commit 9aa104f: [Project] Start mime structures refactoring

Vsevolod Stakhov vsevolod at highsecure.ru
Fri Jul 12 16:42:05 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-07-10 15:01:41 +0100
URL: https://github.com/rspamd/rspamd/commit/9aa104f6a709198527538c6553a0a96db1ecc5e9

[Project] Start mime structures refactoring

---
 CMakeLists.txt             |  1 +
 src/libmime/message.c      | 27 +++++++++++++++++
 src/libmime/message.h      | 38 ++++++++++++++++++++++++
 src/libmime/mime_headers.c | 22 +++++++-------
 src/libmime/mime_headers.h | 72 ++++++++++++++++++++++++----------------------
 src/libserver/dkim.c       |  4 +--
 src/libserver/protocol.c   |  2 ++
 src/libserver/task.c       |  8 +-----
 src/libserver/task.h       | 23 +++------------
 9 files changed, 123 insertions(+), 74 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 06cc9fd30..88aea0b62 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -771,6 +771,7 @@ IF(NOT CMAKE_C_OPT_FLAGS)
 	ENDIF()
 	ELSE(ENABLE_OPTIMIZATION MATCHES "ON")
 		IF(ENABLE_FULL_DEBUG MATCHES "ON")
+			ADD_DEFINITIONS(-DFULL_DEBUG)
 			SET(CMAKE_C_OPT_FLAGS "-g -O0")
 		ELSE(ENABLE_FULL_DEBUG MATCHES "ON")
 			SET(CMAKE_C_OPT_FLAGS "-g -O2")
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 482287769..1d9da26f2 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -1091,6 +1091,22 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start,
 	task->queue_id = mid;
 }
 
+static void
+rspamd_message_dtor (struct rspamd_message *msg)
+{
+
+}
+
+struct rspamd_message*
+rspamd_message_new (struct rspamd_task *task)
+{
+	struct rspamd_message *msg;
+
+	msg = rspamd_mempool_alloc0 (sizeof (*msg));
+
+
+}
+
 gboolean
 rspamd_message_parse (struct rspamd_task *task)
 {
@@ -1593,3 +1609,14 @@ rspamd_message_get_mime_header_array (struct rspamd_task *task,
 
 	return ret;
 }
+
+struct rspamd_message *
+rspamd_message_ref (struct rspamd_message *msg)
+{
+	REF_RETAIN (msg);
+}
+
+void rspamd_message_unref (struct rspamd_message *msg)
+{
+	REF_RELEASE (msg);s
+}
diff --git a/src/libmime/message.h b/src/libmime/message.h
index 17c4ec5b9..7d58fa88f 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -12,6 +12,8 @@
 #include "cryptobox.h"
 #include "mime_headers.h"
 #include "content_type.h"
+#include "libutil/ref.h"
+#include "libutil/str_util.h"
 
 #include <unicode/uchar.h>
 #include <unicode/utext.h>
@@ -129,6 +131,36 @@ struct rspamd_mime_text_part {
 	guint unicode_scripts;
 };
 
+struct rspamd_message {
+	const gchar *message_id;
+	gchar *subject;
+
+	GPtrArray *parts;				/**< list of parsed parts							*/
+	GPtrArray *text_parts;			/**< list of text parts								*/
+	struct {
+		const gchar *begin;
+		gsize len;
+		const gchar *body_start;
+	} raw_headers_content;			/**< list of raw headers							*/
+	GPtrArray *received;			/**< list of received headers						*/
+	GHashTable *urls;				/**< list of parsed urls							*/
+	GHashTable *emails;				/**< list of parsed emails							*/
+	GHashTable *raw_headers;		/**< list of raw headers							*/
+	GQueue *headers_order;			/**< order of raw headers							*/
+	GPtrArray *rcpt_mime;
+	GPtrArray *from_mime;
+	enum rspamd_newlines_type nlines_type; /**< type of newlines (detected on most of headers 	*/
+	ref_entry_t ref;
+};
+
+#ifndef FULL_DEBUG
+#define MESSAGE_FIELD(task, field) ((task)->message->(field))
+#else
+#define MESSAGE_FIELD(task, field) do { \
+	if (!task->message) {msg_err_task("no message when getting field %s", #field); g_assert(0);} \
+	} while(0), ((task)->message->(field))
+#endif
+
 /**
  * Parse and pre-process mime message
  * @param task worker_task object
@@ -191,6 +223,12 @@ enum rspamd_cte rspamd_cte_from_string (const gchar *str);
  */
 const gchar *rspamd_cte_to_string (enum rspamd_cte ct);
 
+struct rspamd_message* rspamd_message_new (struct rspamd_task *task);
+
+struct rspamd_message *rspamd_message_ref (struct rspamd_message *msg);
+
+void rspamd_message_unref (struct rspamd_message *msg);
+
 #ifdef  __cplusplus
 }
 #endif
diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c
index cf6d0f763..952a163b9 100644
--- a/src/libmime/mime_headers.c
+++ b/src/libmime/mime_headers.c
@@ -44,31 +44,31 @@ rspamd_mime_header_check_special (struct rspamd_task *task,
 			g_ptr_array_add (task->received, recv);
 		}
 
-		rh->type = RSPAMD_HEADER_RECEIVED;
+		rh->flags = RSPAMD_HEADER_RECEIVED;
 		break;
 	case 0x76F31A09F4352521ULL:	/* to */
 		task->rcpt_mime = rspamd_email_address_from_mime (task->task_pool,
 				rh->decoded, strlen (rh->decoded), task->rcpt_mime);
-		rh->type = RSPAMD_HEADER_TO|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE;
+		rh->flags = RSPAMD_HEADER_TO|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE;
 		break;
 	case 0x7EB117C1480B76ULL:	/* cc */
 		task->rcpt_mime = rspamd_email_address_from_mime (task->task_pool,
 				rh->decoded, strlen (rh->decoded), task->rcpt_mime);
-		rh->type = RSPAMD_HEADER_CC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE;
+		rh->flags = RSPAMD_HEADER_CC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE;
 		break;
 	case 0xE4923E11C4989C8DULL:	/* bcc */
 		task->rcpt_mime = rspamd_email_address_from_mime (task->task_pool,
 				rh->decoded, strlen (rh->decoded), task->rcpt_mime);
-		rh->type = RSPAMD_HEADER_BCC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE;
+		rh->flags = RSPAMD_HEADER_BCC|RSPAMD_HEADER_RCPT|RSPAMD_HEADER_UNIQUE;
 		break;
 	case 0x41E1985EDC1CBDE4ULL:	/* from */
 		task->from_mime = rspamd_email_address_from_mime (task->task_pool,
 				rh->decoded, strlen (rh->decoded), task->from_mime);
-		rh->type = RSPAMD_HEADER_FROM|RSPAMD_HEADER_SENDER|RSPAMD_HEADER_UNIQUE;
+		rh->flags = RSPAMD_HEADER_FROM|RSPAMD_HEADER_SENDER|RSPAMD_HEADER_UNIQUE;
 		break;
 	case 0x43A558FC7C240226ULL:	/* message-id */ {
 
-		rh->type = RSPAMD_HEADER_MESSAGE_ID|RSPAMD_HEADER_UNIQUE;
+		rh->flags = RSPAMD_HEADER_MESSAGE_ID|RSPAMD_HEADER_UNIQUE;
 		p = rh->decoded;
 		end = p + strlen (p);
 
@@ -107,20 +107,20 @@ rspamd_mime_header_check_special (struct rspamd_task *task,
 		if (task->subject == NULL) {
 			task->subject = rh->decoded;
 		}
-		rh->type = RSPAMD_HEADER_SUBJECT|RSPAMD_HEADER_UNIQUE;
+		rh->flags = RSPAMD_HEADER_SUBJECT|RSPAMD_HEADER_UNIQUE;
 		break;
 	case 0xEE4AA2EAAC61D6F4ULL:	/* return-path */
 		if (task->from_envelope == NULL) {
 			task->from_envelope = rspamd_email_address_from_smtp (rh->decoded,
 					strlen (rh->decoded));
 		}
-		rh->type = RSPAMD_HEADER_RETURN_PATH|RSPAMD_HEADER_UNIQUE;
+		rh->flags = RSPAMD_HEADER_RETURN_PATH|RSPAMD_HEADER_UNIQUE;
 		break;
 	case 0xB9EEFAD2E93C2161ULL:	/* delivered-to */
 		if (task->deliver_to == NULL) {
 			task->deliver_to = rh->decoded;
 		}
-		rh->type = RSPAMD_HEADER_DELIVERED_TO;
+		rh->flags = RSPAMD_HEADER_DELIVERED_TO;
 		break;
 	case 0x2EC3BFF3C393FC10ULL: /* date */
 	case 0xAC0DDB1A1D214CAULL: /* sender */
@@ -128,7 +128,7 @@ rspamd_mime_header_check_special (struct rspamd_task *task,
 	case 0x81CD9E9131AB6A9AULL: /* content-type */
 	case 0xC39BD9A75AA25B60ULL: /* content-transfer-encoding */
 	case 0xB3F6704CB3AD6589ULL: /* references */
-		rh->type = RSPAMD_HEADER_UNIQUE;
+		rh->flags = RSPAMD_HEADER_UNIQUE;
 		break;
 	}
 }
@@ -472,7 +472,7 @@ rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target,
 		while (cur) {
 			nh = cur->data;
 
-			if (nh->name && nh->type != RSPAMD_HEADER_RECEIVED) {
+			if (nh->name && nh->flags != RSPAMD_HEADER_RECEIVED) {
 				rspamd_cryptobox_hash_update (&hs, nh->name, strlen (nh->name));
 			}
 
diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h
index 5cb300978..60fd7b697 100644
--- a/src/libmime/mime_headers.h
+++ b/src/libmime/mime_headers.h
@@ -31,20 +31,22 @@ enum rspamd_rfc2047_encoding {
 	RSPAMD_RFC2047_BASE64,
 };
 
-enum rspamd_mime_header_special_type {
-	RSPAMD_HEADER_GENERIC = 0,
-	RSPAMD_HEADER_RECEIVED = 1 << 0,
-	RSPAMD_HEADER_TO = 1 << 2,
-	RSPAMD_HEADER_CC = 1 << 3,
-	RSPAMD_HEADER_BCC = 1 << 4,
-	RSPAMD_HEADER_FROM = 1 << 5,
-	RSPAMD_HEADER_MESSAGE_ID = 1 << 6,
-	RSPAMD_HEADER_SUBJECT = 1 << 7,
-	RSPAMD_HEADER_RETURN_PATH = 1 << 8,
-	RSPAMD_HEADER_DELIVERED_TO = 1 << 9,
-	RSPAMD_HEADER_SENDER = 1 << 10,
-	RSPAMD_HEADER_RCPT = 1 << 11,
-	RSPAMD_HEADER_UNIQUE = 1 << 12
+enum rspamd_mime_header_flags {
+	RSPAMD_HEADER_GENERIC = 0u,
+	RSPAMD_HEADER_RECEIVED = 1u << 0u,
+	RSPAMD_HEADER_TO = 1u << 2u,
+	RSPAMD_HEADER_CC = 1u << 3u,
+	RSPAMD_HEADER_BCC = 1u << 4u,
+	RSPAMD_HEADER_FROM = 1u << 5u,
+	RSPAMD_HEADER_MESSAGE_ID = 1u << 6u,
+	RSPAMD_HEADER_SUBJECT = 1u << 7u,
+	RSPAMD_HEADER_RETURN_PATH = 1u << 8u,
+	RSPAMD_HEADER_DELIVERED_TO = 1u << 9u,
+	RSPAMD_HEADER_SENDER = 1u << 10u,
+	RSPAMD_HEADER_RCPT = 1u << 11u,
+	RSPAMD_HEADER_UNIQUE = 1u << 12u,
+	RSPAMD_HEADER_EMPTY_SEPARATOR = 1u << 13u,
+	RSPAMD_HEADER_TAB_SEPARATED = 1u << 14u,
 };
 
 struct rspamd_mime_header {
@@ -52,32 +54,31 @@ struct rspamd_mime_header {
 	gchar *value;
 	const gchar *raw_value; /* As it is in the message (unfolded and unparsed) */
 	gsize raw_len;
-	gboolean tab_separated;
-	gboolean empty_separator;
 	guint order;
-	enum rspamd_mime_header_special_type type;
+	int flags; /* see enum rspamd_mime_header_flags */
 	gchar *separator;
 	gchar *decoded;
+	struct rspamd_mime_header *prev, *next; /* Headers with the same name */
+	struct rspamd_mime_header *ord_prev, *ord_next; /* Overall order of headers */
 };
 
 enum rspamd_received_type {
 	RSPAMD_RECEIVED_SMTP = 0,
-	RSPAMD_RECEIVED_ESMTP,
-	RSPAMD_RECEIVED_ESMTPA,
-	RSPAMD_RECEIVED_ESMTPS,
-	RSPAMD_RECEIVED_ESMTPSA,
-	RSPAMD_RECEIVED_LMTP,
-	RSPAMD_RECEIVED_IMAP,
-	RSPAMD_RECEIVED_LOCAL,
-	RSPAMD_RECEIVED_HTTP,
-	RSPAMD_RECEIVED_MAPI,
-	RSPAMD_RECEIVED_UNKNOWN
+	RSPAMD_RECEIVED_ESMTP = 1u << 0u,
+	RSPAMD_RECEIVED_ESMTPA = 1u << 1u,
+	RSPAMD_RECEIVED_ESMTPS = 1u << 2u,
+	RSPAMD_RECEIVED_ESMTPSA = 1u << 3u,
+	RSPAMD_RECEIVED_LMTP = 1u << 4u,
+	RSPAMD_RECEIVED_IMAP = 1u << 5u,
+	RSPAMD_RECEIVED_LOCAL = 1u << 6u,
+	RSPAMD_RECEIVED_HTTP = 1u << 7u,
+	RSPAMD_RECEIVED_MAPI = 1u << 8u,
+	RSPAMD_RECEIVED_UNKNOWN = 1u << 9u,
+	RSPAMD_RECEIVED_FLAG_ARTIFICIAL =  (1u << 10u),
+	RSPAMD_RECEIVED_FLAG_SSL =  (1u << 11u),
+	RSPAMD_RECEIVED_FLAG_AUTHENTICATED =  (1u << 12u),
 };
 
-#define RSPAMD_RECEIVED_FLAG_ARTIFICIAL (1 << 0)
-#define RSPAMD_RECEIVED_FLAG_SSL (1 << 1)
-#define RSPAMD_RECEIVED_FLAG_AUTHENTICATED (1 << 2)
-
 struct received_header {
 	const gchar *from_hostname;
 	const gchar *from_ip;
@@ -88,8 +89,8 @@ struct received_header {
 	rspamd_inet_addr_t *addr;
 	struct rspamd_mime_header *hdr;
 	time_t timestamp;
-	enum rspamd_received_type type;
-	gint flags;
+	gint flags; /* See enum rspamd_received_type */
+	struct received_header *prev, *next;
 };
 
 /**
@@ -100,8 +101,9 @@ struct received_header {
  * @param len
  * @param check_newlines
  */
-void rspamd_mime_headers_process (struct rspamd_task *task, GHashTable *target,
-								  GQueue *order,
+void rspamd_mime_headers_process (struct rspamd_task *task,
+								  GHashTable *target,
+								  struct rspamd_mime_header **order_ptr,
 								  const gchar *in, gsize len,
 								  gboolean check_newlines);
 
diff --git a/src/libserver/dkim.c b/src/libserver/dkim.c
index 9386c5cdc..c0ea29f08 100644
--- a/src/libserver/dkim.c
+++ b/src/libserver/dkim.c
@@ -2175,7 +2175,7 @@ rspamd_dkim_canonize_header (struct rspamd_dkim_common_ctx *ctx,
 		if (ar) {
 			/* Check uniqueness of the header */
 			rh = g_ptr_array_index (ar, 0);
-			if ((rh->type & RSPAMD_HEADER_UNIQUE) && ar->len > 1) {
+			if ((rh->flags & RSPAMD_HEADER_UNIQUE) && ar->len > 1) {
 				guint64 random_cookie = ottery_rand_uint64 ();
 
 				msg_warn_dkim ("header %s is intended to be unique by"
@@ -2210,7 +2210,7 @@ rspamd_dkim_canonize_header (struct rspamd_dkim_common_ctx *ctx,
 						(gint)rh->raw_len, rh->raw_value);
 			}
 			else {
-				if (ctx->is_sign && (rh->type & RSPAMD_HEADER_FROM)) {
+				if (ctx->is_sign && (rh->flags & RSPAMD_HEADER_FROM)) {
 					/* Special handling of the From handling when rewrite is done */
 					gboolean has_rewrite = FALSE;
 					guint i;
diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c
index bef7a0452..ddd072882 100644
--- a/src/libserver/protocol.c
+++ b/src/libserver/protocol.c
@@ -1409,6 +1409,7 @@ rspamd_protocol_http_reply (struct rspamd_http_message *msg,
 	gint flags = RSPAMD_PROTOCOL_DEFAULT;
 	struct rspamd_action *action;
 
+#if 0
 	/* Write custom headers */
 	g_hash_table_iter_init (&hiter, task->reply_headers);
 	while (g_hash_table_iter_next (&hiter, &h, &v)) {
@@ -1416,6 +1417,7 @@ rspamd_protocol_http_reply (struct rspamd_http_message *msg,
 
 		rspamd_http_message_add_header (msg, hn->begin, hv->begin);
 	}
+#endif
 
 	flags |= RSPAMD_PROTOCOL_URLS;
 
diff --git a/src/libserver/task.c b/src/libserver/task.c
index 04be61744..88ee730a3 100644
--- a/src/libserver/task.c
+++ b/src/libserver/task.c
@@ -129,15 +129,9 @@ rspamd_task_new (struct rspamd_worker *worker, struct rspamd_config *cfg,
 	new_task->request_headers = g_hash_table_new_full (rspamd_ftok_icase_hash,
 			rspamd_ftok_icase_equal, rspamd_fstring_mapped_ftok_free,
 			rspamd_request_header_dtor);
-	rspamd_mempool_add_destructor (new_task->task_pool,
-		(rspamd_mempool_destruct_t) g_hash_table_unref,
-		new_task->request_headers);
-	new_task->reply_headers = g_hash_table_new_full (rspamd_ftok_icase_hash,
-			rspamd_ftok_icase_equal, rspamd_fstring_mapped_ftok_free,
-			rspamd_fstring_mapped_ftok_free);
 	rspamd_mempool_add_destructor (new_task->task_pool,
 			(rspamd_mempool_destruct_t) g_hash_table_unref,
-			new_task->reply_headers);
+			new_task->request_headers);
 	rspamd_mempool_add_destructor (new_task->task_pool,
 			(rspamd_mempool_destruct_t) g_hash_table_unref,
 			new_task->raw_headers);
diff --git a/src/libserver/task.h b/src/libserver/task.h
index ac55dd910..00caf3ab6 100644
--- a/src/libserver/task.h
+++ b/src/libserver/task.h
@@ -134,6 +134,7 @@ enum rspamd_task_stage {
 struct rspamd_email_address;
 struct rspamd_lang_detector;
 enum rspamd_newlines_type;
+struct rspamd_message;
 
 /**
  * Worker task structure
@@ -147,15 +148,12 @@ struct rspamd_task {
 	gulong message_len;                                /**< Message length									*/
 	gchar *helo;                                    /**< helo header value								*/
 	gchar *queue_id;                                /**< queue id if specified							*/
-	const gchar *message_id;                        /**< message id										*/
 	rspamd_inet_addr_t *from_addr;                    /**< from addr for a task							*/
 	rspamd_inet_addr_t *client_addr;                /**< address of connected socket					*/
 	gchar *deliver_to;                                /**< address to deliver								*/
 	gchar *user;                                    /**< user to deliver								*/
-	gchar *subject;                                    /**< subject (for non-mime)							*/
 	const gchar *hostname;                            /**< hostname reported by MTA						*/
 	GHashTable *request_headers;                    /**< HTTP headers in a request						*/
-	GHashTable *reply_headers;                        /**< Custom reply headers							*/
 	struct {
 		const gchar *begin;
 		gsize len;
@@ -163,29 +161,14 @@ struct rspamd_task {
 	} msg;                                            /**< message buffer									*/
 	struct rspamd_http_connection *http_conn;        /**< HTTP server connection							*/
 	struct rspamd_async_session *s;                /**< async session object							*/
-	GPtrArray *parts;                                /**< list of parsed parts							*/
-	GPtrArray *text_parts;                            /**< list of text parts								*/
-	struct {
-		const gchar *begin;
-		gsize len;
-		const gchar *body_start;
-	} raw_headers_content;                /**< list of raw headers							*/
-	GPtrArray *received;                            /**< list of received headers						*/
-	GHashTable *urls;                                /**< list of parsed urls							*/
-	GHashTable *emails;                                /**< list of parsed emails							*/
-	GHashTable *raw_headers;                        /**< list of raw headers							*/
-	GQueue *headers_order;                            /**< order of raw headers							*/
 	struct rspamd_metric_result *result;            /**< Metric result									*/
 	GHashTable *lua_cache;                            /**< cache of lua objects							*/
 	GPtrArray *tokens;                                /**< statistics tokens */
 	GArray *meta_words;                                /**< rspamd_stat_token_t produced from meta headers
 														(e.g. Subject) */
 
-	GPtrArray *rcpt_mime;
 	GPtrArray *rcpt_envelope;                        /**< array of rspamd_email_address					*/
-	GPtrArray *from_mime;
 	struct rspamd_email_address *from_envelope;
-	enum rspamd_newlines_type nlines_type;            /**< type of newlines (detected on most of headers 	*/
 
 	ucl_object_t *messages;                            /**< list of messages that would be reported		*/
 	struct rspamd_re_runtime *re_rt;                /**< regexp runtime									*/
@@ -215,6 +198,7 @@ struct rspamd_task {
 
 	const gchar *classifier;                        /**< Classifier to learn (if needed)				*/
 	struct rspamd_lang_detector *lang_det;            /**< Languages detector								*/
+	struct rspamd_message *message;
 	guchar digest[16];
 };
 
@@ -252,7 +236,8 @@ gboolean rspamd_task_fin (void *arg);
  * @return
  */
 gboolean rspamd_task_load_message (struct rspamd_task *task,
-								   struct rspamd_http_message *msg, const gchar *start, gsize len);
+								   struct rspamd_http_message *msg,
+								   const gchar *start, gsize len);
 
 /**
  * Process task


More information about the Commits mailing list