commit ed3e234: [Project] Adopt libmime code

Vsevolod Stakhov vsevolod at highsecure.ru
Fri Jul 12 16:42:19 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-07-12 10:45:43 +0100
URL: https://github.com/rspamd/rspamd/commit/ed3e23421f58da0cdf9938cbe3c72de95752c80f

[Project] Adopt libmime code

---
 src/libmime/archives.c      |   4 +-
 src/libmime/filter.c        |   4 +-
 src/libmime/images.c        |  20 ++----
 src/libmime/mime_encoding.c |   5 +-
 src/libmime/mime_parser.c   | 153 ++++++++++++++++++++------------------------
 5 files changed, 84 insertions(+), 102 deletions(-)

diff --git a/src/libmime/archives.c b/src/libmime/archives.c
index c19991eb6..b1c1624a4 100644
--- a/src/libmime/archives.c
+++ b/src/libmime/archives.c
@@ -1906,9 +1906,7 @@ rspamd_archives_process (struct rspamd_task *task)
 	const guchar sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};
 	const guchar gz_magic[] = {0x1F, 0x8B};
 
-	for (i = 0; i < task->parts->len; i ++) {
-		part = g_ptr_array_index (task->parts, i);
-
+	PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
 		if (!(part->flags & (RSPAMD_MIME_PART_TEXT|RSPAMD_MIME_PART_IMAGE))) {
 			if (part->parsed_data.len > 0) {
 				if (rspamd_archive_cheat_detect (part, "zip",
diff --git a/src/libmime/filter.c b/src/libmime/filter.c
index a040cda1d..83a9881d6 100644
--- a/src/libmime/filter.c
+++ b/src/libmime/filter.c
@@ -139,14 +139,14 @@ rspamd_add_passthrough_result (struct rspamd_task *task,
 	if (!isnan (target_score)) {
 
 		msg_info_task ("<%s>: set pre-result to '%s' %s(%.2f): '%s' from %s(%d)",
-				task->message_id, action->name,
+				MESSAGE_FIELD (task, message_id), action->name,
 				flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "",
 				target_score,
 				message, module, priority);
 	}
 	else {
 		msg_info_task ("<%s>: set pre-result to '%s' %s(no score): '%s' from %s(%d)",
-				task->message_id, action->name,
+				MESSAGE_FIELD (task, message_id), action->name,
 				flags & RSPAMD_PASSTHROUGH_LEAST ? "*least " : "",
 				message, module, priority);
 	}
diff --git a/src/libmime/images.c b/src/libmime/images.c
index 787417ab3..cb59bc88e 100644
--- a/src/libmime/images.c
+++ b/src/libmime/images.c
@@ -54,9 +54,7 @@ rspamd_images_process (struct rspamd_task *task)
 
 	RSPAMD_FTOK_ASSIGN (&srch, "image");
 
-	for (i = 0; i < task->parts->len; i ++) {
-		part = g_ptr_array_index (task->parts, i);
-
+	PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
 		if (!(part->flags & (RSPAMD_MIME_PART_TEXT|RSPAMD_MIME_PART_ARCHIVE))) {
 			if (rspamd_ftok_cmp (&part->ct->type, &srch) == 0 &&
 				part->parsed_data.len > 0) {
@@ -603,17 +601,15 @@ process_image (struct rspamd_task *task, struct rspamd_mime_part *part)
 	struct html_image *himg;
 	const gchar *cid, *html_cid;
 	guint cid_len, i, j;
-	GPtrArray *ar;
 	struct rspamd_image *img;
 
 
 	img = rspamd_maybe_process_image (task->task_pool, &part->parsed_data);
 
 	if (img != NULL) {
-		msg_debug_images ("detected %s image of size %ud x %ud in message <%s>",
+		msg_debug_images ("detected %s image of size %ud x %ud",
 			rspamd_image_type_str (img->type),
-			img->width, img->height,
-			task->message_id);
+			img->width, img->height);
 
 		if (part->cd) {
 			img->filename = &part->cd->filename;
@@ -625,11 +621,10 @@ process_image (struct rspamd_task *task, struct rspamd_mime_part *part)
 		part->specific.img = img;
 
 		/* Check Content-Id */
-		ar = rspamd_message_get_header_from_hash (part->raw_headers,
-				task->task_pool, "Content-Id", FALSE);
+		rh = rspamd_message_get_header_from_hash (part->raw_headers,
+				"Content-Id");
 
-		if (ar != NULL && ar->len > 0) {
-			rh = g_ptr_array_index (ar, 0);
+		if (rh) {
 			cid = rh->decoded;
 
 			if (*cid == '<') {
@@ -643,9 +638,8 @@ process_image (struct rspamd_task *task, struct rspamd_mime_part *part)
 					cid_len --;
 				}
 
-				for (i = 0; i < task->text_parts->len; i ++) {
-					tp = g_ptr_array_index (task->text_parts, i);
 
+				PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, tp) {
 					if (IS_PART_HTML (tp) && tp->html != NULL &&
 							tp->html->images != NULL) {
 						for (j = 0; j < tp->html->images->len; j ++) {
diff --git a/src/libmime/mime_encoding.c b/src/libmime/mime_encoding.c
index 8dc7da12e..4622ee032 100644
--- a/src/libmime/mime_encoding.c
+++ b/src/libmime/mime_encoding.c
@@ -667,7 +667,8 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
 	}
 
 	if (charset == NULL) {
-		msg_info_task ("<%s>: has invalid charset", task->message_id);
+		msg_info_task ("<%s>: has invalid charset",
+				MESSAGE_FIELD (task, message_id));
 		SET_PART_RAW (text_part);
 		text_part->utf_raw_content = part_content;
 
@@ -690,7 +691,7 @@ rspamd_mime_text_part_maybe_convert (struct rspamd_task *task,
 		if (!rspamd_mime_text_part_utf8_convert (task, text_part,
 				part_content, charset, &err)) {
 			msg_warn_task ("<%s>: cannot convert from %s to utf8: %s",
-					task->message_id,
+					MESSAGE_FIELD (task, message_id),
 					charset,
 					err ? err->message : "unknown problem");
 			SET_PART_RAW (text_part);
diff --git a/src/libmime/mime_parser.c b/src/libmime/mime_parser.c
index 6572f4e88..9fe9e7b1f 100644
--- a/src/libmime/mime_parser.c
+++ b/src/libmime/mime_parser.c
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+
 #include "config.h"
 #include "task.h"
 #include "mime_parser.h"
@@ -21,6 +22,7 @@
 #include "message.h"
 #include "multipattern.h"
 #include "contrib/libottery/ottery.h"
+#include "contrib/uthash/utlist.h"
 
 struct rspamd_mime_parser_lib_ctx {
 	struct rspamd_multipattern *mp_boundary;
@@ -256,21 +258,17 @@ rspamd_mime_part_get_cte_heuristic (struct rspamd_task *task,
 
 static void
 rspamd_mime_part_get_cte (struct rspamd_task *task,
-		GHashTable *hdrs,
-		struct rspamd_mime_part *part,
-		gboolean apply_heuristic)
+						  khash_t(rspamd_mime_headers_htb) *hdrs,
+						  struct rspamd_mime_part *part,
+						  gboolean apply_heuristic)
 {
-	struct rspamd_mime_header *hdr;
+	struct rspamd_mime_header *hdr, *cur;
 	guint i;
-	GPtrArray *hdrs_cte;
 	enum rspamd_cte cte = RSPAMD_CTE_UNKNOWN;
 
-	hdrs_cte = rspamd_message_get_header_from_hash (hdrs,
-			task->task_pool,
-			"Content-Transfer-Encoding", FALSE);
-
-	if (hdrs_cte == NULL) {
+	hdr = rspamd_message_get_header_from_hash (hdrs, "Content-Transfer-Encoding");
 
+	if (hdr == NULL) {
 		if (part->parent_part && part->parent_part->cte != RSPAMD_CTE_UNKNOWN &&
 				!(part->parent_part->flags & RSPAMD_MIME_PART_MISSING_CTE)) {
 			part->cte = part->parent_part->cte;
@@ -287,12 +285,11 @@ rspamd_mime_part_get_cte (struct rspamd_task *task,
 		part->flags |= RSPAMD_MIME_PART_MISSING_CTE;
 	}
 	else {
-		for (i = 0; i < hdrs_cte->len; i ++) {
+		DL_FOREACH (hdr, cur) {
 			gsize hlen;
 			gchar lc_buf[128];
 
-			hdr = g_ptr_array_index (hdrs_cte, i);
-			hlen = rspamd_snprintf (lc_buf, sizeof (lc_buf), "%s", hdr->value);
+			hlen = rspamd_snprintf (lc_buf, sizeof (lc_buf), "%s", cur->value);
 			rspamd_str_lc (lc_buf, hlen);
 			cte = rspamd_mime_parse_cte (lc_buf, hlen);
 
@@ -337,19 +334,17 @@ check_cte:
 static void
 rspamd_mime_part_get_cd (struct rspamd_task *task, struct rspamd_mime_part *part)
 {
-	struct rspamd_mime_header *hdr;
+	struct rspamd_mime_header *hdr, *cur;
 	guint i;
-	GPtrArray *hdrs;
 	struct rspamd_content_disposition *cd = NULL;
 	rspamd_ftok_t srch;
 	struct rspamd_content_type_param *found;
 
-	hdrs = rspamd_message_get_header_from_hash (part->raw_headers,
-			task->task_pool,
-			"Content-Disposition", FALSE);
+	hdr = rspamd_message_get_header_from_hash (part->raw_headers,
+			"Content-Disposition");
 
 
-	if (hdrs == NULL) {
+	if (hdr == NULL) {
 		cd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*cd));
 		cd->type = RSPAMD_CT_INLINE;
 
@@ -370,15 +365,13 @@ rspamd_mime_part_get_cd (struct rspamd_task *task, struct rspamd_mime_part *part
 		}
 	}
 	else {
-		for (i = 0; i < hdrs->len; i ++) {
+		DL_FOREACH (hdr, cur) {
 			gsize hlen;
-
-			hdr = g_ptr_array_index (hdrs, i);
 			cd = NULL;
 
-			if (hdr->decoded) {
-				hlen = strlen (hdr->decoded);
-				cd = rspamd_content_disposition_parse (hdr->decoded, hlen,
+			if (cur->decoded) {
+				hlen = strlen (cur->decoded);
+				cd = rspamd_content_disposition_parse (cur->decoded, hlen,
 						task->task_pool);
 			}
 
@@ -517,8 +510,8 @@ rspamd_mime_parse_normal_part (struct rspamd_task *task,
 		g_assert_not_reached ();
 	}
 
-	part->id = task->parts->len;
-	g_ptr_array_add (task->parts, part);
+	part->id = MESSAGE_FIELD (task, parts)->len;
+	g_ptr_array_add (MESSAGE_FIELD (task, parts), part);
 	msg_debug_mime ("parsed data part %T/%T of length %z (%z orig), %s cte",
 			&part->ct->type, &part->ct->subtype, part->parsed_data.len,
 			part->raw_data.len, rspamd_cte_to_string (part->cte));
@@ -546,12 +539,10 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task,
 		GError **err)
 {
 	struct rspamd_content_type *ct, *sel = NULL;
-	struct rspamd_mime_header *hdr;
-	GPtrArray *hdrs = NULL;
+	struct rspamd_mime_header *hdr = NULL, *cur;
 	struct rspamd_mime_part *npart;
 	GString str;
 	goffset hdr_pos, body_pos;
-	guint i;
 	enum rspamd_mime_parse_error ret = RSPAMD_MIME_PARSE_FATAL;
 
 
@@ -592,9 +583,8 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task,
 	npart = rspamd_mempool_alloc0 (task->task_pool,
 			sizeof (struct rspamd_mime_part));
 	npart->parent_part = multipart;
-	npart->raw_headers =  g_hash_table_new_full (rspamd_strcase_hash,
-			rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard);
-	npart->headers_order = g_queue_new ();
+	npart->raw_headers =  rspamd_message_headers_new ();
+	npart->headers_order = NULL;
 
 	if (multipart) {
 		if (multipart->specific.mp->children == NULL) {
@@ -612,15 +602,14 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task,
 
 		if (npart->raw_headers_len > 0) {
 			rspamd_mime_headers_process (task, npart->raw_headers,
-					npart->headers_order,
+					&npart->headers_order,
 					npart->raw_headers_str,
 					npart->raw_headers_len,
 					FALSE);
 		}
 
-		hdrs = rspamd_message_get_header_from_hash (npart->raw_headers,
-				task->task_pool,
-				"Content-Type", FALSE);
+		hdr = rspamd_message_get_header_from_hash (npart->raw_headers,
+				"Content-Type");
 
 	}
 	else {
@@ -631,11 +620,10 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task,
 	}
 
 
-	if (hdrs != NULL) {
+	if (hdr != NULL) {
 
-		for (i = 0; i < hdrs->len; i ++) {
-			hdr = g_ptr_array_index (hdrs, i);
-			ct = rspamd_content_type_parse (hdr->decoded, strlen (hdr->decoded),
+		DL_FOREACH (hdr, cur) {
+			ct = rspamd_content_type_parse (cur->decoded, strlen (cur->decoded),
 					task->task_pool);
 
 			/* Here we prefer multipart content-type or any content-type */
@@ -848,8 +836,8 @@ rspamd_mime_parse_multipart_part (struct rspamd_task *task,
 		return RSPAMD_MIME_PARSE_NESTING;
 	}
 
-	part->id = task->parts->len;
-	g_ptr_array_add (task->parts, part);
+	part->id = MESSAGE_FIELD (task, parts)->len;
+	g_ptr_array_add (MESSAGE_FIELD (task, parts), part);
 	st->nesting ++;
 	rspamd_mime_part_get_cte (task, part->raw_headers, part, FALSE);
 
@@ -1098,8 +1086,7 @@ rspamd_mime_parse_message (struct rspamd_task *task,
 		GError **err)
 {
 	struct rspamd_content_type *ct, *sel = NULL;
-	struct rspamd_mime_header *hdr;
-	GPtrArray *hdrs = NULL;
+	struct rspamd_mime_header *hdr = NULL, *cur;
 	const gchar *pbegin, *p;
 	gsize plen, len;
 	struct rspamd_mime_part *npart;
@@ -1159,42 +1146,45 @@ rspamd_mime_parse_message (struct rspamd_task *task,
 
 		if (hdr_pos > 0 && hdr_pos < str.len) {
 
-			task->raw_headers_content.begin = str.str;
-			task->raw_headers_content.len = hdr_pos;
-			task->raw_headers_content.body_start = str.str + body_pos;
+			MESSAGE_FIELD (task, raw_headers_content).begin = str.str;
+			MESSAGE_FIELD (task, raw_headers_content).len = hdr_pos;
+			MESSAGE_FIELD (task, raw_headers_content).body_start = str.str + body_pos;
 
-			if (task->raw_headers_content.len > 0) {
-				rspamd_mime_headers_process (task, task->raw_headers,
-						task->headers_order,
-						task->raw_headers_content.begin,
-						task->raw_headers_content.len,
+			if (MESSAGE_FIELD (task, raw_headers_content).len > 0) {
+				rspamd_mime_headers_process (task,
+						MESSAGE_FIELD (task, raw_headers),
+						&MESSAGE_FIELD (task, headers_order),
+						MESSAGE_FIELD (task, raw_headers_content).begin,
+						MESSAGE_FIELD (task, raw_headers_content).len,
 						TRUE);
 			}
 
-			hdrs = rspamd_message_get_header_from_hash (task->raw_headers,
-					task->task_pool,
-					"Content-Type", FALSE);
+			hdr = rspamd_message_get_header_from_hash (
+					MESSAGE_FIELD (task, raw_headers),
+					"Content-Type");
 		}
 		else {
 			/* First apply heuristic, maybe we have just headers */
 			hdr_pos = rspamd_mime_parser_headers_heuristic (&str, &body_pos);
 
 			if (hdr_pos > 0 && hdr_pos <= str.len) {
-				task->raw_headers_content.begin = str.str;
-				task->raw_headers_content.len = hdr_pos;
-				task->raw_headers_content.body_start = str.str + body_pos;
-
-				if (task->raw_headers_content.len > 0) {
-					rspamd_mime_headers_process (task, task->raw_headers,
-							task->headers_order,
-							task->raw_headers_content.begin,
-							task->raw_headers_content.len,
+				MESSAGE_FIELD (task, raw_headers_content).begin = str.str;
+				MESSAGE_FIELD (task, raw_headers_content).len = hdr_pos;
+				MESSAGE_FIELD (task, raw_headers_content).body_start = str.str +
+						body_pos;
+
+				if (MESSAGE_FIELD (task, raw_headers_content).len > 0) {
+					rspamd_mime_headers_process (task,
+							MESSAGE_FIELD (task, raw_headers),
+							&MESSAGE_FIELD (task, headers_order),
+							MESSAGE_FIELD (task, raw_headers_content).begin,
+							MESSAGE_FIELD (task, raw_headers_content).len,
 							TRUE);
 				}
 
-				hdrs = rspamd_message_get_header_from_hash (task->raw_headers,
-						task->task_pool,
-						"Content-Type", FALSE);
+				hdr = rspamd_message_get_header_from_hash (
+						MESSAGE_FIELD (task, raw_headers),
+						"Content-Type");
 				task->flags |= RSPAMD_TASK_FLAG_BROKEN_HEADERS;
 			}
 			else {
@@ -1204,7 +1194,8 @@ rspamd_mime_parse_message (struct rspamd_task *task,
 
 		pbegin = st->start + body_pos;
 		plen = st->end - pbegin;
-		npart->raw_headers = g_hash_table_ref (task->raw_headers);
+		/* TODO: check if it is correct */
+		npart->raw_headers = NULL;
 		npart->headers_order = NULL;
 	}
 	else {
@@ -1227,9 +1218,8 @@ rspamd_mime_parse_message (struct rspamd_task *task,
 		str.len = part->parsed_data.len;
 
 		hdr_pos = rspamd_string_find_eoh (&str, &body_pos);
-		npart->raw_headers =  g_hash_table_new_full (rspamd_strcase_hash,
-				rspamd_strcase_equal, NULL, rspamd_ptr_array_free_hard);
-		npart->headers_order = g_queue_new ();
+		npart->raw_headers =  rspamd_message_headers_new ();
+		npart->headers_order = NULL;
 
 		if (hdr_pos > 0 && hdr_pos < str.len) {
 			npart->raw_headers_str = str.str;
@@ -1237,16 +1227,16 @@ rspamd_mime_parse_message (struct rspamd_task *task,
 			npart->raw_data.begin = str.str + body_pos;
 
 			if (npart->raw_headers_len > 0) {
-				rspamd_mime_headers_process (task, npart->raw_headers,
-						npart->headers_order,
+				rspamd_mime_headers_process (task,
+						npart->raw_headers,
+						&npart->headers_order,
 						npart->raw_headers_str,
 						npart->raw_headers_len,
 						FALSE);
 			}
 
-			hdrs = rspamd_message_get_header_from_hash (npart->raw_headers,
-					task->task_pool,
-					"Content-Type", FALSE);
+			hdr = rspamd_message_get_header_from_hash (npart->raw_headers,
+					"Content-Type");
 		}
 		else {
 			body_pos = 0;
@@ -1260,13 +1250,12 @@ rspamd_mime_parse_message (struct rspamd_task *task,
 	npart->raw_data.len = plen;
 	npart->parent_part = part;
 
-	if (hdrs == NULL) {
+	if (hdr == NULL) {
 		sel = NULL;
 	}
 	else {
-		for (i = 0; i < hdrs->len; i ++) {
-			hdr = g_ptr_array_index (hdrs, i);
-			ct = rspamd_content_type_parse (hdr->decoded, strlen (hdr->decoded),
+		DL_FOREACH (hdr, cur) {
+			ct = rspamd_content_type_parse (cur->decoded, strlen (cur->decoded),
 					task->task_pool);
 
 			/* Here we prefer multipart content-type or any content-type */
@@ -1408,7 +1397,7 @@ rspamd_mime_parse_task (struct rspamd_task *task, GError **err)
 
 	st = g_malloc0 (sizeof (*st));
 	st->stack = g_ptr_array_sized_new (4);
-	st->pos = task->raw_headers_content.body_start;
+	st->pos = MESSAGE_FIELD (task, raw_headers_content).body_start;
 	st->end = task->msg.begin + task->msg.len;
 	st->boundaries = g_array_sized_new (FALSE, FALSE,
 			sizeof (struct rspamd_mime_boundary), 8);


More information about the Commits mailing list