commit 8e83bc2: [Minor] Core: Store raw bondary in multipart node

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Dec 27 18:28:09 UTC 2018


Author: Vsevolod Stakhov
Date: 2018-12-17 13:54:55 +0000
URL: https://github.com/rspamd/rspamd/commit/8e83bc2d1076daeb0de350b3fff6551054488efe

[Minor] Core: Store raw bondary in multipart node

---
 src/libmime/content_type.c | 71 ++++++++++++++++++++++++++++------------------
 src/libmime/content_type.h | 13 +++++----
 src/libmime/message.h      |  3 +-
 src/libmime/mime_parser.c  | 14 +++++++--
 src/libserver/task.c       |  4 +--
 src/lua/lua_mimepart.c     |  6 ++--
 6 files changed, 69 insertions(+), 42 deletions(-)

diff --git a/src/libmime/content_type.c b/src/libmime/content_type.c
index 91c09e4bc..6b99953f2 100644
--- a/src/libmime/content_type.c
+++ b/src/libmime/content_type.c
@@ -21,43 +21,28 @@
 void
 rspamd_content_type_add_param (rspamd_mempool_t *pool,
 		struct rspamd_content_type *ct,
-		const gchar *name_start, const gchar *name_end,
-		const gchar *value_start, const gchar *value_end)
+		gchar *name_start, gchar *name_end,
+		gchar *value_start, gchar *value_end)
 {
 	rspamd_ftok_t srch;
 	struct rspamd_content_type_param *found = NULL, *nparam;
 
 	g_assert (ct != NULL);
 
-	srch.begin = name_start;
-	srch.len = name_end - name_start;
-
-	if (ct->attrs) {
-		found = g_hash_table_lookup (ct->attrs, &srch);
-	}
-	else {
-		ct->attrs = g_hash_table_new (rspamd_ftok_icase_hash,
-				rspamd_ftok_icase_equal);
-	}
 
 	nparam = rspamd_mempool_alloc (pool, sizeof (*nparam));
 	nparam->name.begin = name_start;
 	nparam->name.len = name_end - name_start;
+	rspamd_str_lc (name_start, name_end - name_start);
+
 	nparam->value.begin = value_start;
 	nparam->value.len = value_end - value_start;
 
-	if (!found) {
-		DL_APPEND (found, nparam);
-		g_hash_table_insert (ct->attrs, &nparam->name, nparam);
-	}
-	else {
-		DL_APPEND (found, nparam);
-	}
-
 	RSPAMD_FTOK_ASSIGN (&srch, "charset");
 
 	if (rspamd_ftok_cmp (&nparam->name, &srch) == 0) {
 		/* Adjust charset */
+		found = nparam;
 		ct->charset.begin = nparam->value.begin;
 		ct->charset.len = nparam->value.len;
 	}
@@ -65,17 +50,47 @@ rspamd_content_type_add_param (rspamd_mempool_t *pool,
 	RSPAMD_FTOK_ASSIGN (&srch, "boundary");
 
 	if (rspamd_ftok_cmp (&nparam->name, &srch) == 0) {
+		found = nparam;
+		gchar *lc_boundary;
 		/* Adjust boundary */
-		ct->boundary.begin = nparam->value.begin;
+		lc_boundary = rspamd_mempool_alloc (pool, nparam->value.len);
+		memcpy (lc_boundary, nparam->value.begin, nparam->value.len);
+		rspamd_str_lc (lc_boundary, nparam->value.len);
+		ct->boundary.begin = lc_boundary;
 		ct->boundary.len = nparam->value.len;
+		/* Preserve original (case sensitive) boundary */
+		ct->orig_boundary.begin = nparam->value.begin;
+		ct->orig_boundary.len = nparam->value.len;
+	}
+
+	if (!found) {
+		srch.begin = nparam->name.begin;
+		srch.len = nparam->name.len;
+
+		rspamd_str_lc (value_start, value_end - value_start);
+
+		if (ct->attrs) {
+			found = g_hash_table_lookup (ct->attrs, &srch);
+		} else {
+			ct->attrs = g_hash_table_new (rspamd_ftok_icase_hash,
+					rspamd_ftok_icase_equal);
+		}
+
+		if (!found) {
+			DL_APPEND (found, nparam);
+			g_hash_table_insert (ct->attrs, &nparam->name, nparam);
+		}
+		else {
+			DL_APPEND (found, nparam);
+		}
 	}
 }
 
 static struct rspamd_content_type *
-rspamd_content_type_parser (const gchar *in, gsize len, rspamd_mempool_t *pool)
+rspamd_content_type_parser (gchar *in, gsize len, rspamd_mempool_t *pool)
 {
 	guint obraces = 0, ebraces = 0, qlen = 0;
-	const gchar *p, *c, *end, *pname_start = NULL, *pname_end = NULL;
+	gchar *p, *c, *end, *pname_start = NULL, *pname_end = NULL;
 	struct rspamd_content_type *res = NULL, val;
 	gboolean eqsign_seen = FALSE;
 	enum {
@@ -95,7 +110,7 @@ rspamd_content_type_parser (const gchar *in, gsize len, rspamd_mempool_t *pool)
 	c = p;
 	end = p + len;
 	memset (&val, 0, sizeof (val));
-	val.lc_data = (gchar *)in;
+	val.cpy = in;
 
 	while (p < end) {
 		switch (state) {
@@ -346,6 +361,9 @@ rspamd_content_type_parser (const gchar *in, gsize len, rspamd_mempool_t *pool)
 	if (val.type.len > 0) {
 		res = rspamd_mempool_alloc (pool, sizeof (val));
 		memcpy (res, &val, sizeof (val));
+
+		/* Lowercase common thingies */
+
 	}
 
 	return res;
@@ -359,9 +377,8 @@ rspamd_content_type_parse (const gchar *in,
 	rspamd_ftok_t srch;
 	gchar *lc_data;
 
-	lc_data = rspamd_mempool_alloc (pool, len);
-	memcpy (lc_data, in, len);
-	rspamd_str_lc (lc_data, len);
+	lc_data = rspamd_mempool_alloc (pool, len + 1);
+	rspamd_strlcpy (lc_data, in, len + 1);
 
 	if ((res = rspamd_content_type_parser (lc_data, len, pool)) != NULL) {
 		if (res->attrs) {
diff --git a/src/libmime/content_type.h b/src/libmime/content_type.h
index e71077911..c4dc5896e 100644
--- a/src/libmime/content_type.h
+++ b/src/libmime/content_type.h
@@ -41,11 +41,12 @@ struct rspamd_content_type_param {
 };
 
 struct rspamd_content_type {
-	gchar *lc_data;
+	gchar *cpy;
 	rspamd_ftok_t type;
 	rspamd_ftok_t subtype;
 	rspamd_ftok_t charset;
 	rspamd_ftok_t boundary;
+	rspamd_ftok_t orig_boundary;
 	enum rspamd_content_type_flags flags;
 	GHashTable *attrs; /* Can be empty */
 };
@@ -66,16 +67,16 @@ struct rspamd_content_disposition {
 /**
  * Adds new parameter to content type structure
  * @param ct
- * @param name_start
+ * @param name_start (can be modified)
  * @param name_end
- * @param value_start
+ * @param value_start (can be modified)
  * @param value_end
  */
 void
 rspamd_content_type_add_param (rspamd_mempool_t *pool,
-		struct rspamd_content_type *ct,
-		const gchar *name_start, const gchar *name_end,
-		const gchar *value_start, const gchar *value_end);
+							   struct rspamd_content_type *ct,
+							   gchar *name_start,  gchar *name_end,
+							   gchar *value_start,  gchar *value_end);
 
 /**
  * Parse content type from the header (performs copy + lowercase)
diff --git a/src/libmime/message.h b/src/libmime/message.h
index 25c88cc3a..19e8b40b5 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -43,6 +43,7 @@ struct rspamd_mime_text_part;
 
 struct rspamd_mime_multipart {
 	GPtrArray *children;
+	rspamd_ftok_t boundary;
 };
 
 struct rspamd_mime_part {
@@ -64,7 +65,7 @@ struct rspamd_mime_part {
 	guint id;
 
 	union {
-		struct rspamd_mime_multipart mp;
+		struct rspamd_mime_multipart *mp;
 		struct rspamd_mime_text_part *txt;
 		struct rspamd_image *img;
 		struct rspamd_archive *arch;
diff --git a/src/libmime/mime_parser.c b/src/libmime/mime_parser.c
index 5cfda74bf..3b84cbbbf 100644
--- a/src/libmime/mime_parser.c
+++ b/src/libmime/mime_parser.c
@@ -571,11 +571,11 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task,
 	npart->headers_order = g_queue_new ();
 
 	if (multipart) {
-		if (multipart->specific.mp.children == NULL) {
-			multipart->specific.mp.children = g_ptr_array_sized_new (2);
+		if (multipart->specific.mp->children == NULL) {
+			multipart->specific.mp->children = g_ptr_array_sized_new (2);
 		}
 
-		g_ptr_array_add (multipart->specific.mp.children, npart);
+		g_ptr_array_add (multipart->specific.mp->children, npart);
 	}
 
 	if (hdr_pos > 0 && hdr_pos < str.len) {
@@ -635,6 +635,10 @@ rspamd_mime_process_multipart_node (struct rspamd_task *task,
 	if (sel->flags & RSPAMD_CONTENT_TYPE_MULTIPART) {
 		st->nesting ++;
 		g_ptr_array_add (st->stack, npart);
+		npart->specific.mp = rspamd_mempool_alloc0 (task->task_pool,
+				sizeof (struct rspamd_mime_multipart));
+		memcpy (&npart->specific.mp->boundary, &sel->orig_boundary,
+				sizeof (rspamd_ftok_t));
 		ret = rspamd_mime_parse_multipart_part (task, npart, st, err);
 	}
 	else if (sel->flags & RSPAMD_CONTENT_TYPE_MESSAGE) {
@@ -1265,6 +1269,10 @@ rspamd_mime_parse_message (struct rspamd_task *task,
 	if (sel->flags & RSPAMD_CONTENT_TYPE_MULTIPART) {
 		g_ptr_array_add (nst->stack, npart);
 		nst->nesting ++;
+		npart->specific.mp = rspamd_mempool_alloc0 (task->task_pool,
+				sizeof (struct rspamd_mime_multipart));
+		memcpy (&npart->specific.mp->boundary, &sel->orig_boundary,
+				sizeof (rspamd_ftok_t));
 		ret = rspamd_mime_parse_multipart_part (task, npart, nst, err);
 	}
 	else if (sel->flags & RSPAMD_CONTENT_TYPE_MESSAGE) {
diff --git a/src/libserver/task.c b/src/libserver/task.c
index 664715fea..eaa379361 100644
--- a/src/libserver/task.c
+++ b/src/libserver/task.c
@@ -249,8 +249,8 @@ rspamd_task_free (struct rspamd_task *task)
 			}
 
 			if (IS_CT_MULTIPART (p->ct)) {
-				if (p->specific.mp.children) {
-					g_ptr_array_free (p->specific.mp.children, TRUE);
+				if (p->specific.mp->children) {
+					g_ptr_array_free (p->specific.mp->children, TRUE);
 				}
 			}
 		}
diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c
index 56fc0d302..340da7aa2 100644
--- a/src/lua/lua_mimepart.c
+++ b/src/lua/lua_mimepart.c
@@ -1595,13 +1595,13 @@ lua_mimepart_get_children (lua_State * L)
 		return luaL_error (L, "invalid arguments");
 	}
 
-	if (!IS_CT_MULTIPART (part->ct) || part->specific.mp.children == NULL) {
+	if (!IS_CT_MULTIPART (part->ct) || part->specific.mp->children == NULL) {
 		lua_pushnil (L);
 	}
 	else {
-		lua_createtable (L, part->specific.mp.children->len, 0);
+		lua_createtable (L, part->specific.mp->children->len, 0);
 
-		PTR_ARRAY_FOREACH (part->specific.mp.children, i, cur) {
+		PTR_ARRAY_FOREACH (part->specific.mp->children, i, cur) {
 			pcur = lua_newuserdata (L, sizeof (*pcur));
 			*pcur = cur;
 			rspamd_lua_setclass (L, "rspamd{mimepart}", -1);


More information about the Commits mailing list