commit cfdf17e: [Fix] Avoid collisions in mempool variables by changing fuzzy caching logic

Vsevolod Stakhov vsevolod at highsecure.ru
Mon Feb 24 12:07:06 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-02-24 11:59:40 +0000
URL: https://github.com/rspamd/rspamd/commit/cfdf17ea1a26abce66d11319a02faaa33132cf9e (HEAD -> master)

[Fix] Avoid collisions in mempool variables by changing fuzzy caching logic

---
 src/libmime/message.c     |  2 +-
 src/libmime/message.h     |  2 +-
 src/libmime/mime_parser.c |  4 +--
 src/lua/lua_mimepart.c    |  2 +-
 src/plugins/fuzzy_check.c | 68 ++++++++++++++++++++++++++++++++---------------
 5 files changed, 52 insertions(+), 26 deletions(-)

diff --git a/src/libmime/message.c b/src/libmime/message.c
index e5f244a3e..a43e109b5 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -901,7 +901,7 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start,
 	part->raw_data.len = len;
 	part->parsed_data.begin = start;
 	part->parsed_data.len = len;
-	part->id = MESSAGE_FIELD (task, parts)->len;
+	part->part_number = MESSAGE_FIELD (task, parts)->len;
 	part->raw_headers = rspamd_message_headers_new ();
 	part->headers_order = NULL;
 
diff --git a/src/libmime/message.h b/src/libmime/message.h
index eeba0a053..91d6e13d4 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -97,7 +97,7 @@ struct rspamd_mime_part {
 	enum rspamd_cte cte;
 	guint flags;
 	enum rspamd_mime_part_type part_type;
-	guint id;
+	guint part_number;
 
 	union {
 		struct rspamd_mime_multipart *mp;
diff --git a/src/libmime/mime_parser.c b/src/libmime/mime_parser.c
index ce8162401..91df7e684 100644
--- a/src/libmime/mime_parser.c
+++ b/src/libmime/mime_parser.c
@@ -611,7 +611,7 @@ rspamd_mime_parse_normal_part (struct rspamd_task *task,
 		g_assert_not_reached ();
 	}
 
-	part->id = MESSAGE_FIELD (task, parts)->len;
+	part->part_number = MESSAGE_FIELD (task, parts)->len;
 	g_ptr_array_add (MESSAGE_FIELD (task, parts), part);
 	msg_debug_mime ("parsed data part %T/%T of length %z (%z orig), %s cte",
 			&part->ct->type, &part->ct->subtype, part->parsed_data.len,
@@ -945,7 +945,7 @@ rspamd_mime_parse_multipart_part (struct rspamd_task *task,
 		return RSPAMD_MIME_PARSE_NESTING;
 	}
 
-	part->id = MESSAGE_FIELD (task, parts)->len;
+	part->part_number = MESSAGE_FIELD (task, parts)->len;
 	g_ptr_array_add (MESSAGE_FIELD (task, parts), part);
 	st->nesting ++;
 	rspamd_mime_part_get_cte (task, part->raw_headers, part, FALSE);
diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c
index 81e8bed59..76e43b4c5 100644
--- a/src/lua/lua_mimepart.c
+++ b/src/lua/lua_mimepart.c
@@ -1962,7 +1962,7 @@ lua_mimepart_get_id (lua_State * L)
 		return luaL_error (L, "invalid arguments");
 	}
 
-	lua_pushinteger (L, part->id);
+	lua_pushinteger (L, part->part_number);
 
 	return 1;
 }
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c
index 4df88e2a8..475417147 100644
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -1343,35 +1343,59 @@ struct rspamd_cached_shingles {
 	guchar digest[rspamd_cryptobox_HASHBYTES];
 };
 
+
 static struct rspamd_cached_shingles *
 fuzzy_cmd_get_cached (struct fuzzy_rule *rule,
-		rspamd_mempool_t *pool,
-		gpointer p)
+					  struct rspamd_task *task,
+					  struct rspamd_mime_part *mp)
 {
 	gchar key[32];
 	gint key_part;
+	struct rspamd_cached_shingles **cached;
 
 	memcpy (&key_part, rule->shingles_key->str, sizeof (key_part));
-	rspamd_snprintf (key, sizeof (key), "%p%s%d", p, rule->algorithm_str,
+	rspamd_snprintf (key, sizeof (key), "%s%d", rule->algorithm_str,
 			key_part);
 
-	return rspamd_mempool_get_variable (pool, key);
+	cached = (struct rspamd_cached_shingles **)rspamd_mempool_get_variable (
+			task->task_pool, key);
+
+	if (cached && cached[mp->part_number]) {
+		return cached[mp->part_number];
+	}
+
+	return NULL;
 }
 
 static void
 fuzzy_cmd_set_cached (struct fuzzy_rule *rule,
-		rspamd_mempool_t *pool,
-		gpointer p,
-		struct rspamd_cached_shingles *data)
+					  struct rspamd_task *task,
+					  struct rspamd_mime_part *mp,
+					  struct rspamd_cached_shingles *data)
 {
 	gchar key[32];
 	gint key_part;
+	struct rspamd_cached_shingles **cached;
 
 	memcpy (&key_part, rule->shingles_key->str, sizeof (key_part));
-	rspamd_snprintf (key, sizeof (key), "%p%s%d", p, rule->algorithm_str,
+	rspamd_snprintf (key, sizeof (key), "%s%d", rule->algorithm_str,
 			key_part);
-	/* Key is copied */
-	rspamd_mempool_set_variable (pool, key, data, NULL);
+
+	cached = (struct rspamd_cached_shingles **)rspamd_mempool_get_variable (
+			task->task_pool, key);
+
+	if (cached) {
+		cached[mp->part_number] = data;
+	}
+	else {
+		cached = rspamd_mempool_alloc0 (task->task_pool, sizeof (*cached) *
+				(MESSAGE_FIELD (task, parts)->len + 1));
+		cached[mp->part_number] = data;
+
+		rspamd_mempool_set_variable (task->task_pool, key, cached, NULL);
+	}
+
+
 }
 
 static gboolean
@@ -1447,7 +1471,7 @@ fuzzy_cmd_from_text_part (struct rspamd_task *task,
 	GArray *words;
 	struct fuzzy_cmd_io *io;
 
-	cached = fuzzy_cmd_get_cached (rule, pool, mp);
+	cached = fuzzy_cmd_get_cached (rule, task, mp);
 
 	if (cached) {
 		/* Copy cached */
@@ -1538,7 +1562,7 @@ fuzzy_cmd_from_text_part (struct rspamd_task *task,
 		 * Since it is copied when obtained from the cache, it is safe to use
 		 * it this way.
 		 */
-		fuzzy_cmd_set_cached (rule, pool, mp, cached);
+		fuzzy_cmd_set_cached (rule, task, mp, cached);
 	}
 
 	io = rspamd_mempool_alloc (pool, sizeof (*io));
@@ -1601,12 +1625,13 @@ fuzzy_cmd_from_text_part (struct rspamd_task *task,
 	return io;
 }
 
+#if 0
 static struct fuzzy_cmd_io *
 fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
 						   int c,
 						   gint flag,
 						   guint32 weight,
-						   rspamd_mempool_t *pool,
+						   struct rspamd_task *task,
 						   struct rspamd_image *img,
 						   struct rspamd_mime_part *mp)
 {
@@ -1616,11 +1641,11 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
 	struct rspamd_shingle *sh;
 	struct rspamd_cached_shingles *cached;
 
-	cached = fuzzy_cmd_get_cached (rule, pool, mp);
+	cached = fuzzy_cmd_get_cached (rule, task, mp);
 
 	if (cached) {
 		/* Copy cached */
-		encshcmd = rspamd_mempool_alloc0 (pool, sizeof (*encshcmd));
+		encshcmd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*encshcmd));
 		shcmd = &encshcmd->cmd;
 		memcpy (&shcmd->sgl, cached->sh, sizeof (struct rspamd_shingle));
 		memcpy (shcmd->basic.digest, cached->digest,
@@ -1628,14 +1653,14 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
 		shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
 	}
 	else {
-		encshcmd = rspamd_mempool_alloc0 (pool, sizeof (*encshcmd));
+		encshcmd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*encshcmd));
 		shcmd = &encshcmd->cmd;
 
 		/*
 		 * Generate shingles
 		 */
 		sh = rspamd_shingles_from_image (img->dct,
-				rule->shingles_key->str, pool,
+				rule->shingles_key->str, task->task_pool,
 				rspamd_shingles_default_filter, NULL,
 				rule->alg);
 		if (sh != NULL) {
@@ -1652,7 +1677,7 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
 				(const guchar *)img->dct, RSPAMD_DCT_LEN / NBBY,
 				rule->hash_key->str, rule->hash_key->len);
 
-		msg_debug_pool ("loading shingles of type %s with key %*xs",
+		msg_debug_task ("loading shingles of type %s with key %*xs",
 				rule->algorithm_str,
 				16, rule->shingles_key->str);
 
@@ -1663,10 +1688,10 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
 		 * Since it is copied when obtained from the cache, it is safe to use
 		 * it this way.
 		 */
-		cached = rspamd_mempool_alloc (pool, sizeof (*cached));
+		cached = rspamd_mempool_alloc (task->task_pool, sizeof (*cached));
 		cached->sh = sh;
 		memcpy (cached->digest, shcmd->basic.digest, sizeof (cached->digest));
-		fuzzy_cmd_set_cached (rule, pool, mp, cached);
+		fuzzy_cmd_set_cached (rule, task, mp, cached);
 	}
 
 	shcmd->basic.tag = ottery_rand_uint32 ();
@@ -1678,7 +1703,7 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
 		shcmd->basic.value = weight;
 	}
 
-	io = rspamd_mempool_alloc (pool, sizeof (*io));
+	io = rspamd_mempool_alloc (task->task_pool, sizeof (*io));
 	io->part = mp;
 	io->tag = shcmd->basic.tag;
 	io->flags = FUZZY_CMD_FLAG_IMAGE;
@@ -1697,6 +1722,7 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
 
 	return io;
 }
+#endif
 
 static struct fuzzy_cmd_io *
 fuzzy_cmd_from_data_part (struct fuzzy_rule *rule,


More information about the Commits mailing list