commit cfdf17e: [Fix] Avoid collisions in mempool variables by changing fuzzy caching logic
Vsevolod Stakhov
vsevolod at highsecure.ru
Mon Feb 24 12:07:06 UTC 2020
Author: Vsevolod Stakhov
Date: 2020-02-24 11:59:40 +0000
URL: https://github.com/rspamd/rspamd/commit/cfdf17ea1a26abce66d11319a02faaa33132cf9e (HEAD -> master)
[Fix] Avoid collisions in mempool variables by changing fuzzy caching logic
---
src/libmime/message.c | 2 +-
src/libmime/message.h | 2 +-
src/libmime/mime_parser.c | 4 +--
src/lua/lua_mimepart.c | 2 +-
src/plugins/fuzzy_check.c | 68 ++++++++++++++++++++++++++++++++---------------
5 files changed, 52 insertions(+), 26 deletions(-)
diff --git a/src/libmime/message.c b/src/libmime/message.c
index e5f244a3e..a43e109b5 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -901,7 +901,7 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start,
part->raw_data.len = len;
part->parsed_data.begin = start;
part->parsed_data.len = len;
- part->id = MESSAGE_FIELD (task, parts)->len;
+ part->part_number = MESSAGE_FIELD (task, parts)->len;
part->raw_headers = rspamd_message_headers_new ();
part->headers_order = NULL;
diff --git a/src/libmime/message.h b/src/libmime/message.h
index eeba0a053..91d6e13d4 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -97,7 +97,7 @@ struct rspamd_mime_part {
enum rspamd_cte cte;
guint flags;
enum rspamd_mime_part_type part_type;
- guint id;
+ guint part_number;
union {
struct rspamd_mime_multipart *mp;
diff --git a/src/libmime/mime_parser.c b/src/libmime/mime_parser.c
index ce8162401..91df7e684 100644
--- a/src/libmime/mime_parser.c
+++ b/src/libmime/mime_parser.c
@@ -611,7 +611,7 @@ rspamd_mime_parse_normal_part (struct rspamd_task *task,
g_assert_not_reached ();
}
- part->id = MESSAGE_FIELD (task, parts)->len;
+ part->part_number = MESSAGE_FIELD (task, parts)->len;
g_ptr_array_add (MESSAGE_FIELD (task, parts), part);
msg_debug_mime ("parsed data part %T/%T of length %z (%z orig), %s cte",
&part->ct->type, &part->ct->subtype, part->parsed_data.len,
@@ -945,7 +945,7 @@ rspamd_mime_parse_multipart_part (struct rspamd_task *task,
return RSPAMD_MIME_PARSE_NESTING;
}
- part->id = MESSAGE_FIELD (task, parts)->len;
+ part->part_number = MESSAGE_FIELD (task, parts)->len;
g_ptr_array_add (MESSAGE_FIELD (task, parts), part);
st->nesting ++;
rspamd_mime_part_get_cte (task, part->raw_headers, part, FALSE);
diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c
index 81e8bed59..76e43b4c5 100644
--- a/src/lua/lua_mimepart.c
+++ b/src/lua/lua_mimepart.c
@@ -1962,7 +1962,7 @@ lua_mimepart_get_id (lua_State * L)
return luaL_error (L, "invalid arguments");
}
- lua_pushinteger (L, part->id);
+ lua_pushinteger (L, part->part_number);
return 1;
}
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c
index 4df88e2a8..475417147 100644
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -1343,35 +1343,59 @@ struct rspamd_cached_shingles {
guchar digest[rspamd_cryptobox_HASHBYTES];
};
+
static struct rspamd_cached_shingles *
fuzzy_cmd_get_cached (struct fuzzy_rule *rule,
- rspamd_mempool_t *pool,
- gpointer p)
+ struct rspamd_task *task,
+ struct rspamd_mime_part *mp)
{
gchar key[32];
gint key_part;
+ struct rspamd_cached_shingles **cached;
memcpy (&key_part, rule->shingles_key->str, sizeof (key_part));
- rspamd_snprintf (key, sizeof (key), "%p%s%d", p, rule->algorithm_str,
+ rspamd_snprintf (key, sizeof (key), "%s%d", rule->algorithm_str,
key_part);
- return rspamd_mempool_get_variable (pool, key);
+ cached = (struct rspamd_cached_shingles **)rspamd_mempool_get_variable (
+ task->task_pool, key);
+
+ if (cached && cached[mp->part_number]) {
+ return cached[mp->part_number];
+ }
+
+ return NULL;
}
static void
fuzzy_cmd_set_cached (struct fuzzy_rule *rule,
- rspamd_mempool_t *pool,
- gpointer p,
- struct rspamd_cached_shingles *data)
+ struct rspamd_task *task,
+ struct rspamd_mime_part *mp,
+ struct rspamd_cached_shingles *data)
{
gchar key[32];
gint key_part;
+ struct rspamd_cached_shingles **cached;
memcpy (&key_part, rule->shingles_key->str, sizeof (key_part));
- rspamd_snprintf (key, sizeof (key), "%p%s%d", p, rule->algorithm_str,
+ rspamd_snprintf (key, sizeof (key), "%s%d", rule->algorithm_str,
key_part);
- /* Key is copied */
- rspamd_mempool_set_variable (pool, key, data, NULL);
+
+ cached = (struct rspamd_cached_shingles **)rspamd_mempool_get_variable (
+ task->task_pool, key);
+
+ if (cached) {
+ cached[mp->part_number] = data;
+ }
+ else {
+ cached = rspamd_mempool_alloc0 (task->task_pool, sizeof (*cached) *
+ (MESSAGE_FIELD (task, parts)->len + 1));
+ cached[mp->part_number] = data;
+
+ rspamd_mempool_set_variable (task->task_pool, key, cached, NULL);
+ }
+
+
}
static gboolean
@@ -1447,7 +1471,7 @@ fuzzy_cmd_from_text_part (struct rspamd_task *task,
GArray *words;
struct fuzzy_cmd_io *io;
- cached = fuzzy_cmd_get_cached (rule, pool, mp);
+ cached = fuzzy_cmd_get_cached (rule, task, mp);
if (cached) {
/* Copy cached */
@@ -1538,7 +1562,7 @@ fuzzy_cmd_from_text_part (struct rspamd_task *task,
* Since it is copied when obtained from the cache, it is safe to use
* it this way.
*/
- fuzzy_cmd_set_cached (rule, pool, mp, cached);
+ fuzzy_cmd_set_cached (rule, task, mp, cached);
}
io = rspamd_mempool_alloc (pool, sizeof (*io));
@@ -1601,12 +1625,13 @@ fuzzy_cmd_from_text_part (struct rspamd_task *task,
return io;
}
+#if 0
static struct fuzzy_cmd_io *
fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
int c,
gint flag,
guint32 weight,
- rspamd_mempool_t *pool,
+ struct rspamd_task *task,
struct rspamd_image *img,
struct rspamd_mime_part *mp)
{
@@ -1616,11 +1641,11 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
struct rspamd_shingle *sh;
struct rspamd_cached_shingles *cached;
- cached = fuzzy_cmd_get_cached (rule, pool, mp);
+ cached = fuzzy_cmd_get_cached (rule, task, mp);
if (cached) {
/* Copy cached */
- encshcmd = rspamd_mempool_alloc0 (pool, sizeof (*encshcmd));
+ encshcmd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*encshcmd));
shcmd = &encshcmd->cmd;
memcpy (&shcmd->sgl, cached->sh, sizeof (struct rspamd_shingle));
memcpy (shcmd->basic.digest, cached->digest,
@@ -1628,14 +1653,14 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
shcmd->basic.shingles_count = RSPAMD_SHINGLE_SIZE;
}
else {
- encshcmd = rspamd_mempool_alloc0 (pool, sizeof (*encshcmd));
+ encshcmd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*encshcmd));
shcmd = &encshcmd->cmd;
/*
* Generate shingles
*/
sh = rspamd_shingles_from_image (img->dct,
- rule->shingles_key->str, pool,
+ rule->shingles_key->str, task->task_pool,
rspamd_shingles_default_filter, NULL,
rule->alg);
if (sh != NULL) {
@@ -1652,7 +1677,7 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
(const guchar *)img->dct, RSPAMD_DCT_LEN / NBBY,
rule->hash_key->str, rule->hash_key->len);
- msg_debug_pool ("loading shingles of type %s with key %*xs",
+ msg_debug_task ("loading shingles of type %s with key %*xs",
rule->algorithm_str,
16, rule->shingles_key->str);
@@ -1663,10 +1688,10 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
* Since it is copied when obtained from the cache, it is safe to use
* it this way.
*/
- cached = rspamd_mempool_alloc (pool, sizeof (*cached));
+ cached = rspamd_mempool_alloc (task->task_pool, sizeof (*cached));
cached->sh = sh;
memcpy (cached->digest, shcmd->basic.digest, sizeof (cached->digest));
- fuzzy_cmd_set_cached (rule, pool, mp, cached);
+ fuzzy_cmd_set_cached (rule, task, mp, cached);
}
shcmd->basic.tag = ottery_rand_uint32 ();
@@ -1678,7 +1703,7 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
shcmd->basic.value = weight;
}
- io = rspamd_mempool_alloc (pool, sizeof (*io));
+ io = rspamd_mempool_alloc (task->task_pool, sizeof (*io));
io->part = mp;
io->tag = shcmd->basic.tag;
io->flags = FUZZY_CMD_FLAG_IMAGE;
@@ -1697,6 +1722,7 @@ fuzzy_cmd_from_image_part (struct fuzzy_rule *rule,
return io;
}
+#endif
static struct fuzzy_cmd_io *
fuzzy_cmd_from_data_part (struct fuzzy_rule *rule,
More information about the Commits
mailing list