commit 2511a18: [Project] Add methods to append data to fuzzy requests

Vsevolod Stakhov vsevolod at highsecure.ru
Tue Jul 7 21:28:13 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-07-07 16:56:02 +0100
URL: https://github.com/rspamd/rspamd/commit/2511a18378e2ffdf4b59c5537e585291a98db182

[Project] Add methods to append data to fuzzy requests

---
 src/plugins/fuzzy_check.c | 144 ++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 125 insertions(+), 19 deletions(-)

diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c
index c12a127c5..8798c0aca 100644
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -84,6 +84,7 @@ struct fuzzy_rule {
 	double max_score;
 	gboolean read_only;
 	gboolean skip_unknown;
+	gboolean no_share;
 	gint learn_condition_cb;
 	struct rspamd_hash_map_helper *skip_map;
 	struct fuzzy_ctx *ctx;
@@ -432,6 +433,10 @@ fuzzy_parse_rule (struct rspamd_config *cfg, const ucl_object_t *obj,
 		rule->skip_unknown = ucl_obj_toboolean (value);
 	}
 
+	if ((value = ucl_object_lookup (obj, "no_share")) != NULL) {
+		rule->no_share = ucl_obj_toboolean (value);
+	}
+
 	if ((value = ucl_object_lookup (obj, "algorithm")) != NULL) {
 		rule->algorithm_str = ucl_object_tostring (value);
 
@@ -1461,6 +1466,109 @@ fuzzy_rule_check_mimepart (struct rspamd_task *task,
 	return FALSE;
 }
 
+#define MAX_FUZZY_DOMAIN 64
+
+static guint
+fuzzy_cmd_extension_length (struct rspamd_task *task,
+							struct fuzzy_rule *rule)
+{
+	guint total = 0;
+
+	if (rule->no_share) {
+		return 0;
+	}
+
+	/* From domain */
+	if (MESSAGE_FIELD (task, from_mime) && MESSAGE_FIELD (task, from_mime)->len > 0) {
+		struct rspamd_email_address *addr = g_ptr_array_index (MESSAGE_FIELD (task,
+				from_mime), 0);
+
+		if (addr->domain_len > 0) {
+			total += 2; /* 2 bytes: type + length */
+			total += MIN (MAX_FUZZY_DOMAIN, addr->domain_len);
+		}
+	}
+
+	if (rspamd_inet_address_get_af (task->from_addr) == AF_INET) {
+		total += sizeof (struct in_addr) + 1;
+	}
+	else if (rspamd_inet_address_get_af (task->from_addr) == AF_INET6) {
+		total += sizeof (struct in6_addr) + 1;
+	}
+
+	return total;
+}
+
+static guint
+fuzzy_cmd_write_extensions (struct rspamd_task *task,
+							struct fuzzy_rule *rule,
+							guchar *dest,
+							gsize available)
+{
+	guint written = 0;
+
+	if (rule->no_share) {
+		return 0;
+	}
+
+	if (MESSAGE_FIELD (task, from_mime) && MESSAGE_FIELD (task, from_mime)->len > 0) {
+		struct rspamd_email_address *addr = g_ptr_array_index (MESSAGE_FIELD (task,
+				from_mime), 0);
+		guint to_write = MIN (MAX_FUZZY_DOMAIN, addr->domain_len) + 2;
+
+		if (to_write > 0 && to_write <= available) {
+			*dest++ = RSPAMD_FUZZY_EXT_SOURCE_DOMAIN;
+			*dest++ = to_write - 2;
+
+			if (addr->domain_len < MAX_FUZZY_DOMAIN) {
+				memcpy (dest, addr->domain, addr->domain_len);
+				dest += addr->domain_len;
+			}
+			else {
+				/* Trim from left */
+				memcpy (dest,
+						addr->domain + (addr->domain_len - MAX_FUZZY_DOMAIN),
+						MAX_FUZZY_DOMAIN);
+				dest += MAX_FUZZY_DOMAIN;
+			}
+
+			available -= to_write;
+			written += to_write;
+		}
+	}
+
+	if (rspamd_inet_address_get_af (task->from_addr) == AF_INET) {
+		if (available >= sizeof (struct in_addr) + 1) {
+			guint klen;
+			guchar *inet_data = rspamd_inet_address_get_hash_key (task->from_addr, &klen);
+
+			*dest++ = RSPAMD_FUZZY_EXT_SOURCE_IP4;
+
+			memcpy (dest, inet_data, klen);
+			dest += klen;
+
+			available -= klen + 1;
+			written += klen + 1;
+		}
+	}
+	else if (rspamd_inet_address_get_af (task->from_addr) == AF_INET6) {
+		if (available >= sizeof (struct in6_addr) + 1) {
+			guint klen;
+			guchar *inet_data = rspamd_inet_address_get_hash_key (task->from_addr, &klen);
+
+			*dest++ = RSPAMD_FUZZY_EXT_SOURCE_IP6;
+
+			memcpy (dest, inet_data, klen);
+			dest += klen;
+
+			available -= klen + 1;
+			written += klen + 1;
+		}
+	}
+
+	return written;
+}
+
 /*
  * Create fuzzy command from a text part
  */
@@ -1471,7 +1579,6 @@ fuzzy_cmd_from_text_part (struct rspamd_task *task,
 						  gint flag,
 						  guint32 weight,
 						  gboolean short_text,
-						  rspamd_mempool_t *pool,
 						  struct rspamd_mime_text_part *part,
 						  struct rspamd_mime_part *mp)
 {
@@ -1492,14 +1599,14 @@ fuzzy_cmd_from_text_part (struct rspamd_task *task,
 	if (cached) {
 		/* Copy cached */
 		if (short_text) {
-			enccmd = rspamd_mempool_alloc0 (pool, sizeof (*enccmd));
+			enccmd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*enccmd));
 			cmd = &enccmd->cmd;
 			memcpy (cmd->digest, cached->digest,
 					sizeof (cached->digest));
 			cmd->shingles_count = 0;
 		}
 		else if (cached->sh) {
-			encshcmd = rspamd_mempool_alloc0 (pool, sizeof (*encshcmd));
+			encshcmd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*encshcmd));
 			shcmd = &encshcmd->cmd;
 			memcpy (&shcmd->sgl, cached->sh, sizeof (struct rspamd_shingle));
 			memcpy (shcmd->basic.digest, cached->digest,
@@ -1511,10 +1618,10 @@ fuzzy_cmd_from_text_part (struct rspamd_task *task,
 		}
 	}
 	else {
-		cached = rspamd_mempool_alloc (pool, sizeof (*cached));
+		cached = rspamd_mempool_alloc (task->task_pool, sizeof (*cached));
 
 		if (short_text) {
-			enccmd = rspamd_mempool_alloc0 (pool, sizeof (*encshcmd));
+			enccmd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*encshcmd));
 			cmd = &enccmd->cmd;
 			rspamd_cryptobox_hash_init (&st, rule->hash_key->str,
 					rule->hash_key->len);
@@ -1533,14 +1640,14 @@ fuzzy_cmd_from_text_part (struct rspamd_task *task,
 			cached->sh = NULL;
 		}
 		else {
-			encshcmd = rspamd_mempool_alloc0 (pool, sizeof (*encshcmd));
+			encshcmd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*encshcmd));
 			shcmd = &encshcmd->cmd;
 
 			/*
 			 * Generate hash from all words in the part
 			 */
 			rspamd_cryptobox_hash_init (&st, rule->hash_key->str, rule->hash_key->len);
-			words = fuzzy_preprocess_words (part, pool);
+			words = fuzzy_preprocess_words (part, task->task_pool);
 
 			for (i = 0; i < words->len; i ++) {
 				word = &g_array_index (words, rspamd_stat_token_t, i);
@@ -1555,11 +1662,11 @@ fuzzy_cmd_from_text_part (struct rspamd_task *task,
 			rspamd_cryptobox_hash_final (&st, shcmd->basic.digest);
 
 
-			msg_debug_pool ("loading shingles of type %s with key %*xs",
+			msg_debug_task ("loading shingles of type %s with key %*xs",
 					rule->algorithm_str,
 					16, rule->shingles_key->str);
 			sh = rspamd_shingles_from_text (words,
-					rule->shingles_key->str, pool,
+					rule->shingles_key->str, task->task_pool,
 					rspamd_shingles_default_filter, NULL,
 					rule->alg);
 			if (sh != NULL) {
@@ -1581,7 +1688,7 @@ fuzzy_cmd_from_text_part (struct rspamd_task *task,
 		fuzzy_cmd_set_cached (rule, task, mp, cached);
 	}
 
-	io = rspamd_mempool_alloc (pool, sizeof (*io));
+	io = rspamd_mempool_alloc (task->task_pool, sizeof (*io));
 	io->part = mp;
 
 	if (!short_text) {
@@ -1745,7 +1852,7 @@ fuzzy_cmd_from_data_part (struct fuzzy_rule *rule,
 						  int c,
 						  gint flag,
 						  guint32 weight,
-						  rspamd_mempool_t *pool,
+						  struct rspamd_task *task,
 						  guchar digest[rspamd_cryptobox_HASHBYTES],
 						  struct rspamd_mime_part *mp)
 {
@@ -1754,11 +1861,11 @@ fuzzy_cmd_from_data_part (struct fuzzy_rule *rule,
 	struct fuzzy_cmd_io *io;
 
 	if (rule->peer_key) {
-		enccmd = rspamd_mempool_alloc0 (pool, sizeof (*enccmd));
+		enccmd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*enccmd));
 		cmd = &enccmd->cmd;
 	}
 	else {
-		cmd = rspamd_mempool_alloc0 (pool, sizeof (*cmd));
+		cmd = rspamd_mempool_alloc0 (task->task_pool, sizeof (*cmd));
 	}
 
 	cmd->cmd = c;
@@ -1771,7 +1878,7 @@ fuzzy_cmd_from_data_part (struct fuzzy_rule *rule,
 	cmd->tag = ottery_rand_uint32 ();
 	memcpy (cmd->digest, digest, sizeof (cmd->digest));
 
-	io = rspamd_mempool_alloc (pool, sizeof (*io));
+	io = rspamd_mempool_alloc (task->task_pool, sizeof (*io));
 	io->flags = 0;
 	io->tag = cmd->tag;
 	io->part = mp;
@@ -2795,7 +2902,6 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
 							flag,
 							value,
 							!fuzzy_check,
-							task->task_pool,
 							part,
 							mime_part);
 				}
@@ -2804,7 +2910,7 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
 					image = mime_part->specific.img;
 
 					io = fuzzy_cmd_from_data_part (rule, c, flag, value,
-							task->task_pool,
+							task,
 							image->parent->digest,
 							mime_part);
 					io->flags |= FUZZY_CMD_FLAG_IMAGE;
@@ -2847,7 +2953,7 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
 								if (hlen == rspamd_cryptobox_HASHBYTES) {
 									io = fuzzy_cmd_from_data_part (rule, c,
 											flag, value,
-											task->task_pool,
+											task,
 											(guchar *)h,
 											mime_part);
 
@@ -2866,14 +2972,14 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
 						 */
 						io = fuzzy_cmd_from_data_part (rule, c,
 								flag, value,
-								task->task_pool,
+								task,
 								mime_part->digest,
 								mime_part);
 					}
 				}
 				else {
 					io = fuzzy_cmd_from_data_part (rule, c, flag, value,
-							task->task_pool,
+							task,
 							mime_part->digest, mime_part);
 				}
 


More information about the Commits mailing list