commit b54a829: [Feature] Implement content hashes

Vsevolod Stakhov vsevolod at highsecure.ru
Wed Jan 22 12:21:09 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-01-22 12:16:05 +0000
URL: https://github.com/rspamd/rspamd/commit/b54a829032d8685e19e8bc70e0110c459d330b90 (HEAD -> master)

[Feature] Implement content hashes

---
 lualib/lua_fuzzy.lua      |   9 ++++
 src/plugins/fuzzy_check.c | 129 ++++++++++++++++++++++++++++++++++++----------
 2 files changed, 112 insertions(+), 26 deletions(-)

diff --git a/lualib/lua_fuzzy.lua b/lualib/lua_fuzzy.lua
index ea74b4131..fdae76e62 100644
--- a/lualib/lua_fuzzy.lua
+++ b/lualib/lua_fuzzy.lua
@@ -307,6 +307,15 @@ exports.check_mime_part = function(task, part, rule_id)
     return true,false
   end
 
+  if part:is_specific() then
+    local sp = part:get_specific()
+
+    if type(sp) == 'table' and sp.fuzzy_hashes then
+      lua_util.debugm(N, task, 'check specific part %s', part:get_id())
+      return true,false
+    end
+  end
+
   if part:is_attachment() then
     return mime_types_check(task, part, rule)
   end
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c
index e8f02652d..c7bd0e6e9 100644
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -112,6 +112,7 @@ struct fuzzy_ctx {
 enum fuzzy_result_type {
 	FUZZY_RESULT_TXT,
 	FUZZY_RESULT_IMG,
+	FUZZY_RESULT_CONTENT,
 	FUZZY_RESULT_BIN
 };
 
@@ -155,6 +156,7 @@ struct fuzzy_learn_session {
 #define FUZZY_CMD_FLAG_REPLIED (1 << 0)
 #define FUZZY_CMD_FLAG_SENT (1 << 1)
 #define FUZZY_CMD_FLAG_IMAGE (1 << 2)
+#define FUZZY_CMD_FLAG_CONTENT (1 << 3)
 
 #define FUZZY_CHECK_FLAG_NOIMAGES (1 << 0)
 #define FUZZY_CHECK_FLAG_NOATTACHMENTS (1 << 1)
@@ -1943,24 +1945,32 @@ fuzzy_insert_result (struct fuzzy_client_session *session,
 
 	nval = fuzzy_normalize (rep->v1.value, weight);
 
-	if (io && (io->flags & FUZZY_CMD_FLAG_IMAGE)) {
-		if (!io->part || io->part->parsed_data.len <= short_image_limit) {
-			nval *= rspamd_normalize_probability (rep->v1.prob, 0.5);
-		}
-
-		type = "img";
-		res->type = FUZZY_RESULT_IMG;
-	}
-	else {
-		/* Calc real probability */
-		nval *= sqrtf (rep->v1.prob);
+	if (io) {
+		if ((io->flags & FUZZY_CMD_FLAG_IMAGE)) {
+			if (!io->part || io->part->parsed_data.len <= short_image_limit) {
+				nval *= rspamd_normalize_probability (rep->v1.prob, 0.5);
+			}
 
-		if (cmd->shingles_count > 0) {
-			type = "txt";
-			res->type = FUZZY_RESULT_TXT;
+			type = "img";
+			res->type = FUZZY_RESULT_IMG;
 		}
 		else {
-			res->type = FUZZY_RESULT_BIN;
+			/* Calc real probability */
+			nval *= sqrtf (rep->v1.prob);
+
+			if (cmd->shingles_count > 0) {
+				type = "txt";
+				res->type = FUZZY_RESULT_TXT;
+			}
+			else {
+				if (io->flags & FUZZY_CMD_FLAG_CONTENT) {
+					type = "content";
+					res->type = FUZZY_RESULT_CONTENT;
+				}
+				else {
+					res->type = FUZZY_RESULT_BIN;
+				}
+			}
 		}
 	}
 
@@ -2484,18 +2494,23 @@ fuzzy_controller_io_callback (gint fd, short what, void *arg)
 
 				ftype = "bin";
 
-				if (io && (io->flags & FUZZY_CMD_FLAG_IMAGE)) {
-					ftype = "img";
-				}
-				else if (cmd->shingles_count > 0) {
-					ftype = "txt";
-				}
+				if (io) {
+					if ((io->flags & FUZZY_CMD_FLAG_IMAGE)) {
+						ftype = "img";
+					}
+					else if (io->flags & FUZZY_CMD_FLAG_CONTENT) {
+						ftype = "content";
+					}
+					else if (cmd->shingles_count > 0) {
+						ftype = "txt";
+					}
 
-				if (io->cmd.cmd == FUZZY_WRITE) {
-					op = "added";
-				}
-				else if (io->cmd.cmd == FUZZY_DEL) {
-					op = "deleted";
+					if (io->cmd.cmd == FUZZY_WRITE) {
+						op = "added";
+					}
+					else if (io->cmd.cmd == FUZZY_DEL) {
+						op = "deleted";
+					}
 				}
 
 				if (rep->v1.prob > 0.5) {
@@ -2741,6 +2756,68 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
 							mime_part);
 					io->flags |= FUZZY_CMD_FLAG_IMAGE;
 				}
+				else if (mime_part->part_type == RSPAMD_MIME_PART_CUSTOM_LUA) {
+					const struct rspamd_lua_specific_part *lua_spec;
+
+					lua_spec = &mime_part->specific.lua_specific;
+
+					if (lua_spec->type == RSPAMD_LUA_PART_TABLE) {
+						lua_State *L = (lua_State *)task->cfg->lua_state;
+						gint old_top;
+
+						old_top = lua_gettop (L);
+						/* Push table */
+						lua_rawgeti (L, LUA_REGISTRYINDEX, lua_spec->cbref);
+						lua_pushstring (L, "fuzzy_hashes");
+						lua_gettable (L, -2);
+
+						if (lua_type (L, -1) == LUA_TTABLE) {
+
+							for (lua_pushnil (L); lua_next (L, 2); lua_pop (L, 1)) {
+								const gchar *h = NULL;
+								gsize hlen = 0;
+
+								if (lua_isstring (L, -1)) {
+									h = lua_tolstring (L, -1, &hlen);
+								}
+								else if (lua_type (L, -1) == LUA_TUSERDATA) {
+									struct rspamd_lua_text *t;
+
+									t = lua_check_text (L, -1);
+
+									if (t) {
+										h = t->start;
+										hlen = t->len;
+									}
+ 								}
+
+								if (hlen == rspamd_cryptobox_HASHBYTES) {
+									io = fuzzy_cmd_from_data_part (rule, c,
+											flag, value,
+											task->task_pool,
+											(guchar *)h,
+											mime_part);
+
+									if (io) {
+										io->flags |= FUZZY_CMD_FLAG_CONTENT;
+										g_ptr_array_add (res, io);
+									}
+								}
+							}
+						}
+
+						lua_settop (L, old_top);
+
+						/*
+						 * Add part itself as well
+						 */
+						io = fuzzy_cmd_from_data_part (rule, c,
+								flag, value,
+								task->task_pool,
+								mime_part->digest,
+								mime_part);
+					}
+				}
 				else {
 					io = fuzzy_cmd_from_data_part (rule, c, flag, value,
 							task->task_pool,


More information about the Commits mailing list