commit cac6696: [Feature] Add controller endpoint to get fuzzy hashes from messages

Vsevolod Stakhov vsevolod at rspamd.com
Sat May 20 14:28:03 UTC 2023


Author: Vsevolod Stakhov
Date: 2023-05-20 15:22:43 +0100
URL: https://github.com/rspamd/rspamd/commit/cac66961924d22e2454db745ccef118a08ea6184

[Feature] Add controller endpoint to get fuzzy hashes from messages
Sample usage:

```
curl -XPOST 'http://localhost:11334/plugins/fuzzy/hashes?flag=1' --data-binary '@-' < file
```

Sample output:
```json
{
  "hashes": {
    "local": [
      "24b6e7de2f489778d828c827079c48bacb086f816d0a7acabbe42e8d0da703b89b913176ad67eefaf5b54fa59f5e0ecfc7015846c4043fcfb0c7a4ed7a235025",
      "72789777cbec926f4143de4c08c87acc3fbf3b909b5c39f1edcf82ed12e2d8bc2f56be8d68ee681feccf44ca04e3eca5b8ec039cb84a0d40e22258c370a10cbb"
    ],
    "rspamd.com": [
      "24b6e7de2f489778d828c827079c48bacb086f816d0a7acabbe42e8d0da703b89b913176ad67eefaf5b54fa59f5e0ecfc7015846c4043fcfb0c7a4ed7a235025",
      "72789777cbec926f4143de4c08c87acc3fbf3b909b5c39f1edcf82ed12e2d8bc2f56be8d68ee681feccf44ca04e3eca5b8ec039cb84a0d40e22258c370a10cbb"
    ],
  },
  "success": true
}
```

Issue: #4489

---
 rules/controller/fuzzy.lua |  46 ++++++++++++++++++++
 rules/controller/init.lua  |   1 +
 src/plugins/fuzzy_check.c  | 105 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 152 insertions(+)

diff --git a/rules/controller/fuzzy.lua b/rules/controller/fuzzy.lua
new file mode 100644
index 000000000..7e4c96fe1
--- /dev/null
+++ b/rules/controller/fuzzy.lua
@@ -0,0 +1,46 @@
+--[[
+Copyright (c) 2023, Vsevolod Stakhov <vsevolod at rspamd.com>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+local function handle_gen_fuzzy(task, conn, req_params)
+  if type(rspamd_plugins.fuzzy_check) == 'table' then
+    local ret, hashes
+    task:process_message()
+    if req_params.rule then
+      ret,hashes = pcall(rspamd_plugins.fuzzy_check.hex_hashes, task, req_params.rule)
+    elseif req_params.flag then
+      ret,hashes = pcall(rspamd_plugins.fuzzy_check.hex_hashes, task, tonumber(req_params.flag))
+    else
+      conn:send_error(404, 'missing rule or flag')
+      return
+    end
+
+    if ret then
+      conn:send_ucl({success = true, hashes = hashes})
+    else
+      conn:send_error(500, 'cannot generate hashes')
+    end
+  else
+    conn:send_error(404, 'fuzzy_check is not enabled')
+  end
+end
+
+return {
+  hashes = {
+    handler = handle_gen_fuzzy,
+    need_task = true,
+    enable = false
+  },
+}
\ No newline at end of file
diff --git a/rules/controller/init.lua b/rules/controller/init.lua
index 90bb137c4..9d60200f8 100644
--- a/rules/controller/init.lua
+++ b/rules/controller/init.lua
@@ -28,6 +28,7 @@ local controller_plugin_paths = {
   maps = dofile(local_rules .. "/controller/maps.lua"),
   neural = dofile(local_rules .. "/controller/neural.lua"),
   selectors = dofile(local_rules .. "/controller/selectors.lua"),
+  fuzzy = dofile(local_rules .. "/controller/fuzzy.lua"),
 }
 
 if rspamd_util.file_exists(local_conf .. '/controller.lua') then
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c
index 842094c54..fd8e56cce 100644
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -194,6 +194,7 @@ static gint fuzzy_attach_controller (struct module_ctx *ctx,
 static gint fuzzy_lua_learn_handler (lua_State *L);
 static gint fuzzy_lua_unlearn_handler (lua_State *L);
 static gint fuzzy_lua_gen_hashes_handler (lua_State *L);
+static gint fuzzy_lua_hex_hashes_handler (lua_State *L);
 
 module_t fuzzy_check_module = {
 		"fuzzy_check",
@@ -1217,6 +1218,9 @@ fuzzy_check_module_config (struct rspamd_config *cfg, bool validate)
 		lua_pushstring (L, "gen_hashes");
 		lua_pushcfunction (L, fuzzy_lua_gen_hashes_handler);
 		lua_settable (L, -3);
+		lua_pushstring (L, "hex_hashes");
+		lua_pushcfunction (L, fuzzy_lua_hex_hashes_handler);
+		lua_settable (L, -3);
 		/* Finish fuzzy_check key */
 		lua_settable (L, -3);
 	}
@@ -4058,6 +4062,107 @@ fuzzy_lua_gen_hashes_handler (lua_State *L)
 	return 1;
 }
 
+static gint
+fuzzy_lua_hex_hashes_handler (lua_State *L)
+{
+	struct rspamd_task *task = lua_check_task (L, 1);
+
+	if (task == NULL) {
+		return luaL_error(L, "invalid arguments");
+	}
+
+	guint flag = 0, weight = 1, send_flags = 0;
+	const gchar *symbol;
+	struct fuzzy_ctx *fuzzy_module_ctx = fuzzy_get_context (task->cfg);
+	struct fuzzy_rule *rule;
+	GPtrArray *commands;
+	gint i;
+
+	if (lua_type (L, 2) == LUA_TNUMBER) {
+		flag = lua_tonumber (L, 2);
+	}
+	else if (lua_type (L, 2) == LUA_TSTRING) {
+		struct fuzzy_rule *rule;
+		GHashTableIter it;
+		gpointer k, v;
+		struct fuzzy_mapping *map;
+
+		symbol = lua_tostring (L, 2);
+
+		PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
+			if (flag != 0) {
+				break;
+			}
+
+			g_hash_table_iter_init (&it, rule->mappings);
+
+			while (g_hash_table_iter_next (&it, &k, &v)) {
+				map = v;
+
+				if (g_ascii_strcasecmp (symbol, map->symbol) == 0) {
+					flag = map->fuzzy_flag;
+					break;
+				}
+			}
+		}
+	}
+
+	if (flag == 0) {
+		return luaL_error (L, "bad flag");
+	}
+
+	lua_createtable (L, 0, fuzzy_module_ctx->fuzzy_rules->len);
+
+	PTR_ARRAY_FOREACH (fuzzy_module_ctx->fuzzy_rules, i, rule) {
+		/* Check for flag */
+		if (g_hash_table_lookup (rule->mappings,
+			GINT_TO_POINTER (flag)) == NULL) {
+			msg_debug_task ("skip rule %s as it has no flag %d defined"
+						   " false", rule->name, flag);
+			continue;
+		}
+
+		commands = fuzzy_generate_commands (task, rule, FUZZY_CHECK, flag,
+			weight, send_flags);
+
+		lua_pushstring (L, rule->name);
+
+		if (commands != NULL) {
+			lua_createtable (L, commands->len, 0);
+			/*
+			 * We have all commands cached, so we can just read their cached value to
+			 * get hex hashes
+			 */
+			struct rspamd_mime_part *mp;
+			gint j, part_idx = 1;
+
+			PTR_ARRAY_FOREACH(MESSAGE_FIELD(task, parts), j, mp) {
+				struct rspamd_cached_shingles *cached;
+
+				cached = fuzzy_cmd_get_cached(rule, task, mp);
+
+				if (cached) {
+					gchar hexbuf[rspamd_cryptobox_HASHBYTES * 2 + 1];
+					gint r = rspamd_encode_hex_buf (cached->digest, sizeof(cached->digest), hexbuf,
+							sizeof (hexbuf));
+					lua_pushlstring (L, hexbuf, r);
+					lua_rawseti(L, -2, part_idx++);
+				}
+			}
+
+			g_ptr_array_free (commands, TRUE);
+		}
+		else {
+			lua_pushnil(L);
+		}
+
+		/* res[rule->name] = {hex_hash1, ..., hex_hashn} */
+		lua_settable(L, -3);
+	}
+
+	return 1;
+}
+
 static gboolean
 fuzzy_add_handler (struct rspamd_http_connection_entry *conn_ent,
 	struct rspamd_http_message *msg, struct module_ctx *ctx)


More information about the Commits mailing list