commit b54a829: [Feature] Implement content hashes
Vsevolod Stakhov
vsevolod at highsecure.ru
Wed Jan 22 12:21:09 UTC 2020
Author: Vsevolod Stakhov
Date: 2020-01-22 12:16:05 +0000
URL: https://github.com/rspamd/rspamd/commit/b54a829032d8685e19e8bc70e0110c459d330b90 (HEAD -> master)
[Feature] Implement content hashes
---
lualib/lua_fuzzy.lua | 9 ++++
src/plugins/fuzzy_check.c | 129 ++++++++++++++++++++++++++++++++++++----------
2 files changed, 112 insertions(+), 26 deletions(-)
diff --git a/lualib/lua_fuzzy.lua b/lualib/lua_fuzzy.lua
index ea74b4131..fdae76e62 100644
--- a/lualib/lua_fuzzy.lua
+++ b/lualib/lua_fuzzy.lua
@@ -307,6 +307,15 @@ exports.check_mime_part = function(task, part, rule_id)
return true,false
end
+ if part:is_specific() then
+ local sp = part:get_specific()
+
+ if type(sp) == 'table' and sp.fuzzy_hashes then
+ lua_util.debugm(N, task, 'check specific part %s', part:get_id())
+ return true,false
+ end
+ end
+
if part:is_attachment() then
return mime_types_check(task, part, rule)
end
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c
index e8f02652d..c7bd0e6e9 100644
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -112,6 +112,7 @@ struct fuzzy_ctx {
enum fuzzy_result_type {
FUZZY_RESULT_TXT,
FUZZY_RESULT_IMG,
+ FUZZY_RESULT_CONTENT,
FUZZY_RESULT_BIN
};
@@ -155,6 +156,7 @@ struct fuzzy_learn_session {
#define FUZZY_CMD_FLAG_REPLIED (1 << 0)
#define FUZZY_CMD_FLAG_SENT (1 << 1)
#define FUZZY_CMD_FLAG_IMAGE (1 << 2)
+#define FUZZY_CMD_FLAG_CONTENT (1 << 3)
#define FUZZY_CHECK_FLAG_NOIMAGES (1 << 0)
#define FUZZY_CHECK_FLAG_NOATTACHMENTS (1 << 1)
@@ -1943,24 +1945,32 @@ fuzzy_insert_result (struct fuzzy_client_session *session,
nval = fuzzy_normalize (rep->v1.value, weight);
- if (io && (io->flags & FUZZY_CMD_FLAG_IMAGE)) {
- if (!io->part || io->part->parsed_data.len <= short_image_limit) {
- nval *= rspamd_normalize_probability (rep->v1.prob, 0.5);
- }
-
- type = "img";
- res->type = FUZZY_RESULT_IMG;
- }
- else {
- /* Calc real probability */
- nval *= sqrtf (rep->v1.prob);
+ if (io) {
+ if ((io->flags & FUZZY_CMD_FLAG_IMAGE)) {
+ if (!io->part || io->part->parsed_data.len <= short_image_limit) {
+ nval *= rspamd_normalize_probability (rep->v1.prob, 0.5);
+ }
- if (cmd->shingles_count > 0) {
- type = "txt";
- res->type = FUZZY_RESULT_TXT;
+ type = "img";
+ res->type = FUZZY_RESULT_IMG;
}
else {
- res->type = FUZZY_RESULT_BIN;
+ /* Calc real probability */
+ nval *= sqrtf (rep->v1.prob);
+
+ if (cmd->shingles_count > 0) {
+ type = "txt";
+ res->type = FUZZY_RESULT_TXT;
+ }
+ else {
+ if (io->flags & FUZZY_CMD_FLAG_CONTENT) {
+ type = "content";
+ res->type = FUZZY_RESULT_CONTENT;
+ }
+ else {
+ res->type = FUZZY_RESULT_BIN;
+ }
+ }
}
}
@@ -2484,18 +2494,23 @@ fuzzy_controller_io_callback (gint fd, short what, void *arg)
ftype = "bin";
- if (io && (io->flags & FUZZY_CMD_FLAG_IMAGE)) {
- ftype = "img";
- }
- else if (cmd->shingles_count > 0) {
- ftype = "txt";
- }
+ if (io) {
+ if ((io->flags & FUZZY_CMD_FLAG_IMAGE)) {
+ ftype = "img";
+ }
+ else if (io->flags & FUZZY_CMD_FLAG_CONTENT) {
+ ftype = "content";
+ }
+ else if (cmd->shingles_count > 0) {
+ ftype = "txt";
+ }
- if (io->cmd.cmd == FUZZY_WRITE) {
- op = "added";
- }
- else if (io->cmd.cmd == FUZZY_DEL) {
- op = "deleted";
+ if (io->cmd.cmd == FUZZY_WRITE) {
+ op = "added";
+ }
+ else if (io->cmd.cmd == FUZZY_DEL) {
+ op = "deleted";
+ }
}
if (rep->v1.prob > 0.5) {
@@ -2741,6 +2756,68 @@ fuzzy_generate_commands (struct rspamd_task *task, struct fuzzy_rule *rule,
mime_part);
io->flags |= FUZZY_CMD_FLAG_IMAGE;
}
+ else if (mime_part->part_type == RSPAMD_MIME_PART_CUSTOM_LUA) {
+ const struct rspamd_lua_specific_part *lua_spec;
+
+ lua_spec = &mime_part->specific.lua_specific;
+
+ if (lua_spec->type == RSPAMD_LUA_PART_TABLE) {
+ lua_State *L = (lua_State *)task->cfg->lua_state;
+ gint old_top;
+
+ old_top = lua_gettop (L);
+ /* Push table */
+ lua_rawgeti (L, LUA_REGISTRYINDEX, lua_spec->cbref);
+ lua_pushstring (L, "fuzzy_hashes");
+ lua_gettable (L, -2);
+
+ if (lua_type (L, -1) == LUA_TTABLE) {
+
+ for (lua_pushnil (L); lua_next (L, 2); lua_pop (L, 1)) {
+ const gchar *h = NULL;
+ gsize hlen = 0;
+
+ if (lua_isstring (L, -1)) {
+ h = lua_tolstring (L, -1, &hlen);
+ }
+ else if (lua_type (L, -1) == LUA_TUSERDATA) {
+ struct rspamd_lua_text *t;
+
+ t = lua_check_text (L, -1);
+
+ if (t) {
+ h = t->start;
+ hlen = t->len;
+ }
+ }
+
+ if (hlen == rspamd_cryptobox_HASHBYTES) {
+ io = fuzzy_cmd_from_data_part (rule, c,
+ flag, value,
+ task->task_pool,
+ (guchar *)h,
+ mime_part);
+
+ if (io) {
+ io->flags |= FUZZY_CMD_FLAG_CONTENT;
+ g_ptr_array_add (res, io);
+ }
+ }
+ }
+ }
+
+ lua_settop (L, old_top);
+
+ /*
+ * Add part itself as well
+ */
+ io = fuzzy_cmd_from_data_part (rule, c,
+ flag, value,
+ task->task_pool,
+ mime_part->digest,
+ mime_part);
+ }
+ }
else {
io = fuzzy_cmd_from_data_part (rule, c, flag, value,
task->task_pool,
More information about the Commits
mailing list