commit 0d99318: [Project] Add bayes learn script

Vsevolod Stakhov vsevolod at rspamd.com
Mon Jul 29 17:49:56 UTC 2024


Author: Vsevolod Stakhov
Date: 2023-12-07 15:01:11 +0000
URL: https://github.com/rspamd/rspamd/commit/0d993187c1b1b37cfd99d3212745927eea0bff7a

[Project] Add bayes learn script

---
 lualib/lua_bayes_redis.lua           | 15 +++++++++++++--
 lualib/redis_scripts/bayes_learn.lua | 25 +++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/lualib/lua_bayes_redis.lua b/lualib/lua_bayes_redis.lua
index 575beff4b..2286295d5 100644
--- a/lualib/lua_bayes_redis.lua
+++ b/lualib/lua_bayes_redis.lua
@@ -42,8 +42,19 @@ local function gen_classify_functor(redis_params, classify_script_id)
 end
 
 local function gen_learn_functor(redis_params, learn_script_id)
-  return function(task, expanded_key, id, is_spam, stat_tokens, callback)
-    -- TODO: write this function
+  return function(task, expanded_key, id, is_spam, symbol, is_unlearn, stat_tokens, callback)
+    local function learn_redis_cb(err, data)
+      lua_util.debugm(N, task, 'learn redis cb: %s, %s', err, data)
+      if err then
+        callback(task, false, err)
+      else
+        callback(task, true)
+      end
+    end
+
+    lua_redis.exec_redis_script(learn_script_id,
+        { task = task, is_write = false, key = expanded_key },
+        learn_redis_cb, { expanded_key, is_spam, symbol, is_unlearn, stat_tokens })
   end
 end
 
diff --git a/lualib/redis_scripts/bayes_learn.lua b/lualib/redis_scripts/bayes_learn.lua
new file mode 100644
index 000000000..2b74fcca9
--- /dev/null
+++ b/lualib/redis_scripts/bayes_learn.lua
@@ -0,0 +1,25 @@
+-- Lua script to perform bayes learning
+-- This script accepts the following parameters:
+-- key1 - prefix for bayes tokens (e.g. for per-user classification)
+-- key2 - boolean is_spam
+-- key3 - string symbol
+-- key4 - boolean is_unlearn
+-- key5 - set of tokens encoded in messagepack array of int64_t
+
+local prefix = KEYS[1]
+local is_spam = KEYS[2]
+local symbol = KEYS[3]
+local is_unlearn = KEYS[4]
+local input_tokens = cmsgpack.unpack(KEYS[5])
+
+local prefix_underscore = prefix .. '_'
+local hash_key = is_spam and 'S' or 'H'
+local learned_key = is_spam and 'learns_spam' or 'learns_ham'
+
+redis.call('SADD', symbol .. '_keys', prefix)
+redis.call('HSET', prefix, 'version', '2') -- new schema
+redis.call('HINCRBY', prefix, learned_key, is_unlearn and -1 or 1) -- increase or decrease learned count
+
+for _, token in ipairs(input_tokens) do
+  redis.call('HINCRBY', prefix_underscore .. tostring(token), hash_key, 1)
+end
\ No newline at end of file


More information about the Commits mailing list