commit 933c82f: [Project] Neural: Implement scoring
Vsevolod Stakhov
vsevolod at highsecure.ru
Sat Jul 6 19:56:07 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-07-06 12:41:20 +0100
URL: https://github.com/rspamd/rspamd/commit/933c82f6ed2f4450d3c0cfad7d35a9750918b74e
[Project] Neural: Implement scoring
---
src/plugins/lua/neural.lua | 169 ++++++++++++++++++++++-----------------------
1 file changed, 83 insertions(+), 86 deletions(-)
diff --git a/src/plugins/lua/neural.lua b/src/plugins/lua/neural.lua
index cca6f647c..fdb138321 100644
--- a/src/plugins/lua/neural.lua
+++ b/src/plugins/lua/neural.lua
@@ -50,6 +50,26 @@ local default_options = {
symbol_ham = 'NEURAL_HAM',
}
+-- Rule structure:
+-- * static config fields (see `default_options`)
+-- * prefix - name or defined prefix
+-- * settings - table of settings indexed by settings id, -1 is used when no settings defined
+
+-- Rule settings element defines elements for specific settings id:
+-- * symbols - static symbols profile (defined by config or extracted from symcache)
+-- * name - name of settings id
+-- * digest - digest of all symbols
+-- * ann - dynamic ANN configuration loaded from Redis
+-- * train - train data for ANN (e.g. the currently trained ANN)
+
+-- Settings ANN table is loaded from Redis and represents dynamic profile for ANN
+-- Some elements are directly stored in Redis, ANN is, in turn loaded dynamically
+-- * version - version of ANN loaded from redis
+-- * ann_key - name of ANN key in Redis
+-- * symbols - symbols in THIS PARTICULAR ANN (might be different from set.symbols)
+-- * distance - distance between set.symbols and set.ann.symbols
+-- * ann - kann object
+
local settings = {
rules = {},
prefix = 'rn', -- Neural network default prefix
@@ -212,30 +232,75 @@ local function load_scripts(params)
params)
end
+local function result_to_vector(task, profile)
+ if not profile.zeros then
+ -- Fill zeros vector
+ local zeros = {}
+ for i=1,meta_functions.count_metatokens() do
+ zeros[i] = 0.0
+ end
+ for _,_ in ipairs(profile.symbols) do
+ zeros[#zeros + 1] = 0.0
+ end
+ profile.zeros = zeros
+ end
+
+ local vec = lua_util.shallowcopy(profile.zeros)
+ local mt = meta_functions.rspamd_gen_metatokens(task)
+
+ for i,v in ipairs(mt) do
+ vec[i] = v
+ end
+
+ task:process_ann_tokens(profile.symbols, vec, #mt)
+
+ return vec
+end
local function ann_scores_filter(task)
for _,rule in pairs(settings.rules) do
- local id = '0'
- if rule.use_settings then
- local sid = task:get_settings_id()
- if sid then
- id = tostring(sid)
- end
- end
- if rule.per_user then
- local r = task:get_principal_recipient()
- id = id .. r
+ local sid = task:get_settings_id()
+ local ann
+ local profile
+
+ if sid then
+ if rule.settings[sid] then
+ local set = rule.settings[sid]
+
+ if set.ann then
+ ann = set.ann.ann
+ profile = set.ann
+ else
+ lua_util.debugm(N, task, 'no ann loaded for %s:%s',
+ rule.prefix, set.name)
+ end
+ else
+ lua_util.debugm(N, task, 'no ann defined in %s for settings id %s',
+ rule.prefix, sid)
+ end
+ else
+ if rule.settings[-1] then
+ local set = rule.settings[-1]
+
+ if set.ann then
+ ann = set.ann.ann
+ profile = set.ann
+ else
+ lua_util.debugm(N, task, 'no ann loaded for %s:%s',
+ rule.prefix, set.name)
+ end
+ else
+ lua_util.debugm(N, task, 'no default ann for rule %s',
+ rule.prefix)
+ end
end
- if rule.anns[id] and rule.anns[id].ann then
- local ann_data = task:get_symbols_tokens()
- local mt = meta_functions.rspamd_gen_metatokens(task)
- -- Add filtered meta tokens
- fun.each(function(e) table.insert(ann_data, e) end, mt)
+ if ann then
+ local vec = result_to_vector(task, profile)
local score
- local out = rule.anns[id].ann:apply1(ann_data)
+ local out = ann:apply1(vec)
score = out[1]
local symscore = string.format('%.3f', score)
@@ -262,76 +327,6 @@ local function create_ann(n, nlayers)
return rspamd_kann.new.kann(t)
end
-local function create_train_ann(rule, n, id)
- local prefix = gen_ann_prefix(rule, id)
- if not rule.anns[id] then
- rule.anns[id] = {}
- end
- -- Fix that for flexibe layers number
- if rule.anns[id].ann then
- if not is_ann_valid(rule, prefix, rule.anns[id].ann) then
- rule.anns[id].ann_train = create_ann(n, rule.nlayers)
- rule.anns[id].ann = nil
- rspamd_logger.infox(rspamd_config, 'invalidate existing ANN, create train ANN %s', prefix)
- elseif rule.train.max_usages > 0 and
- rule.anns[id].version % rule.train.max_usages == 0 then
- -- Forget last ann
- rspamd_logger.infox(rspamd_config, 'recreate ANN %s, version %s', prefix,
- rule.anns[id].version)
- rule.anns[id].ann_train = create_ann(n, rule.nlayers)
- else
- rule.anns[id].ann_train = rule.anns[id].ann
- rspamd_logger.infox(rspamd_config, 'reuse ANN for training %s', prefix)
- end
- else
- rule.anns[id].ann_train = create_ann(n, rule.nlayers)
- rspamd_logger.infox(rspamd_config, 'create train ANN %s', prefix)
- rule.anns[id].version = 0
- end
-end
-
-local function load_or_invalidate_ann(rule, data, id, ev_base)
- local ver = data[2]
- local prefix = gen_ann_prefix(rule, id)
-
- if not ver or not tonumber(ver) then
- rspamd_logger.errx(rspamd_config, 'cannot get version for ANN: %s', prefix)
- return
- end
-
- local err,ann_data = rspamd_util.zstd_decompress(data[1])
- local ann
-
- if err or not ann_data then
- rspamd_logger.errx(rspamd_config, 'cannot decompress ANN %s: %s', prefix, err)
- return
- else
- ann = rspamd_kann.load(ann_data)
- end
-
- if is_ann_valid(rule, prefix, ann) then
- if not rule.anns[id] then rule.anns[id] = {} end
- rule.anns[id].ann = ann
- rspamd_logger.infox(rspamd_config, 'loaded ANN %s version %s from redis',
- prefix, ver)
- rule.anns[id].version = tonumber(ver)
- else
- local function redis_invalidate_cb(_err, _data)
- if _err then
- rspamd_logger.errx(rspamd_config, 'cannot invalidate ANN %s from redis: %s', prefix, _err)
- elseif type(_data) == 'string' then
- rspamd_logger.infox(rspamd_config, 'invalidated ANN %s from redis: %s', prefix, _err)
- rule.anns[id].version = 0
- end
- end
- -- Invalidate ANN
- rspamd_logger.infox(rspamd_config, 'invalidate ANN %s', prefix)
- lua_redis.exec_redis_script(redis_maybe_invalidate_id,
- {ev_base = ev_base, is_write = true},
- redis_invalidate_cb,
- {prefix})
- end
-end
local function ann_train_callback(rule, task, score, required_score, id)
local train_opts = rule['train']
@@ -901,6 +896,7 @@ local function cleanup_anns(rule, cfg, ev_base)
end
end
end
+
lua_redis.exec_redis_script(redis_maybe_invalidate_id,
{ev_base = ev_base, is_write = true},
invalidate_cb,
@@ -1095,6 +1091,7 @@ for k,rule in pairs(settings.rules) do
-- We also want to train neural nets when they have enough data
rspamd_config:add_periodic(ev_base, 0.0,
function(_, _)
+ -- Clean old ANNs
cleanup_anns(rule, cfg, ev_base)
return maybe_train_anns(rule, cfg, ev_base, worker)
end)
More information about the Commits
mailing list