[Rspamd-Users] Rspamd statistic settings: How to set learned_ids when users_enabled is true?
Gabriele Nencioni
gabriele.nencioni at register.it
Mon Feb 14 11:05:57 UTC 2022
Hi,
we have set the statistic module in order to have the gloabl and peruser
classifier and we noticed that the keys name into learned_ids redis
field (hash) are the same for global and peruser classifier.
Here an example of taht problem:
~# cat 1644312420.15363.eml | rspamc -c peruser -d
hari at int.billibotton.net -t 90 learn_ham
Results for file: stdin (0.008 seconds)
success = true;
filename = "stdin";
scan_time = 0.008000;
~# cat 1644312420.15363.eml | rspamc -c global -t 90 learn_ham
Results for file: stdin (0.004 seconds)
error = "<20220207080348.008504 at rebelwaltz.it> has been already learned
as ham, ignore it";
filename = "stdin";
scan_time = 0.004000;
~# redis-cli monitor | grep learned_ids
1644505170.215501 [0 127.0.0.1:43620] "HGET" "learned_ids"
"1mpwf7m975idffxpnh9dmzd1qx5y1445kxtynogpyscu1isa457kn3fhmcrq15b7iqkgnf6485knpfcd7hkkkpq7wpmgxbuboaw9tun"
1644505170.217240 [0 127.0.0.1:43622] "HSET" "learned_ids"
"1mpwf7m975idffxpnh9dmzd1qx5y1445kxtynogpyscu1isa457kn3fhmcrq15b7iqkgnf6485knpfcd7hkkkpq7wpmgxbuboaw9tun"
"-1"
1644505182.578004 [0 127.0.0.1:43714] "HGET" "learned_ids"
"1mpwf7m975idffxpnh9dmzd1qx5y1445kxtynogpyscu1isa457kn3fhmcrq15b7iqkgnf6485knpfcd7hkkkpq7wpmgxbuboaw9tun"
So is there a way to set a different key name for each classifier?
Moreover how is the string of that key name generate? Or is there a
command to know it (given a certain message/email)?
(In our example we are talking about the string:
1mpwf7m975idffxpnh9dmzd1qx5y1445kxtynogpyscu1isa457kn3fhmcrq15b7iqkgnf6485knpfcd7hkkkpq7wpmgxbuboaw9tun,
as you can see it is the same for both classifier)
It follows our statistic module configuration
classifier {
bayes {
tokenizer {
name = "osb";
}
name = "global";
backend = 'redis';
read_servers =
"172.20.235.35,172.20.235.36,172.20.235.37,172.20.235.38";
write_servers = '172.20.235.32';
timeout = 30s;
new_schema = true;
expire = 10368000;
lazy = true;
min_tokens = 11;
min_learns = 200;
languages_enabled = true;
statfile {
spam = false;
symbol = "BAYES_HAM";
}
statfile {
spam = true;
symbol = "BAYES_SPAM";
}
learn_condition = <<EOD
return function(task, is_spam, is_unlearn)
local learn_type = task:get_request_header('Learn-Type')
if not (learn_type and tostring(learn_type) == 'bulk') then
local prob = task:get_mempool():get_variable('bayes_prob', 'double')
if prob then
local in_class = false
local cl
if is_spam then
cl = 'spam'
in_class = prob >= 0.95
else
cl = 'ham'
in_class = prob <= 0.05
end
if in_class then
return false,string.format('already in class %s; probability
%.2f%%',
cl, math.abs((prob - 0.5) * 200.0))
end
end
end
return true
end
EOD;
.include(try=true; priority=1)
"$LOCAL_CONFDIR/local.d/classifier-bayes.conf"
.include(try=true; priority=10)
"$LOCAL_CONFDIR/override.d/classifier-bayes.conf"
}
}
classifier {
bayes {
tokenizer {
name = "osb";
}
name = "peruser";
backend = 'redis';
read_servers =
"172.20.235.35,172.20.235.36,172.20.235.37,172.20.235.38";
write_servers = '172.20.235.32';
timeout = 30s;
new_schema = true;
expire = 10368000;
lazy = true;
min_tokens = 11;
min_learns = 0;
languages_enabled = true;
users_enabled = true;
per_user = true;
statfile {
symbol = "BAYES_HAM_USER";
spam = false;
}
statfile {
symbol = "BAYES_SPAM_USER";
spam = true;
}
learn_condition = <<EOD
return function(task, is_spam, is_unlearn)
local learn_type = task:get_request_header('Learn-Type')
if not (learn_type and tostring(learn_type) == 'bulk') then
local prob = task:get_mempool():get_variable('bayes_prob', 'double')
if prob then
local in_class = false
local cl
if is_spam then
cl = 'spam'
in_class = prob >= 0.95
else
cl = 'ham'
in_class = prob <= 0.05
end
if in_class then
return false,string.format('already in class %s; probability
%.2f%%',
cl, math.abs((prob - 0.5) * 200.0))
end
end
end
return true
end
EOD;
.include(try=true; priority=1)
"$LOCAL_CONFDIR/local.d/classifier-bayes.conf"
.include(try=true; priority=10)
"$LOCAL_CONFDIR/override.d/classifier-bayes.conf"
}
Thanks in advance
--
Gabriele Nencioni
More information about the Users
mailing list