[Rspamd-Users] rspamd per-user statistic user limit

Gabriele Nencioni gabriele.nencioni at register.it
Fri Jul 3 14:36:58 UTC 2020


Hi,
I have upgrade Rspamd from 1.8 to 2.4 version, and I have the following
statistic configuration:

cat /etc/rspamd/local.d/statistic.conf
classifier {
    bayes {
        tokenizer {
            name = "osb";
        }
        name = "global";
        backend = 'redis';
        read_servers = 'xxx.xxx.xxx.xxx';
        write_servers = 'xxx.xxx.xxx.xxx';
        timeout = 30s;
        new_schema = true;
        expire = 10368000;
        lazy = true;
        min_tokens = 11;
        min_learns = 200;
        languages_enabled = true;
        statfile {
            spam = false;
            symbol = "BAYES_HAM";
        }
        statfile {
            spam = true;
            symbol = "BAYES_SPAM";
        }
        learn_condition = <<EOD
return function(task, is_spam, is_unlearn)
  local learn_type = task:get_request_header('Learn-Type')

  if not (learn_type and tostring(learn_type) == 'bulk') then
    local prob = task:get_mempool():get_variable('bayes_prob', 'double')

    if prob then
      local in_class = false
      local cl
      if is_spam then
        cl = 'spam'
        in_class = prob >= 0.95
      else
        cl = 'ham'
        in_class = prob <= 0.05
      end

      if in_class then
        return false,string.format('already in class %s; probability
%.2f%%',
          cl, math.abs((prob - 0.5) * 200.0))
      end
    end
  end

  return true
end
EOD;
    }
}

classifier {
    bayes {
        tokenizer {
            name = "osb";
        }
        name = "peruser";
        backend = 'redis';
        read_servers = 'xxx.xxx.xxx.xxx';
        write_servers = 'xxx.xxx.xxx.xxx';
        timeout = 30s;
        new_schema = true;
        expire = 10368000;
        lazy = true;
        min_tokens = 11;
        min_learns = 0;
        languages_enabled = true;
        users_enabled = true;
        per_user = true;
        statfile {
            symbol = "BAYES_HAM_USER";
            spam = false;
        }
        statfile {
            symbol = "BAYES_SPAM_USER";
            spam = true;
        }
        learn_condition = <<EOD
return function(task, is_spam, is_unlearn)
  local learn_type = task:get_request_header('Learn-Type')

  if not (learn_type and tostring(learn_type) == 'bulk') then
    local prob = task:get_mempool():get_variable('bayes_prob', 'double')

    if prob then
      local in_class = false
      local cl
      if is_spam then
        cl = 'spam'
        in_class = prob >= 0.95
      else
        cl = 'ham'
        in_class = prob <= 0.05
      end

      if in_class then
        return false,string.format('already in class %s; probability
%.2f%%',
          cl, math.abs((prob - 0.5) * 200.0))
      end
    end
  end

  return true
end
EOD;
    }
}

and my problem is: on peruser classifier every time 1000 users are
reached, the learned and users counters are reset, as you can see by
rspamc stat output:

~# while date ; do rspamc stat | grep _HAM_USER ; sleep 60 ; done
...
Statfile: BAYES_HAM_USER type: redis; length: 78.07M; free blocks: 0;
total blocks: 2.05M; free: 0.00%; learned: 3301; users: 924; languages: 0
...
Statfile: BAYES_HAM_USER type: redis; length: 84.27M; free blocks: 0;
total blocks: 2.21M; free: 0.00%; learned: 3511; users: 977; languages: 0
...
Statfile: BAYES_HAM_USER type: redis; length: 1.97M; free blocks: 0;
total blocks: 51.86k; free: 0.00%; learned: 66; users: 26; languages: 0


while the number of keys on redis isn't decreased.
(Same behavior with BAYES_SPAM_USER and with old_schema setting)


On Rspamd 1.8 (where the only configuration difference is the
old_schema) I have not any problem:
~# rspamc stat | grep _USER
Statfile: BAYES_SPAM_USER type: redis; length: 27.55G; free blocks: 0;
total blocks: 725M; free: 0.00%; learned: 2246954; users: 44086;
languages: 0
Statfile: BAYES_HAM_USER type: redis; length: 9.76G; free blocks: 0;
total blocks: 256.83M; free: 0.00%; learned: 561230; users: 33171;
languages: 0


Which setting have I to change in order to disable that limit?



Thanks in advance
Regards,
--
Gabriele Nencioni


More information about the Users mailing list