commit c716fe4: [Feature] Re-add pyzor support

defkev defkev at gmail.com
Sun Jul 18 11:21:04 UTC 2021


Author: defkev
Date: 2021-07-10 03:29:08 +0200
URL: https://github.com/rspamd/rspamd/commit/c716fe44803e5b9c58425c20b00718c9bce798c5

[Feature] Re-add pyzor support

---
 lualib/lua_scanners/pyzor.lua | 217 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 217 insertions(+)

diff --git a/lualib/lua_scanners/pyzor.lua b/lualib/lua_scanners/pyzor.lua
new file mode 100644
index 000000000..dd9205e7d
--- /dev/null
+++ b/lualib/lua_scanners/pyzor.lua
@@ -0,0 +1,217 @@
+--[[
+Copyright (c) 2021, defkev <defkev at gmail.com>
+Copyright (c) 2018, Carsten Rosenberg <c.rosenberg at heinlein-support.de>
+Copyright (c) 2018, Vsevolod Stakhov <vsevolod at highsecure.ru>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+--[[[
+-- @module pyzor
+-- This module contains pyzor access functions
+--]]
+
+local lua_util = require "lua_util"
+local tcp = require "rspamd_tcp"
+local upstream_list = require "rspamd_upstream_list"
+local rspamd_logger = require "rspamd_logger"
+local common = require "lua_scanners/common"
+
+local N = 'pyzor'
+local categories = {'pyzor','bulk', 'hash', 'scanner'}
+
+local function pyzor_config(opts)
+
+  local pyzor_conf = {
+    text_part_min_words = 2,
+    default_port = 5953,
+    timeout = 15.0,
+    log_clean = false,
+    retransmits = 2,
+    detection_category = "hash",
+    cache_expire = 7200, -- expire redis in one hour
+    message = '${SCANNER}: Pyzor bulk message found: "${VIRUS}"',
+    default_score = 1,
+    action = false,
+  }
+
+  pyzor_conf = lua_util.override_defaults(pyzor_conf, opts)
+
+  if not pyzor_conf.prefix then
+    pyzor_conf.prefix = 'rext_' .. N .. '_'
+  end
+
+  if not pyzor_conf.log_prefix then
+    pyzor_conf.log_prefix = N .. ' (' .. pyzor_conf.detection_category .. ')'
+  end
+
+  if not pyzor_conf['servers'] then
+    rspamd_logger.errx(rspamd_config, 'no servers defined')
+
+    return nil
+  end
+
+  pyzor_conf['upstreams'] = upstream_list.create(rspamd_config,
+      pyzor_conf['servers'],
+      pyzor_conf.default_port)
+
+  if pyzor_conf['upstreams'] then
+    lua_util.add_debug_alias('external_services', N)
+    return pyzor_conf
+  end
+
+  rspamd_logger.errx(rspamd_config, 'cannot parse servers %s',
+      pyzor_conf['servers'])
+  return nil
+end
+
+local function pyzor_check(task, content, digest, rule)
+  local function pyzor_check_uncached ()
+    local upstream = rule.upstreams:get_upstream_round_robin()
+    local addr = upstream:get_addr()
+    local retransmits = rule.retransmits
+
+    local function pyzor_callback(err, data, conn)
+
+      if err then
+
+        -- set current upstream to fail because an error occurred
+        upstream:fail()
+
+        -- retry with another upstream until retransmits exceeds
+        if retransmits > 0 then
+
+          retransmits = retransmits - 1
+
+          -- Select a different upstream!
+          upstream = rule.upstreams:get_upstream_round_robin()
+          addr = upstream:get_addr()
+
+          lua_util.debugm(N, task, '%s: retry IP: %s:%s err: %s',
+              rule.log_prefix, addr, addr:get_port(), err)
+
+          tcp.request({
+            task = task,
+            host = addr:to_string(),
+            port = addr:get_port(),
+            timeout = rule['timeout'],
+            shutdown = true,
+            data = content,
+            callback = pyzor_callback,
+          })
+        else
+          rspamd_logger.errx(task, '%s: failed to scan, maximum retransmits exceed',
+              rule['symbol'], rule['type'])
+          task:insert_result(rule['symbol_fail'], 0.0,
+              'failed to scan and retransmits exceed')
+        end
+      else
+        -- Parse the response
+        if upstream then upstream:ok() end
+        -- pyzor output is unicode (\x09 -> tab, \0a -> newline)
+        --   public.pyzor.org:24441  (200, 'OK')     21285091   206759
+        --   server:port             Code  Diag      Count      WL-Count
+        lua_util.debugm(N, task, '%s: returned data: %s', rule.log_prefix, tostring(data))
+        -- If pyzor would return JSON this wouldn't be necessary
+        local resp = {}
+        for v in string.gmatch(tostring(data), '[^\t]+') do
+          table.insert(resp, v)
+        end
+        -- rspamd_logger.infox(task, 'resp: %s', resp)
+        if resp[2] ~= '(200, \'OK\')' then
+          rspamd_logger.errx(task, "error parsing response: %s", tostring(data))
+          return
+        end
+
+        local whitelisted = tonumber(resp[4])
+        local reported = tonumber(resp[3])
+
+        --rspamd_logger.infox(task, "%s - count=%s wl=%s", addr:to_string(), reported, whitelisted)
+
+        --[[
+        @todo: Implement math function to calc the score dynamically based on return values.
+        Maybe check spamassassin implementation.
+        ]] --
+        local entries = reported - whitelisted
+
+        local weight = 0
+
+        if entries >= 100 then
+          weight = 1.5
+        elseif entries >= 25 then
+          weight = 1.25
+        elseif entries >= 5 then
+          weight = 1.0
+        elseif entries >= 1 and whitelisted == 0 then
+          weight = 0.2
+        end
+
+        if whitelisted >= 100 then
+          weight = weight - 1.5
+        elseif whitelisted >= 25 then
+          weight = weight - 1.25
+        elseif whitelisted >= 5 then
+          weight = weight - 1.0
+        elseif whitelisted >= 1 then
+          weight = weight - 0.2
+        end
+
+        local info = string.format("count=%d wl=%d", reported, whitelisted)
+        local threat_string = string.format("bl_%d_wl_%d", reported, whitelisted)
+
+        if weight > 0 then
+          lua_util.debugm(N, task, '%s: returned result is spam - info: %s', rule.log_prefix, info)
+          common.yield_result(task, rule, threat_string, weight)
+          common.save_cache(task, digest, rule, threat_string, weight)
+        else
+          if rule.log_clean then
+            rspamd_logger.infox(task, '%s: clean, returned result is ham - info: %s', rule.log_prefix, info)
+          else
+            lua_util.debugm(N, task, '%s: returned result is ham - info: %s', rule.log_prefix, info)
+          end
+          common.save_cache(task, digest, rule, 'OK', weight)
+        end
+
+      end
+    end
+
+   if digest == 'da39a3ee5e6b4b0d3255bfef95601890afd80709' then
+     rspamd_logger.infox(task, '%s: not checking default digest', rule.log_prefix)
+     return
+   end
+
+    tcp.request({
+      task = task,
+      host = addr:to_string(),
+      port = addr:get_port(),
+      timeout = rule.timeout,
+      shutdown = true,
+      data = content,
+      callback = pyzor_callback,
+    })
+  end
+  if common.condition_check_and_continue(task, content, rule, digest, pyzor_check_uncached) then
+    return
+  else
+    pyzor_check_uncached()
+  end
+end
+
+
+return {
+  type = categories,
+  description = 'pyzor bulk scanner',
+  configure = pyzor_config,
+  check = pyzor_check,
+  name = N
+}


More information about the Commits mailing list