commit 83f771a: [Minor] lua_scanners - move and extend mime_part matching

Carsten Rosenberg c.rosenberg at heinlein-support.de
Thu Jan 17 15:07:13 UTC 2019


Author: Carsten Rosenberg
Date: 2019-01-13 14:25:14 +0100
URL: https://github.com/rspamd/rspamd/commit/83f771a19c433b610dc9c6f12466b495fbb3ac51

[Minor] lua_scanners - move and extend mime_part matching

---
 lualib/lua_scanners/common.lua        | 109 ++++++++++++++++++++++++++++++--
 src/plugins/lua/antivirus.lua         |  32 +---------
 src/plugins/lua/external_services.lua | 113 +++-------------------------------
 3 files changed, 116 insertions(+), 138 deletions(-)

diff --git a/lualib/lua_scanners/common.lua b/lualib/lua_scanners/common.lua
index 6364a7e8e..1fe82fabb 100644
--- a/lualib/lua_scanners/common.lua
+++ b/lualib/lua_scanners/common.lua
@@ -1,5 +1,6 @@
 --[[
 Copyright (c) 2018, Vsevolod Stakhov <vsevolod at highsecure.ru>
+Copyright (c) 2019, Carsten Rosenberg <c.rosenberg at heinlein-support.de>
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -20,6 +21,7 @@ limitations under the License.
 --]]
 
 local rspamd_logger = require "rspamd_logger"
+local rspamd_regexp = require "rspamd_regexp"
 local lua_util = require "lua_util"
 local lua_redis = require "lua_redis"
 local fun = require "fun"
@@ -178,22 +180,119 @@ local function save_av_cache(task, digest, rule, to_save, dyn_weight)
   return false
 end
 
-local function text_parts_min_words(task, min_words)
-  local filter_func = function(p)
-    return p:get_words_count() >= min_words
+local function create_regex_table(task, patterns)
+  local regex_table = {}
+  if patterns[1] then
+    for i, p in ipairs(patterns) do
+      if type(p) == 'table' then
+        local new_set = {}
+        for k, v in pairs(p) do
+          new_set[k] = rspamd_regexp.create_cached(v)
+        end
+        regex_table[i] = new_set
+      else
+        regex_table[i] = {}
+      end
+    end
+  else
+    for k, v in pairs(patterns) do
+      regex_table[k] = rspamd_regexp.create_cached(v)
+    end
   end
+  return regex_table
+end
 
-  return fun.any(filter_func, task:get_text_parts())
+local function match_filter(task, found, patterns)
+  if type(patterns) ~= 'table' then return false end
+  if not patterns[1] then
+    for _, pat in pairs(patterns) do
+      if pat:match(found) then
+        return true
+      end
+    end
+    return false
+  else
+    for _, p in ipairs(patterns) do
+      for _, pat in ipairs(p) do
+        if pat:match(found) then
+          return true
+        end
+      end
+    end
+    return false
+  end
+end
+
+-- borrowed from mime_types.lua
+-- ext is the last extension, LOWERCASED
+-- ext2 is the one before last extension LOWERCASED
+local function gen_extension(fname)
+  local filename_parts = rspamd_str_split(fname, '.')
 
+  local ext = {}
+  for n = 1, 2 do
+      ext[n] = #filename_parts > n and string.lower(filename_parts[#filename_parts + 1 - n]) or nil
+  end
+  return ext[1],ext[2],filename_parts
 end
 
+local function check_parts_match(task, rule)
+
+  local filter_func = function(p)
+    local content_type,content_subtype = p:get_type()
+    local fname = p:get_filename()
+    local ext, ext2, part_table
+    local extension_check = false
+    local content_type_check = false
+    local text_part_min_words_check = true
+
+    if rule.scan_all_mime_parts == false then
+    -- check file extension and filename regex matching
+      if fname ~= nil then
+        ext,ext2,part_table = gen_extension(fname)
+        lua_util.debugm(rule.module_name, task, '%s: extension found: %s - 2.ext: %s - parts: %s',
+          rule.log_prefix, ext, ext2, part_table)
+        if match_filter(task, ext, rule.mime_parts_filter_ext)
+          or match_filter(task, ext2, rule.mime_parts_filter_ext) then
+          lua_util.debugm(rule.module_name, task, '%s: extension matched: %s', rule.log_prefix, ext)
+          extension_check = true
+        end
+        if match_filter(task, fname, rule.mime_parts_filter_regex) then
+          content_type_check = true
+        end
+      end
+      -- check content type regex matching
+      if content_type ~= nil and content_subtype ~= nil then
+        if match_filter(task, content_type..'/'..content_subtype, rule.mime_parts_filter_regex) then
+          lua_util.debugm(rule.module_name, task, '%s: regex ct: %s', rule.log_prefix,
+            content_type..'/'..content_subtype)
+          content_type_check = true
+        end
+      end
+    end
+
+    -- check text_part has more words than text_part_min_words_check
+    if rule.text_part_min_words and p:is_text() then
+      text_part_min_words_check = p:get_words_count() >= tonumber(rule.text_part_min_words)
+    end
+
+    return (rule.scan_image_mime and p:is_image())
+        or (rule.scan_text_mime and text_part_min_words_check)
+        or (p:is_attachment() and rule.scan_all_mime_parts ~= false)
+        or extension_check
+        or content_type_check
+  end
+
+  return fun.filter(filter_func, task:get_parts())
+end
 
 exports.yield_result = yield_result
 exports.match_patterns = match_patterns
 exports.need_av_check = need_av_check
 exports.check_av_cache = check_av_cache
 exports.save_av_cache = save_av_cache
-exports.text_parts_min_words = text_parts_min_words
+exports.create_regex_table = create_regex_table
+exports.check_parts_match = check_parts_match
 
 setmetatable(exports, {
   __call = function(t, override)
diff --git a/src/plugins/lua/antivirus.lua b/src/plugins/lua/antivirus.lua
index 07dbba2f0..0515e3376 100644
--- a/src/plugins/lua/antivirus.lua
+++ b/src/plugins/lua/antivirus.lua
@@ -19,6 +19,7 @@ local rspamd_regexp = require "rspamd_regexp"
 local lua_util = require "lua_util"
 local fun = require "fun"
 local lua_antivirus = require("lua_scanners").filter('antivirus')
+local common = require "lua_scanners/common"
 local redis_params
 
 local N = "antivirus"
@@ -107,26 +108,7 @@ local function add_antivirus_rule(sym, opts)
     return nil
   end
 
-  if type(opts['patterns']) == 'table' then
-    rule['patterns'] = {}
-    if opts['patterns'][1] then
-      for i, p in ipairs(opts['patterns']) do
-        if type(p) == 'table' then
-          local new_set = {}
-          for k, v in pairs(p) do
-            new_set[k] = rspamd_regexp.create_cached(v)
-          end
-          rule['patterns'][i] = new_set
-        else
-          rule['patterns'][i] = {}
-        end
-      end
-    else
-      for k, v in pairs(opts['patterns']) do
-        rule['patterns'][k] = rspamd_regexp.create_cached(v)
-      end
-    end
-  end
+  rule.patterns = common.create_regex_table(task, opts.patterns or {})
 
   if opts['whitelist'] then
     rule['whitelist'] = rspamd_config:add_hash_map(opts['whitelist'])
@@ -134,21 +116,13 @@ local function add_antivirus_rule(sym, opts)
 
   return function(task)
     if rule.scan_mime_parts then
-      local parts = task:get_parts() or {}
-
-      local filter_func = function(p)
-        return (rule.scan_image_mime and p:is_image())
-            or (rule.scan_text_mime and p:is_text())
-            or (p:is_attachment())
-      end
 
       fun.each(function(p)
         local content = p:get_content()
-
         if content and #content > 0 then
           cfg.check(task, content, p:get_digest(), rule)
         end
-      end, fun.filter(filter_func, parts))
+      end, common.check_parts_match(task, rule))
 
     else
       cfg.check(task, task:get_content(), task:get_digest(), rule)
diff --git a/src/plugins/lua/external_services.lua b/src/plugins/lua/external_services.lua
index 192f15f51..3a22c16d5 100644
--- a/src/plugins/lua/external_services.lua
+++ b/src/plugins/lua/external_services.lua
@@ -16,10 +16,10 @@ limitations under the License.
 ]] --
 
 local rspamd_logger = require "rspamd_logger"
-local rspamd_regexp = require "rspamd_regexp"
 local lua_util = require "lua_util"
 local fun = require "fun"
 local lua_scanners = require("lua_scanners").filter('scanner')
+local common = require "lua_scanners/common"
 local redis_params
 
 local N = "external_services"
@@ -62,7 +62,7 @@ local function add_scanner_rule(sym, opts)
   local cfg = lua_scanners[opts['type']]
 
   if not cfg then
-    rspamd_logger.errx(rspamd_config, 'unknown antivirus type: %s',
+    rspamd_logger.errx(rspamd_config, 'unknown external scanner type: %s',
         opts['type'])
     return nil
   end
@@ -82,126 +82,31 @@ local function add_scanner_rule(sym, opts)
     return nil
   end
 
-  local function create_regex_table(task, patterns)
-    local regex_table = {}
-    if patterns[1] then
-      for i, p in ipairs(patterns) do
-        if type(p) == 'table' then
-          local new_set = {}
-          for k, v in pairs(p) do
-            new_set[k] = rspamd_regexp.create_cached(v)
-          end
-          regex_table[i] = new_set
-        else
-          regex_table[i] = {}
-        end
-      end
-    else
-      for k, v in pairs(patterns) do
-        regex_table[k] = rspamd_regexp.create_cached(v)
-      end
-    end
-    return regex_table
-  end
-
-  if opts['mime_parts_filter_regex'] ~= nil
-    or opts['mime_parts_filter_ext'] ~= nil then
+  -- if any mime_part filter defined, do not scan all attachments
+  if opts.mime_parts_filter_regex ~= nil
+    or opts.mime_parts_filter_ext ~= nil then
       rule.scan_all_mime_parts = false
   end
 
-  rule['patterns'] = create_regex_table(task, opts['patterns'] or {})
+  rule.patterns = common.create_regex_table(task, opts.patterns or {})
 
-  rule['mime_parts_filter_regex'] = create_regex_table(task, opts['mime_parts_filter_regex'] or {})
+  rule.mime_parts_filter_regex = common.create_regex_table(task, opts.mime_parts_filter_regex or {})
 
-  rule['mime_parts_filter_ext'] = create_regex_table(task, opts['mime_parts_filter_ext'] or {})
+  rule.mime_parts_filter_ext = common.create_regex_table(task, opts.mime_parts_filter_ext or {})
 
   if opts['whitelist'] then
     rule['whitelist'] = rspamd_config:add_hash_map(opts['whitelist'])
   end
 
-  local function match_filter(task, found, patterns)
-    if type(patterns) ~= 'table' then
-      lua_util.debugm(N, task, '%s: pattern not table %s', rule.log_prefix, type(patterns))
-      return false
-    end
-    if not patterns[1] then
-      --lua_util.debugm(N, task, '%s: in not pattern[1]', rule['symbol'], rule['type'])
-      for _, pat in pairs(patterns) do
-        if pat:match(found) then
-          return true
-        end
-      end
-      return false
-    else
-      for _, p in ipairs(patterns) do
-        for _, pat in ipairs(p) do
-          if pat:match(found) then
-            return true
-          end
-        end
-      end
-      return false
-    end
-  end
-
-  -- borrowed from mime_types.lua
-  -- ext is the last extension, LOWERCASED
-  -- ext2 is the one before last extension LOWERCASED
-  local function gen_extension(fname)
-    local filename_parts = rspamd_str_split(fname, '.')
-
-    local ext = {}
-    for n = 1, 2 do
-        ext[n] = #filename_parts > n and string.lower(filename_parts[#filename_parts + 1 - n]) or nil
-    end
-  --lua_util.debugm(N, task, '%s: extension found: %s', rule.log_prefix, ext[1])
-    return ext[1],ext[2],filename_parts
-  end
-
   return function(task)
     if rule.scan_mime_parts then
-      local parts = task:get_parts() or {}
-
-      local filter_func = function(p)
-        local content_type,content_subtype = p:get_type()
-        local fname = p:get_filename()
-        local ext,ext2,part_table
-        local extension_check = false
-        local content_type_check = false
-        if fname ~= nil then
-          ext,ext2,part_table = gen_extension(fname)
-          lua_util.debugm(N, task, '%s: extension found: %s - 2.ext: %s - parts: %s',
-            rule.log_prefix, ext, ext2, part_table)
-          if match_filter(task, ext, rule['mime_parts_filter_ext'])
-            or match_filter(task, ext2, rule['mime_parts_filter_ext']) then
-            lua_util.debugm(N, task, '%s: extension matched: %s', rule.log_prefix, ext)
-            extension_check = true
-          end
-          if match_filter(task, fname, rule['mime_parts_filter_regex']) then
-            --lua_util.debugm(N, task, '%s: regex fname: %s', rule.log_prefix, fname)
-            content_type_check = true
-          end
-        end
-        if content_type ~=nil and content_subtype ~= nil then
-          if match_filter(task, content_type..'/'..content_subtype, rule['mime_parts_filter_regex']) then
-            lua_util.debugm(N, task, '%s: regex ct: %s', rule.log_prefix, content_type..'/'..content_subtype)
-            content_type_check = true
-          end
-        end
-
-        return (rule.scan_image_mime and p:is_image())
-            or (rule.scan_text_mime and p:is_text())
-            or (p:get_filename() and rule.scan_all_mime_parts ~= false)
-            or extension_check
-            or content_type_check
-      end
 
       fun.each(function(p)
         local content = p:get_content()
         if content and #content > 0 then
           cfg.check(task, content, p:get_digest(), rule)
         end
-      end, fun.filter(filter_func, parts))
+      end, common.check_parts_match(task, rule))
 
     else
       cfg.check(task, task:get_content(), task:get_digest(), rule)


More information about the Commits mailing list