commit 3214f48: [Minor] external_services - add support for mime_type matching

Carsten Rosenberg c.rosenberg at heinlein-support.de
Thu Jan 17 15:07:04 UTC 2019


Author: Carsten Rosenberg
Date: 2019-01-13 00:26:20 +0100
URL: https://github.com/rspamd/rspamd/commit/3214f4825598eaa511fe982312899a4531475f55

[Minor] external_services - add support for mime_type matching

---
 src/plugins/lua/external_services.lua | 117 +++++++++++++++++++++++++++++++---
 1 file changed, 108 insertions(+), 9 deletions(-)

diff --git a/src/plugins/lua/external_services.lua b/src/plugins/lua/external_services.lua
index d9481461f..192f15f51 100644
--- a/src/plugins/lua/external_services.lua
+++ b/src/plugins/lua/external_services.lua
@@ -1,5 +1,6 @@
 --[[
 Copyright (c) 2019, Vsevolod Stakhov <vsevolod at highsecure.ru>
+Copyright (c) 2019, Carsten Rosenberg <c.rosenberg at heinlein-support.de>
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -17,6 +18,7 @@ limitations under the License.
 local rspamd_logger = require "rspamd_logger"
 local rspamd_regexp = require "rspamd_regexp"
 local lua_util = require "lua_util"
+local fun = require "fun"
 local lua_scanners = require("lua_scanners").filter('scanner')
 local redis_params
 
@@ -80,33 +82,130 @@ local function add_scanner_rule(sym, opts)
     return nil
   end
 
-  if type(opts['patterns']) == 'table' then
-    rule['patterns'] = {}
-    if opts['patterns'][1] then
-      for i, p in ipairs(opts['patterns']) do
+  local function create_regex_table(task, patterns)
+    local regex_table = {}
+    if patterns[1] then
+      for i, p in ipairs(patterns) do
         if type(p) == 'table' then
           local new_set = {}
           for k, v in pairs(p) do
             new_set[k] = rspamd_regexp.create_cached(v)
           end
-          rule['patterns'][i] = new_set
+          regex_table[i] = new_set
         else
-          rule['patterns'][i] = {}
+          regex_table[i] = {}
         end
       end
     else
-      for k, v in pairs(opts['patterns']) do
-        rule['patterns'][k] = rspamd_regexp.create_cached(v)
+      for k, v in pairs(patterns) do
+        regex_table[k] = rspamd_regexp.create_cached(v)
       end
     end
+    return regex_table
   end
 
+  if opts['mime_parts_filter_regex'] ~= nil
+    or opts['mime_parts_filter_ext'] ~= nil then
+      rule.scan_all_mime_parts = false
+  end
+
+  rule['patterns'] = create_regex_table(task, opts['patterns'] or {})
+
+  rule['mime_parts_filter_regex'] = create_regex_table(task, opts['mime_parts_filter_regex'] or {})
+
+  rule['mime_parts_filter_ext'] = create_regex_table(task, opts['mime_parts_filter_ext'] or {})
+
   if opts['whitelist'] then
     rule['whitelist'] = rspamd_config:add_hash_map(opts['whitelist'])
   end
 
+  local function match_filter(task, found, patterns)
+    if type(patterns) ~= 'table' then
+      lua_util.debugm(N, task, '%s: pattern not table %s', rule.log_prefix, type(patterns))
+      return false
+    end
+    if not patterns[1] then
+      --lua_util.debugm(N, task, '%s: in not pattern[1]', rule['symbol'], rule['type'])
+      for _, pat in pairs(patterns) do
+        if pat:match(found) then
+          return true
+        end
+      end
+      return false
+    else
+      for _, p in ipairs(patterns) do
+        for _, pat in ipairs(p) do
+          if pat:match(found) then
+            return true
+          end
+        end
+      end
+      return false
+    end
+  end
+
+  -- borrowed from mime_types.lua
+  -- ext is the last extension, LOWERCASED
+  -- ext2 is the one before last extension LOWERCASED
+  local function gen_extension(fname)
+    local filename_parts = rspamd_str_split(fname, '.')
+
+    local ext = {}
+    for n = 1, 2 do
+        ext[n] = #filename_parts > n and string.lower(filename_parts[#filename_parts + 1 - n]) or nil
+    end
+  --lua_util.debugm(N, task, '%s: extension found: %s', rule.log_prefix, ext[1])
+    return ext[1],ext[2],filename_parts
+  end
+
   return function(task)
-    cfg.check(task, task:get_content(), task:get_digest(), rule)
+    if rule.scan_mime_parts then
+      local parts = task:get_parts() or {}
+
+      local filter_func = function(p)
+        local content_type,content_subtype = p:get_type()
+        local fname = p:get_filename()
+        local ext,ext2,part_table
+        local extension_check = false
+        local content_type_check = false
+        if fname ~= nil then
+          ext,ext2,part_table = gen_extension(fname)
+          lua_util.debugm(N, task, '%s: extension found: %s - 2.ext: %s - parts: %s',
+            rule.log_prefix, ext, ext2, part_table)
+          if match_filter(task, ext, rule['mime_parts_filter_ext'])
+            or match_filter(task, ext2, rule['mime_parts_filter_ext']) then
+            lua_util.debugm(N, task, '%s: extension matched: %s', rule.log_prefix, ext)
+            extension_check = true
+          end
+          if match_filter(task, fname, rule['mime_parts_filter_regex']) then
+            --lua_util.debugm(N, task, '%s: regex fname: %s', rule.log_prefix, fname)
+            content_type_check = true
+          end
+        end
+        if content_type ~=nil and content_subtype ~= nil then
+          if match_filter(task, content_type..'/'..content_subtype, rule['mime_parts_filter_regex']) then
+            lua_util.debugm(N, task, '%s: regex ct: %s', rule.log_prefix, content_type..'/'..content_subtype)
+            content_type_check = true
+          end
+        end
+
+        return (rule.scan_image_mime and p:is_image())
+            or (rule.scan_text_mime and p:is_text())
+            or (p:get_filename() and rule.scan_all_mime_parts ~= false)
+            or extension_check
+            or content_type_check
+      end
+
+      fun.each(function(p)
+        local content = p:get_content()
+        if content and #content > 0 then
+          cfg.check(task, content, p:get_digest(), rule)
+        end
+      end, fun.filter(filter_func, parts))
+
+    else
+      cfg.check(task, task:get_content(), task:get_digest(), rule)
+    end
   end
 end
 


More information about the Commits mailing list