commit d6f2dc6: [Minor] Lua_magic: Add ical and vcard support

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Nov 21 14:00:10 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-11-21 13:59:50 +0000
URL: https://github.com/rspamd/rspamd/commit/d6f2dc67078d48e2623d5920e202c888300bd685 (HEAD -> master)

[Minor] Lua_magic: Add ical and vcard support

---
 lualib/lua_magic/heuristics.lua | 46 +++++++++++++++++++++++------------------
 lualib/lua_magic/types.lua      | 10 +++++++++
 2 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/lualib/lua_magic/heuristics.lua b/lualib/lua_magic/heuristics.lua
index 3da6a84ab..d9d408170 100644
--- a/lualib/lua_magic/heuristics.lua
+++ b/lualib/lua_magic/heuristics.lua
@@ -61,18 +61,24 @@ local zip_patterns = {
 local txt_trie
 local txt_patterns = {
   html = {
-    [[(?i)\s*<html]],
-    [[(?i)\s*<\!DOCTYPE HTML]],
-    [[(?i)\s*<xml]],
-    [[(?i)\s*<body]],
-    [[(?i)\s*<table]],
-    [[(?i)\s*<a]],
-    [[(?i)\s*<p]],
-    [[(?i)\s*<div]],
-    [[(?i)\s*<span]],
+    {[[(?i)\s*<html]], 30},
+    {[[(?i)\s*<\!DOCTYPE HTML]], 30},
+    {[[(?i)\s*<xml]], 20},
+    {[[(?i)\s*<body]], 20},
+    {[[(?i)\s*<table]], 20},
+    {[[(?i)\s*<a]], 10},
+    {[[(?i)\s*<p]], 10},
+    {[[(?i)\s*<div]], 10},
+    {[[(?i)\s*<span]], 10},
   },
   csv = {
-    [[(?:[-a-zA-Z0-9_]+\s*,){2,}(?:[-a-zA-Z0-9_]+,?[ ]*[\r\n])]]
+    {[[(?:[-a-zA-Z0-9_]+\s*,){2,}(?:[-a-zA-Z0-9_]+,?[ ]*[\r\n])]], 20}
+  },
+  ics = {
+    {[[^BEGIN:VCALENDAR\r?\n]], 40},
+  },
+  vcf = {
+    {[[^BEGIN:VCARD\r?\n]], 40},
   },
 }
 
@@ -95,7 +101,7 @@ local function compile_tries()
       for _,pat in ipairs(pats) do
         -- These are utf16 strings in fact...
         strs[#strs + 1] = transform_func(pat)
-        indexes[#indexes + 1] = ext
+        indexes[#indexes + 1] = {ext, pat}
       end
     end
 
@@ -131,7 +137,7 @@ local function compile_tries()
         function(pat) return pat end)
     -- Text patterns at the initial fragment
     txt_trie = compile_pats(txt_patterns, txt_patterns_indexes,
-        function(pat) return pat end,
+        function(pat_tbl) return pat_tbl[1] end,
         bit.bor(rspamd_trie.flags.re,
             rspamd_trie.flags.dot_all,
             rspamd_trie.flags.no_start))
@@ -184,8 +190,8 @@ local function detect_ole_format(input, log_obj)
         for n,_ in pairs(matches) do
           if msoffice_clsid_indexes[n] then
             lua_util.debugm(N, log_obj, "found valid clsid for %s",
-                msoffice_clsid_indexes[n])
-            return true,msoffice_clsid_indexes[n]
+                msoffice_clsid_indexes[n][1])
+            return true,msoffice_clsid_indexes[n][1]
           end
         end
       end
@@ -195,7 +201,7 @@ local function detect_ole_format(input, log_obj)
       if matches then
         for n,_ in pairs(matches) do
           if msoffice_patterns_indexes[n] then
-            return true,msoffice_patterns_indexes[n]
+            return true,msoffice_patterns_indexes[n][1]
           end
         end
       end
@@ -295,8 +301,8 @@ local function detect_archive_flaw(part, arch, log_obj)
         for n,_ in pairs(matches) do
           if zip_patterns_indexes[n] then
             lua_util.debugm(N, log_obj, "found zip pattern for %s",
-                zip_patterns_indexes[n])
-            return zip_patterns_indexes[n],40
+                zip_patterns_indexes[n][1])
+            return zip_patterns_indexes[n][1],40
           end
         end
       end
@@ -392,11 +398,11 @@ exports.text_part_heuristic = function(part, log_obj)
       if matches then
         -- Require at least 2 occurrences of those patterns
         for n,positions in pairs(matches) do
-          local ext = txt_patterns_indexes[n]
+          local ext,weight = txt_patterns_indexes[n][1], txt_patterns_indexes[n][2][2]
           if ext then
-            res[ext] = (res[ext] or 0) + 20 * #positions
+            res[ext] = (res[ext] or 0) + weight * #positions
             lua_util.debugm(N, log_obj, "found txt pattern for %s: %s, total: %s",
-                ext, #positions, res[ext])
+                ext, weight * #positions, res[ext])
           end
         end
 
diff --git a/lualib/lua_magic/types.lua b/lualib/lua_magic/types.lua
index 2ee3e62d7..d15eec6e1 100644
--- a/lualib/lua_magic/types.lua
+++ b/lualib/lua_magic/types.lua
@@ -282,6 +282,16 @@ local types = {
     ct = 'text/csv',
     av_check = false,
   },
+  ics = {
+    type = 'text',
+    ct = 'text/calendar',
+    av_check = false,
+  },
+  vcf = {
+    type = 'text',
+    ct = 'text/vcard',
+    av_check = false,
+  },
   eml = {
     type = 'message',
     ct = 'message/rfc822',


More information about the Commits mailing list