commit eb120f8: [Project] Lua_magic: Add heuristics for Office 2007+

Vsevolod Stakhov vsevolod at highsecure.ru
Sun Sep 8 08:56:03 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-09-08 09:35:01 +0100
URL: https://github.com/rspamd/rspamd/commit/eb120f830eecdbea31bf7c4090c45a7784de682b

[Project] Lua_magic: Add heuristics for Office 2007+

---
 lualib/lua_magic/heuristics.lua | 38 +++++++++++++++++++++++++++++++++++---
 lualib/lua_magic/types.lua      | 15 ++++++++++++++-
 2 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/lualib/lua_magic/heuristics.lua b/lualib/lua_magic/heuristics.lua
index 6a407f5e9..167edd0c9 100644
--- a/lualib/lua_magic/heuristics.lua
+++ b/lualib/lua_magic/heuristics.lua
@@ -165,9 +165,42 @@ local function detect_ole_format(input, log_obj)
   until directory_offset >= inplen
 end
 
-
 exports.ole_format_heuristic = detect_ole_format
 
+local function detect_archive_flaw(part, arch)
+  local arch_type = arch:get_type()
+  local res = {
+    docx = 0,
+    xlsx = 0,
+    pptx = 0,
+    jar = 0,
+  } -- ext + confidence pairs
+
+  -- General msoffice patterns
+  local function add_msoffice_confidence(incr)
+    res.docx = res.docx + incr
+    res.xlsx = res.xlsx + incr
+    res.pptx = res.pptx + incr
+  end
+
+  if arch_type == 'zip' then
+    -- Find specific files/folders in zip file
+    local files = arch:get_files() or {}
+    for _,file in ipairs(files) do
+      if file == '[Content_Types].xml' then
+        add_msoffice_confidence(10)
+      elseif file == 'xl/' then
+        res.xlsx = res.xlsx + 30
+      elseif file == 'word/' then
+        res.xlsx = res.docx + 30
+      elseif file == 'ppt/' then
+        res.xlsx = res.pptx + 30
+      end
+    end
+  end
+
+  return arch_type:lower(),40
+end
 exports.mime_part_heuristic = function(part)
   if part:is_text() then
     if part:get_text():is_html() then
@@ -184,8 +217,7 @@ exports.mime_part_heuristic = function(part)
 
   if part:is_archive() then
     local arch = part:get_archive()
-    -- TODO: add files heuristics
-    return arch:get_type():lower(),60
+    return detect_archive_flaw(part, arch)
   end
 
   return nil
diff --git a/lualib/lua_magic/types.lua b/lualib/lua_magic/types.lua
index c8850cd18..c5de552c8 100644
--- a/lualib/lua_magic/types.lua
+++ b/lualib/lua_magic/types.lua
@@ -157,7 +157,20 @@ local types = {
   },
   msg = {
     ct = 'application/vnd.ms-outlook',
-    type = 'executable'
+    type = 'msoffice'
+  },
+  -- newer office (2007+)
+  docx = {
+    ct = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+    type = 'msoffice'
+  },
+  xlsx = {
+    ct = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+    type = 'msoffice'
+  },
+  pptx = {
+    ct = 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+    type = 'msoffice'
   },
   -- other
   pgp = {


More information about the Commits mailing list