commit 0b979e1: [Project] Lua_magic: Adopt lua_magic stuff in mime_types

Vsevolod Stakhov vsevolod at highsecure.ru
Mon Sep 9 16:14:05 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-09-09 16:54:31 +0100
URL: https://github.com/rspamd/rspamd/commit/0b979e1351d1d5ff6d42f42391469ce366c18154

[Project] Lua_magic: Adopt lua_magic stuff in mime_types

---
 src/lua/lua_mimepart.c         |  32 ++++++++-
 src/plugins/lua/mime_types.lua | 146 +++++++++++++++++++++++------------------
 2 files changed, 112 insertions(+), 66 deletions(-)

diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c
index dfc4ee8fa..fdc793ad8 100644
--- a/src/lua/lua_mimepart.c
+++ b/src/lua/lua_mimepart.c
@@ -365,18 +365,25 @@ LUA_FUNCTION_DEF (mimepart, get_type_full);
 
 /***
  * @method mime_part:get_detected_type()
- * Extract content-type string of the mime part. Use libmagic detection
+ * Extract content-type string of the mime part. Use lua_magic detection
  * @return {string,string} content type in form 'type','subtype'
  */
 LUA_FUNCTION_DEF (mimepart, get_detected_type);
 
 /***
  * @method mime_part:get_detected_type_full()
- * Extract content-type string of the mime part with all attributes. Use libmagic detection
+ * Extract content-type string of the mime part with all attributes. Use lua_magic detection
  * @return {string,string,table} content type in form 'type','subtype', {attrs}
  */
 LUA_FUNCTION_DEF (mimepart, get_detected_type_full);
 
+/***
+ * @method mime_part:get_detected_ext()
+ * Returns a msdos extension name according to lua_magic detection
+ * @return {string} detected extension (see lua_magic.types)
+ */
+LUA_FUNCTION_DEF (mimepart, get_detected_ext);
+
 /***
  * @method mime_part:get_cte()
  * Extract content-transfer-encoding for a part
@@ -523,6 +530,7 @@ static const struct luaL_reg mimepartlib_m[] = {
 	LUA_INTERFACE_DEF (mimepart, get_type),
 	LUA_INTERFACE_DEF (mimepart, get_type_full),
 	LUA_INTERFACE_DEF (mimepart, get_detected_type),
+	LUA_INTERFACE_DEF (mimepart, get_detected_ext),
 	LUA_INTERFACE_DEF (mimepart, get_detected_type_full),
 	LUA_INTERFACE_DEF (mimepart, get_cte),
 	LUA_INTERFACE_DEF (mimepart, get_filename),
@@ -1494,6 +1502,26 @@ lua_mimepart_get_detected_type_full (lua_State * L)
 	return lua_mimepart_get_type_common (L, part->detected_ct, TRUE);
 }
 
+static gint
+lua_mimepart_get_detected_ext (lua_State * L)
+{
+	LUA_TRACE_POINT;
+	struct rspamd_mime_part *part = lua_check_mimepart (L);
+
+	if (part == NULL) {
+		return luaL_error (L, "invalid arguments");
+	}
+
+	if (part->detected_ext) {
+		lua_pushstring (L, part->detected_ext);
+	}
+	else {
+		lua_pushnil (L);
+	}
+
+	return 1;
+}
+
 static gint
 lua_mimepart_get_cte (lua_State * L)
 {
diff --git a/src/plugins/lua/mime_types.lua b/src/plugins/lua/mime_types.lua
index de53b74a7..32d9d62f3 100644
--- a/src/plugins/lua/mime_types.lua
+++ b/src/plugins/lua/mime_types.lua
@@ -24,6 +24,7 @@ local lua_util = require "lua_util"
 local rspamd_util = require "rspamd_util"
 local lua_maps = require "lua_maps"
 local lua_mime = require "lua_mime"
+local lua_magic_types = require "lua_magic/types"
 local fun = require "fun"
 
 local N = "mime_types"
@@ -210,7 +211,7 @@ local function check_mime_type(task)
     return ext[1],ext[2],parts
   end
 
-  local function check_filename(fname, ct, is_archive, part, detected_ct)
+  local function check_filename(fname, ct, is_archive, part, detected_ext)
 
     local has_bad_unicode, char, ch_pos = rspamd_util.has_obscured_unicode(fname)
     if has_bad_unicode then
@@ -229,7 +230,7 @@ local function check_mime_type(task)
     if settings.filename_whitelist and
         settings.filename_whitelist:get_key(fname) then
       logger.debugm("mime_types", task, "skip checking of %s - file is in filename whitelist",
-        fname)
+          fname)
       return
     end
 
@@ -237,11 +238,20 @@ local function check_mime_type(task)
     -- ext is the last extension, LOWERCASED
     -- ext2 is the one before last extension LOWERCASED
 
-    if not ext and detected_ct then
+    local detected
+
+    if not is_archive and detected_ext then
+      detected = lua_magic_types[detected_ext]
+    end
+
+    if not ext or (detected_ext and ext ~= detected_ext) then
       -- Try to find extension by real content type
-      ext = lua_mime.reversed_extensions_map[detected_ct]
+      check_filename('detected.' .. detected_ext, detected.ct,
+          false, part, nil)
     end
 
+    if not ext then return end
+
     local function check_extension(badness_mult, badness_mult2)
       if not badness_mult and not badness_mult2 then return end
       if #parts > 2 then
@@ -257,7 +267,7 @@ local function check_mime_type(task)
 
           -- Double extension + bad extension == VERY bad
           task:insert_result(settings['symbol_double_extension'], badness_mult,
-            string.format(".%s.%s", ext2, ext))
+              string.format(".%s.%s", ext2, ext))
           task:insert_result('MIME_TRACE', 0.0,
               string.format("%s:%s", part:get_id(), '-'))
           return
@@ -286,7 +296,7 @@ local function check_mime_type(task)
           -- Convert to a key-value map
           extra_table = fun.tomap(
               fun.map(function(e) return e,1.0 end,
-              user_settings.bad_extensions))
+                  user_settings.bad_extensions))
         else
           extra_table = user_settings.bad_extensions
         end
@@ -306,66 +316,64 @@ local function check_mime_type(task)
     local function check_tables(e)
       if is_archive then
         return extra_archive_table[e] or settings.bad_archive_extensions[e] or
-          extra_table[e] or settings.bad_extensions[e]
+            extra_table[e] or settings.bad_extensions[e]
       end
 
       return extra_table[e] or settings.bad_extensions[e]
     end
 
-    if ext then
-      -- Also check for archive bad extension
-      if is_archive then
-        if ext2 then
-          local score1 = check_tables(ext)
-          local score2 = check_tables(ext2)
-          check_extension(score1, score2)
-        else
-          local score1 = check_tables(ext)
-          check_extension(score1, nil)
-        end
+    -- Also check for archive bad extension
+    if is_archive then
+      if ext2 then
+        local score1 = check_tables(ext)
+        local score2 = check_tables(ext2)
+        check_extension(score1, score2)
+      else
+        local score1 = check_tables(ext)
+        check_extension(score1, nil)
+      end
 
-        if settings['archive_extensions'][ext] then
-          -- Archive in archive
-          task:insert_result(settings['symbol_archive_in_archive'], 1.0, ext)
+      if settings['archive_extensions'][ext] then
+        -- Archive in archive
+        task:insert_result(settings['symbol_archive_in_archive'], 1.0, ext)
+        task:insert_result('MIME_TRACE', 0.0,
+            string.format("%s:%s", part:get_id(), '-'))
+      end
+    else
+      if ext2 then
+        local score1 = check_tables(ext)
+        local score2 = check_tables(ext2)
+        check_extension(score1, score2)
+        -- Check for archive cloaking like .zip.gz
+        if settings['archive_extensions'][ext2]
+            -- Exclude multipart archive extensions, e.g. .zip.001
+            and not string.match(ext, '^%d+$')
+        then
+          task:insert_result(settings['symbol_archive_in_archive'],
+              1.0, string.format(".%s.%s", ext2, ext))
           task:insert_result('MIME_TRACE', 0.0,
               string.format("%s:%s", part:get_id(), '-'))
         end
       else
-        if ext2 then
-          local score1 = check_tables(ext)
-          local score2 = check_tables(ext2)
-          check_extension(score1, score2)
-          -- Check for archive cloaking like .zip.gz
-          if settings['archive_extensions'][ext2]
-            -- Exclude multipart archive extensions, e.g. .zip.001
-            and not string.match(ext, '^%d+$')
-          then
-            task:insert_result(settings['symbol_archive_in_archive'],
-                1.0, string.format(".%s.%s", ext2, ext))
-            task:insert_result('MIME_TRACE', 0.0,
-                string.format("%s:%s", part:get_id(), '-'))
-          end
-        else
-          local score1 = check_tables(ext)
-          check_extension(score1, nil)
-        end
+        local score1 = check_tables(ext)
+        check_extension(score1, nil)
       end
+    end
 
-      local mt = settings['extension_map'][ext]
-      if mt and ct then
-        local found
-        local mult
-        for _,v in ipairs(mt) do
-          mult = v.mult
-          if ct == v.ct then
-            found = true
-            break
-          end
+    local mt = settings['extension_map'][ext]
+    if mt and ct then
+      local found
+      local mult
+      for _,v in ipairs(mt) do
+        mult = v.mult
+        if ct == v.ct then
+          found = true
+          break
         end
+      end
 
-        if not found  then
-          task:insert_result(settings['symbol_attachment'], mult, ext)
-        end
+      if not found  then
+        task:insert_result(settings['symbol_attachment'], mult, ext)
       end
     end
   end
@@ -375,7 +383,6 @@ local function check_mime_type(task)
   if parts then
     for _,p in ipairs(parts) do
       local mtype,subtype = p:get_type()
-      local dtype,dsubtype = p:get_detected_type()
 
       if not mtype then
         task:insert_result(settings['symbol_unknown'], 1.0, 'missing content type')
@@ -385,20 +392,24 @@ local function check_mime_type(task)
         -- Check for attachment
         local filename = p:get_filename()
         local ct = string.format('%s/%s', mtype, subtype):lower()
-        local detected_ct
-        if dtype and dsubtype then
-          detected_ct = string.format('%s/%s', dtype, dsubtype)
-        end
+        local detected_ext = p:get_detected_ext()
 
         if filename then
-          check_filename(filename, ct, false, p, detected_ct)
+          check_filename(filename, ct, false, p, detected_ext)
         end
 
         if p:is_archive() then
-
           local check = true
+          if detected_ext then
+            local detected_type = lua_magic_types[detected_ext]
 
-          if filename then
+            if detected_type.type ~= 'archive' then
+              logger.debugm("mime_types", task, "skip checking of %s as archive, %s is not archive but %s",
+                  filename, detected_type.type)
+              check = false
+            end
+          end
+          if check and filename then
             local ext = gen_extension(filename)
 
             if ext and settings.archive_exceptions[ext] then
@@ -436,7 +447,8 @@ local function check_mime_type(task)
               end
 
               if f['name'] then
-                check_filename(f['name'], nil, true, p, nil)
+                check_filename(f['name'], nil,
+                    true, p, nil)
               end
             end
 
@@ -463,8 +475,14 @@ local function check_mime_type(task)
         if map then
           local v = map:get_key(ct)
           local detected_different = false
-          if detected_ct and detected_ct ~= ct then
-            local v_detected = map:get_key(detected_ct)
+
+          local detected_type
+          if detected_ext then
+            detected_type = lua_magic_types[detected_ext]
+          end
+
+          if detected_type and detected_type.ct ~= ct then
+            local v_detected = map:get_key(detected_type.ct)
             if not v or v_detected and v_detected > v then v = v_detected end
             detected_different = true
           end
@@ -477,7 +495,7 @@ local function check_mime_type(task)
                   -- Penalize case
                   n = n * 1.5
                   task:insert_result(settings['symbol_bad'], n,
-                      string.format('%s:%s', ct, detected_ct))
+                      string.format('%s:%s', ct, detected_type.ct))
                 else
                   task:insert_result(settings['symbol_bad'], n, ct)
                 end


More information about the Commits mailing list