commit 0b979e1: [Project] Lua_magic: Adopt lua_magic stuff in mime_types
Vsevolod Stakhov
vsevolod at highsecure.ru
Mon Sep 9 16:14:05 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-09-09 16:54:31 +0100
URL: https://github.com/rspamd/rspamd/commit/0b979e1351d1d5ff6d42f42391469ce366c18154
[Project] Lua_magic: Adopt lua_magic stuff in mime_types
---
src/lua/lua_mimepart.c | 32 ++++++++-
src/plugins/lua/mime_types.lua | 146 +++++++++++++++++++++++------------------
2 files changed, 112 insertions(+), 66 deletions(-)
diff --git a/src/lua/lua_mimepart.c b/src/lua/lua_mimepart.c
index dfc4ee8fa..fdc793ad8 100644
--- a/src/lua/lua_mimepart.c
+++ b/src/lua/lua_mimepart.c
@@ -365,18 +365,25 @@ LUA_FUNCTION_DEF (mimepart, get_type_full);
/***
* @method mime_part:get_detected_type()
- * Extract content-type string of the mime part. Use libmagic detection
+ * Extract content-type string of the mime part. Use lua_magic detection
* @return {string,string} content type in form 'type','subtype'
*/
LUA_FUNCTION_DEF (mimepart, get_detected_type);
/***
* @method mime_part:get_detected_type_full()
- * Extract content-type string of the mime part with all attributes. Use libmagic detection
+ * Extract content-type string of the mime part with all attributes. Use lua_magic detection
* @return {string,string,table} content type in form 'type','subtype', {attrs}
*/
LUA_FUNCTION_DEF (mimepart, get_detected_type_full);
+/***
+ * @method mime_part:get_detected_ext()
+ * Returns a msdos extension name according to lua_magic detection
+ * @return {string} detected extension (see lua_magic.types)
+ */
+LUA_FUNCTION_DEF (mimepart, get_detected_ext);
+
/***
* @method mime_part:get_cte()
* Extract content-transfer-encoding for a part
@@ -523,6 +530,7 @@ static const struct luaL_reg mimepartlib_m[] = {
LUA_INTERFACE_DEF (mimepart, get_type),
LUA_INTERFACE_DEF (mimepart, get_type_full),
LUA_INTERFACE_DEF (mimepart, get_detected_type),
+ LUA_INTERFACE_DEF (mimepart, get_detected_ext),
LUA_INTERFACE_DEF (mimepart, get_detected_type_full),
LUA_INTERFACE_DEF (mimepart, get_cte),
LUA_INTERFACE_DEF (mimepart, get_filename),
@@ -1494,6 +1502,26 @@ lua_mimepart_get_detected_type_full (lua_State * L)
return lua_mimepart_get_type_common (L, part->detected_ct, TRUE);
}
+static gint
+lua_mimepart_get_detected_ext (lua_State * L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_mime_part *part = lua_check_mimepart (L);
+
+ if (part == NULL) {
+ return luaL_error (L, "invalid arguments");
+ }
+
+ if (part->detected_ext) {
+ lua_pushstring (L, part->detected_ext);
+ }
+ else {
+ lua_pushnil (L);
+ }
+
+ return 1;
+}
+
static gint
lua_mimepart_get_cte (lua_State * L)
{
diff --git a/src/plugins/lua/mime_types.lua b/src/plugins/lua/mime_types.lua
index de53b74a7..32d9d62f3 100644
--- a/src/plugins/lua/mime_types.lua
+++ b/src/plugins/lua/mime_types.lua
@@ -24,6 +24,7 @@ local lua_util = require "lua_util"
local rspamd_util = require "rspamd_util"
local lua_maps = require "lua_maps"
local lua_mime = require "lua_mime"
+local lua_magic_types = require "lua_magic/types"
local fun = require "fun"
local N = "mime_types"
@@ -210,7 +211,7 @@ local function check_mime_type(task)
return ext[1],ext[2],parts
end
- local function check_filename(fname, ct, is_archive, part, detected_ct)
+ local function check_filename(fname, ct, is_archive, part, detected_ext)
local has_bad_unicode, char, ch_pos = rspamd_util.has_obscured_unicode(fname)
if has_bad_unicode then
@@ -229,7 +230,7 @@ local function check_mime_type(task)
if settings.filename_whitelist and
settings.filename_whitelist:get_key(fname) then
logger.debugm("mime_types", task, "skip checking of %s - file is in filename whitelist",
- fname)
+ fname)
return
end
@@ -237,11 +238,20 @@ local function check_mime_type(task)
-- ext is the last extension, LOWERCASED
-- ext2 is the one before last extension LOWERCASED
- if not ext and detected_ct then
+ local detected
+
+ if not is_archive and detected_ext then
+ detected = lua_magic_types[detected_ext]
+ end
+
+ if not ext or (detected_ext and ext ~= detected_ext) then
-- Try to find extension by real content type
- ext = lua_mime.reversed_extensions_map[detected_ct]
+ check_filename('detected.' .. detected_ext, detected.ct,
+ false, part, nil)
end
+ if not ext then return end
+
local function check_extension(badness_mult, badness_mult2)
if not badness_mult and not badness_mult2 then return end
if #parts > 2 then
@@ -257,7 +267,7 @@ local function check_mime_type(task)
-- Double extension + bad extension == VERY bad
task:insert_result(settings['symbol_double_extension'], badness_mult,
- string.format(".%s.%s", ext2, ext))
+ string.format(".%s.%s", ext2, ext))
task:insert_result('MIME_TRACE', 0.0,
string.format("%s:%s", part:get_id(), '-'))
return
@@ -286,7 +296,7 @@ local function check_mime_type(task)
-- Convert to a key-value map
extra_table = fun.tomap(
fun.map(function(e) return e,1.0 end,
- user_settings.bad_extensions))
+ user_settings.bad_extensions))
else
extra_table = user_settings.bad_extensions
end
@@ -306,66 +316,64 @@ local function check_mime_type(task)
local function check_tables(e)
if is_archive then
return extra_archive_table[e] or settings.bad_archive_extensions[e] or
- extra_table[e] or settings.bad_extensions[e]
+ extra_table[e] or settings.bad_extensions[e]
end
return extra_table[e] or settings.bad_extensions[e]
end
- if ext then
- -- Also check for archive bad extension
- if is_archive then
- if ext2 then
- local score1 = check_tables(ext)
- local score2 = check_tables(ext2)
- check_extension(score1, score2)
- else
- local score1 = check_tables(ext)
- check_extension(score1, nil)
- end
+ -- Also check for archive bad extension
+ if is_archive then
+ if ext2 then
+ local score1 = check_tables(ext)
+ local score2 = check_tables(ext2)
+ check_extension(score1, score2)
+ else
+ local score1 = check_tables(ext)
+ check_extension(score1, nil)
+ end
- if settings['archive_extensions'][ext] then
- -- Archive in archive
- task:insert_result(settings['symbol_archive_in_archive'], 1.0, ext)
+ if settings['archive_extensions'][ext] then
+ -- Archive in archive
+ task:insert_result(settings['symbol_archive_in_archive'], 1.0, ext)
+ task:insert_result('MIME_TRACE', 0.0,
+ string.format("%s:%s", part:get_id(), '-'))
+ end
+ else
+ if ext2 then
+ local score1 = check_tables(ext)
+ local score2 = check_tables(ext2)
+ check_extension(score1, score2)
+ -- Check for archive cloaking like .zip.gz
+ if settings['archive_extensions'][ext2]
+ -- Exclude multipart archive extensions, e.g. .zip.001
+ and not string.match(ext, '^%d+$')
+ then
+ task:insert_result(settings['symbol_archive_in_archive'],
+ 1.0, string.format(".%s.%s", ext2, ext))
task:insert_result('MIME_TRACE', 0.0,
string.format("%s:%s", part:get_id(), '-'))
end
else
- if ext2 then
- local score1 = check_tables(ext)
- local score2 = check_tables(ext2)
- check_extension(score1, score2)
- -- Check for archive cloaking like .zip.gz
- if settings['archive_extensions'][ext2]
- -- Exclude multipart archive extensions, e.g. .zip.001
- and not string.match(ext, '^%d+$')
- then
- task:insert_result(settings['symbol_archive_in_archive'],
- 1.0, string.format(".%s.%s", ext2, ext))
- task:insert_result('MIME_TRACE', 0.0,
- string.format("%s:%s", part:get_id(), '-'))
- end
- else
- local score1 = check_tables(ext)
- check_extension(score1, nil)
- end
+ local score1 = check_tables(ext)
+ check_extension(score1, nil)
end
+ end
- local mt = settings['extension_map'][ext]
- if mt and ct then
- local found
- local mult
- for _,v in ipairs(mt) do
- mult = v.mult
- if ct == v.ct then
- found = true
- break
- end
+ local mt = settings['extension_map'][ext]
+ if mt and ct then
+ local found
+ local mult
+ for _,v in ipairs(mt) do
+ mult = v.mult
+ if ct == v.ct then
+ found = true
+ break
end
+ end
- if not found then
- task:insert_result(settings['symbol_attachment'], mult, ext)
- end
+ if not found then
+ task:insert_result(settings['symbol_attachment'], mult, ext)
end
end
end
@@ -375,7 +383,6 @@ local function check_mime_type(task)
if parts then
for _,p in ipairs(parts) do
local mtype,subtype = p:get_type()
- local dtype,dsubtype = p:get_detected_type()
if not mtype then
task:insert_result(settings['symbol_unknown'], 1.0, 'missing content type')
@@ -385,20 +392,24 @@ local function check_mime_type(task)
-- Check for attachment
local filename = p:get_filename()
local ct = string.format('%s/%s', mtype, subtype):lower()
- local detected_ct
- if dtype and dsubtype then
- detected_ct = string.format('%s/%s', dtype, dsubtype)
- end
+ local detected_ext = p:get_detected_ext()
if filename then
- check_filename(filename, ct, false, p, detected_ct)
+ check_filename(filename, ct, false, p, detected_ext)
end
if p:is_archive() then
-
local check = true
+ if detected_ext then
+ local detected_type = lua_magic_types[detected_ext]
- if filename then
+ if detected_type.type ~= 'archive' then
+ logger.debugm("mime_types", task, "skip checking of %s as archive, %s is not archive but %s",
+ filename, detected_type.type)
+ check = false
+ end
+ end
+ if check and filename then
local ext = gen_extension(filename)
if ext and settings.archive_exceptions[ext] then
@@ -436,7 +447,8 @@ local function check_mime_type(task)
end
if f['name'] then
- check_filename(f['name'], nil, true, p, nil)
+ check_filename(f['name'], nil,
+ true, p, nil)
end
end
@@ -463,8 +475,14 @@ local function check_mime_type(task)
if map then
local v = map:get_key(ct)
local detected_different = false
- if detected_ct and detected_ct ~= ct then
- local v_detected = map:get_key(detected_ct)
+
+ local detected_type
+ if detected_ext then
+ detected_type = lua_magic_types[detected_ext]
+ end
+
+ if detected_type and detected_type.ct ~= ct then
+ local v_detected = map:get_key(detected_type.ct)
if not v or v_detected and v_detected > v then v = v_detected end
detected_different = true
end
@@ -477,7 +495,7 @@ local function check_mime_type(task)
-- Penalize case
n = n * 1.5
task:insert_result(settings['symbol_bad'], n,
- string.format('%s:%s', ct, detected_ct))
+ string.format('%s:%s', ct, detected_type.ct))
else
task:insert_result(settings['symbol_bad'], n, ct)
end
More information about the Commits
mailing list