commit 7cf7a88: [Project] Lua_magic: Add mime parts detection function
Vsevolod Stakhov
vsevolod at highsecure.ru
Sat Sep 7 15:42:03 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-09-07 16:37:58 +0100
URL: https://github.com/rspamd/rspamd/commit/7cf7a889a60efb651fdf2062b1773ad17c9eec7f (HEAD -> master)
[Project] Lua_magic: Add mime parts detection function
---
lualib/lua_magic/heuristics.lua | 26 ++++++++++++++++++++++++
lualib/lua_magic/init.lua | 11 +++++++++++
lualib/lua_magic/types.lua | 44 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 81 insertions(+)
diff --git a/lualib/lua_magic/heuristics.lua b/lualib/lua_magic/heuristics.lua
index d60c87162..6a407f5e9 100644
--- a/lualib/lua_magic/heuristics.lua
+++ b/lualib/lua_magic/heuristics.lua
@@ -52,6 +52,7 @@ local function compile_msoffice_trie(log_obj)
local strs = {}
for ext,pats in pairs(msoffice_patterns) do
for _,pat in ipairs(pats) do
+ -- These are utf16 strings in fact...
strs[#strs + 1] = '^' ..
table.concat(
fun.totable(
@@ -66,6 +67,7 @@ local function compile_msoffice_trie(log_obj)
strs = {}
for ext,pats in pairs(msoffice_clsids) do
for _,pat in ipairs(pats) do
+ -- Convert hex to re
local hex_table = {}
for i=1,#pat,2 do
local subc = pat:sub(i, i + 1)
@@ -163,6 +165,30 @@ local function detect_ole_format(input, log_obj)
until directory_offset >= inplen
end
+
exports.ole_format_heuristic = detect_ole_format
+exports.mime_part_heuristic = function(part)
+ if part:is_text() then
+ if part:get_text():is_html() then
+ return 'html',60
+ else
+ return 'txt',60
+ end
+ end
+
+ if part:is_image() then
+ local img = part:get_image()
+ return img:get_type():lower(),60
+ end
+
+ if part:is_archive() then
+ local arch = part:get_archive()
+ -- TODO: add files heuristics
+ return arch:get_type():lower(),60
+ end
+
+ return nil
+end
+
return exports
\ No newline at end of file
diff --git a/lualib/lua_magic/init.lua b/lualib/lua_magic/init.lua
index 59e2a6e36..8b5064bfe 100644
--- a/lualib/lua_magic/init.lua
+++ b/lualib/lua_magic/init.lua
@@ -21,6 +21,7 @@ limitations under the License.
local patterns = require "lua_magic/patterns"
local types = require "lua_magic/types"
+local heuristics = require "lua_magic/heuristics"
local fun = require "fun"
local lua_util = require "lua_util"
@@ -317,6 +318,16 @@ exports.detect = function(input, log_obj)
return nil
end
+exports.detect_mime_part = function(part, log_obj)
+ local ext,weight = heuristics.mime_part_heuristic(part)
+
+ if ext and weight and weight > 20 then
+ return ext,types[ext]
+ end
+
+ return exports.detect(part:get_content(), log_obj)
+end
+
-- This parameter specifies how many bytes are checked in the input
-- Rspamd checks 2 chunks at start and 1 chunk at the end
exports.chunk_size = 32768
diff --git a/lualib/lua_magic/types.lua b/lualib/lua_magic/types.lua
index 8255af663..c8850cd18 100644
--- a/lualib/lua_magic/types.lua
+++ b/lualib/lua_magic/types.lua
@@ -168,6 +168,50 @@ local types = {
ct = 'application/x-uuencoded',
type = 'binary',
},
+ -- Types that are detected by Rspamd itself
+ -- Archives
+ zip = {
+ ct = 'application/zip',
+ type = 'archive',
+ },
+ rar = {
+ ct = 'application/x-rar',
+ type = 'archive',
+ },
+ ['7z'] = {
+ ct = 'x-7z-compressed',
+ type = 'archive',
+ },
+ gz = {
+ ct = 'application/gzip',
+ type = 'archive',
+ },
+ -- Images
+ png = {
+ ct = 'image/png',
+ type = 'image',
+ },
+ gif = {
+ ct = 'image/gif',
+ type = 'image',
+ },
+ jpg = {
+ ct = 'image/jpeg',
+ type = 'image',
+ },
+ bmp = {
+ type = 'image',
+ ct = 'image/bmp',
+ },
+ -- Text
+ txt = {
+ type = 'text',
+ ct = 'text/plain',
+ },
+ html = {
+ type = 'text',
+ ct = 'text/html',
+ },
}
return types
\ No newline at end of file
More information about the Commits
mailing list