commit 8748c90: [Project] Lua_magic: Start new magic detection library
Vsevolod Stakhov
vsevolod at highsecure.ru
Thu Sep 5 14:35:07 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-09-05 15:29:35 +0100
URL: https://github.com/rspamd/rspamd/commit/8748c908b407a787ed5aab23cd0cf78f8d1ae4be (HEAD -> master)
[Project] Lua_magic: Start new magic detection library
---
lualib/lua_magic/init.lua | 140 +++++++++++++++++++++
.../{lua_ffi/common.lua => lua_magic/patterns.lua} | 54 ++++----
.../maps.lua => lua_magic/types.lua} | 24 +++-
3 files changed, 190 insertions(+), 28 deletions(-)
diff --git a/lualib/lua_magic/init.lua b/lualib/lua_magic/init.lua
new file mode 100644
index 000000000..464a10d0a
--- /dev/null
+++ b/lualib/lua_magic/init.lua
@@ -0,0 +1,140 @@
+--[[
+Copyright (c) 2019, Vsevolod Stakhov <vsevolod at highsecure.ru>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+--[[[
+-- @module lua_magic
+-- This module contains file types detection logic
+--]]
+
+local patterns = require "lua_magic/patterns"
+local types = require "lua_magic/types"
+local fun = require "fun"
+local lua_util = require "lua_util"
+
+local rspamd_trie = require "rspamd_trie"
+
+local N = "lua_magic"
+local exports = {}
+-- trie object
+local compiled_patterns
+-- {<str>, <match_object>, <pattern_object>} indexed by pattern number
+local processed_patterns = {}
+
+local function process_patterns()
+ if not compiled_patterns then
+ for _,pattern in ipairs(patterns) do
+ for _,match in ipairs(pattern.matches) do
+ if match.string then
+ processed_patterns[#processed_patterns + 1] = {
+ match.string, match, pattern
+ }
+ end
+ end
+ end
+
+ compiled_patterns = rspamd_trie.create(fun.totable(
+ fun.map(function(t) return t[1] end, processed_patterns)),
+ rspamd_trie.flags.re
+ )
+
+ lua_util.debugm(N, rspamd_config, 'compiled %s patterns',
+ #processed_patterns)
+ end
+end
+
+exports.detect = function(input, log_obj)
+ process_patterns()
+ local res = {}
+ local matches = compiled_patterns:match(input)
+
+ if not log_obj then log_obj = rspamd_config end
+
+ local function add_result(match, pattern)
+ if not res[pattern.ext] then
+ res[pattern.ext] = 0
+ end
+ if match.weight then
+ res[pattern.ext] = res[pattern.ext] + match.weight
+ else
+ res[pattern.ext] = res[pattern.ext] + 1
+ end
+
+ lua_util.debugm(N, log_obj,'add pattern for %s, weight %s, total weight %s',
+ pattern.ext, match.weight, res[pattern.ext])
+ end
+
+ for npat,matched_positions in pairs(matches) do
+ local pat_data = processed_patterns[npat]
+ local pattern = pat_data[3]
+ local match = pat_data[2]
+
+ local function match_position(pos, expected)
+ local cmp = function(a, b) return a == b end
+ if type(expected) == 'table' then
+ -- Something like {'>', 0}
+ if expected[1] == '>' then
+ cmp = function(a, b) return a > b end
+ elseif expected[1] == '>=' then
+ cmp = function(a, b) return a >= b end
+ elseif expected[1] == '<' then
+ cmp = function(a, b) return a < b end
+ elseif expected[1] == '<=' then
+ cmp = function(a, b) return a <= b end
+ elseif expected[1] == '!=' then
+ cmp = function(a, b) return a ~= b end
+ end
+ expected = expected[2]
+ end
+
+ return cmp(pos, expected)
+ end
+ -- Single position
+ if match.position then
+ local position = match.position
+
+ for _,pos in ipairs(matched_positions) do
+ if match_position(pos, position) then
+ add_result(match, pattern)
+ end
+ end
+ end
+ -- Match all positions
+ if match.positions then
+ for _,position in ipairs(match.positions) do
+ for _,pos in ipairs(matched_positions) do
+ if match_position(pos, position) then
+ add_result(match, pattern)
+ end
+ end
+ end
+ end
+ end
+
+ local extensions = lua_util.keys(res)
+
+ if #extensions > 0 then
+ table.sort(extensions, function(ex1, ex2)
+ return res[ex1] > res[ex2]
+ end)
+
+ return extensions[1],types[extensions[1]]
+ end
+
+ -- Nothing found
+ return nil
+end
+
+return exports
\ No newline at end of file
diff --git a/lualib/lua_ffi/common.lua b/lualib/lua_magic/patterns.lua
similarity index 51%
copy from lualib/lua_ffi/common.lua
copy to lualib/lua_magic/patterns.lua
index 652b1dc6e..354f8ec61 100644
--- a/lualib/lua_ffi/common.lua
+++ b/lualib/lua_magic/patterns.lua
@@ -15,31 +15,33 @@ limitations under the License.
]]--
--[[[
--- @module lua_ffi/common
--- Common ffi definitions
+-- @module lua_magic/patterns
+-- This module contains most common patterns
--]]
-local ffi = require 'ffi'
-
-ffi.cdef[[
-struct GString {
- char *str;
- size_t len;
- size_t allocated_len;
-};
-struct GArray {
- char *data;
- unsigned len;
-};
-typedef void (*ref_dtor_cb_t)(void *data);
-struct ref_entry_s {
- unsigned int refcount;
- ref_dtor_cb_t dtor;
-};
-
-void g_string_free (struct GString *st, int free_data);
-void g_free (void *p);
-long rspamd_snprintf (char *buf, long max, const char *fmt, ...);
-]]
-
-return {}
\ No newline at end of file
+local patterns = {
+ {
+ -- MSDOS extension to match types table
+ ext = 'pdf',
+ -- These are alternatives
+ matches = {
+ {
+ string = [[%PDF-\d]],
+ position = 6, -- must be end of the match, as that's how hyperscan works
+ weight = 60,
+ },
+ {
+ string = [[\012%PDF-\d]],
+ position = 7,
+ weight = 60,
+ },
+ {
+ string = [[%FDF-\d]],
+ position = 6,
+ weight = 60,
+ },
+ },
+ }
+}
+
+return patterns
\ No newline at end of file
diff --git a/lualib/lua_selectors/maps.lua b/lualib/lua_magic/types.lua
similarity index 61%
copy from lualib/lua_selectors/maps.lua
copy to lualib/lua_magic/types.lua
index 175e23960..746c87400 100644
--- a/lualib/lua_selectors/maps.lua
+++ b/lualib/lua_magic/types.lua
@@ -14,6 +14,26 @@ See the License for the specific language governing permissions and
limitations under the License.
]]--
-local maps = {} -- Shared within selectors, indexed by name
+--[[[
+-- @module lua_magic/patterns
+-- This module contains types definitions
+--]]
-return maps
\ No newline at end of file
+-- This table is indexed by msdos extension for convenience
+
+local types = {
+ pdf = {
+ ct = 'application/pdf',
+ type = 'binary',
+ },
+ exe = {
+ ct = 'application/x-ms-application',
+ type = 'executable',
+ },
+ tiff = {
+ ct = 'image/tiff',
+ type = 'image',
+ }
+}
+
+return types
\ No newline at end of file
More information about the Commits
mailing list