commit 8748c90: [Project] Lua_magic: Start new magic detection library

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Sep 5 14:35:07 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-09-05 15:29:35 +0100
URL: https://github.com/rspamd/rspamd/commit/8748c908b407a787ed5aab23cd0cf78f8d1ae4be (HEAD -> master)

[Project] Lua_magic: Start new magic detection library

---
 lualib/lua_magic/init.lua                          | 140 +++++++++++++++++++++
 .../{lua_ffi/common.lua => lua_magic/patterns.lua} |  54 ++++----
 .../maps.lua => lua_magic/types.lua}               |  24 +++-
 3 files changed, 190 insertions(+), 28 deletions(-)

diff --git a/lualib/lua_magic/init.lua b/lualib/lua_magic/init.lua
new file mode 100644
index 000000000..464a10d0a
--- /dev/null
+++ b/lualib/lua_magic/init.lua
@@ -0,0 +1,140 @@
+--[[
+Copyright (c) 2019, Vsevolod Stakhov <vsevolod at highsecure.ru>
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+]]--
+
+--[[[
+-- @module lua_magic
+-- This module contains file types detection logic
+--]]
+
+local patterns = require "lua_magic/patterns"
+local types = require "lua_magic/types"
+local fun = require "fun"
+local lua_util = require "lua_util"
+
+local rspamd_trie = require "rspamd_trie"
+
+local N = "lua_magic"
+local exports = {}
+-- trie object
+local compiled_patterns
+-- {<str>, <match_object>, <pattern_object>} indexed by pattern number
+local processed_patterns = {}
+
+local function process_patterns()
+  if not compiled_patterns then
+    for _,pattern in ipairs(patterns) do
+      for _,match in ipairs(pattern.matches) do
+        if match.string then
+          processed_patterns[#processed_patterns + 1] = {
+            match.string, match, pattern
+          }
+        end
+      end
+    end
+
+    compiled_patterns = rspamd_trie.create(fun.totable(
+        fun.map(function(t) return t[1] end, processed_patterns)),
+        rspamd_trie.flags.re
+    )
+
+    lua_util.debugm(N, rspamd_config, 'compiled %s patterns',
+        #processed_patterns)
+  end
+end
+
+exports.detect = function(input, log_obj)
+  process_patterns()
+  local res = {}
+  local matches = compiled_patterns:match(input)
+
+  if not log_obj then log_obj = rspamd_config end
+
+  local function add_result(match, pattern)
+    if not res[pattern.ext] then
+      res[pattern.ext] = 0
+    end
+    if match.weight then
+      res[pattern.ext] = res[pattern.ext] + match.weight
+    else
+      res[pattern.ext] = res[pattern.ext] + 1
+    end
+
+    lua_util.debugm(N, log_obj,'add pattern for %s, weight %s, total weight %s',
+        pattern.ext, match.weight, res[pattern.ext])
+  end
+
+  for npat,matched_positions in pairs(matches) do
+    local pat_data = processed_patterns[npat]
+    local pattern = pat_data[3]
+    local match = pat_data[2]
+
+    local function match_position(pos, expected)
+      local cmp = function(a, b) return a == b end
+      if type(expected) == 'table' then
+        -- Something like {'>', 0}
+        if expected[1] == '>' then
+          cmp = function(a, b) return a > b end
+        elseif expected[1] == '>=' then
+          cmp = function(a, b) return a >= b end
+        elseif expected[1] == '<' then
+          cmp = function(a, b) return a < b end
+        elseif expected[1] == '<=' then
+          cmp = function(a, b) return a <= b end
+        elseif expected[1] == '!=' then
+          cmp = function(a, b) return a ~= b end
+        end
+        expected = expected[2]
+      end
+
+      return cmp(pos, expected)
+    end
+    -- Single position
+    if match.position then
+      local position = match.position
+
+      for _,pos in ipairs(matched_positions) do
+        if match_position(pos, position) then
+          add_result(match, pattern)
+        end
+      end
+    end
+    -- Match all positions
+    if match.positions then
+      for _,position in ipairs(match.positions) do
+        for _,pos in ipairs(matched_positions) do
+          if match_position(pos, position) then
+            add_result(match, pattern)
+          end
+        end
+      end
+    end
+  end
+
+  local extensions = lua_util.keys(res)
+
+  if #extensions > 0 then
+    table.sort(extensions, function(ex1, ex2)
+      return res[ex1] > res[ex2]
+    end)
+
+    return extensions[1],types[extensions[1]]
+  end
+
+  -- Nothing found
+  return nil
+end
+
+return exports
\ No newline at end of file
diff --git a/lualib/lua_ffi/common.lua b/lualib/lua_magic/patterns.lua
similarity index 51%
copy from lualib/lua_ffi/common.lua
copy to lualib/lua_magic/patterns.lua
index 652b1dc6e..354f8ec61 100644
--- a/lualib/lua_ffi/common.lua
+++ b/lualib/lua_magic/patterns.lua
@@ -15,31 +15,33 @@ limitations under the License.
 ]]--
 
 --[[[
--- @module lua_ffi/common
--- Common ffi definitions
+-- @module lua_magic/patterns
+-- This module contains most common patterns
 --]]
 
-local ffi = require 'ffi'
-
-ffi.cdef[[
-struct GString {
-  char  *str;
-  size_t len;
-  size_t allocated_len;
-};
-struct GArray {
-  char *data;
-  unsigned len;
-};
-typedef void (*ref_dtor_cb_t)(void *data);
-struct ref_entry_s {
-	unsigned int refcount;
-	ref_dtor_cb_t dtor;
-};
-
-void g_string_free (struct GString *st, int free_data);
-void g_free (void *p);
-long rspamd_snprintf (char *buf, long max, const char *fmt, ...);
-]]
-
-return {}
\ No newline at end of file
+local patterns = {
+  {
+    -- MSDOS extension to match types table
+    ext = 'pdf',
+    -- These are alternatives
+    matches = {
+      {
+        string = [[%PDF-\d]],
+        position = 6, -- must be end of the match, as that's how hyperscan works
+        weight = 60,
+      },
+      {
+        string = [[\012%PDF-\d]],
+        position = 7,
+        weight = 60,
+      },
+      {
+        string = [[%FDF-\d]],
+        position = 6,
+        weight = 60,
+      },
+    },
+  }
+}
+
+return patterns
\ No newline at end of file
diff --git a/lualib/lua_selectors/maps.lua b/lualib/lua_magic/types.lua
similarity index 61%
copy from lualib/lua_selectors/maps.lua
copy to lualib/lua_magic/types.lua
index 175e23960..746c87400 100644
--- a/lualib/lua_selectors/maps.lua
+++ b/lualib/lua_magic/types.lua
@@ -14,6 +14,26 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ]]--
 
-local maps = {} -- Shared within selectors, indexed by name
+--[[[
+-- @module lua_magic/patterns
+-- This module contains types definitions
+--]]
 
-return maps
\ No newline at end of file
+-- This table is indexed by msdos extension for convenience
+
+local types = {
+  pdf = {
+    ct = 'application/pdf',
+    type = 'binary',
+  },
+  exe = {
+    ct = 'application/x-ms-application',
+    type = 'executable',
+  },
+  tiff = {
+    ct = 'image/tiff',
+    type = 'image',
+  }
+}
+
+return types
\ No newline at end of file


More information about the Commits mailing list