commit 603ae65: [Project] Lua_magic: Add heuristics support

Vsevolod Stakhov vsevolod at highsecure.ru
Sat Sep 7 15:14:05 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-09-07 14:24:06 +0100
URL: https://github.com/rspamd/rspamd/commit/603ae65cd4808e35822a52f63a52e17b3a5e1c4c

[Project] Lua_magic: Add heuristics support

---
 lualib/lua_magic/init.lua | 106 +++++++++++++++++++++++++++-------------------
 1 file changed, 63 insertions(+), 43 deletions(-)

diff --git a/lualib/lua_magic/init.lua b/lualib/lua_magic/init.lua
index 5a4154c79..98d4845eb 100644
--- a/lualib/lua_magic/init.lua
+++ b/lualib/lua_magic/init.lua
@@ -131,23 +131,48 @@ local function process_patterns(log_obj)
   end
 end
 
-local function match_chunk(input, tlen, offset, trie, processed_tbl, log_obj, res)
-  local matches = trie:match(input)
+local function match_chunk(chunk, input, tlen, offset, trie, processed_tbl, log_obj, res)
+  local matches = trie:match(chunk)
 
   local last = tlen
 
-  local function add_result(match, pattern)
-    if not res[pattern.ext] then
-      res[pattern.ext] = 0
+  local function add_result(weight, ext)
+    if not res[ext] then
+      res[ext] = 0
     end
     if match.weight then
-      res[pattern.ext] = res[pattern.ext] + match.weight
+      res[ext] = res[ext] + weight
     else
-      res[pattern.ext] = res[pattern.ext] + 1
+      res[ext] = res[ext] + 1
     end
 
     lua_util.debugm(N, log_obj,'add pattern for %s, weight %s, total weight %s',
-        pattern.ext, match.weight, res[pattern.ext])
+        ext, weight, res[ext])
+  end
+
+  local function match_position(pos, expected)
+    local cmp = function(a, b) return a == b end
+    if type(expected) == 'table' then
+      -- Something like {'>', 0}
+      if expected[1] == '>' then
+        cmp = function(a, b) return a > b end
+      elseif expected[1] == '>=' then
+        cmp = function(a, b) return a >= b end
+      elseif expected[1] == '<' then
+        cmp = function(a, b) return a < b end
+      elseif expected[1] == '<=' then
+        cmp = function(a, b) return a <= b end
+      elseif expected[1] == '!=' then
+        cmp = function(a, b) return a ~= b end
+      end
+      expected = expected[2]
+    end
+
+    -- Tail match
+    if expected < 0 then
+      expected = last + expected + 1
+    end
+    return cmp(pos, expected)
   end
 
   for npat,matched_positions in pairs(matches) do
@@ -155,30 +180,6 @@ local function match_chunk(input, tlen, offset, trie, processed_tbl, log_obj, re
     local pattern = pat_data[3]
     local match = pat_data[2]
 
-    local function match_position(pos, expected)
-      local cmp = function(a, b) return a == b end
-      if type(expected) == 'table' then
-        -- Something like {'>', 0}
-        if expected[1] == '>' then
-          cmp = function(a, b) return a > b end
-        elseif expected[1] == '>=' then
-          cmp = function(a, b) return a >= b end
-        elseif expected[1] == '<' then
-          cmp = function(a, b) return a < b end
-        elseif expected[1] == '<=' then
-          cmp = function(a, b) return a <= b end
-        elseif expected[1] == '!=' then
-          cmp = function(a, b) return a ~= b end
-        end
-        expected = expected[2]
-      end
-
-      -- Tail match
-      if expected < 0 then
-        expected = last + expected + 1
-      end
-      return cmp(pos, expected)
-    end
     -- Single position
     if match.position then
       local position = match.position
@@ -187,13 +188,21 @@ local function match_chunk(input, tlen, offset, trie, processed_tbl, log_obj, re
         lua_util.debugm(N, log_obj, 'found match %s at offset %s(from %s)',
             pattern.ext, pos, offset)
         if match_position(pos + offset, position) then
-          add_result(match, pattern)
-          break
+          if match.heuristic then
+            local ext,weight = match.heuristic(input, log_obj)
+
+            if ext then
+              add_result(weight, ext)
+              break
+            end
+          else
+            add_result(match.weight, pattern.ext)
+            break
+          end
         end
       end
-    end
-    -- Match all positions
-    if match.positions then
+    elseif match.positions then
+      -- Match all positions
       local all_right = true
       for _,position in ipairs(match.positions) do
         local matched = false
@@ -210,10 +219,21 @@ local function match_chunk(input, tlen, offset, trie, processed_tbl, log_obj, re
       end
 
       if all_right then
-        add_result(match, pattern)
+        if match.heuristic then
+          local ext,weight = match.heuristic(input, log_obj)
+
+          if ext then
+            add_result(weight, ext)
+            break
+          end
+        else
+          add_result(match.weight, pattern.ext)
+          break
+        end
       end
     end
   end
+
 end
 
 local function process_detected(res)
@@ -248,13 +268,13 @@ exports.detect = function(input, log_obj)
     -- Check tail matches
     if inplen > min_tail_offset then
       local tail = input:span(inplen - min_tail_offset, min_tail_offset)
-      match_chunk(tail, inplen, inplen - min_tail_offset,
+      match_chunk(tail, input, inplen, inplen - min_tail_offset,
           compiled_tail_patterns, tail_patterns, log_obj, res)
     end
 
     -- Try short match
     local head = input:span(1, math.min(max_short_offset, inplen))
-    match_chunk(head, inplen, 0,
+    match_chunk(head, input, inplen, 0,
         compiled_short_patterns, short_patterns, log_obj, res)
 
     -- Check if we have enough data or go to long patterns
@@ -273,13 +293,13 @@ exports.detect = function(input, log_obj)
       input:span(inplen - exports.chunk_size, exports.chunk_size)
       local offset1, offset2 = 0, inplen - exports.chunk_size
 
-      match_chunk(chunk1, inplen,
+      match_chunk(chunk1, input, inplen,
           offset1, compiled_patterns, processed_patterns, log_obj, res)
-      match_chunk(chunk2, inplen,
+      match_chunk(chunk2, input, inplen,
           offset2, compiled_patterns, processed_patterns, log_obj, res)
     else
       -- Input is short enough to match it at all
-      match_chunk(input, inplen, 0,
+      match_chunk(input, input, inplen, 0,
           compiled_patterns, processed_patterns, log_obj, res)
     end
   else


More information about the Commits mailing list