commit 786faec: [Project] Lua_magic: Implement chunks based scan

Vsevolod Stakhov vsevolod at highsecure.ru
Fri Sep 6 17:49:05 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-09-06 12:28:39 +0100
URL: https://github.com/rspamd/rspamd/commit/786faec3794563dd8a1fb503695d50797cc2bffa

[Project] Lua_magic: Implement chunks based scan

---
 lualib/lua_magic/init.lua | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/lualib/lua_magic/init.lua b/lualib/lua_magic/init.lua
index 464a10d0a..e8629eeda 100644
--- a/lualib/lua_magic/init.lua
+++ b/lualib/lua_magic/init.lua
@@ -24,6 +24,7 @@ local types = require "lua_magic/types"
 local fun = require "fun"
 local lua_util = require "lua_util"
 
+local rspamd_text = require "rspamd_text"
 local rspamd_trie = require "rspamd_trie"
 
 local N = "lua_magic"
@@ -55,9 +56,7 @@ local function process_patterns()
   end
 end
 
-exports.detect = function(input, log_obj)
-  process_patterns()
-  local res = {}
+local function match_chunk(input, offset, log_obj, res)
   local matches = compiled_patterns:match(input)
 
   if not log_obj then log_obj = rspamd_config end
@@ -106,7 +105,7 @@ exports.detect = function(input, log_obj)
       local position = match.position
 
       for _,pos in ipairs(matched_positions) do
-        if match_position(pos, position) then
+        if match_position(pos + offset, position) then
           add_result(match, pattern)
         end
       end
@@ -122,6 +121,30 @@ exports.detect = function(input, log_obj)
       end
     end
   end
+end
+exports.detect = function(input, log_obj)
+  process_patterns()
+  local res = {}
+
+  if type(input) == 'string' then
+    -- Convert to rspamd_text
+    input = rspamd_text.fromstring(input)
+  end
+
+  if type(input) == 'userdata' and #input > exports.chunk_size * 3 then
+    -- Split by chunks
+    local chunk1, chunk2, chunk3 =
+    input:span(1, exports.chunk_size),
+    input:span(exports.chunk_size, exports.chunk_size),
+    input:span(#input - exports.chunk_size, exports.chunk_size)
+    local offset1, offset2, offset3 = 0, exports.chunk_size, #input - exports.chunk_size
+
+    match_chunk(chunk1, offset1, log_obj, res)
+    match_chunk(chunk2, offset2, log_obj, res)
+    match_chunk(chunk3, offset3, log_obj, res)
+  else
+    match_chunk(input, 0, log_obj, res)
+  end
 
   local extensions = lua_util.keys(res)
 
@@ -137,4 +160,8 @@ exports.detect = function(input, log_obj)
   return nil
 end
 
+-- This parameter specifies how many bytes are checked in the input
+-- Rspamd checks 2 chunks at start and 1 chunk at the end
+exports.chunk_size = 16384
+
 return exports
\ No newline at end of file


More information about the Commits mailing list