commit f4d5f9c: [Minor] Lua_magic: Return utf8 check as it is useful for many cases

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Apr 22 13:28:04 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-04-22 14:22:52 +0100
URL: https://github.com/rspamd/rspamd/commit/f4d5f9c4e45952d37d3aa2549f8a8cd133d09181 (HEAD -> master)

[Minor] Lua_magic: Return utf8 check as it is useful for many cases

---
 lualib/lua_magic/heuristics.lua | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/lualib/lua_magic/heuristics.lua b/lualib/lua_magic/heuristics.lua
index aa8e9e819..66e186906 100644
--- a/lualib/lua_magic/heuristics.lua
+++ b/lualib/lua_magic/heuristics.lua
@@ -334,6 +334,21 @@ exports.text_part_heuristic = function(part, log_obj, _)
       local n8bit = 0
 
       while b >= 127 and n8bit < remain do
+        -- utf8 part
+        if bit.band(b, 0xe0) == 0xc0 and remain > 1 and
+                bit.band(bytes[idx + 1], 0xc0) == 0x80 then
+          return true,1
+        elseif bit.band(b, 0xf0) == 0xe0 and remain > 2 and
+                bit.band(bytes[idx + 1], 0xc0) == 0x80 and
+                bit.band(bytes[idx + 2], 0xc0) == 0x80 then
+          return true,2
+        elseif bit.band(b, 0xf8) == 0xf0 and remain > 3 and
+                bit.band(bytes[idx + 1], 0xc0) == 0x80 and
+                bit.band(bytes[idx + 2], 0xc0) == 0x80 and
+                bit.band(bytes[idx + 3], 0xc0) == 0x80 then
+          return true,3
+        end
+
         n8bit = n8bit + 1
         idx = idx + 1
         b = bytes[idx]


More information about the Commits mailing list