commit 1108444: [Minor] Pdf: Do not fire PDF_SUSPICIOUS on legit escapes

Vsevolod Stakhov vsevolod at highsecure.ru
Mon Jan 11 12:56:07 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-01-11 12:52:28 +0000
URL: https://github.com/rspamd/rspamd/commit/11084440ff7469df2ca63c7b4f831bba992f50e7 (HEAD -> master)

[Minor] Pdf: Do not fire PDF_SUSPICIOUS on legit escapes

---
 lualib/lua_content/pdf.lua | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/lualib/lua_content/pdf.lua b/lualib/lua_content/pdf.lua
index 4e199b8b6..a5e23d11f 100644
--- a/lualib/lua_content/pdf.lua
+++ b/lualib/lua_content/pdf.lua
@@ -38,7 +38,7 @@ local pdf_patterns = {
     patterns = {
       [[netsh\s]],
       [[echo\s]],
-      [[\/[A-Za-z]*#\d\d(?:[#A-Za-z<>/\s])]], -- Hex encode obfuscation
+      [=[\/[A-Za-z]*#\d\d[#A-Za-z<>/\s]]=], -- Hex encode obfuscation
     }
   },
   start_object = {
@@ -1326,16 +1326,33 @@ processors.suspicious = function(input, task, positions, pdf_object, pdf_output)
       suspicious_factor = suspicious_factor + 0.5
     elseif match[2] == 2 then
       nexec = nexec + 1
-    else
-      nencoded = nencoded + 1
+    elseif match[2] == 3 then
+      local enc_data = input:sub(match[1] - 2, match[1] - 1)
+      local legal_escape = false
+
+      if enc_data then
+        enc_data = enc_data:strtoul()
 
-      if last_encoded then
-        if match[1] - last_encoded < 8 then
-          -- likely consecutive encoded chars, increase factor
-          close_encoded = close_encoded + 1
+        if enc_data then
+          -- Legit encode cases are non printable characters (e.g. spaces)
+          if enc_data < 0x21 or enc_data >= 0x7f then
+            legal_escape = true
+          end
         end
       end
-      last_encoded = match[1]
+
+      if not legal_escape then
+        nencoded = nencoded + 1
+
+        if last_encoded then
+          if match[1] - last_encoded < 8 then
+            -- likely consecutive encoded chars, increase factor
+            close_encoded = close_encoded + 1
+          end
+        end
+        last_encoded = match[1]
+
+      end
     end
   end
 


More information about the Commits mailing list