commit 2ded1a6: [Rules] Improve html images detection logic

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Feb 21 12:07:04 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-02-21 12:01:37 +0000
URL: https://github.com/rspamd/rspamd/commit/2ded1a6c4556a6ee20620df1416ca53253cdc773 (HEAD -> master)

[Rules] Improve html images detection logic

---
 rules/html.lua | 45 +++++++++++++++++++++++++--------------------
 1 file changed, 25 insertions(+), 20 deletions(-)

diff --git a/rules/html.lua b/rules/html.lua
index da4ef1d13..fb32e9179 100644
--- a/rules/html.lua
+++ b/rules/html.lua
@@ -23,6 +23,20 @@ reconf['MIME_HTML_ONLY'] = {
   group = 'headers'
 }
 
+local function has_anchor_parent(tag)
+  local parent = tag
+  repeat
+    parent = parent:get_parent()
+    if parent then
+      if parent:get_type() == 'a' then
+        return true
+      end
+    end
+  until not parent
+
+  return false
+end
+
 local function check_html_image(task, min, max)
   local tp = task:get_text_parts()
 
@@ -38,13 +52,10 @@ local function check_html_image(task, min, max)
           for _,i in ipairs(images) do
             local tag = i['tag']
             if tag then
-              local parent = tag:get_parent()
-              if parent then
-                if parent:get_type() == 'a' then
-                  -- do not trigger on small and unknown size images
-                  if i['height'] + i['width'] >= 210 or not i['embedded'] then
-                    return true
-                  end
+              if has_anchor_parent(tag) then
+                -- do not trigger on small and unknown size images
+                if i['height'] + i['width'] >= 210 or not i['embedded'] then
+                  return true
                 end
               end
             end
@@ -81,6 +92,7 @@ rspamd_config.HTML_SHORT_LINK_IMG_3 = {
   group = 'html',
   description = 'Short html part (1.5K..2K) with a link to an image'
 }
+
 rspamd_config.R_EMPTY_IMAGE = {
   callback = function(task)
     local tp = task:get_text_parts() -- get text parts in a message
@@ -98,11 +110,8 @@ rspamd_config.R_EMPTY_IMAGE = {
               if i['height'] + i['width'] >= 400 then -- if we have a large image
                 local tag = i['tag']
                 if tag then
-                  local parent = tag:get_parent()
-                  if parent then
-                    if parent:get_type() ~= 'a' then
-                      return true
-                    end
+                  if not has_anchor_parent(tag) then
+                    return true
                   end
                 end
               end
@@ -136,14 +145,10 @@ rspamd_config.R_SUSPICIOUS_IMAGES = {
             local tag = i['tag']
 
             if tag then
-              local parent = tag:get_parent()
-              if parent then
-                if parent:get_type() == 'a' then
-                  -- do not trigger on small and large images
-                  if dim > 100 and dim < 3000 then
-                    -- We assume that a single picture 100x200 contains approx 3 words of text
-                    pic_words = pic_words + dim / 100
-                  end
+              if has_anchor_parent(tag) then
+                if dim > 100 and dim < 3000 then
+                  -- We assume that a single picture 100x200 contains approx 3 words of text
+                  pic_words = pic_words + dim / 100
                 end
               end
             end


More information about the Commits mailing list