commit e2d4415: [Rules] Fix HTTP_TO_HTTPS rule

Vsevolod Stakhov vsevolod at highsecure.ru
Sat Mar 13 11:07:05 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-03-13 10:43:10 +0000
URL: https://github.com/rspamd/rspamd/commit/e2d44157b31eff46aeb1645b73e5b576b5afa897

[Rules] Fix HTTP_TO_HTTPS rule

---
 rules/html.lua | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/rules/html.lua b/rules/html.lua
index 1891cfefa..5430f49b7 100644
--- a/rules/html.lua
+++ b/rules/html.lua
@@ -15,6 +15,8 @@
 
 local reconf = config['regexp']
 
+local rspamd_regexp = require "rspamd_regexp"
+
 -- Messages that have only HTML part
 reconf['MIME_HTML_ONLY'] = {
   re = 'has_only_html_part()',
@@ -368,34 +370,44 @@ rspamd_config.EXT_CSS = {
 
 rspamd_config.HTTP_TO_HTTPS = {
   callback = function(task)
-    local tp = task:get_text_parts()
-    if (not tp) then return false end
+    local http_re = rspamd_regexp.create_cached('/^http[^s]/i')
+    local https_re = rspamd_regexp.create_cached('/^https:/i')
+    local found_opts
+    local tp = task:get_text_parts() or {}
+
     for _,p in ipairs(tp) do
       if p:is_html() then
         local hc = p:get_html()
         if (not hc) then return false end
+
         local found = false
+
         hc:foreach_tag('a', function (tag, length)
           -- Skip this loop if we already have a match
           if (found) then return true end
+
           local c = tag:get_content()
           if (c) then
-            c = tostring(c):lower()
-            if (not c:match('^http')) then return false end
+            if (not http_re:match(c)) then return false end
+
             local u = tag:get_extra()
             if (not u) then return false end
-            u = tostring(u):lower()
-            if (not u:match('^http')) then return false end
-            if ((c:match('^http:') and u:match('^https:')) or
-                (c:match('^https:') and u:match('^http:')))
-            then
+            local url_proto = u:get_protocol()
+            if (not url_proto:match('^http')) then return false end
+            -- Capture matches for http in href to https in visible part only
+            if ((https_re:match(c) and url_proto == 'http')) then
               found = true
+              found_opts = u:get_host()
               return true
             end
           end
           return false
         end)
-        if (found) then return true end
+
+        if (found) then
+          return true,1.0,found_opts
+        end
+
         return false
       end
     end


More information about the Commits mailing list