commit c3d9d44: [Minor] Allow to filter redirected

Vsevolod Stakhov vsevolod at highsecure.ru
Mon Aug 19 15:56:05 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-08-19 14:35:36 +0100
URL: https://github.com/rspamd/rspamd/commit/c3d9d449aadb4cd6853218b10de458c3227372bb

[Minor] Allow to filter redirected

---
 lualib/lua_util.lua | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/lualib/lua_util.lua b/lualib/lua_util.lua
index fe3d0d5e4..682a33bf5 100644
--- a/lualib/lua_util.lua
+++ b/lualib/lua_util.lua
@@ -671,9 +671,19 @@ exports.filter_specific_urls = function (urls, params)
 
   local res = {}
 
-  for _,u in ipairs(urls) do
+  local function process_single_url(u)
     local esld = u:get_tld()
 
+    if params.ignore_redirected and u:is_redirected() then
+      local redir = u:get_redirected() -- get the real url
+      local redir_tld = redir:get_tld()
+
+      if redir_tld then
+        -- Ignore redirected as it should also be in the hash
+        return
+      end
+    end
+
     if esld then
       if not eslds[esld] then
         eslds[esld] = {u}
@@ -709,6 +719,10 @@ exports.filter_specific_urls = function (urls, params)
     end
   end
 
+  for _,u in ipairs(urls) do
+    process_single_url(u)
+  end
+
   local limit = params.limit
   limit = limit - #res
   if limit <= 0 then limit = 1 end
@@ -788,6 +802,7 @@ end
 - - need_emails <bool> (default = false)
 - - filter <callback> (default = nil)
 - - prefix <string> cache prefix (default = nil)
+- - ignore_redirected <bool> (default = false)
 -- }
 -- Apply heuristic in extracting of urls from task, this function
 -- tries its best to extract specific number of urls from a task based on
@@ -800,7 +815,8 @@ exports.extract_specific_urls = function(params_or_task, lim, need_emails, filte
     esld_limit = 9999,
     need_emails = false,
     filter = nil,
-    prefix = nil
+    prefix = nil,
+    ignore_redirected = false,
   }
 
   local params


More information about the Commits mailing list