commit f3423dd: [Minor] Allow to get content urls in extract_specific routine

Vsevolod Stakhov vsevolod at highsecure.ru
Tue May 5 14:56:10 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-05-05 15:48:06 +0100
URL: https://github.com/rspamd/rspamd/commit/f3423dd01487a6ac22512cf874368d76d47139f8

[Minor] Allow to get content urls in extract_specific routine

---
 lualib/lua_util.lua | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/lualib/lua_util.lua b/lualib/lua_util.lua
index 89a4016b2..5b0950419 100644
--- a/lualib/lua_util.lua
+++ b/lualib/lua_util.lua
@@ -672,9 +672,10 @@ exports.filter_specific_urls = function (urls, params)
     if params.prefix then
       cache_key = params.prefix
     else
-      cache_key = string.format('sp_urls_%d%s%s', params.limit,
+      cache_key = string.format('sp_urls_%d%s%s%s', params.limit,
           tostring(params.need_emails or false),
-          tostring(params.need_images or false))
+          tostring(params.need_images or false),
+          tostring(params.need_content or false))
     end
     local cached = params.task:cache_get(cache_key)
 
@@ -879,6 +880,7 @@ end
 - - prefix <string> cache prefix (default = nil)
 - - ignore_redirected <bool> (default = false)
 - - need_images <bool> (default = false)
+- - need_content <bool> (default = false)
 -- }
 -- Apply heuristic in extracting of urls from task, this function
 -- tries its best to extract specific number of urls from a task based on
@@ -891,6 +893,7 @@ exports.extract_specific_urls = function(params_or_task, lim, need_emails, filte
     esld_limit = 9999,
     need_emails = false,
     need_images = false,
+    need_content = false,
     filter = nil,
     prefix = nil,
     ignore_ip = false,
@@ -914,8 +917,32 @@ exports.extract_specific_urls = function(params_or_task, lim, need_emails, filte
   for k,v in pairs(default_params) do
     if type(params[k]) == 'nil' and v ~= nil then params[k] = v end
   end
+  local url_params = {
+    emails = params.need_emails,
+    images = params.need_images,
+    content = params.need_content,
+  }
+
+  -- Shortcut for cached stuff
+  if params.task and not params.no_cache then
+    local cache_key
+    if params.prefix then
+      cache_key = params.prefix
+    else
+      cache_key = string.format('sp_urls_%d%s%s%s', params.limit,
+          tostring(params.need_emails or false),
+          tostring(params.need_images or false),
+          tostring(params.need_content or false))
+    end
+    local cached = params.task:cache_get(cache_key)
+
+    if cached then
+      return cached
+    end
+  end
 
-  local urls = params.task:get_urls(params.need_emails, params.need_images)
+  -- No cache version
+  local urls = params.task:get_urls(url_params)
 
   return exports.filter_specific_urls(urls, params)
 end


More information about the Commits mailing list