commit aa0cf9e: [Feature] Lua_util: Add filter_specific_url function

Vsevolod Stakhov vsevolod at highsecure.ru
Mon Aug 19 15:56:04 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-08-19 14:28:35 +0100
URL: https://github.com/rspamd/rspamd/commit/aa0cf9e2c140ea73b849482fc1c1e76f4d4bb901

[Feature] Lua_util: Add filter_specific_url function

---
 lualib/lua_util.lua | 104 ++++++++++++++++++++++++++++++++--------------------
 1 file changed, 65 insertions(+), 39 deletions(-)

diff --git a/lualib/lua_util.lua b/lualib/lua_util.lua
index b8420c7a4..fe3d0d5e4 100644
--- a/lualib/lua_util.lua
+++ b/lualib/lua_util.lua
@@ -619,9 +619,9 @@ end
 exports.override_defaults = override_defaults
 
 --[[[
--- @function lua_util.extract_specific_urls(params)
+-- @function lua_util.filter_specific_urls(urls, params)
 -- params: {
-- - task
+- - task - if needed to save in the cache
 - - limit <int> (default = 9999)
 - - esld_limit <int> (default = 9999) n domains per eSLD (effective second level domain)
                                       works only if number of unique eSLD less than `limit`
@@ -629,62 +629,38 @@ exports.override_defaults = override_defaults
 - - filter <callback> (default = nil)
 - - prefix <string> cache prefix (default = nil)
 -- }
--- Apply heuristic in extracting of urls from task, this function
+-- Apply heuristic in extracting of urls from `urls` table, this function
 -- tries its best to extract specific number of urls from a task based on
 -- their characteristics
 --]]
--- exports.extract_specific_urls = function(params_or_task, limit, need_emails, filter, prefix)
-exports.extract_specific_urls = function(params_or_task, lim, need_emails, filter, prefix)
-  local default_params = {
-    limit = 9999,
-    esld_limit = 9999,
-    need_emails = false,
-    filter = nil,
-    prefix = nil
-  }
-
-  local params
-  if type(params_or_task) == 'table' and type(lim) == 'nil' then
-    params = params_or_task
-  else
-    -- Deprecated call
-    params = {
-      task = params_or_task,
-      limit = lim,
-      need_emails = need_emails,
-      filter = filter,
-      prefix = prefix
-    }
-  end
-  for k,v in pairs(default_params) do
-    if not params[k] then params[k] = v end
-  end
-
-
+exports.filter_specific_urls = function (urls, params)
   local cache_key
 
   if params.prefix then
     cache_key = params.prefix
   else
     cache_key = string.format('sp_urls_%d%s', params.limit,
-        tostring(params.need_emails))
+        tostring(params.need_emails or false))
   end
 
+  if params.task then
+    local cached = params.task:cache_get(cache_key)
 
-  local cached = params.task:cache_get(cache_key)
+    if cached then
+      return cached
+    end
 
-  if cached then
-    return cached
   end
 
-  local urls = params.task:get_urls(params.need_emails)
-
   if not urls then return {} end
 
   if params.filter then urls = fun.totable(fun.filter(params.filter, urls)) end
 
   if #urls <= params.limit and #urls <= params.esld_limit then
-    params.task:cache_set(cache_key, urls)
+    if params.task then
+      params.task:cache_set(cache_key, urls)
+    end
+
     return urls
   end
 
@@ -795,10 +771,60 @@ exports.extract_specific_urls = function(params_or_task, lim, need_emails, filte
     end
   end
 
-  params.task:cache_set(cache_key, urls)
+  if params.task then
+    params.task:cache_set(cache_key, urls)
+  end
+
   return res
 end
 
+--[[[
+-- @function lua_util.extract_specific_urls(params)
+-- params: {
+- - task
+- - limit <int> (default = 9999)
+- - esld_limit <int> (default = 9999) n domains per eSLD (effective second level domain)
+                                      works only if number of unique eSLD less than `limit`
+- - need_emails <bool> (default = false)
+- - filter <callback> (default = nil)
+- - prefix <string> cache prefix (default = nil)
+-- }
+-- Apply heuristic in extracting of urls from task, this function
+-- tries its best to extract specific number of urls from a task based on
+-- their characteristics
+--]]
+-- exports.extract_specific_urls = function(params_or_task, limit, need_emails, filter, prefix)
+exports.extract_specific_urls = function(params_or_task, lim, need_emails, filter, prefix)
+  local default_params = {
+    limit = 9999,
+    esld_limit = 9999,
+    need_emails = false,
+    filter = nil,
+    prefix = nil
+  }
+
+  local params
+  if type(params_or_task) == 'table' and type(lim) == 'nil' then
+    params = params_or_task
+  else
+    -- Deprecated call
+    params = {
+      task = params_or_task,
+      limit = lim,
+      need_emails = need_emails,
+      filter = filter,
+      prefix = prefix
+    }
+  end
+  for k,v in pairs(default_params) do
+    if not params[k] then params[k] = v end
+  end
+
+  local urls = params.task:get_urls(params.need_emails)
+
+  return exports.filter_specific_urls(urls, params)
+end
+
 --[[[
 -- @function lua_util.deepcopy(table)
 -- params: {


More information about the Commits mailing list