commit 890c565: [Minor] Optimize task:get_urls calls

Vsevolod Stakhov vsevolod at highsecure.ru
Fri Jul 17 11:28:05 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-07-17 12:11:06 +0100
URL: https://github.com/rspamd/rspamd/commit/890c5658b9551733f9f12ed8504346ead63a1af3 (HEAD -> master)

[Minor] Optimize task:get_urls calls

---
 src/plugins/lua/clickhouse.lua | 35 +++++++++++++++--------------------
 1 file changed, 15 insertions(+), 20 deletions(-)

diff --git a/src/plugins/lua/clickhouse.lua b/src/plugins/lua/clickhouse.lua
index fb4db3fc9..200fcaeca 100644
--- a/src/plugins/lua/clickhouse.lua
+++ b/src/plugins/lua/clickhouse.lua
@@ -633,9 +633,9 @@ local function clickhouse_collect(task)
   end
 
   local nurls = 0
-  if task:has_urls(true) then
-    nurls = #task:get_urls(true)
-  end
+  local task_urls = task:get_urls(true) or {}
+
+  nurls = #task_urls
 
   local timestamp = math.floor(task:get_date({
     format = 'connect',
@@ -757,27 +757,22 @@ local function clickhouse_collect(task)
 
   -- Urls step
   local urls_urls = {}
-  if task:has_urls(false) then
 
-    for _,u in ipairs(task:get_urls(false)) do
-      if settings['full_urls'] then
-        urls_urls[u:get_text()] = u
-      else
-        urls_urls[u:get_host()] = u
-      end
+  for _,u in ipairs(task_urls) do
+    if settings['full_urls'] then
+      urls_urls[u:get_text()] = u
+    else
+      urls_urls[u:get_host()] = u
     end
-
-    -- Get tlds
-    table.insert(row, flatten_urls(function(_, u)
-      return u:get_tld() or u:get_host()
-    end, urls_urls))
-    -- Get hosts/full urls
-    table.insert(row, flatten_urls(function(k, _) return k end, urls_urls))
-  else
-    table.insert(row, {})
-    table.insert(row, {})
   end
 
+  -- Get tlds
+  table.insert(row, flatten_urls(function(_, u)
+    return u:get_tld() or u:get_host()
+  end, urls_urls))
+  -- Get hosts/full urls
+  table.insert(row, flatten_urls(function(k, _) return k end, urls_urls))
+
   -- Emails step
   if task:has_urls(true) then
     table.insert(row, flatten_urls(function(k, _) return k end,


More information about the Commits mailing list