commit 2fd8ae4: [Rework] Do not lowercase all data send to ClickHouse

Anton Yuzhaninov citrin+github at citrin.ru
Wed Jun 26 10:49:05 UTC 2019


Author: Anton Yuzhaninov
Date: 2019-06-26 11:25:40 +0100
URL: https://github.com/rspamd/rspamd/commit/2fd8ae45023bc225bdb2970581452a9c700555db (refs/pull/2939/head)

[Rework] Do not lowercase all data send to ClickHouse
A lot of strings stored in ClickHouse are case sensitive according to
standards - store them in original case. We can always can use
'lower(field)' in a ClickHouse query, but if string was lowercased by
Rspamd nothing can be done to recover lost information.

Lowercase domain parts of addresses - domains are not case sensitive and
storing them in lower case will simplify queries.

---
 lualib/lua_clickhouse.lua      |  4 ++--
 src/plugins/lua/clickhouse.lua | 12 +++++++-----
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/lualib/lua_clickhouse.lua b/lualib/lua_clickhouse.lua
index ad5b51dce..4a57afd3f 100644
--- a/lualib/lua_clickhouse.lua
+++ b/lualib/lua_clickhouse.lua
@@ -49,7 +49,7 @@ local function clickhouse_quote(str)
       ['\\'] = [[\\]],
       ['\n'] = [[\n]],
       ['\t'] = [[\t]],
-    }):lower()
+    })
   end
 
   return ''
@@ -503,4 +503,4 @@ exports.generic_sync = function (upstream, settings, params, query)
   end
 end
 
-return exports
\ No newline at end of file
+return exports
diff --git a/src/plugins/lua/clickhouse.lua b/src/plugins/lua/clickhouse.lua
index 9c8f7b631..f62bda2c6 100644
--- a/src/plugins/lua/clickhouse.lua
+++ b/src/plugins/lua/clickhouse.lua
@@ -426,7 +426,7 @@ local function clickhouse_collect(task)
     local from = task:get_from('smtp')[1]
 
     if from then
-      from_domain = from['domain']
+      from_domain = from['domain']:lower()
       from_user = from['user']
     end
 
@@ -446,15 +446,17 @@ local function clickhouse_collect(task)
   if task:has_from('mime') then
     local from = task:get_from({'mime','orig'})[1]
     if from then
-      mime_domain = from['domain']
+      mime_domain = from['domain']:lower()
       mime_user = from['user']
     end
   end
 
   local mime_rcpt = {}
   if task:has_recipients('mime') then
-    local from = task:get_recipients({'mime','orig'})
-    mime_rcpt = fun.totable(fun.map(function (f) return f.addr or '' end, from))
+    local recipients = task:get_recipients({'mime','orig'})
+    for _, rcpt in ipairs(recipients) do
+      table.insert(mime_rcpt, rcpt['user'] .. '@' .. rcpt['domain']:lower())
+    end
   end
 
   local ip_str = 'undefined'
@@ -474,7 +476,7 @@ local function clickhouse_collect(task)
   if task:has_recipients('smtp') then
     local rcpt = task:get_recipients('smtp')[1]
     rcpt_user = rcpt['user']
-    rcpt_domain = rcpt['domain']
+    rcpt_domain = rcpt['domain']:lower()
   end
 
   local list_id = task:get_header('List-Id') or ''


More information about the Commits mailing list