commit 2fd8ae4: [Rework] Do not lowercase all data send to ClickHouse
Anton Yuzhaninov
citrin+github at citrin.ru
Wed Jun 26 10:49:05 UTC 2019
Author: Anton Yuzhaninov
Date: 2019-06-26 11:25:40 +0100
URL: https://github.com/rspamd/rspamd/commit/2fd8ae45023bc225bdb2970581452a9c700555db (refs/pull/2939/head)
[Rework] Do not lowercase all data send to ClickHouse
A lot of strings stored in ClickHouse are case sensitive according to
standards - store them in original case. We can always can use
'lower(field)' in a ClickHouse query, but if string was lowercased by
Rspamd nothing can be done to recover lost information.
Lowercase domain parts of addresses - domains are not case sensitive and
storing them in lower case will simplify queries.
---
lualib/lua_clickhouse.lua | 4 ++--
src/plugins/lua/clickhouse.lua | 12 +++++++-----
2 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/lualib/lua_clickhouse.lua b/lualib/lua_clickhouse.lua
index ad5b51dce..4a57afd3f 100644
--- a/lualib/lua_clickhouse.lua
+++ b/lualib/lua_clickhouse.lua
@@ -49,7 +49,7 @@ local function clickhouse_quote(str)
['\\'] = [[\\]],
['\n'] = [[\n]],
['\t'] = [[\t]],
- }):lower()
+ })
end
return ''
@@ -503,4 +503,4 @@ exports.generic_sync = function (upstream, settings, params, query)
end
end
-return exports
\ No newline at end of file
+return exports
diff --git a/src/plugins/lua/clickhouse.lua b/src/plugins/lua/clickhouse.lua
index 9c8f7b631..f62bda2c6 100644
--- a/src/plugins/lua/clickhouse.lua
+++ b/src/plugins/lua/clickhouse.lua
@@ -426,7 +426,7 @@ local function clickhouse_collect(task)
local from = task:get_from('smtp')[1]
if from then
- from_domain = from['domain']
+ from_domain = from['domain']:lower()
from_user = from['user']
end
@@ -446,15 +446,17 @@ local function clickhouse_collect(task)
if task:has_from('mime') then
local from = task:get_from({'mime','orig'})[1]
if from then
- mime_domain = from['domain']
+ mime_domain = from['domain']:lower()
mime_user = from['user']
end
end
local mime_rcpt = {}
if task:has_recipients('mime') then
- local from = task:get_recipients({'mime','orig'})
- mime_rcpt = fun.totable(fun.map(function (f) return f.addr or '' end, from))
+ local recipients = task:get_recipients({'mime','orig'})
+ for _, rcpt in ipairs(recipients) do
+ table.insert(mime_rcpt, rcpt['user'] .. '@' .. rcpt['domain']:lower())
+ end
end
local ip_str = 'undefined'
@@ -474,7 +476,7 @@ local function clickhouse_collect(task)
if task:has_recipients('smtp') then
local rcpt = task:get_recipients('smtp')[1]
rcpt_user = rcpt['user']
- rcpt_domain = rcpt['domain']
+ rcpt_domain = rcpt['domain']:lower()
end
local list_id = task:get_header('List-Id') or ''
More information about the Commits
mailing list