commit 0d9c79e: [Minor] rspamadm clickhouse: batching; query parameters
Andrew Lewis
nerf at judo.za.org
Mon Nov 30 16:56:07 UTC 2020
Author: Andrew Lewis
Date: 2020-11-30 17:15:39 +0200
URL: https://github.com/rspamd/rspamd/commit/0d9c79e1f8a8edda83d3efabacf7d87fc5c497b3 (refs/pull/3564/head)
[Minor] rspamadm clickhouse: batching; query parameters
---
lualib/rspamadm/clickhouse.lua | 61 ++++++++++++++++++++++++++++++++----------
1 file changed, 47 insertions(+), 14 deletions(-)
diff --git a/lualib/rspamadm/clickhouse.lua b/lualib/rspamadm/clickhouse.lua
index 4388b8ce0..2ca4eab18 100644
--- a/lualib/rspamadm/clickhouse.lua
+++ b/lualib/rspamadm/clickhouse.lua
@@ -65,9 +65,20 @@ local neural_profile = parser:command 'neural_profile'
neural_profile:option '-w --where'
:description 'WHERE clause for Clickhouse query'
:argname('where')
-parser:flag '-j --json'
+neural_profile:flag '-j --json'
:description 'Write output as JSON'
:argname('json')
+neural_profile:option '--days'
+ :description 'Number of days to collect stats for'
+ :argname('days')
+ :default('7')
+neural_profile:option '--limit -l'
+ :description 'Maximum rows to fetch per day'
+ :argname('limit')
+neural_profile:option '--settings-id'
+ :description 'Settings ID to query'
+ :argname('settings_id')
+ :default('')
local http_params = {
config = rspamd_config,
@@ -130,15 +141,9 @@ local function get_excluded_symbols(known_symbols, correlations, seen_total)
end
local function handle_neural_profile(args)
- if args.where then
- args.where = 'WHERE ' .. args.where
- end
- local query = string.format(
- "SELECT Action, Symbols.Names FROM rspamd %s", args.where or '')
- local upstream = args.upstream:get_upstream_round_robin()
- local known_symbols = {}
- local symbols_count, seen_total = 1, 0
- local correlations = {}
+
+ local known_symbols, correlations = {}, {}
+ local symbols_count, seen_total = 0, 0
local function process_row(r)
local is_spam = true
@@ -197,10 +202,38 @@ local function handle_neural_profile(args)
end
end
- local err, _ = lua_clickhouse.select_sync(upstream, args, http_params, query, process_row)
- if err ~= nil then
- io.stderr:write(string.format('Error querying Clickhouse: %s\n', err))
- os.exit(1)
+ -- Create list of days to query starting with yesterday
+ local query_days = {}
+ local previous_date = os.time() - 86400
+ local num_days = tonumber(args.days)
+ for _ = 1, num_days do
+ table.insert(query_days, os.date('%Y-%m-%d', previous_date))
+ previous_date = previous_date - 86400
+ end
+
+ local conditions = {}
+ table.insert(conditions, string.format("SettingsId = '%s'", args.settings_id))
+ local limit = ''
+ local num_limit = tonumber(args.limit)
+ if num_limit then
+ limit = string.format(' LIMIT %d', num_limit) -- Contains leading space
+ end
+ if args.where then
+ table.insert(conditions, args.where)
+ end
+
+ local query_fmt = 'SELECT Action, Symbols.Names FROM rspamd WHERE %s%s'
+ for _, query_day in ipairs(query_days) do
+ -- Date should be the last condition
+ table.insert(conditions, string.format("Date = '%s'", query_day))
+ local query = string.format(query_fmt, table.concat(conditions, ' AND '), limit)
+ local upstream = args.upstream:get_upstream_round_robin()
+ local err = lua_clickhouse.select_sync(upstream, args, http_params, query, process_row)
+ if err ~= nil then
+ io.stderr:write(string.format('Error querying Clickhouse: %s\n', err))
+ os.exit(1)
+ end
+ conditions[#conditions] = nil -- remove Date condition
end
local remove = get_excluded_symbols(known_symbols, correlations, seen_total)
More information about the Commits
mailing list