commit ade933c: [Feature] Clickhouse: Slashing - add new fields to CH

Vsevolod Stakhov vsevolod at highsecure.ru
Mon Apr 15 19:28:12 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-04-15 20:18:56 +0100
URL: https://github.com/rspamd/rspamd/commit/ade933cf634f9ae003533ffc3d25ed3639f5b201 (HEAD -> master)

[Feature] Clickhouse: Slashing - add new fields to CH
* IsSpf
* MimeRecipients
* MessageId
* ScanTimeReal
* ScanTimeVirtual
* SMTPFrom - alias
* SMTPRcpt - alias
* MIMEFrom - alias
* MIMERcpt - alias

Added new values to DKIM/DMARC checks to indicate different failures.

---
 src/plugins/lua/clickhouse.lua | 149 +++++++++++++++++++++++++++++++++++------
 1 file changed, 129 insertions(+), 20 deletions(-)

diff --git a/src/plugins/lua/clickhouse.lua b/src/plugins/lua/clickhouse.lua
index 28d80de27..51cba56fd 100644
--- a/src/plugins/lua/clickhouse.lua
+++ b/src/plugins/lua/clickhouse.lua
@@ -30,7 +30,7 @@ end
 local data_rows = {}
 local custom_rows = {}
 local nrows = 0
-local schema_version = 3 -- Current schema version
+local schema_version = 4 -- Current schema version
 
 local settings = {
   limit = 1000,
@@ -42,8 +42,18 @@ local settings = {
   whitelist_symbols = {'WHITELIST_DKIM', 'WHITELIST_SPF_DKIM', 'WHITELIST_DMARC'},
   dkim_allow_symbols = {'R_DKIM_ALLOW'},
   dkim_reject_symbols = {'R_DKIM_REJECT'},
+  dkim_dnsfail_symbols = {'R_DKIM_TEMPFAIL', 'R_DKIM_PERMFAIL'},
+  dkim_na_symbol = {'R_DKIM_NA'},
   dmarc_allow_symbols = {'DMARC_POLICY_ALLOW'},
-  dmarc_reject_symbols = {'DMARC_POLICY_REJECT', 'DMARC_POLICY_QUARANTINE'},
+  dmarc_reject_symbols = {'DMARC_POLICY_REJECT'},
+  dmarc_quarantine_symbols = {'DMARC_POLICY_QUARANTINE'},
+  dmarc_softfail_symbols = {'DMARC_POLICY_SOFTFAIL'},
+  dmarc_dnsfail_symbols = {'DMARC_FAIL'},
+  spf_allow_symbols = {'R_SPF_ALLOW'},
+  spf_reject_symbols = {'R_SPF_FAIL'},
+  spf_dnsfail_symbols = {'R_SPF_DNSFAIL', 'R_SPF_PERMFAIL'},
+  spf_neutral_symbols = {'R_DKIM_TEMPFAIL', 'R_DKIM_PERMFAIL'},
+  spf_na_symbol = {'R_SPF_NA'},
   stop_symbols = {},
   ipmask = 19,
   ipmask6 = 48,
@@ -84,18 +94,21 @@ CREATE TABLE rspamd
     Score Float64,
     NRcpt UInt8,
     Size UInt32,
-    IsWhitelist Enum8('blacklist' = 0, 'whitelist' = 1, 'unknown' = 2) DEFAULT CAST('unknown' AS Enum8('blacklist' = 0, 'whitelist' = 1, 'unknown' = 2)),
-    IsBayes Enum8('ham' = 0, 'spam' = 1, 'unknown' = 2) DEFAULT CAST('unknown' AS Enum8('ham' = 0, 'spam' = 1, 'unknown' = 2)),
-    IsFuzzy Enum8('whitelist' = 0, 'deny' = 1, 'unknown' = 2) DEFAULT CAST('unknown' AS Enum8('whitelist' = 0, 'deny' = 1, 'unknown' = 2)),
-    IsFann Enum8('ham' = 0, 'spam' = 1, 'unknown' = 2) DEFAULT CAST('unknown' AS Enum8('ham' = 0, 'spam' = 1, 'unknown' = 2)),
-    IsDkim Enum8('reject' = 0, 'allow' = 1, 'unknown' = 2) DEFAULT CAST('unknown' AS Enum8('reject' = 0, 'allow' = 1, 'unknown' = 2)),
-    IsDmarc Enum8('reject' = 0, 'allow' = 1, 'unknown' = 2) DEFAULT CAST('unknown' AS Enum8('reject' = 0, 'allow' = 1, 'unknown' = 2)),
+    IsWhitelist Enum8('blacklist' = 0, 'whitelist' = 1, 'unknown' = 2) DEFAULT 'unknown',
+    IsBayes Enum8('ham' = 0, 'spam' = 1, 'unknown' = 2) DEFAULT 'unknown',
+    IsFuzzy Enum8('whitelist' = 0, 'deny' = 1, 'unknown' = 2) DEFAULT 'unknown',
+    IsFann Enum8('ham' = 0, 'spam' = 1, 'unknown' = 2) DEFAULT 'unknown',
+    IsDkim Enum8('reject' = 0, 'allow' = 1, 'unknown' = 2, 'dnsfail' = 3, 'na' = 4) DEFAULT 'unknown',
+    IsDmarc Enum8('reject' = 0, 'allow' = 1, 'unknown' = 2, 'softfail' = 3, 'na' = 4, 'quarantine' = 5) DEFAULT 'unknown',
+    IsSpf Enum8('reject' = 0, 'allow' = 1, 'neutral' = 2, 'dnsfail' = 3, 'na' = 4, 'unknown' = 5) DEFAULT 'unknown',
     NUrls Int32,
-    Action Enum8('reject' = 0, 'rewrite subject' = 1, 'add header' = 2, 'greylist' = 3, 'no action' = 4, 'soft reject' = 5) DEFAULT CAST('no action' AS Enum8('reject' = 0, 'rewrite subject' = 1, 'add header' = 2, 'greylist' = 3, 'no action' = 4, 'soft reject' = 5)),
+    Action Enum8('reject' = 0, 'rewrite subject' = 1, 'add header' = 2, 'greylist' = 3, 'no action' = 4, 'soft reject' = 5) DEFAULT 'no action',
     FromUser String,
     MimeUser String,
     RcptUser String,
     RcptDomain String,
+    MimeRecipients Array(String),
+    MessageId String,
     ListId String,
     Subject String,
     `Attachments.FileName` Array(String),
@@ -111,7 +124,13 @@ CREATE TABLE rspamd
     `Symbols.Names` Array(String),
     `Symbols.Scores` Array(Float64),
     `Symbols.Options` Array(String),
-    Digest FixedString(32)
+    ScanTimeReal UInt32,
+    ScanTimeVirtual UInt32,
+    Digest FixedString(32),
+    SMTPFrom ALIAS if(From = '', '', concat(FromUser, '@', From)),
+    SMTPRcpt ALIAS if(RcptDomain = '', '', concat(RcptUser, '@', RcptDomain)),
+    MIMEFrom ALIAS if(MimeFrom = '', '', concat(MimeUser, '@', MimeFrom)),
+    MIMERcpt ALIAS MimeRecipients[1]
 ) ENGINE = MergeTree(Date, (TS, From), 8192)
 ]],
 [[CREATE TABLE rspamd_version ( Version UInt32) ENGINE = TinyLog]],
@@ -146,7 +165,26 @@ local migrations = {
       ADD COLUMN Subject String AFTER ListId]],
     -- New version
     [[INSERT INTO rspamd_version (Version) Values (3)]],
-  }
+  },
+  [3] = {
+    [[ALTER TABLE rspamd
+      ADD COLUMN IsSpf Enum8('reject' = 0, 'allow' = 1, 'neutral' = 2, 'dnsfail' = 3, 'na' = 4, 'unknown' = 5) DEFAULT 'unknown' AFTER IsDmarc,
+      MODIFY COLUMN IsDkim Enum8('reject' = 0, 'allow' = 1, 'unknown' = 2, 'dnsfail' = 3, 'na' = 4) DEFAULT 'unknown',
+      MODIFY COLUMN IsDmarc Enum8('reject' = 0, 'allow' = 1, 'unknown' = 2, 'softfail' = 3, 'na' = 4, 'quarantine' = 5) DEFAULT 'unknown',
+      ADD COLUMN MimeRecipients Array(String) AFTER RcptDomain,
+      ADD COLUMN MessageId String AFTER MimeRecipients,
+      ADD COLUMN ScanTimeReal UInt32 AFTER `Symbols.Options`,
+      ADD COLUMN ScanTimeVirtual UInt32 AFTER ScanTimeReal]],
+    -- Add aliases
+    [[ALTER TABLE rspamd
+      ADD COLUMN SMTPFrom ALIAS if(From = '', '', concat(FromUser, '@', From)),
+      ADD COLUMN SMTPRcpt ALIAS if(RcptDomain = '', '', concat(RcptUser, '@', RcptDomain)),
+      ADD COLUMN MIMEFrom ALIAS if(MimeFrom = '', '', concat(MimeUser, '@', MimeFrom)),
+      ADD COLUMN MIMERcpt ALIAS MimeRecipients[1]
+    ]],
+    -- New version
+    [[INSERT INTO rspamd_version (Version) Values (4)]],
+  },
 }
 
 
@@ -175,6 +213,12 @@ local function clickhouse_main_row(res)
     'ListId',
     'Subject',
     'Digest',
+    -- 1.9.2 +
+    'IsSpf',
+    'MimeRecipients',
+    'MessageId',
+    'ScanTimeReal',
+    'ScanTimeVirtual',
   }
 
   for _,v in ipairs(fields) do table.insert(res, v) end
@@ -347,6 +391,12 @@ local function clickhouse_collect(task)
     end
   end
 
+  local mime_rcpt = {}
+  if task:has_recipients('mime') then
+    local from = task:get_recipients({'mime','orig'})
+    mime_rcpt = fun.totable(fun.map(function (f) return f.addr or '' end, from))
+  end
+
   local ip_str = 'undefined'
   local ip = task:get_from_ip()
   if ip and ip:is_valid() then
@@ -367,11 +417,8 @@ local function clickhouse_collect(task)
     rcpt_domain = rcpt['domain']
   end
 
-  local list_id = ''
-  local lh = task:get_header('List-Id')
-  if lh then
-    list_id = lh
-  end
+  local list_id = task:get_header('List-Id') or ''
+  local message_id = task:get_message_id() or ''
 
   local score = task:get_metric_score('default')[1];
   local bayes = 'unknown';
@@ -380,6 +427,7 @@ local function clickhouse_collect(task)
   local whitelist = 'unknown';
   local dkim = 'unknown';
   local dmarc = 'unknown';
+  local spf = 'unknown'
 
   local ret
 
@@ -388,7 +436,7 @@ local function clickhouse_collect(task)
     bayes = 'spam'
   end
 
-  ret = clickhouse_check_symbol(task, settings['bayes_ham_symbols'], false)
+  ret = ret or clickhouse_check_symbol(task, settings['bayes_ham_symbols'], false)
   if ret then
     bayes = 'ham'
   end
@@ -422,21 +470,74 @@ local function clickhouse_collect(task)
     dkim = 'allow'
   end
 
-  ret = clickhouse_check_symbol(task, settings['dkim_reject_symbols'], false)
+  ret = ret or
+      clickhouse_check_symbol(task, settings['dkim_reject_symbols'], false)
   if ret then
     dkim = 'reject'
   end
 
+  ret = ret or
+      clickhouse_check_symbol(task, settings.dkim_dnsfail_symbols, false)
+  if ret then
+    dkim = 'dnsfail'
+  end
+
+  ret = ret or
+      clickhouse_check_symbol(task, settings.dkim_na_symbols, false)
+  if ret then
+    dkim = 'na'
+  end
+
   ret = clickhouse_check_symbol(task, settings['dmarc_allow_symbols'], false)
   if ret then
     dmarc = 'allow'
   end
 
-  ret = clickhouse_check_symbol(task, settings['dmarc_reject_symbols'], false)
+  ret = ret or clickhouse_check_symbol(task, settings['dmarc_reject_symbols'], false)
   if ret then
     dmarc = 'reject'
   end
 
+  ret = ret or clickhouse_check_symbol(task, settings.dmarc_quarantine_symbols, false)
+  if ret then
+    dmarc = 'quarantine'
+  end
+
+  ret = ret or clickhouse_check_symbol(task, settings.dmarc_softfail_symbols, false)
+  if ret then
+    dmarc = 'softfail'
+  end
+
+  ret = ret or clickhouse_check_symbol(task, settings.dmarc_dnsfail_symbols, false)
+  if ret then
+    dmarc = 'dnsfail'
+  end
+
+  ret = clickhouse_check_symbol(task, settings.spf_allow_symbols, false)
+  if ret then
+    spf = 'allow'
+  end
+
+  ret = ret or clickhouse_check_symbol(task, settings.spf_reject_symbols, false)
+  if ret then
+    spf = 'reject'
+  end
+
+  ret = ret or clickhouse_check_symbol(task, settings.spf_neutral_symbols, false)
+  if ret then
+    spf = 'neutral'
+  end
+
+  ret = ret or clickhouse_check_symbol(task, settings.spf_dnsfail_symbols, false)
+  if ret then
+    spf = 'dnsfail'
+  end
+
+  ret = ret or clickhouse_check_symbol(task, settings.spf_na_symbols, false)
+  if ret then
+    spf = 'na'
+  end
+
   local nrcpts = 0
   if task:has_recipients('smtp') then
     nrcpts = #task:get_recipients('smtp')
@@ -460,6 +561,9 @@ local function clickhouse_collect(task)
     subject = lua_util.maybe_obfuscate_subject(task:get_subject() or '', settings)
   end
 
+  local scan_real,scan_virtual = task:get_scan_time()
+  scan_real,scan_virtual = math.floor(scan_real * 1000), math.floor(scan_virtual * 1000)
+
   local row = {
     today(timestamp),
     timestamp,
@@ -483,7 +587,12 @@ local function clickhouse_collect(task)
     rcpt_domain,
     list_id,
     subject,
-    digest
+    digest,
+    spf,
+    mime_rcpt,
+    message_id,
+    scan_real,
+    scan_virtual
   }
 
   -- Attachments step


More information about the Commits mailing list