commit d90b056: [Project] Some rework about specific data that is now tagged

Vsevolod Stakhov vsevolod at highsecure.ru
Tue Nov 26 17:35:09 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-11-26 17:29:42 +0000
URL: https://github.com/rspamd/rspamd/commit/d90b05640bcea0544ea698ccec35abbfd2e313b0 (HEAD -> master)

[Project] Some rework about specific data that is now tagged

---
 lualib/lua_content/ical.lua | 31 ++++++++++++++++++++++++-------
 lualib/lua_content/pdf.lua  |  9 ++++++++-
 2 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/lualib/lua_content/ical.lua b/lualib/lua_content/ical.lua
index 8052f04b9..bb2f52771 100644
--- a/lualib/lua_content/ical.lua
+++ b/lualib/lua_content/ical.lua
@@ -15,7 +15,8 @@ limitations under the License.
 ]]--
 
 local l = require 'lpeg'
-local rspamd_text = require "rspamd_text"
+local lua_util = require "lua_util"
+local N = "lua_content"
 
 local ical_grammar
 
@@ -34,28 +35,44 @@ end
 
 local exports = {}
 
+local function extract_text_data(specific)
+  local fun = require "fun"
+
+  local tbl = fun.totable(fun.map(function(e) return e[2]:lower() end, specific.elts))
+  return table.concat(tbl, '\n')
+end
+
 local function process_ical(input, _, task)
-  local control={n='\n', r='\r'}
+  local control={n='\n', r=''}
   local rspamd_url = require "rspamd_url"
-  local escaper = l.Ct((gen_grammar() / function(_, value)
+  local escaper = l.Ct((gen_grammar() / function(key, value)
     value = value:gsub("\\(.)", control)
+    key = key:lower()
     local local_urls = rspamd_url.all(task:get_mempool(), value)
 
     if local_urls and #local_urls > 0 then
       for _,u in ipairs(local_urls) do
+        lua_util.debugm(N, task, 'ical: found URL in ical %s',
+            tostring(u))
         task:inject_url(u)
       end
     end
-    return value
+    lua_util.debugm(N, task, 'ical: ical key %s = "%s"',
+        key, value)
+    return {key, value}
   end)^1)
 
-  local values = escaper:match(input)
+  local elts = escaper:match(input)
 
-  if not values then
+  if not elts then
     return nil
   end
 
-  return rspamd_text.fromtable(values, "\n")
+  return {
+    tag = 'ical',
+    extract_text = extract_text_data,
+    elts = elts
+  }
 end
 
 --[[[
diff --git a/lualib/lua_content/pdf.lua b/lualib/lua_content/pdf.lua
index 9ff3d0260..3ff9f0f64 100644
--- a/lualib/lua_content/pdf.lua
+++ b/lualib/lua_content/pdf.lua
@@ -71,11 +71,18 @@ end
 -- Call immediately on require
 compile_tries()
 
+local function extract_text_data(specific)
+  return nil -- NYI
+end
+
 local function process_pdf(input, _, task)
   local matches = pdf_trie:match(input)
-  local pdf_output = {}
 
   if matches then
+    local pdf_output = {
+      tag = 'pdf',
+      extract_text = extract_text_data,
+    }
     local grouped_processors = {}
     for npat,matched_positions in pairs(matches) do
       local index = pdf_indexes[npat]


More information about the Commits mailing list