commit 0de3dee: [Feature] Lua_selectors: Add `words` selector

Vsevolod Stakhov vsevolod at highsecure.ru
Tue Jul 23 17:07:04 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-07-23 17:51:19 +0100
URL: https://github.com/rspamd/rspamd/commit/0de3deeadb49872e4b2eecf74faa2741d45a2bab

[Feature] Lua_selectors: Add `words` selector

---
 lualib/lua_selectors.lua    | 30 +++++++++++++++++++++++++++++-
 test/lua/unit/selectors.lua |  8 ++++++--
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/lualib/lua_selectors.lua b/lualib/lua_selectors.lua
index 2a4782e8b..efe332878 100644
--- a/lualib/lua_selectors.lua
+++ b/lualib/lua_selectors.lua
@@ -358,7 +358,35 @@ The first argument must be header name.]],
   The second argument is optional time format, see [os.date](http://pgl.yoyo.org/luai/i/os.date) description]],
     ['args_schema'] = {ts.one_of{'connect', 'message'}:is_optional(),
                        ts.string:is_optional()}
-  }
+  },
+  -- Get text words from a message
+  ['words'] = {
+    ['get_value'] = function(task, args)
+      local how = args[1] or 'stem'
+      local tp = task:get_text_parts()
+
+      if tp then
+        local rtype = 'string_list'
+        if how == 'full' then
+          rtype = 'table_list'
+        end
+
+        return lua_util.flatten(
+            fun.map(function(p)
+              return p:get_words(how)
+            end, tp)), rtype
+      end
+
+      return nil
+    end,
+    ['description'] = [[Get words from text parts
+  - `stem`: stemmed words (default)
+  - `raw`: raw words
+  - `norm`: normalised words (lowercased)
+  - `full`: list of tables
+  ]],
+    ['args_schema'] = { ts.one_of { 'stem', 'raw', 'norm', 'full' }:is_optional()},
+  },
 }
 
 local function pure_type(ltype)
diff --git a/test/lua/unit/selectors.lua b/test/lua/unit/selectors.lua
index b5152e347..2fb839be1 100644
--- a/test/lua/unit/selectors.lua
+++ b/test/lua/unit/selectors.lua
@@ -69,7 +69,7 @@ context("Selectors test", function()
 
     ["digest"] = {
                 selector = "digest",
-                expect = {"1649c0cbbd127660095d4f44e15e8b60"}
+                expect = {"f46ccafe448fe4d7b46076938749695e"}
     },
 
     ["user"] = {
@@ -267,6 +267,10 @@ context("Selectors test", function()
       selector = "list('key', 'key1', 'key2', 'key3').apply_map(test_map).uniq",
       expect = {{'value1', 'value'}}
     },
+    ["words"] = {
+      selector = "words('norm')",
+      expect = {{'hello', 'world', 'mail', 'me'}}
+    },
   }
 
   for case_name, case in pairs(cases) do
@@ -295,7 +299,7 @@ Content-Type: multipart/alternative;
 
 --_000_6be055295eab48a5af7ad4022f33e2d0_
 Content-Type: text/plain; charset="utf-8"
-Content-Transfer-Encoding: base64
+Content-Transfer-Encoding: 7bit
 
 Hello world
 


More information about the Commits mailing list