commit 3c0a34c: [Minor] Selectors: further fixes for dealing with specific URLs

Andrew Lewis nerf at judo.za.org
Tue Nov 24 11:14:06 UTC 2020


Author: Andrew Lewis
Date: 2020-11-24 13:02:26 +0200
URL: https://github.com/rspamd/rspamd/commit/3c0a34cc0f6505046e94dfebc8ef2af53628d475 (refs/pull/3559/head)

[Minor] Selectors: further fixes for dealing with specific URLs

---
 lualib/lua_selectors/extractors.lua | 66 +++++++++++++++++++++++--------------
 1 file changed, 41 insertions(+), 25 deletions(-)

diff --git a/lualib/lua_selectors/extractors.lua b/lualib/lua_selectors/extractors.lua
index 770887ff6..ce88445e8 100644
--- a/lualib/lua_selectors/extractors.lua
+++ b/lualib/lua_selectors/extractors.lua
@@ -21,6 +21,42 @@ local common = require "lua_selectors/common"
 local ts = require("tableshape").types
 local E = {}
 
+local url_flags_ts = ts.array_of(ts.one_of{
+    'content',
+    'has_port',
+    'has_user',
+    'host_encoded',
+    'html_displayed',
+    'idn',
+    'image',
+    'missing_slahes', -- sic
+    'no_tld',
+    'numeric',
+    'obscured',
+    'path_encoded',
+    'phished',
+    'query',
+    'query_encoded',
+    'redirected',
+    'schema_encoded',
+    'schemaless',
+    'subject',
+    'text',
+    'unnormalised',
+    'url_displayed',
+    'zw_spaces',
+    }):is_optional()
+
+local function gen_exclude_flags_filter(exclude_flags)
+  return function(u)
+    local got_flags = u:get_flags()
+    for _, flag in ipairs(exclude_flags) do
+      if got_flags[flag] then return false end
+    end
+    return true
+  end
+end
+
 local extractors = {
   -- Plain id function
   ['id'] = {
@@ -277,6 +313,9 @@ e.g. `get_tld`]],
       local params = args[1] or {}
       params.task = task
       params.no_cache = true
+      if params.exclude_flags then
+        params.filter = gen_exclude_flags_filter(params.exclude_flags)
+      end
       local urls = lua_util.extract_specific_urls(params)
       if not urls[1] then
         return nil
@@ -287,31 +326,8 @@ e.g. `get_tld`]],
     ['args_schema'] = {ts.shape{
       limit = ts.number + ts.string / tonumber,
       esld_limit = (ts.number + ts.string / tonumber):is_optional(),
-      flags = ts.array_of(ts.one_of{
-          'content',
-          'has_port',
-          'has_user',
-          'host_encoded',
-          'html_displayed',
-          'idn',
-          'image',
-          'missing_slahes', -- sic
-          'no_tld',
-          'numeric',
-          'obscured',
-          'path_encoded',
-          'query',
-          'query_encoded',
-          'redirected',
-          'schema_encoded',
-          'schemaless',
-          'subject',
-          'text',
-          'unnormalised',
-          'url_displayed',
-          'zw_spaces',
-          'phished',
-          }):is_optional(),
+      exclude_flags = url_flags_ts,
+      flags = url_flags_ts,
       flags_mode = ts.one_of{'explicit'}:is_optional(),
       prefix = ts.string:is_optional(),
       need_content = (ts.boolean + ts.string / lua_util.toboolean):is_optional(),


More information about the Commits mailing list