commit ec8a472: [Feature] Add dedicated ZW spaces detection for URLs

Vsevolod Stakhov vsevolod at highsecure.ru
Mon Jan 21 12:42:04 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-01-21 12:41:03 +0000
URL: https://github.com/rspamd/rspamd/commit/ec8a472f21d83d3bb93a200acdd0d5b257e4e88c (HEAD -> master)

[Feature] Add dedicated ZW spaces detection for URLs
Issue: #2725

---
 rules/misc.lua       | 20 ++++++++++++++++++--
 src/libserver/html.c |  4 ++++
 src/libserver/url.h  |  1 +
 src/lua/lua_url.c    |  2 ++
 4 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/rules/misc.lua b/rules/misc.lua
index 26f2a1357..05d4af5d5 100644
--- a/rules/misc.lua
+++ b/rules/misc.lua
@@ -101,25 +101,41 @@ rspamd_config.DATE_IN_PAST = {
   type = 'mime',
 }
 
-rspamd_config.R_SUSPICIOUS_URL = {
+local obscured_id = rspamd_config:register_symbol{
   callback = function(task)
     local urls = task:get_urls()
 
     if urls then
       for _,u in ipairs(urls) do
-        if u:is_obscured() then
+        local fl = u:get_flags()
+        if fl.obscured then
           task:insert_result('R_SUSPICIOUS_URL', 1.0, u:get_host())
         end
+        if fl.zw_spaces then
+          task:insert_result('ZERO_WIDTH_SPACE_URL', 1.0, u:get_host())
+        end
       end
     end
+
     return false
   end,
+  name = 'R_SUSPICIOUS_URL',
   score = 5.0,
   one_shot = true,
   description = 'Obfusicated or suspicious URL has been found in a message',
   group = 'url'
 }
 
+rspamd_config:register_symbol{
+  type = 'virtual',
+  name = 'ZERO_WIDTH_SPACE_URL',
+  score = 7.0,
+  one_shot = true,
+  description = 'Zero width space in url',
+  group = 'url',
+  parent = obscured_id,
+}
+
 
 rspamd_config.ENVFROM_PRVS = {
   callback = function (task)
diff --git a/src/libserver/html.c b/src/libserver/html.c
index e97a010fe..afaeae4c5 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -1351,6 +1351,10 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len,
 
 	if (norm_res & (RSPAMD_UNICODE_NORM_ZERO_SPACES|RSPAMD_UNICODE_NORM_ERROR)) {
 		saved_flags |= RSPAMD_URL_FLAG_OBSCURED;
+
+		if (norm_res & RSPAMD_UNICODE_NORM_ZERO_SPACES) {
+			saved_flags |= RSPAMD_URL_FLAG_ZW_SPACES;
+		}
 	}
 
 	rc = rspamd_url_parse (url, decoded, dlen, pool, RSPAMD_URL_PARSE_HREF);
diff --git a/src/libserver/url.h b/src/libserver/url.h
index a9eda71de..12a649ec7 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -27,6 +27,7 @@ enum rspamd_url_flags {
 	RSPAMD_URL_FLAG_HAS_USER = 1 << 14,
 	RSPAMD_URL_FLAG_SCHEMALESS = 1 << 15,
 	RSPAMD_URL_FLAG_UNNORMALISED = 1 << 16,
+	RSPAMD_URL_FLAG_ZW_SPACES = 1 << 17,
 };
 
 struct rspamd_url_tag {
diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c
index 8b18c7c3d..8bc0cf657 100644
--- a/src/lua/lua_url.c
+++ b/src/lua/lua_url.c
@@ -875,6 +875,7 @@ lua_url_all (lua_State *L)
  * - `has_user`: URL has user part
  * - `schemaless`: URL has no schema
  * - `unnormalised`: URL has some unicode unnormalities
+ * - `zw_spaces`: URL has some zero width spaces
  * @return {table} URL flags
  */
 #define PUSH_FLAG(fl, name) do { \
@@ -914,6 +915,7 @@ lua_url_get_flags (lua_State *L)
 		PUSH_FLAG (RSPAMD_URL_FLAG_HAS_USER, "has_user");
 		PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMALESS, "schemaless");
 		PUSH_FLAG (RSPAMD_URL_FLAG_UNNORMALISED, "unnormalised");
+		PUSH_FLAG (RSPAMD_URL_FLAG_ZW_SPACES, "zw_spaces");
 	}
 	else {
 		return luaL_error (L, "invalid arguments");


More information about the Commits mailing list