commit ac58581: [Feature] Mime_types: Add MIME_BAD_UNICODE rule

Vsevolod Stakhov vsevolod at highsecure.ru
Fri Jan 18 17:21:05 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-01-18 17:18:27 +0000
URL: https://github.com/rspamd/rspamd/commit/ac585813528a7ca13e63bd53704b0bbfb2d3b773 (HEAD -> master)

[Feature] Mime_types: Add MIME_BAD_UNICODE rule

---
 conf/scores.d/mime_types_group.conf |  5 +++++
 src/lua/lua_util.c                  | 15 ++++++++-------
 src/plugins/lua/mime_types.lua      | 25 +++++++++++++++++++------
 3 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/conf/scores.d/mime_types_group.conf b/conf/scores.d/mime_types_group.conf
index 10cb1ba93..7a2847b1e 100644
--- a/conf/scores.d/mime_types_group.conf
+++ b/conf/scores.d/mime_types_group.conf
@@ -56,4 +56,9 @@ symbols = {
         description = "Bad extension";
         one_shot = true;
     }
+    "MIME_BAD_UNICODE" {
+        weight = 8.0;
+        description = "Filename with known obscured unicode characters";
+        one_shot = true;
+    }
 }
\ No newline at end of file
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c
index 94554faa1..94f3bb783 100644
--- a/src/lua/lua_util.c
+++ b/src/lua/lua_util.c
@@ -400,11 +400,11 @@ LUA_FUNCTION_DEF (util, is_utf_spoofed);
 LUA_FUNCTION_DEF (util, is_valid_utf8);
 
 /***
- * @function util.has_obscured_utf(str)
+ * @function util.has_obscured_unicode(str)
  * Returns true if a string has obscure UTF symbols (zero width spaces, order marks), ignores invalid utf characters
- * @return {boolean} true if a has obscured utf characters
+ * @return {boolean} true if a has obscured unicode characters (+ character and offset if found)
  */
-LUA_FUNCTION_DEF (util, has_obscured_utf);
+LUA_FUNCTION_DEF (util, has_obscured_unicode);
 
 /***
  * @function util.readline([prompt])
@@ -616,7 +616,7 @@ static const struct luaL_reg utillib_f[] = {
 	LUA_INTERFACE_DEF (util, caseless_hash_fast),
 	LUA_INTERFACE_DEF (util, is_utf_spoofed),
 	LUA_INTERFACE_DEF (util, is_valid_utf8),
-	LUA_INTERFACE_DEF (util, has_obscured_utf),
+	LUA_INTERFACE_DEF (util, has_obscured_unicode),
 	LUA_INTERFACE_DEF (util, readline),
 	LUA_INTERFACE_DEF (util, readpassphrase),
 	LUA_INTERFACE_DEF (util, file_exists),
@@ -2618,24 +2618,25 @@ lua_util_is_valid_utf8 (lua_State *L)
 }
 
 static gint
-lua_util_has_obscured_utf (lua_State *L)
+lua_util_has_obscured_unicode (lua_State *L)
 {
 	LUA_TRACE_POINT;
 	const gchar *str;
 	gsize len;
-	gint32 i = 0;
+	gint32 i = 0, prev_i;
 	UChar32 uc;
 
 	str = lua_tolstring (L, 1, &len);
 
 	while (i < len) {
+		prev_i = i;
 		U8_NEXT (str, i, len, uc);
 
 		if (uc > 0) {
 			if (IS_OBSCURED_CHAR (uc)) {
 				lua_pushboolean (L, true);
 				lua_pushnumber (L, uc); /* Character */
-				lua_pushnumber (L, i); /* Offset */
+				lua_pushnumber (L, prev_i); /* Offset */
 
 				return 3;
 			}
diff --git a/src/plugins/lua/mime_types.lua b/src/plugins/lua/mime_types.lua
index e9a10bc3c..a7a859e53 100644
--- a/src/plugins/lua/mime_types.lua
+++ b/src/plugins/lua/mime_types.lua
@@ -21,6 +21,7 @@ end
 -- This plugin implements mime types checks for mail messages
 local logger = require "rspamd_logger"
 local lua_util = require "lua_util"
+local rspamd_util = require "rspamd_util"
 local N = "mime_types"
 local settings = {
   file = '',
@@ -32,6 +33,7 @@ local settings = {
   symbol_archive_in_archive = 'MIME_ARCHIVE_IN_ARCHIVE',
   symbol_double_extension = 'MIME_DOUBLE_BAD_EXTENSION',
   symbol_bad_extension = 'MIME_BAD_EXTENSION',
+  symbol_bad_unicode = 'MIME_BAD_UNICODE',
   regexp = false,
   extension_map = { -- extension -> mime_type
     html = 'text/html',
@@ -832,6 +834,17 @@ local function check_mime_type(task)
   end
 
   local function check_filename(fname, ct, is_archive, part)
+
+    local has_bad_unicode, char, ch_pos = rspamd_util.has_obscured_unicode(fname)
+    if has_bad_unicode then
+      task:insert_result(settings.symbol_bad_unicode, 1.0,
+          string.format("0x%xd after %s", char,
+              fname:sub(1, ch_pos)))
+    end
+
+    -- Replace potentially bad characters with '?'
+    fname = fname:gsub('[^%s%g]', '?')
+
     local ext,ext2,parts = gen_extension(fname)
     -- ext is the last extension, LOWERCASED
     -- ext2 is the one before last extension LOWERCASED
@@ -945,7 +958,6 @@ local function check_mime_type(task)
         end
 
         if filename then
-          filename = filename:gsub('[^%s%g]', '?')
           check_filename(filename, ct, false, p)
         end
 
@@ -976,11 +988,6 @@ local function check_mime_type(task)
             local nfiles = #fl
 
             for _,f in ipairs(fl) do
-              -- Strip bad characters
-              if f['name'] then
-                f['name'] = f['name']:gsub('[\128-\255%s%G]', '?')
-              end
-
               if f['encrypted'] then
                 task:insert_result(settings['symbol_encrypted_archive'],
                     1.0, f['name'])
@@ -1158,6 +1165,12 @@ if opts then
       parent = id,
       group = 'mime_types',
     })
+    rspamd_config:register_symbol({
+      type = 'virtual',
+      name = settings['symbol_bad_unicode'],
+      parent = id,
+      group = 'mime_types',
+    })
     rspamd_config:register_symbol({
       type = 'virtual,nostat',
       name = 'MIME_TRACE',


More information about the Commits mailing list