commit afa9755: [Minor] Core: Add lua utility to find some obscured unicode symbols

Vsevolod Stakhov vsevolod at highsecure.ru
Fri Jan 18 17:21:04 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-01-18 16:48:29 +0000
URL: https://github.com/rspamd/rspamd/commit/afa975523e60d36be65d5b79d650a1c98e0a3b0d

[Minor] Core: Add lua utility to find some obscured unicode symbols

---
 src/libutil/str_util.h |  3 +++
 src/lua/lua_util.c     | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h
index a2669d1a0..f95932547 100644
--- a/src/libutil/str_util.h
+++ b/src/libutil/str_util.h
@@ -455,5 +455,8 @@ gchar * rspamd_str_make_utf_valid (const gchar *src, gsize slen, gsize *dstlen);
 gsize rspamd_gstring_strip (GString *s, const gchar *strip_chars);
 
 #define IS_ZERO_WIDTH_SPACE(uc) ((uc) == 0x200b || (uc) == 0x200c)
+#define IS_OBSCURED_CHAR(uc) (((uc) >= 0x200B && (uc) <= 0x200F) || \
+								((uc) >= 0x2028 && (uc) <= 0x202F) || \
+								((uc) >= 0x205F && (uc) <= 0x206F))
 
 #endif /* SRC_LIBUTIL_STR_UTIL_H_ */
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c
index 81b44bd28..94554faa1 100644
--- a/src/lua/lua_util.c
+++ b/src/lua/lua_util.c
@@ -399,6 +399,13 @@ LUA_FUNCTION_DEF (util, is_utf_spoofed);
  */
 LUA_FUNCTION_DEF (util, is_valid_utf8);
 
+/***
+ * @function util.has_obscured_utf(str)
+ * Returns true if a string has obscure UTF symbols (zero width spaces, order marks), ignores invalid utf characters
+ * @return {boolean} true if a has obscured utf characters
+ */
+LUA_FUNCTION_DEF (util, has_obscured_utf);
+
 /***
  * @function util.readline([prompt])
  * Returns string read from stdin with history and editing support
@@ -609,6 +616,7 @@ static const struct luaL_reg utillib_f[] = {
 	LUA_INTERFACE_DEF (util, caseless_hash_fast),
 	LUA_INTERFACE_DEF (util, is_utf_spoofed),
 	LUA_INTERFACE_DEF (util, is_valid_utf8),
+	LUA_INTERFACE_DEF (util, has_obscured_utf),
 	LUA_INTERFACE_DEF (util, readline),
 	LUA_INTERFACE_DEF (util, readpassphrase),
 	LUA_INTERFACE_DEF (util, file_exists),
@@ -2609,6 +2617,36 @@ lua_util_is_valid_utf8 (lua_State *L)
 	return 1;
 }
 
+static gint
+lua_util_has_obscured_utf (lua_State *L)
+{
+	LUA_TRACE_POINT;
+	const gchar *str;
+	gsize len;
+	gint32 i = 0;
+	UChar32 uc;
+
+	str = lua_tolstring (L, 1, &len);
+
+	while (i < len) {
+		U8_NEXT (str, i, len, uc);
+
+		if (uc > 0) {
+			if (IS_OBSCURED_CHAR (uc)) {
+				lua_pushboolean (L, true);
+				lua_pushnumber (L, uc); /* Character */
+				lua_pushnumber (L, i); /* Offset */
+
+				return 3;
+			}
+		}
+	}
+
+	lua_pushboolean (L, false);
+
+	return 1;
+}
+
 static gint
 lua_util_readline (lua_State *L)
 {


More information about the Commits mailing list