commit c5c2eaf: [Rework] Rework url flags handling API

Vsevolod Stakhov vsevolod at highsecure.ru
Tue Apr 21 15:21:11 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-04-21 14:27:20 +0100
URL: https://github.com/rspamd/rspamd/commit/c5c2eaf6b05640a9a0934042a099e818719357aa

[Rework] Rework url flags handling API

---
 src/libserver/url.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/libserver/url.h | 16 +++++++++++
 src/lua/lua_url.c   | 66 +++++++++++++++++++++++++++++---------------
 3 files changed, 139 insertions(+), 22 deletions(-)

diff --git a/src/libserver/url.c b/src/libserver/url.c
index db89073f5..195727c13 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -214,6 +214,35 @@ struct url_matcher static_matchers[] = {
 				0}
 };
 
+struct rspamd_url_flag_name {
+	const gchar *name;
+	gint flag;
+	gint hash;
+} url_flag_names[] = {
+		{"phished", RSPAMD_URL_FLAG_PHISHED, -1},
+		{"numeric", RSPAMD_URL_FLAG_NUMERIC, -1},
+		{"obscured", RSPAMD_URL_FLAG_OBSCURED, -1},
+		{"redirected", RSPAMD_URL_FLAG_REDIRECTED, -1},
+		{"html_displayed", RSPAMD_URL_FLAG_HTML_DISPLAYED, -1},
+		{"text", RSPAMD_URL_FLAG_FROM_TEXT, -1},
+		{"subject", RSPAMD_URL_FLAG_SUBJECT, -1},
+		{"host_encoded", RSPAMD_URL_FLAG_HOSTENCODED, -1},
+		{"schema_encoded", RSPAMD_URL_FLAG_SCHEMAENCODED, -1},
+		{"path_encoded", RSPAMD_URL_FLAG_PATHENCODED, -1},
+		{"query_encoded", RSPAMD_URL_FLAG_QUERYENCODED, -1},
+		{"missing_slahes", RSPAMD_URL_FLAG_MISSINGSLASHES, -1},
+		{"idn", RSPAMD_URL_FLAG_IDN, -1},
+		{"has_port", RSPAMD_URL_FLAG_HAS_PORT, -1},
+		{"has_user", RSPAMD_URL_FLAG_HAS_USER, -1},
+		{"schemaless", RSPAMD_URL_FLAG_SCHEMALESS, -1},
+		{"unnormalised", RSPAMD_URL_FLAG_UNNORMALISED, -1},
+		{"zw_spaces", RSPAMD_URL_FLAG_ZW_SPACES, -1},
+		{"url_displayed", RSPAMD_URL_FLAG_DISPLAY_URL, -1},
+		{"image", RSPAMD_URL_FLAG_IMAGE, -1},
+		{"query", RSPAMD_URL_FLAG_QUERY, -1},
+		{"content", RSPAMD_URL_FLAG_CONTENT, -1}
+};
+
 
 static inline khint_t rspamd_url_hash (struct rspamd_url *u);
 
@@ -610,6 +639,26 @@ rspamd_url_init (const gchar *tld_file)
 					url_scanner->matchers_strict->len);
 		}
 	}
+
+	/* Generate hashes for flags */
+	for (gint i = 0; i < G_N_ELEMENTS (url_flag_names); i ++) {
+		url_flag_names[i].hash =
+				rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT,
+						url_flag_names[i].name,
+						strlen (url_flag_names[i].name), 0);
+	}
+	/* Ensure that we have no hashes collisions O(N^2) but this array is small */
+	for (gint i = 0; i < G_N_ELEMENTS (url_flag_names) - 1; i ++) {
+		for (gint j = i + 1; j < G_N_ELEMENTS (url_flag_names); j ++) {
+			if (url_flag_names[i].hash == url_flag_names[j].hash) {
+				msg_err ("collision: both %s and %s map to %d",
+						url_flag_names[i].name, url_flag_names[j].name,
+						url_flag_names[i].hash);
+				abort ();
+			}
+		}
+	}
+
 }
 
 #define SET_U(u, field) do {                                                \
@@ -3991,3 +4040,33 @@ rspamd_url_host_set_has (khash_t (rspamd_url_host_hash) *set, struct rspamd_url
 
 	return false;
 }
+
+bool
+rspamd_url_flag_from_string (const gchar *str, gint *flag)
+{
+	gint h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT,
+			str, strlen (str), 0);
+
+	for (int i = 0; i < G_N_ELEMENTS (url_flag_names); i ++) {
+		if (url_flag_names[i].hash == h) {
+			*flag |= url_flag_names[i].flag;
+
+			return true;
+		}
+	}
+
+	return false;
+}
+
+
+const gchar *
+rspamd_url_flag_to_string (int flag)
+{
+	for (int i = 0; i < G_N_ELEMENTS (url_flag_names); i ++) {
+		if (url_flag_names[i].flag & flag) {
+			return url_flag_names[i].name;
+		}
+	}
+
+	return NULL;
+}
diff --git a/src/libserver/url.h b/src/libserver/url.h
index bb9c57399..2a5892fc5 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -36,6 +36,7 @@ enum rspamd_url_flags {
 	RSPAMD_URL_FLAG_DISPLAY_URL = 1u << 18u,
 	RSPAMD_URL_FLAG_IMAGE = 1u << 19u,
 	RSPAMD_URL_FLAG_QUERY = 1u << 20u,
+	RSPAMD_URL_FLAG_CONTENT = 1u << 21u,
 };
 
 struct rspamd_url_tag {
@@ -268,6 +269,21 @@ const gchar *rspamd_url_protocol_name (enum rspamd_url_protocol proto);
  */
 enum rspamd_url_protocol rspamd_url_protocol_from_string (const gchar *str);
 
+/**
+ * Converts string to a url flag
+ * @param str
+ * @param flag
+ * @return
+ */
+bool rspamd_url_flag_from_string (const gchar *str, gint *flag);
+
+/**
+ * Converts url flag to a string
+ * @param flag
+ * @return
+ */
+const gchar * rspamd_url_flag_to_string (int flag);
+
 /* Defines sets of urls indexed by url as is */
 KHASH_DECLARE (rspamd_url_hash, struct rspamd_url *, char);
 KHASH_DECLARE (rspamd_url_host_hash, struct rspamd_url *, char);
diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c
index 6540919ea..94cb51dbd 100644
--- a/src/lua/lua_url.c
+++ b/src/lua/lua_url.c
@@ -728,6 +728,7 @@ lua_url_create (lua_State *L)
 	const gchar *text;
 	size_t length;
 	gboolean own_pool = FALSE;
+	struct rspamd_lua_url *u;
 
 	if (lua_type (L, 1) == LUA_TUSERDATA) {
 		pool = rspamd_lua_check_mempool (L, 1);
@@ -753,6 +754,26 @@ lua_url_create (lua_State *L)
 		if (lua_type (L, -1) != LUA_TUSERDATA) {
 			/* URL is actually not found */
 			lua_pushnil (L);
+
+			return 1;
+		}
+
+		u = (struct rspamd_lua_url *)lua_touserdata (L, -1);
+
+		if (lua_type (L, 3) == LUA_TTABLE) {
+			/* Add flags */
+			for (lua_pushnil (L); lua_next (L, 3); lua_pop (L, 1)) {
+				int nmask = 0;
+				const gchar *fname = lua_tostring (L, -1);
+
+				if (rspamd_url_flag_from_string (fname, &nmask)) {
+					u->url->flags |= nmask;
+				}
+				else {
+					lua_pop (L, 1);
+					return luaL_error (L, "invalid flag: %s", fname);
+				}
+			}
 		}
 	}
 
@@ -854,9 +875,9 @@ lua_url_all (lua_State *L)
  * - `image`: URL is from src attribute of img HTML tag
  * @return {table} URL flags
  */
-#define PUSH_FLAG(fl, name) do { \
+#define PUSH_FLAG(fl) do { \
 	if (flags & (fl)) { \
-		lua_pushstring (L, (name)); \
+		lua_pushstring (L, rspamd_url_flag_to_string (fl)); \
 		lua_pushboolean (L, true); \
 		lua_settable (L, -3); \
 	} \
@@ -874,26 +895,27 @@ lua_url_get_flags (lua_State *L)
 
 		lua_createtable (L, 0, 4);
 
-		PUSH_FLAG (RSPAMD_URL_FLAG_PHISHED, "phished");
-		PUSH_FLAG (RSPAMD_URL_FLAG_NUMERIC, "numeric");
-		PUSH_FLAG (RSPAMD_URL_FLAG_OBSCURED, "obscured");
-		PUSH_FLAG (RSPAMD_URL_FLAG_REDIRECTED, "redirected");
-		PUSH_FLAG (RSPAMD_URL_FLAG_HTML_DISPLAYED, "html_displayed");
-		PUSH_FLAG (RSPAMD_URL_FLAG_FROM_TEXT, "text");
-		PUSH_FLAG (RSPAMD_URL_FLAG_SUBJECT, "subject");
-		PUSH_FLAG (RSPAMD_URL_FLAG_HOSTENCODED, "host_encoded");
-		PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMAENCODED, "schema_encoded");
-		PUSH_FLAG (RSPAMD_URL_FLAG_PATHENCODED, "path_encoded");
-		PUSH_FLAG (RSPAMD_URL_FLAG_QUERYENCODED, "query_encoded");
-		PUSH_FLAG (RSPAMD_URL_FLAG_MISSINGSLASHES, "missing_slahes");
-		PUSH_FLAG (RSPAMD_URL_FLAG_IDN, "idn");
-		PUSH_FLAG (RSPAMD_URL_FLAG_HAS_PORT, "has_port");
-		PUSH_FLAG (RSPAMD_URL_FLAG_HAS_USER, "has_user");
-		PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMALESS, "schemaless");
-		PUSH_FLAG (RSPAMD_URL_FLAG_UNNORMALISED, "unnormalised");
-		PUSH_FLAG (RSPAMD_URL_FLAG_ZW_SPACES, "zw_spaces");
-		PUSH_FLAG (RSPAMD_URL_FLAG_DISPLAY_URL, "url_displayed");
-		PUSH_FLAG (RSPAMD_URL_FLAG_IMAGE, "image");
+		PUSH_FLAG (RSPAMD_URL_FLAG_PHISHED);
+		PUSH_FLAG (RSPAMD_URL_FLAG_NUMERIC);
+		PUSH_FLAG (RSPAMD_URL_FLAG_OBSCURED);
+		PUSH_FLAG (RSPAMD_URL_FLAG_REDIRECTED);
+		PUSH_FLAG (RSPAMD_URL_FLAG_HTML_DISPLAYED);
+		PUSH_FLAG (RSPAMD_URL_FLAG_FROM_TEXT);
+		PUSH_FLAG (RSPAMD_URL_FLAG_SUBJECT);
+		PUSH_FLAG (RSPAMD_URL_FLAG_HOSTENCODED);
+		PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMAENCODED);
+		PUSH_FLAG (RSPAMD_URL_FLAG_PATHENCODED);
+		PUSH_FLAG (RSPAMD_URL_FLAG_QUERYENCODED);
+		PUSH_FLAG (RSPAMD_URL_FLAG_MISSINGSLASHES);
+		PUSH_FLAG (RSPAMD_URL_FLAG_IDN);
+		PUSH_FLAG (RSPAMD_URL_FLAG_HAS_PORT);
+		PUSH_FLAG (RSPAMD_URL_FLAG_HAS_USER);
+		PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMALESS);
+		PUSH_FLAG (RSPAMD_URL_FLAG_UNNORMALISED);
+		PUSH_FLAG (RSPAMD_URL_FLAG_ZW_SPACES);
+		PUSH_FLAG (RSPAMD_URL_FLAG_DISPLAY_URL);
+		PUSH_FLAG (RSPAMD_URL_FLAG_IMAGE);
+		PUSH_FLAG (RSPAMD_URL_FLAG_CONTENT);
 	}
 	else {
 		return luaL_error (L, "invalid arguments");


More information about the Commits mailing list