commit c5c2eaf: [Rework] Rework url flags handling API
Vsevolod Stakhov
vsevolod at highsecure.ru
Tue Apr 21 15:21:11 UTC 2020
Author: Vsevolod Stakhov
Date: 2020-04-21 14:27:20 +0100
URL: https://github.com/rspamd/rspamd/commit/c5c2eaf6b05640a9a0934042a099e818719357aa
[Rework] Rework url flags handling API
---
src/libserver/url.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++
src/libserver/url.h | 16 +++++++++++
src/lua/lua_url.c | 66 +++++++++++++++++++++++++++++---------------
3 files changed, 139 insertions(+), 22 deletions(-)
diff --git a/src/libserver/url.c b/src/libserver/url.c
index db89073f5..195727c13 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -214,6 +214,35 @@ struct url_matcher static_matchers[] = {
0}
};
+struct rspamd_url_flag_name {
+ const gchar *name;
+ gint flag;
+ gint hash;
+} url_flag_names[] = {
+ {"phished", RSPAMD_URL_FLAG_PHISHED, -1},
+ {"numeric", RSPAMD_URL_FLAG_NUMERIC, -1},
+ {"obscured", RSPAMD_URL_FLAG_OBSCURED, -1},
+ {"redirected", RSPAMD_URL_FLAG_REDIRECTED, -1},
+ {"html_displayed", RSPAMD_URL_FLAG_HTML_DISPLAYED, -1},
+ {"text", RSPAMD_URL_FLAG_FROM_TEXT, -1},
+ {"subject", RSPAMD_URL_FLAG_SUBJECT, -1},
+ {"host_encoded", RSPAMD_URL_FLAG_HOSTENCODED, -1},
+ {"schema_encoded", RSPAMD_URL_FLAG_SCHEMAENCODED, -1},
+ {"path_encoded", RSPAMD_URL_FLAG_PATHENCODED, -1},
+ {"query_encoded", RSPAMD_URL_FLAG_QUERYENCODED, -1},
+ {"missing_slahes", RSPAMD_URL_FLAG_MISSINGSLASHES, -1},
+ {"idn", RSPAMD_URL_FLAG_IDN, -1},
+ {"has_port", RSPAMD_URL_FLAG_HAS_PORT, -1},
+ {"has_user", RSPAMD_URL_FLAG_HAS_USER, -1},
+ {"schemaless", RSPAMD_URL_FLAG_SCHEMALESS, -1},
+ {"unnormalised", RSPAMD_URL_FLAG_UNNORMALISED, -1},
+ {"zw_spaces", RSPAMD_URL_FLAG_ZW_SPACES, -1},
+ {"url_displayed", RSPAMD_URL_FLAG_DISPLAY_URL, -1},
+ {"image", RSPAMD_URL_FLAG_IMAGE, -1},
+ {"query", RSPAMD_URL_FLAG_QUERY, -1},
+ {"content", RSPAMD_URL_FLAG_CONTENT, -1}
+};
+
static inline khint_t rspamd_url_hash (struct rspamd_url *u);
@@ -610,6 +639,26 @@ rspamd_url_init (const gchar *tld_file)
url_scanner->matchers_strict->len);
}
}
+
+ /* Generate hashes for flags */
+ for (gint i = 0; i < G_N_ELEMENTS (url_flag_names); i ++) {
+ url_flag_names[i].hash =
+ rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT,
+ url_flag_names[i].name,
+ strlen (url_flag_names[i].name), 0);
+ }
+ /* Ensure that we have no hashes collisions O(N^2) but this array is small */
+ for (gint i = 0; i < G_N_ELEMENTS (url_flag_names) - 1; i ++) {
+ for (gint j = i + 1; j < G_N_ELEMENTS (url_flag_names); j ++) {
+ if (url_flag_names[i].hash == url_flag_names[j].hash) {
+ msg_err ("collision: both %s and %s map to %d",
+ url_flag_names[i].name, url_flag_names[j].name,
+ url_flag_names[i].hash);
+ abort ();
+ }
+ }
+ }
+
}
#define SET_U(u, field) do { \
@@ -3991,3 +4040,33 @@ rspamd_url_host_set_has (khash_t (rspamd_url_host_hash) *set, struct rspamd_url
return false;
}
+
+bool
+rspamd_url_flag_from_string (const gchar *str, gint *flag)
+{
+ gint h = rspamd_cryptobox_fast_hash_specific (RSPAMD_CRYPTOBOX_HASHFAST_INDEPENDENT,
+ str, strlen (str), 0);
+
+ for (int i = 0; i < G_N_ELEMENTS (url_flag_names); i ++) {
+ if (url_flag_names[i].hash == h) {
+ *flag |= url_flag_names[i].flag;
+
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+const gchar *
+rspamd_url_flag_to_string (int flag)
+{
+ for (int i = 0; i < G_N_ELEMENTS (url_flag_names); i ++) {
+ if (url_flag_names[i].flag & flag) {
+ return url_flag_names[i].name;
+ }
+ }
+
+ return NULL;
+}
diff --git a/src/libserver/url.h b/src/libserver/url.h
index bb9c57399..2a5892fc5 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -36,6 +36,7 @@ enum rspamd_url_flags {
RSPAMD_URL_FLAG_DISPLAY_URL = 1u << 18u,
RSPAMD_URL_FLAG_IMAGE = 1u << 19u,
RSPAMD_URL_FLAG_QUERY = 1u << 20u,
+ RSPAMD_URL_FLAG_CONTENT = 1u << 21u,
};
struct rspamd_url_tag {
@@ -268,6 +269,21 @@ const gchar *rspamd_url_protocol_name (enum rspamd_url_protocol proto);
*/
enum rspamd_url_protocol rspamd_url_protocol_from_string (const gchar *str);
+/**
+ * Converts string to a url flag
+ * @param str
+ * @param flag
+ * @return
+ */
+bool rspamd_url_flag_from_string (const gchar *str, gint *flag);
+
+/**
+ * Converts url flag to a string
+ * @param flag
+ * @return
+ */
+const gchar * rspamd_url_flag_to_string (int flag);
+
/* Defines sets of urls indexed by url as is */
KHASH_DECLARE (rspamd_url_hash, struct rspamd_url *, char);
KHASH_DECLARE (rspamd_url_host_hash, struct rspamd_url *, char);
diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c
index 6540919ea..94cb51dbd 100644
--- a/src/lua/lua_url.c
+++ b/src/lua/lua_url.c
@@ -728,6 +728,7 @@ lua_url_create (lua_State *L)
const gchar *text;
size_t length;
gboolean own_pool = FALSE;
+ struct rspamd_lua_url *u;
if (lua_type (L, 1) == LUA_TUSERDATA) {
pool = rspamd_lua_check_mempool (L, 1);
@@ -753,6 +754,26 @@ lua_url_create (lua_State *L)
if (lua_type (L, -1) != LUA_TUSERDATA) {
/* URL is actually not found */
lua_pushnil (L);
+
+ return 1;
+ }
+
+ u = (struct rspamd_lua_url *)lua_touserdata (L, -1);
+
+ if (lua_type (L, 3) == LUA_TTABLE) {
+ /* Add flags */
+ for (lua_pushnil (L); lua_next (L, 3); lua_pop (L, 1)) {
+ int nmask = 0;
+ const gchar *fname = lua_tostring (L, -1);
+
+ if (rspamd_url_flag_from_string (fname, &nmask)) {
+ u->url->flags |= nmask;
+ }
+ else {
+ lua_pop (L, 1);
+ return luaL_error (L, "invalid flag: %s", fname);
+ }
+ }
}
}
@@ -854,9 +875,9 @@ lua_url_all (lua_State *L)
* - `image`: URL is from src attribute of img HTML tag
* @return {table} URL flags
*/
-#define PUSH_FLAG(fl, name) do { \
+#define PUSH_FLAG(fl) do { \
if (flags & (fl)) { \
- lua_pushstring (L, (name)); \
+ lua_pushstring (L, rspamd_url_flag_to_string (fl)); \
lua_pushboolean (L, true); \
lua_settable (L, -3); \
} \
@@ -874,26 +895,27 @@ lua_url_get_flags (lua_State *L)
lua_createtable (L, 0, 4);
- PUSH_FLAG (RSPAMD_URL_FLAG_PHISHED, "phished");
- PUSH_FLAG (RSPAMD_URL_FLAG_NUMERIC, "numeric");
- PUSH_FLAG (RSPAMD_URL_FLAG_OBSCURED, "obscured");
- PUSH_FLAG (RSPAMD_URL_FLAG_REDIRECTED, "redirected");
- PUSH_FLAG (RSPAMD_URL_FLAG_HTML_DISPLAYED, "html_displayed");
- PUSH_FLAG (RSPAMD_URL_FLAG_FROM_TEXT, "text");
- PUSH_FLAG (RSPAMD_URL_FLAG_SUBJECT, "subject");
- PUSH_FLAG (RSPAMD_URL_FLAG_HOSTENCODED, "host_encoded");
- PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMAENCODED, "schema_encoded");
- PUSH_FLAG (RSPAMD_URL_FLAG_PATHENCODED, "path_encoded");
- PUSH_FLAG (RSPAMD_URL_FLAG_QUERYENCODED, "query_encoded");
- PUSH_FLAG (RSPAMD_URL_FLAG_MISSINGSLASHES, "missing_slahes");
- PUSH_FLAG (RSPAMD_URL_FLAG_IDN, "idn");
- PUSH_FLAG (RSPAMD_URL_FLAG_HAS_PORT, "has_port");
- PUSH_FLAG (RSPAMD_URL_FLAG_HAS_USER, "has_user");
- PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMALESS, "schemaless");
- PUSH_FLAG (RSPAMD_URL_FLAG_UNNORMALISED, "unnormalised");
- PUSH_FLAG (RSPAMD_URL_FLAG_ZW_SPACES, "zw_spaces");
- PUSH_FLAG (RSPAMD_URL_FLAG_DISPLAY_URL, "url_displayed");
- PUSH_FLAG (RSPAMD_URL_FLAG_IMAGE, "image");
+ PUSH_FLAG (RSPAMD_URL_FLAG_PHISHED);
+ PUSH_FLAG (RSPAMD_URL_FLAG_NUMERIC);
+ PUSH_FLAG (RSPAMD_URL_FLAG_OBSCURED);
+ PUSH_FLAG (RSPAMD_URL_FLAG_REDIRECTED);
+ PUSH_FLAG (RSPAMD_URL_FLAG_HTML_DISPLAYED);
+ PUSH_FLAG (RSPAMD_URL_FLAG_FROM_TEXT);
+ PUSH_FLAG (RSPAMD_URL_FLAG_SUBJECT);
+ PUSH_FLAG (RSPAMD_URL_FLAG_HOSTENCODED);
+ PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMAENCODED);
+ PUSH_FLAG (RSPAMD_URL_FLAG_PATHENCODED);
+ PUSH_FLAG (RSPAMD_URL_FLAG_QUERYENCODED);
+ PUSH_FLAG (RSPAMD_URL_FLAG_MISSINGSLASHES);
+ PUSH_FLAG (RSPAMD_URL_FLAG_IDN);
+ PUSH_FLAG (RSPAMD_URL_FLAG_HAS_PORT);
+ PUSH_FLAG (RSPAMD_URL_FLAG_HAS_USER);
+ PUSH_FLAG (RSPAMD_URL_FLAG_SCHEMALESS);
+ PUSH_FLAG (RSPAMD_URL_FLAG_UNNORMALISED);
+ PUSH_FLAG (RSPAMD_URL_FLAG_ZW_SPACES);
+ PUSH_FLAG (RSPAMD_URL_FLAG_DISPLAY_URL);
+ PUSH_FLAG (RSPAMD_URL_FLAG_IMAGE);
+ PUSH_FLAG (RSPAMD_URL_FLAG_CONTENT);
}
else {
return luaL_error (L, "invalid arguments");
More information about the Commits
mailing list