commit 6b67691: [Rework] Drop url tags

Vsevolod Stakhov vsevolod at highsecure.ru
Wed Aug 21 17:00:07 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-08-21 15:52:43 +0100
URL: https://github.com/rspamd/rspamd/commit/6b676918bbb037c111fa2616f1709ead9ac3c788

[Rework] Drop url tags

---
 src/libserver/html.c             |   3 -
 src/libserver/url.c              |  28 ---------
 src/libserver/url.h              |  11 ----
 src/lua/lua_url.c                | 130 +--------------------------------------
 src/plugins/surbl.c              |  62 -------------------
 test/functional/lua/url_tags.lua |  56 -----------------
 6 files changed, 1 insertion(+), 289 deletions(-)

diff --git a/src/libserver/html.c b/src/libserver/html.c
index 4ff310f1c..fa33ffdfb 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -721,9 +721,6 @@ rspamd_html_url_is_phished (rspamd_mempool_t *pool,
 						href_url->phished_url = text_url;
 						phished_tld.begin = href_tok.begin;
 						phished_tld.len = href_tok.len;
-						rspamd_url_add_tag (text_url, "phishing",
-								rspamd_mempool_ftokdup (pool, &phished_tld),
-								pool);
 						text_url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED;
 					}
 				}
diff --git a/src/libserver/url.c b/src/libserver/url.c
index 9314ce2bb..d770b2191 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -3176,34 +3176,6 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
 	}
 }
 
-void
-rspamd_url_add_tag (struct rspamd_url *url, const gchar *tag,
-		const gchar *value,
-		rspamd_mempool_t *pool)
-{
-	struct rspamd_url_tag *found, *ntag;
-
-	g_assert (url != NULL && tag != NULL && value != NULL);
-
-	if (url->tags == NULL) {
-		url->tags = g_hash_table_new (rspamd_strcase_hash, rspamd_strcase_equal);
-		rspamd_mempool_add_destructor (pool,
-				(rspamd_mempool_destruct_t)g_hash_table_unref, url->tags);
-	}
-
-	found = g_hash_table_lookup (url->tags, tag);
-
-	ntag = rspamd_mempool_alloc0 (pool, sizeof (*ntag));
-	ntag->data = rspamd_mempool_strdup (pool, value);
-
-	if (found == NULL) {
-		g_hash_table_insert (url->tags, rspamd_mempool_strdup (pool, tag),
-				ntag);
-	}
-
-	DL_APPEND (found, ntag);
-}
-
 guint
 rspamd_url_hash (gconstpointer u)
 {
diff --git a/src/libserver/url.h b/src/libserver/url.h
index 6f1ccf59f..d9e15e212 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -70,7 +70,6 @@ struct rspamd_url {
 
 	enum rspamd_url_flags flags;
 	guint count;
-	GHashTable *tags;
 };
 
 enum uri_errno {
@@ -214,16 +213,6 @@ void rspamd_url_task_subject_callback (struct rspamd_url *url,
 									   gsize start_offset,
 									   gsize end_offset, gpointer ud);
 
-/**
- * Adds a tag for url
- * @param url
- * @param tag
- * @param pool
- */
-void rspamd_url_add_tag (struct rspamd_url *url, const gchar *tag,
-						 const gchar *value,
-						 rspamd_mempool_t *pool);
-
 guint rspamd_url_hash (gconstpointer u);
 
 guint rspamd_email_hash (gconstpointer u);
diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c
index a0f8c4648..648de08d8 100644
--- a/src/lua/lua_url.c
+++ b/src/lua/lua_url.c
@@ -59,10 +59,7 @@ LUA_FUNCTION_DEF (url, is_obscured);
 LUA_FUNCTION_DEF (url, is_html_displayed);
 LUA_FUNCTION_DEF (url, is_subject);
 LUA_FUNCTION_DEF (url, get_phished);
-LUA_FUNCTION_DEF (url, get_tag);
 LUA_FUNCTION_DEF (url, get_count);
-LUA_FUNCTION_DEF (url, get_tags);
-LUA_FUNCTION_DEF (url, add_tag);
 LUA_FUNCTION_DEF (url, get_visible);
 LUA_FUNCTION_DEF (url, create);
 LUA_FUNCTION_DEF (url, init);
@@ -87,9 +84,7 @@ static const struct luaL_reg urllib_m[] = {
 	LUA_INTERFACE_DEF (url, is_html_displayed),
 	LUA_INTERFACE_DEF (url, is_subject),
 	LUA_INTERFACE_DEF (url, get_phished),
-	LUA_INTERFACE_DEF (url, get_tag),
-	LUA_INTERFACE_DEF (url, get_tags),
-	LUA_INTERFACE_DEF (url, add_tag),
+
 	LUA_INTERFACE_DEF (url, get_visible),
 	LUA_INTERFACE_DEF (url, get_count),
 	LUA_INTERFACE_DEF (url, get_flags),
@@ -438,129 +433,6 @@ lua_url_is_subject (lua_State *L)
 	return 1;
 }
 
-/***
- * @method url:get_tag(tag)
- * Returns list of string for a specific tagname for an url
- * @return {table/strings} list of tags for an url
- */
-static gint
-lua_url_get_tag (lua_State *L)
-{
-	LUA_TRACE_POINT;
-	struct rspamd_lua_url *url = lua_check_url (L, 1);
-	guint i;
-	const gchar *tag = luaL_checkstring (L, 2);
-	struct rspamd_url_tag *tval, *cur;
-
-	if (url != NULL && tag != NULL) {
-
-		if (url->url->tags == NULL) {
-			lua_createtable (L, 0, 0);
-		}
-		else {
-			tval = g_hash_table_lookup (url->url->tags, tag);
-
-			if (tval) {
-				lua_newtable (L);
-				i = 1;
-
-				DL_FOREACH (tval, cur) {
-					lua_pushstring (L, cur->data);
-					lua_rawseti (L, -2, i ++);
-				}
-
-				lua_settable (L, -3);
-			}
-			else {
-				lua_createtable (L, 0, 0);
-			}
-		}
-	}
-	else {
-		lua_pushnil (L);
-	}
-
-	return 1;
-}
-
-
-/***
- * @method url:get_tags()
- * Returns list of string tags for an url
- * @return {table/strings} list of tags for an url
- */
-static gint
-lua_url_get_tags (lua_State *L)
-{
-	LUA_TRACE_POINT;
-	struct rspamd_lua_url *url = lua_check_url (L, 1);
-	guint i;
-	GHashTableIter it;
-	struct rspamd_url_tag *tval, *cur;
-	gpointer k, v;
-
-	if (url != NULL) {
-		if (url->url->tags == NULL) {
-			lua_createtable (L, 0, 0);
-		}
-		else {
-			lua_createtable (L, 0, g_hash_table_size (url->url->tags));
-			g_hash_table_iter_init (&it, url->url->tags);
-
-			while (g_hash_table_iter_next (&it, &k, &v)) {
-				tval = v;
-				lua_pushstring (L, (const gchar *)k);
-				lua_newtable (L);
-				i = 1;
-
-				DL_FOREACH (tval, cur) {
-					lua_pushstring (L, cur->data);
-					lua_rawseti (L, -2, i ++);
-				}
-
-				lua_settable (L, -3);
-			}
-		}
-	}
-	else {
-		lua_pushnil (L);
-	}
-
-	return 1;
-}
-
-/***
- * @method url:add_tag(tag, mempool)
- * Adds a new tag for url
- * @param {string} tag new tag to add
- * @param {mempool} mempool memory pool (e.g. `task:get_pool()`)
- */
-static gint
-lua_url_add_tag (lua_State *L)
-{
-	LUA_TRACE_POINT;
-	struct rspamd_lua_url *url = lua_check_url (L, 1);
-	rspamd_mempool_t *mempool = rspamd_lua_check_mempool (L, 4);
-	const gchar *tag = luaL_checkstring (L, 2);
-	const gchar *value;
-
-	if (lua_type (L, 3) == LUA_TSTRING) {
-		value = lua_tostring (L, 3);
-	}
-	else {
-		value = "1"; /* Some stupid placeholder */
-	}
-
-	if (url != NULL && mempool != NULL && tag != NULL) {
-		rspamd_url_add_tag (url->url, tag, value, mempool);
-	}
-	else {
-		return luaL_error (L, "invalid arguments");
-	}
-
-	return 0;
-}
-
 /***
  * @method url:get_phished()
  * Get another URL that pretends to be this URL (e.g. used in phishing)
diff --git a/src/plugins/surbl.c b/src/plugins/surbl.c
index 0d03f9516..4113130db 100644
--- a/src/plugins/surbl.c
+++ b/src/plugins/surbl.c
@@ -1584,9 +1584,6 @@ process_dns_results (struct rspamd_task *task,
 					bit->bit);
 			rspamd_task_insert_result (task, bit->symbol, 1, resolved_name);
 
-			if (surbl_module_ctx->use_tags) {
-				rspamd_url_add_tag (uri, "surbl", bit->symbol, task->task_pool);
-			}
 			got_result = TRUE;
 		}
 	}
@@ -1605,10 +1602,6 @@ process_dns_results (struct rspamd_task *task,
 						resolved_name, suffix->suffix,
 						bit->bit);
 				rspamd_task_insert_result (task, bit->symbol, 1, resolved_name);
-
-				if (surbl_module_ctx->use_tags) {
-					rspamd_url_add_tag (uri, "surbl", bit->symbol, task->task_pool);
-				}
 			}
 		}
 	}
@@ -1618,10 +1611,6 @@ process_dns_results (struct rspamd_task *task,
 			msg_info_surbl ("domain [%s] is in surbl %s",
 					resolved_name, suffix->suffix);
 			rspamd_task_insert_result (task, suffix->symbol, 1, resolved_name);
-
-			if (surbl_module_ctx->use_tags) {
-				rspamd_url_add_tag (uri, "surbl", suffix->symbol, task->task_pool);
-			}
 		}
 		else {
 			ina.s_addr = addr;
@@ -1795,11 +1784,6 @@ surbl_redirector_finish (struct rspamd_http_connection *conn,
 				else {
 					existing->count ++;
 				}
-
-				if (surbl_module_ctx->use_tags) {
-					rspamd_url_add_tag (param->url, "redirector", urlstr,
-							task->task_pool);
-				}
 			}
 			else {
 				msg_info_surbl ("cannot parse redirector reply: %s", urlstr);
@@ -1879,38 +1863,6 @@ register_redirector_call (struct rspamd_url *url, struct rspamd_task *task,
 	}
 }
 
-static gboolean
-surbl_test_tags (struct rspamd_task *task, struct redirector_param *param,
-		struct rspamd_url *url)
-{
-	struct rspamd_url_tag *tag = NULL, *cur;
-	gchar *ftld = NULL;
-	rspamd_ftok_t tld;
-	gboolean processed = FALSE;
-
-	if (url->tags) {
-		tag = g_hash_table_lookup (url->tags, "surbl");
-	}
-
-	if (tag) {
-		tld.begin = url->tld;
-		tld.len = url->tldlen;
-
-		ftld = rspamd_mempool_ftokdup (task->task_pool, &tld);
-		/* We know results for this URL */
-
-		DL_FOREACH (tag, cur) {
-			msg_info_surbl ("domain [%s] is in surbl %s (tags)",
-					ftld, cur->data);
-			rspamd_task_insert_result (task, cur->data, 1, ftld);
-		}
-
-		processed = TRUE;
-	}
-
-	return processed;
-}
-
 static void
 surbl_tree_redirector_callback (gpointer key, gpointer value, void *data)
 {
@@ -2026,15 +1978,6 @@ surbl_tree_url_callback (gpointer key, gpointer value, void *data)
 	msg_debug_surbl ("check url %*s in %s", url->urllen, url->string,
 			param->suffix->suffix);
 
-	if (surbl_module_ctx->use_tags && surbl_test_tags (param->task, param, url)) {
-		return;
-	}
-
-	if (url->tags && g_hash_table_lookup (url->tags, "redirector")) {
-		/* URL is redirected, skip from checks */
-		return;
-	}
-
 	make_surbl_requests (url, param->task, param->item, param->suffix, FALSE,
 			param->tree, surbl_module_ctx);
 }
@@ -2324,11 +2267,6 @@ surbl_continue_process_handler (lua_State *L)
 					redirected_url->phished_url = param->url;
 					redirected_url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
 				}
-
-				if (surbl_module_ctx->use_tags) {
-					rspamd_url_add_tag (param->url, "redirector", urlstr,
-							task->task_pool);
-				}
 			}
 			else {
 				msg_info_surbl ("could not resolve '%*s' on redirector",
diff --git a/test/functional/lua/url_tags.lua b/test/functional/lua/url_tags.lua
deleted file mode 100644
index 9dc38ac72..000000000
--- a/test/functional/lua/url_tags.lua
+++ /dev/null
@@ -1,56 +0,0 @@
-
-local rspamd_logger = require 'rspamd_logger'
-
-rspamd_config:register_symbol({
-  name = 'ADDED_TAGS',
-  score = 1.0,
-  callback = function(task)
-    if not task:get_request_header('addtags') then
-      return true, 'nope! not requested'
-    end
-    local urls = task:get_urls()
-    if not (urls and urls[1]) then
-      return true, 'nope! found no urls'
-    end
-    local mpool = task:get_mempool()
-    for _, u in ipairs(urls) do
-      u:add_tag('test1', 'meta1', mpool)
-      u:add_tag('test1', 'meta2', mpool)
-      u:add_tag('test2', 'http://www.example.com', mpool)
-    end
-    return true, 'no worry'
-  end
-})
-
-rspamd_config:register_symbol({
-  name = 'FOUND_TAGS',
-  score = 1.0,
-  callback = function(task)
-    local urls = task:get_urls()
-    if not (urls and urls[1]) then
-      return true, 'nope! found no urls'
-    end
-    for _, u in ipairs(urls) do
-      local tags = u:get_tags()
-      rspamd_logger.debugx(task, 'tags: %1', tags)
-      if not tags['test1'] then
-        return true, 'no key - test1'
-      end
-      local found1, found2 = false, false
-      for _, e in ipairs(tags['test1']) do
-        if e == 'meta1' then found1 = true end
-        if e == 'meta2' then found2 = true end
-      end
-      if not (found1 and found2) then
-        return true, 'missing metatags in test1'
-      end
-      if not tags['test2'] then
-        return true, 'no key - test2'
-      end
-      if not tags['test2'][1] == 'http://www.example.com' then
-        return true, 'wrong value in test2 metatag: ' .. tags['test2'][1]
-      end
-    end
-    return true, 'no worry'
-  end
-})


More information about the Commits mailing list