commit 50a043a: [Rework] Urls: more rework of the urls sets

Vsevolod Stakhov vsevolod at highsecure.ru
Mon Mar 9 10:49:13 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-03-07 12:15:51 +0000
URL: https://github.com/rspamd/rspamd/commit/50a043a7cbce8142d81b7887d263a9573ff568eb

[Rework] Urls: more rework of the urls sets

---
 src/libmime/message.c    |   8 +-
 src/libmime/message.h    |   4 +-
 src/libserver/html.h     |   3 +-
 src/libserver/protocol.c |  46 +++++++-----
 src/libserver/re_cache.c |  24 +++---
 src/libserver/url.c      | 191 +++++++++++++++++++++--------------------------
 src/libserver/url.h      |  24 +++---
 7 files changed, 137 insertions(+), 163 deletions(-)

diff --git a/src/libmime/message.c b/src/libmime/message.c
index a43e109b5..40b7fe8bc 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -1048,8 +1048,7 @@ rspamd_message_dtor (struct rspamd_message *msg)
 	g_ptr_array_unref (msg->text_parts);
 	g_ptr_array_unref (msg->parts);
 
-	g_hash_table_unref (msg->urls);
-	g_hash_table_unref (msg->emails);
+	kh_destroy (rspamd_url_hash, msg->urls);
 }
 
 struct rspamd_message*
@@ -1060,10 +1059,7 @@ rspamd_message_new (struct rspamd_task *task)
 	msg = rspamd_mempool_alloc0 (task->task_pool, sizeof (*msg));
 
 	msg->raw_headers = rspamd_message_headers_new ();
-
-	msg->emails = g_hash_table_new (rspamd_email_hash, rspamd_emails_cmp);
-	msg->urls = g_hash_table_new (rspamd_url_hash, rspamd_urls_cmp);
-
+	msg->urls = kh_init (rspamd_url_hash);
 	msg->parts = g_ptr_array_sized_new (4);
 	msg->text_parts = g_ptr_array_sized_new (2);
 	msg->task = task;
diff --git a/src/libmime/message.h b/src/libmime/message.h
index 91d6e13d4..96ed9d5d4 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -13,6 +13,7 @@
 #include "libcryptobox/cryptobox.h"
 #include "libmime/mime_headers.h"
 #include "libmime/content_type.h"
+#include "libserver/url.h"
 #include "libutil/ref.h"
 #include "libutil/str_util.h"
 
@@ -175,8 +176,7 @@ struct rspamd_message {
 	GPtrArray *text_parts;			/**< list of text parts								*/
 	struct rspamd_message_raw_headers_content raw_headers_content;
 	struct rspamd_received_header *received;	/**< list of received headers						*/
-	GHashTable *urls;							/**< list of parsed urls							*/
-	GHashTable *emails;							/**< list of parsed emails							*/
+	khash_t (rspamd_url_hash) *urls;
 	struct rspamd_mime_headers_table *raw_headers;	/**< list of raw headers						*/
 	struct rspamd_mime_header *headers_order;	/**< order of raw headers							*/
 	struct rspamd_task *task;
diff --git a/src/libserver/html.h b/src/libserver/html.h
index b369bd890..ee5c242cb 100644
--- a/src/libserver/html.h
+++ b/src/libserver/html.h
@@ -141,7 +141,8 @@ GByteArray *rspamd_html_process_part (rspamd_mempool_t *pool,
 
 GByteArray *rspamd_html_process_part_full (rspamd_mempool_t *pool,
 										   struct html_content *hc,
-										   GByteArray *in, GList **exceptions, GHashTable *urls, GHashTable *emails);
+										   GByteArray *in, GList **exceptions,
+										   GHashTable *urls, GHashTable *emails);
 
 /*
  * Returns true if a specified tag has been seen in a part
diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c
index 739d3b950..35d50b909 100644
--- a/src/libserver/protocol.c
+++ b/src/libserver/protocol.c
@@ -861,7 +861,7 @@ rspamd_protocol_handle_request (struct rspamd_task *task,
 /* Structure for writing tree data */
 struct tree_cb_data {
 	ucl_object_t *top;
-	GHashTable *seen;
+	khash_t (rspamd_url_host_hash) *seen;
 	struct rspamd_task *task;
 };
 
@@ -908,10 +908,8 @@ rspamd_protocol_extended_url (struct rspamd_task *task,
  * Callback for writing urls
  */
 static void
-urls_protocol_cb (gpointer key, gpointer value, gpointer ud)
+urls_protocol_cb (struct rspamd_url *url, struct tree_cb_data *cb)
 {
-	struct tree_cb_data *cb = ud;
-	struct rspamd_url *url = value;
 	ucl_object_t *obj;
 	struct rspamd_task *task = cb->task;
 	const gchar *user_field = "unknown", *encoded = NULL;
@@ -921,7 +919,7 @@ urls_protocol_cb (gpointer key, gpointer value, gpointer ud)
 
 	if (!(task->protocol_flags & RSPAMD_TASK_PROTOCOL_FLAG_EXT_URLS)) {
 		if (url->hostlen > 0) {
-			if (g_hash_table_lookup (cb->seen, url)) {
+			if (rspamd_url_host_set_has (cb->seen, url)) {
 				return;
 			}
 
@@ -941,7 +939,7 @@ urls_protocol_cb (gpointer key, gpointer value, gpointer ud)
 			return;
 		}
 
-		g_hash_table_insert (cb->seen, url, url);
+		rspamd_url_host_set_add (cb->seen, url);
 	}
 	else {
 		encoded = rspamd_url_encode (url, &enclen, task->task_pool);
@@ -975,28 +973,32 @@ urls_protocol_cb (gpointer key, gpointer value, gpointer ud)
 }
 
 static ucl_object_t *
-rspamd_urls_tree_ucl (GHashTable *input, struct rspamd_task *task)
+rspamd_urls_tree_ucl (khash_t (rspamd_url_hash) *set,
+		struct rspamd_task *task)
 {
 	struct tree_cb_data cb;
 	ucl_object_t *obj;
+	struct rspamd_url *u;
 
 	obj = ucl_object_typed_new (UCL_ARRAY);
 	cb.top = obj;
 	cb.task = task;
-	cb.seen = g_hash_table_new (rspamd_url_host_hash, rspamd_urls_host_cmp);
+	cb.seen = kh_init (rspamd_url_host_hash);
 
-	g_hash_table_foreach (input, urls_protocol_cb, &cb);
+	kh_foreach_key (set, u, {
+		if (!(u->protocol & PROTOCOL_MAILTO)) {
+			urls_protocol_cb (u, &cb);
+		}
+	});
 
-	g_hash_table_unref (cb.seen);
+	kh_destroy (rspamd_url_host_hash, cb.seen);
 
 	return obj;
 }
 
 static void
-emails_protocol_cb (gpointer key, gpointer value, gpointer ud)
+emails_protocol_cb (struct rspamd_url *url, struct tree_cb_data *cb)
 {
-	struct tree_cb_data *cb = ud;
-	struct rspamd_url *url = value;
 	ucl_object_t *obj;
 
 	if (url->userlen > 0 && url->hostlen > 0) {
@@ -1007,16 +1009,23 @@ emails_protocol_cb (gpointer key, gpointer value, gpointer ud)
 }
 
 static ucl_object_t *
-rspamd_emails_tree_ucl (GHashTable *input, struct rspamd_task *task)
+rspamd_emails_tree_ucl (khash_t (rspamd_url_hash) *set,
+						struct rspamd_task *task)
 {
 	struct tree_cb_data cb;
 	ucl_object_t *obj;
+	struct rspamd_url *u;
 
 	obj = ucl_object_typed_new (UCL_ARRAY);
 	cb.top = obj;
 	cb.task = task;
 
-	g_hash_table_foreach (input, emails_protocol_cb, &cb);
+	kh_foreach_key (set, u, {
+		if ((u->protocol & PROTOCOL_MAILTO)) {
+			emails_protocol_cb (u, &cb);
+		}
+	});
+
 
 	return obj;
 }
@@ -1446,15 +1455,12 @@ rspamd_protocol_write_ucl (struct rspamd_task *task,
 	}
 
 	if (flags & RSPAMD_PROTOCOL_URLS && task->message) {
-		if (g_hash_table_size (MESSAGE_FIELD (task, urls)) > 0) {
+		if (kh_size (MESSAGE_FIELD (task, urls)) > 0) {
 			ucl_object_insert_key (top,
 					rspamd_urls_tree_ucl (MESSAGE_FIELD (task, urls), task),
 					"urls", 0, false);
-		}
-
-		if (g_hash_table_size (MESSAGE_FIELD (task, emails)) > 0) {
 			ucl_object_insert_key (top,
-					rspamd_emails_tree_ucl (MESSAGE_FIELD (task, emails), task),
+					rspamd_emails_tree_ucl (MESSAGE_FIELD (task, urls), task),
 					"emails", 0, false);
 		}
 	}
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c
index 995af8ddf..257428720 100644
--- a/src/libserver/re_cache.c
+++ b/src/libserver/re_cache.c
@@ -1053,7 +1053,6 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
 		gboolean is_strong)
 {
 	guint ret = 0, i, re_id;
-	GHashTableIter it;
 	struct rspamd_mime_header *rh;
 	const gchar *in;
 	const guchar **scvec;
@@ -1062,7 +1061,6 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
 	struct rspamd_mime_text_part *text_part;
 	struct rspamd_mime_part *mime_part;
 	struct rspamd_url *url;
-	gpointer k, v;
 	guint len, cnt;
 	const gchar *class_name;
 
@@ -1164,17 +1162,18 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
 		}
 		break;
 	case RSPAMD_RE_URL:
-		cnt = g_hash_table_size (MESSAGE_FIELD (task, urls));
+		cnt = kh_size (MESSAGE_FIELD (task, urls));
 
 		if (cnt > 0) {
 			scvec = g_malloc (sizeof (*scvec) * cnt);
 			lenvec = g_malloc (sizeof (*lenvec) * cnt);
-			g_hash_table_iter_init (&it, MESSAGE_FIELD (task, urls));
 			i = 0;
 			raw = FALSE;
 
-			while (g_hash_table_iter_next (&it, &k, &v)) {
-				url = v;
+			kh_foreach_key (MESSAGE_FIELD (task, urls), url, {
+				if ((url->protocol & PROTOCOL_MAILTO)) {
+					continue;
+				}
 				in = url->string;
 				len = url->urllen;
 
@@ -1182,7 +1181,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
 					scvec[i] = (guchar *) in;
 					lenvec[i++] = len;
 				}
-			}
+			});
 
 #if 0
 			g_hash_table_iter_init (&it, MESSAGE_FIELD (task, emails));
@@ -1207,18 +1206,19 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
 		}
 		break;
 	case RSPAMD_RE_EMAIL:
-		cnt = g_hash_table_size (MESSAGE_FIELD (task, emails));
+		cnt = kh_size (MESSAGE_FIELD (task, urls));
 
 		if (cnt > 0) {
 			scvec = g_malloc (sizeof (*scvec) * cnt);
 			lenvec = g_malloc (sizeof (*lenvec) * cnt);
-			g_hash_table_iter_init (&it, MESSAGE_FIELD (task, emails));
 			i = 0;
 			raw = FALSE;
 
-			while (g_hash_table_iter_next (&it, &k, &v)) {
-				url = v;
+			kh_foreach_key (MESSAGE_FIELD (task, urls), url, {
 
+				if (!(url->protocol & PROTOCOL_MAILTO)) {
+					continue;
+				}
 				if (url->userlen == 0 || url->hostlen == 0) {
 					continue;
 				}
@@ -1227,7 +1227,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
 				len = url->userlen + 1 + url->hostlen;
 				scvec[i] = (guchar *) in;
 				lenvec[i++] = len;
-			}
+			});
 
 			ret = rspamd_re_cache_process_regexp_data (rt, re,
 					task, scvec, lenvec, i, raw, &processed_hyperscan);
diff --git a/src/libserver/url.c b/src/libserver/url.c
index 3449310b2..505d1d150 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -214,6 +214,13 @@ struct url_matcher static_matchers[] = {
 				URL_FLAG_NOHTML}
 };
 
+
+static inline khint_t rspamd_url_hash (struct rspamd_url *u);
+
+static inline khint_t rspamd_url_host_hash (struct rspamd_url * u);
+static inline bool rspamd_urls_cmp (struct rspamd_url *a, struct rspamd_url *b);
+static inline bool rspamd_urls_host_cmp (struct rspamd_url *a, struct rspamd_url *b);
+
 /* Hash table implementation */
 __KHASH_IMPL (rspamd_url_hash, kh_inline,struct rspamd_url *, char, false,
 		rspamd_url_hash, rspamd_urls_cmp);
@@ -3116,7 +3123,6 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
 	struct rspamd_task *task;
 	gchar *url_str = NULL;
 	struct rspamd_url *query_url, *existing;
-	GHashTable *target_tbl = NULL;
 	gint rc;
 	gboolean prefix_added;
 
@@ -3141,36 +3147,23 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
 	}
 
 	if (url->protocol == PROTOCOL_MAILTO) {
-		if (url->userlen > 0) {
-			target_tbl = MESSAGE_FIELD (task, emails);
+		if (url->userlen == 0) {
+			return FALSE;
 		}
 	}
-	else {
-		target_tbl = MESSAGE_FIELD (task, urls);
-	}
-
-	if (target_tbl) {
-		/* Also check max urls */
-		if (cbd->task->cfg && cbd->task->cfg->max_urls > 0) {
-			if (g_hash_table_size (target_tbl) > cbd->task->cfg->max_urls) {
-				msg_err_task ("part has too many URLs, we cannot process more: "
-							  "%d urls extracted ",
-						(guint)g_hash_table_size (target_tbl));
-
-				return FALSE;
-			}
-		}
+	/* Also check max urls */
+	if (cbd->task->cfg && cbd->task->cfg->max_urls > 0) {
+		if (kh_size (MESSAGE_FIELD (task, urls)) > cbd->task->cfg->max_urls) {
+			msg_err_task ("part has too many URLs, we cannot process more: "
+						  "%d urls extracted ",
+					(guint)kh_size (MESSAGE_FIELD (task, urls)));
 
-		if ((existing = g_hash_table_lookup (target_tbl, url)) == NULL) {
-			url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
-			g_hash_table_insert (target_tbl, url, url);
-		}
-		else {
-			existing->count++;
+			return FALSE;
 		}
 	}
 
-	target_tbl = NULL;
+	url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
+	rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url);
 
 	cbd->part->exceptions = g_list_prepend (
 			cbd->part->exceptions,
@@ -3178,7 +3171,8 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
 
 	/* We also search the query for additional url inside */
 	if (url->querylen > 0) {
-		if (rspamd_url_find (task->task_pool, rspamd_url_query_unsafe (url), url->querylen,
+		if (rspamd_url_find (task->task_pool,
+				rspamd_url_query_unsafe (url), url->querylen,
 				&url_str, RSPAMD_URL_FIND_ALL, NULL, &prefix_added)) {
 			query_url = rspamd_mempool_alloc0 (task->task_pool,
 					sizeof (struct rspamd_url));
@@ -3198,23 +3192,13 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
 				}
 
 				if (query_url->protocol == PROTOCOL_MAILTO) {
-					if (query_url->userlen > 0) {
-						target_tbl = MESSAGE_FIELD (task, emails);
+					if (query_url->userlen == 0) {
+						return TRUE;
 					}
 				}
-				else {
-					target_tbl = MESSAGE_FIELD (task, urls);
-				}
 
-				if (target_tbl) {
-					if ((existing = g_hash_table_lookup (target_tbl, query_url)) == NULL) {
-						url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
-						g_hash_table_insert (target_tbl, query_url, query_url);
-					}
-					else {
-						existing->count++;
-					}
-				}
+				query_url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
+				rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), query_url);
 			}
 		}
 	}
@@ -3321,27 +3305,13 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
 	url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED|RSPAMD_URL_FLAG_SUBJECT;
 
 	if (url->protocol == PROTOCOL_MAILTO) {
-		if (url->userlen > 0 && url->hostlen > 0) {
-			if ((existing = g_hash_table_lookup (MESSAGE_FIELD (task, emails),
-					url)) == NULL) {
-				g_hash_table_insert (MESSAGE_FIELD (task, emails), url,
-						url);
-			}
-			else {
-				existing->count ++;
-			}
-		}
-	}
-	else {
-		if ((existing = g_hash_table_lookup (MESSAGE_FIELD (task, urls),
-				url)) == NULL) {
-			g_hash_table_insert (MESSAGE_FIELD (task, urls), url, url);
-		}
-		else {
-			existing->count ++;
+		if (url->userlen == 0) {
+			return FALSE;
 		}
 	}
 
+	rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url);
+
 	/* We also search the query for additional url inside */
 	if (url->querylen > 0) {
 		if (rspamd_url_find (task->task_pool, rspamd_url_query_unsafe (url), url->querylen,
@@ -3364,15 +3334,14 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
 					query_url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
 				}
 
-				if ((existing = g_hash_table_lookup (MESSAGE_FIELD (task, urls),
-						query_url)) == NULL) {
-					g_hash_table_insert (MESSAGE_FIELD (task, urls),
-							query_url,
-							query_url);
-				}
-				else {
-					existing->count ++;
+				if (query_url->protocol == PROTOCOL_MAILTO) {
+					if (query_url->userlen == 0) {
+						return TRUE;
+					}
 				}
+
+				rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls),
+						query_url);
 			}
 		}
 	}
@@ -3380,26 +3349,22 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
 	return TRUE;
 }
 
-inline guint
-rspamd_url_hash (gconstpointer u)
+static inline khint_t
+rspamd_url_hash (struct rspamd_url *url)
 {
-	const struct rspamd_url *url = u;
-
 	if (url->urllen > 0) {
-		return (guint)rspamd_cryptobox_fast_hash (url->string, url->urllen,
+		return (khint_t)rspamd_cryptobox_fast_hash (url->string, url->urllen,
 				rspamd_hash_seed ());
 	}
 
 	return 0;
 }
 
-inline guint
-rspamd_url_host_hash (gconstpointer u)
+static inline khint_t
+rspamd_url_host_hash (struct rspamd_url *url)
 {
-	const struct rspamd_url *url = u;
-
 	if (url->hostlen > 0) {
-		return (guint)rspamd_cryptobox_fast_hash (rspamd_url_host_unsafe (url),
+		return (khint_t)rspamd_cryptobox_fast_hash (rspamd_url_host_unsafe (url),
 				url->hostlen,
 				rspamd_hash_seed ());
 	}
@@ -3407,30 +3372,10 @@ rspamd_url_host_hash (gconstpointer u)
 	return 0;
 }
 
-inline guint
-rspamd_email_hash (gconstpointer u)
-{
-	const struct rspamd_url *url = u;
-	rspamd_cryptobox_fast_hash_state_t st;
-
-	rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ());
-
-	if (url->hostlen > 0) {
-		rspamd_cryptobox_fast_hash_update (&st, rspamd_url_host_unsafe (url), url->hostlen);
-	}
-
-	if (url->userlen > 0) {
-		rspamd_cryptobox_fast_hash_update (&st, rspamd_url_user_unsafe(url), url->userlen);
-	}
-
-	return (guint)rspamd_cryptobox_fast_hash_final (&st);
-}
-
 /* Compare two emails for building emails tree */
-inline gboolean
-rspamd_emails_cmp (gconstpointer a, gconstpointer b)
+static inline bool
+rspamd_emails_cmp (struct rspamd_url *u1, struct rspamd_url *u2)
 {
-	const struct rspamd_url *u1 = a, *u2 = b;
 	gint r;
 
 	if (u1->hostlen != u2->hostlen || u1->hostlen == 0) {
@@ -3456,30 +3401,32 @@ rspamd_emails_cmp (gconstpointer a, gconstpointer b)
 	return FALSE;
 }
 
-inline gboolean
-rspamd_urls_cmp (gconstpointer a, gconstpointer b)
+static inline bool
+rspamd_urls_cmp (struct rspamd_url *u1, struct rspamd_url *u2)
 {
-	const struct rspamd_url *u1 = a, *u2 = b;
 	int r = 0;
 
-	if (u1->urllen != u2->urllen) {
-		return FALSE;
+	if (u1->protocol != u2->protocol || u1->urllen != u2->urllen) {
+		return false;
 	}
 	else {
+		if (u1->protocol & PROTOCOL_MAILTO) {
+			return rspamd_emails_cmp (u1, u2);
+		}
+
 		r = memcmp (u1->string, u2->string, u1->urllen);
 	}
 
 	return r == 0;
 }
 
-inline gboolean
-rspamd_urls_host_cmp (gconstpointer a, gconstpointer b)
+static inline bool
+rspamd_urls_host_cmp (struct rspamd_url *u1, struct rspamd_url *u2)
 {
-	const struct rspamd_url *u1 = a, *u2 = b;
 	int r = 0;
 
 	if (u1->hostlen != u2->hostlen) {
-		return FALSE;
+		return false;
 	}
 	else {
 		r = memcmp (rspamd_url_host_unsafe (u1), rspamd_url_host_unsafe (u2),
@@ -3834,6 +3781,22 @@ rspamd_url_set_add_or_increase (khash_t (rspamd_url_hash) *set,
 	return true;
 }
 
+bool
+rspamd_url_host_set_add (khash_t (rspamd_url_host_hash) *set,
+								struct rspamd_url *u)
+{
+	khiter_t k;
+	gint r;
+
+	k = kh_put (rspamd_url_host_hash, set, u, &r);
+
+	if (r == 0) {
+		return false;
+	}
+
+	return true;
+}
+
 bool
 rspamd_url_set_has (khash_t (rspamd_url_hash) *set, struct rspamd_url *u)
 {
@@ -3845,5 +3808,19 @@ rspamd_url_set_has (khash_t (rspamd_url_hash) *set, struct rspamd_url *u)
 		return false;
 	}
 
+	return true;
+}
+
+bool
+rspamd_url_host_set_has (khash_t (rspamd_url_host_hash) *set, struct rspamd_url *u)
+{
+	khiter_t k;
+
+	k = kh_get (rspamd_url_hash, set, u);
+
+	if (k == kh_end (set)) {
+		return false;
+	}
+
 	return true;
 }
\ No newline at end of file
diff --git a/src/libserver/url.h b/src/libserver/url.h
index 358c61e16..aff7ccf5f 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -225,21 +225,6 @@ gboolean rspamd_url_task_subject_callback (struct rspamd_url *url,
 									   gsize start_offset,
 									   gsize end_offset, gpointer ud);
 
-guint rspamd_url_hash (gconstpointer u);
-
-guint rspamd_email_hash (gconstpointer u);
-
-guint rspamd_url_host_hash (gconstpointer u);
-
-
-/* Compare two emails for building emails hash */
-gboolean rspamd_emails_cmp (gconstpointer a, gconstpointer b);
-
-/* Compare two urls for building emails hash */
-gboolean rspamd_urls_cmp (gconstpointer a, gconstpointer b);
-
-gboolean rspamd_urls_host_cmp (gconstpointer a, gconstpointer b);
-
 /**
  * Decode URL encoded string in-place and return new length of a string, src and dst are NULL terminated
  * @param dst
@@ -295,6 +280,14 @@ KHASH_DECLARE (rspamd_url_host_hash, struct rspamd_url *, char);
  */
 bool rspamd_url_set_add_or_increase (khash_t (rspamd_url_hash) *set,
 		struct rspamd_url *u);
+/**
+ * Helper for url host set
+ * @param set
+ * @param u
+ * @return
+ */
+bool rspamd_url_host_set_add (khash_t (rspamd_url_host_hash) *set,
+									 struct rspamd_url *u);
 /**
  * Checks if a url is in set
  * @param set
@@ -302,6 +295,7 @@ bool rspamd_url_set_add_or_increase (khash_t (rspamd_url_hash) *set,
  * @return
  */
 bool rspamd_url_set_has (khash_t (rspamd_url_hash) *set, struct rspamd_url *u);
+bool rspamd_url_host_set_has (khash_t (rspamd_url_host_hash) *set, struct rspamd_url *u);
 
 #ifdef  __cplusplus
 }


More information about the Commits mailing list