commit c399a60: [Rework] Urls: rework urls hash structure

Vsevolod Stakhov vsevolod at highsecure.ru
Mon Mar 9 10:49:12 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-03-06 15:58:22 +0000
URL: https://github.com/rspamd/rspamd/commit/c399a6013b8522fc28ed11839fae6cbe7062278a

[Rework] Urls: rework urls hash structure

---
 src/libserver/url.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++------
 src/libserver/url.h | 22 ++++++++++++++++++++++
 2 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/src/libserver/url.c b/src/libserver/url.c
index 043f523f0..3449310b2 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -214,6 +214,12 @@ struct url_matcher static_matchers[] = {
 				URL_FLAG_NOHTML}
 };
 
+/* Hash table implementation */
+__KHASH_IMPL (rspamd_url_hash, kh_inline,struct rspamd_url *, char, false,
+		rspamd_url_hash, rspamd_urls_cmp);
+__KHASH_IMPL (rspamd_url_host_hash, kh_inline,struct rspamd_url *, char, false,
+		rspamd_url_host_hash, rspamd_urls_host_cmp);
+
 struct url_callback_data {
 	const gchar *begin;
 	gchar *url_str;
@@ -3374,7 +3380,7 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
 	return TRUE;
 }
 
-guint
+inline guint
 rspamd_url_hash (gconstpointer u)
 {
 	const struct rspamd_url *url = u;
@@ -3387,7 +3393,7 @@ rspamd_url_hash (gconstpointer u)
 	return 0;
 }
 
-guint
+inline guint
 rspamd_url_host_hash (gconstpointer u)
 {
 	const struct rspamd_url *url = u;
@@ -3401,7 +3407,7 @@ rspamd_url_host_hash (gconstpointer u)
 	return 0;
 }
 
-guint
+inline guint
 rspamd_email_hash (gconstpointer u)
 {
 	const struct rspamd_url *url = u;
@@ -3421,7 +3427,7 @@ rspamd_email_hash (gconstpointer u)
 }
 
 /* Compare two emails for building emails tree */
-gboolean
+inline gboolean
 rspamd_emails_cmp (gconstpointer a, gconstpointer b)
 {
 	const struct rspamd_url *u1 = a, *u2 = b;
@@ -3450,7 +3456,7 @@ rspamd_emails_cmp (gconstpointer a, gconstpointer b)
 	return FALSE;
 }
 
-gboolean
+inline gboolean
 rspamd_urls_cmp (gconstpointer a, gconstpointer b)
 {
 	const struct rspamd_url *u1 = a, *u2 = b;
@@ -3466,7 +3472,7 @@ rspamd_urls_cmp (gconstpointer a, gconstpointer b)
 	return r == 0;
 }
 
-gboolean
+inline gboolean
 rspamd_urls_host_cmp (gconstpointer a, gconstpointer b)
 {
 	const struct rspamd_url *u1 = a, *u2 = b;
@@ -3806,3 +3812,38 @@ rspamd_url_protocol_from_string (const gchar *str)
 
 	return ret;
 }
+
+
+bool
+rspamd_url_set_add_or_increase (khash_t (rspamd_url_hash) *set,
+									 struct rspamd_url *u)
+{
+	khiter_t k;
+	gint r;
+
+	k = kh_put (rspamd_url_hash, set, u, &r);
+
+	if (r == 0) {
+		struct rspamd_url *ex = kh_key (set, k);
+
+		ex->count ++;
+
+		return false;
+	}
+
+	return true;
+}
+
+bool
+rspamd_url_set_has (khash_t (rspamd_url_hash) *set, struct rspamd_url *u)
+{
+	khiter_t k;
+
+	k = kh_get (rspamd_url_hash, set, u);
+
+	if (k == kh_end (set)) {
+		return false;
+	}
+
+	return true;
+}
\ No newline at end of file
diff --git a/src/libserver/url.h b/src/libserver/url.h
index 00f09ac30..358c61e16 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -4,6 +4,7 @@
 
 #include "config.h"
 #include "mem_pool.h"
+#include "khash.h"
 #include "fstring.h"
 
 #ifdef  __cplusplus
@@ -281,6 +282,27 @@ const gchar *rspamd_url_protocol_name (enum rspamd_url_protocol proto);
  */
 enum rspamd_url_protocol rspamd_url_protocol_from_string (const gchar *str);
 
+/* Defines sets of urls indexed by url as is */
+KHASH_DECLARE (rspamd_url_hash, struct rspamd_url *, char);
+KHASH_DECLARE (rspamd_url_host_hash, struct rspamd_url *, char);
+
+/* Convenience functions for url sets */
+/**
+ * Add an url to set or increase the existing url count
+ * @param set
+ * @param u
+ * @return true if a new url has been added
+ */
+bool rspamd_url_set_add_or_increase (khash_t (rspamd_url_hash) *set,
+		struct rspamd_url *u);
+/**
+ * Checks if a url is in set
+ * @param set
+ * @param u
+ * @return
+ */
+bool rspamd_url_set_has (khash_t (rspamd_url_hash) *set, struct rspamd_url *u);
+
 #ifdef  __cplusplus
 }
 #endif


More information about the Commits mailing list