commit c399a60: [Rework] Urls: rework urls hash structure
Vsevolod Stakhov
vsevolod at highsecure.ru
Mon Mar 9 10:49:12 UTC 2020
Author: Vsevolod Stakhov
Date: 2020-03-06 15:58:22 +0000
URL: https://github.com/rspamd/rspamd/commit/c399a6013b8522fc28ed11839fae6cbe7062278a
[Rework] Urls: rework urls hash structure
---
src/libserver/url.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++------
src/libserver/url.h | 22 ++++++++++++++++++++++
2 files changed, 69 insertions(+), 6 deletions(-)
diff --git a/src/libserver/url.c b/src/libserver/url.c
index 043f523f0..3449310b2 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -214,6 +214,12 @@ struct url_matcher static_matchers[] = {
URL_FLAG_NOHTML}
};
+/* Hash table implementation */
+__KHASH_IMPL (rspamd_url_hash, kh_inline,struct rspamd_url *, char, false,
+ rspamd_url_hash, rspamd_urls_cmp);
+__KHASH_IMPL (rspamd_url_host_hash, kh_inline,struct rspamd_url *, char, false,
+ rspamd_url_host_hash, rspamd_urls_host_cmp);
+
struct url_callback_data {
const gchar *begin;
gchar *url_str;
@@ -3374,7 +3380,7 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
return TRUE;
}
-guint
+inline guint
rspamd_url_hash (gconstpointer u)
{
const struct rspamd_url *url = u;
@@ -3387,7 +3393,7 @@ rspamd_url_hash (gconstpointer u)
return 0;
}
-guint
+inline guint
rspamd_url_host_hash (gconstpointer u)
{
const struct rspamd_url *url = u;
@@ -3401,7 +3407,7 @@ rspamd_url_host_hash (gconstpointer u)
return 0;
}
-guint
+inline guint
rspamd_email_hash (gconstpointer u)
{
const struct rspamd_url *url = u;
@@ -3421,7 +3427,7 @@ rspamd_email_hash (gconstpointer u)
}
/* Compare two emails for building emails tree */
-gboolean
+inline gboolean
rspamd_emails_cmp (gconstpointer a, gconstpointer b)
{
const struct rspamd_url *u1 = a, *u2 = b;
@@ -3450,7 +3456,7 @@ rspamd_emails_cmp (gconstpointer a, gconstpointer b)
return FALSE;
}
-gboolean
+inline gboolean
rspamd_urls_cmp (gconstpointer a, gconstpointer b)
{
const struct rspamd_url *u1 = a, *u2 = b;
@@ -3466,7 +3472,7 @@ rspamd_urls_cmp (gconstpointer a, gconstpointer b)
return r == 0;
}
-gboolean
+inline gboolean
rspamd_urls_host_cmp (gconstpointer a, gconstpointer b)
{
const struct rspamd_url *u1 = a, *u2 = b;
@@ -3806,3 +3812,38 @@ rspamd_url_protocol_from_string (const gchar *str)
return ret;
}
+
+
+bool
+rspamd_url_set_add_or_increase (khash_t (rspamd_url_hash) *set,
+ struct rspamd_url *u)
+{
+ khiter_t k;
+ gint r;
+
+ k = kh_put (rspamd_url_hash, set, u, &r);
+
+ if (r == 0) {
+ struct rspamd_url *ex = kh_key (set, k);
+
+ ex->count ++;
+
+ return false;
+ }
+
+ return true;
+}
+
+bool
+rspamd_url_set_has (khash_t (rspamd_url_hash) *set, struct rspamd_url *u)
+{
+ khiter_t k;
+
+ k = kh_get (rspamd_url_hash, set, u);
+
+ if (k == kh_end (set)) {
+ return false;
+ }
+
+ return true;
+}
\ No newline at end of file
diff --git a/src/libserver/url.h b/src/libserver/url.h
index 00f09ac30..358c61e16 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -4,6 +4,7 @@
#include "config.h"
#include "mem_pool.h"
+#include "khash.h"
#include "fstring.h"
#ifdef __cplusplus
@@ -281,6 +282,27 @@ const gchar *rspamd_url_protocol_name (enum rspamd_url_protocol proto);
*/
enum rspamd_url_protocol rspamd_url_protocol_from_string (const gchar *str);
+/* Defines sets of urls indexed by url as is */
+KHASH_DECLARE (rspamd_url_hash, struct rspamd_url *, char);
+KHASH_DECLARE (rspamd_url_host_hash, struct rspamd_url *, char);
+
+/* Convenience functions for url sets */
+/**
+ * Add an url to set or increase the existing url count
+ * @param set
+ * @param u
+ * @return true if a new url has been added
+ */
+bool rspamd_url_set_add_or_increase (khash_t (rspamd_url_hash) *set,
+ struct rspamd_url *u);
+/**
+ * Checks if a url is in set
+ * @param set
+ * @param u
+ * @return
+ */
+bool rspamd_url_set_has (khash_t (rspamd_url_hash) *set, struct rspamd_url *u);
+
#ifdef __cplusplus
}
#endif
More information about the Commits
mailing list