commit a59ff17: [Regression] Fix urls output in the protocol
Vsevolod Stakhov
vsevolod at highsecure.ru
Mon Feb 4 16:42:03 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-02-04 16:35:21 +0000
URL: https://github.com/rspamd/rspamd/commit/a59ff17c8be4d6ebe6db7b6fc2cebf724c4fd865 (HEAD -> master)
[Regression] Fix urls output in the protocol
---
src/libserver/protocol.c | 67 +++++++++++++++++++++++++++++++++---------------
src/libserver/url.c | 48 +++++++++++++++++++++++++++++-----
src/libserver/url.h | 3 +++
3 files changed, 91 insertions(+), 27 deletions(-)
diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c
index ba468ee5f..5bcfbc37a 100644
--- a/src/libserver/protocol.c
+++ b/src/libserver/protocol.c
@@ -435,14 +435,17 @@ rspamd_protocol_handle_headers (struct rspamd_task *task,
}
}
IF_HEADER (URLS_HEADER) {
+ msg_debug_protocol ("read urls header, value: %V", hv);
+
srch.begin = "extended";
srch.len = 8;
- msg_debug_protocol ("read urls header, value: %V", hv);
if (rspamd_ftok_casecmp (hv_tok, &srch) == 0) {
task->flags |= RSPAMD_TASK_FLAG_EXT_URLS;
msg_debug_protocol ("extended urls information");
}
+
+ /* TODO: add more formats there */
}
IF_HEADER (USER_AGENT_HEADER) {
msg_debug_protocol ("read user-agent header, value: %V", hv);
@@ -665,6 +668,7 @@ rspamd_protocol_handle_request (struct rspamd_task *task,
/* Structure for writing tree data */
struct tree_cb_data {
ucl_object_t *top;
+ GHashTable *seen;
struct rspamd_task *task;
};
@@ -715,17 +719,37 @@ urls_protocol_cb (gpointer key, gpointer value, gpointer ud)
struct rspamd_url *url = value;
ucl_object_t *obj;
struct rspamd_task *task = cb->task;
- const gchar *user_field = "unknown", *encoded;
+ const gchar *user_field = "unknown", *encoded = NULL;
gboolean has_user = FALSE;
guint len = 0;
- gsize enclen;
-
- encoded = rspamd_url_encode (url, &enclen, task->task_pool);
+ gsize enclen = 0;
if (!(task->flags & RSPAMD_TASK_FLAG_EXT_URLS)) {
- obj = ucl_object_fromlstring (encoded, enclen);
+ if (url->hostlen > 0) {
+ if (g_hash_table_lookup (cb->seen, url)) {
+ return;
+ }
+
+ const gchar *end = NULL;
+
+ if (g_utf8_validate (url->host, url->hostlen, &end)) {
+ obj = ucl_object_fromlstring (url->host, url->hostlen);
+ }
+ else if (end - url->host > 0) {
+ obj = ucl_object_fromlstring (url->host, end - url->host);
+ }
+ else {
+ return;
+ }
+ }
+ else {
+ return;
+ }
+
+ g_hash_table_insert (cb->seen, url, url);
}
else {
+ encoded = rspamd_url_encode (url, &enclen, task->task_pool);
obj = rspamd_protocol_extended_url (task, url, encoded, enclen);
}
@@ -742,6 +766,10 @@ urls_protocol_cb (gpointer key, gpointer value, gpointer ud)
len = task->from_envelope->addr_len;
}
+ if (!encoded) {
+ encoded = rspamd_url_encode (url, &enclen, task->task_pool);
+ }
+
msg_notice_task_encrypted ("<%s> %s: %*s; ip: %s; URL: %*s",
task->message_id,
has_user ? "user" : "from",
@@ -760,9 +788,12 @@ rspamd_urls_tree_ucl (GHashTable *input, struct rspamd_task *task)
obj = ucl_object_typed_new (UCL_ARRAY);
cb.top = obj;
cb.task = task;
+ cb.seen = g_hash_table_new (rspamd_url_host_hash, rspamd_urls_host_cmp);
g_hash_table_foreach (input, urls_protocol_cb, &cb);
+ g_hash_table_unref (cb.seen);
+
return obj;
}
@@ -1168,18 +1199,16 @@ rspamd_protocol_write_ucl (struct rspamd_task *task,
}
if (flags & RSPAMD_PROTOCOL_URLS) {
- if (task->flags & RSPAMD_TASK_FLAG_EXT_URLS) {
- if (g_hash_table_size (task->urls) > 0) {
- ucl_object_insert_key (top,
- rspamd_urls_tree_ucl (task->urls, task),
- "urls", 0, false);
- }
+ if (g_hash_table_size (task->urls) > 0) {
+ ucl_object_insert_key (top,
+ rspamd_urls_tree_ucl (task->urls, task),
+ "urls", 0, false);
+ }
- if (g_hash_table_size (task->emails) > 0) {
- ucl_object_insert_key (top,
- rspamd_emails_tree_ucl (task->emails, task),
- "emails", 0, false);
- }
+ if (g_hash_table_size (task->emails) > 0) {
+ ucl_object_insert_key (top,
+ rspamd_emails_tree_ucl (task->emails, task),
+ "emails", 0, false);
}
}
@@ -1279,9 +1308,7 @@ rspamd_protocol_http_reply (struct rspamd_http_message *msg,
rspamd_http_message_add_header (msg, hn->begin, hv->begin);
}
- if (task->cfg->log_urls || (task->flags & RSPAMD_TASK_FLAG_EXT_URLS)) {
- flags |= RSPAMD_PROTOCOL_URLS;
- }
+ flags |= RSPAMD_PROTOCOL_URLS;
top = rspamd_protocol_write_ucl (task, flags);
diff --git a/src/libserver/url.c b/src/libserver/url.c
index 4599f3ce1..4dcd11c9e 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -2971,15 +2971,26 @@ guint
rspamd_url_hash (gconstpointer u)
{
const struct rspamd_url *url = u;
- rspamd_cryptobox_fast_hash_state_t st;
-
- rspamd_cryptobox_fast_hash_init (&st, rspamd_hash_seed ());
if (url->urllen > 0) {
- rspamd_cryptobox_fast_hash_update (&st, url->string, url->urllen);
+ return rspamd_cryptobox_fast_hash (url->string, url->urllen,
+ rspamd_hash_seed ());
}
- return rspamd_cryptobox_fast_hash_final (&st);
+ return 0;
+}
+
+guint
+rspamd_url_host_hash (gconstpointer u)
+{
+ const struct rspamd_url *url = u;
+
+ if (url->hostlen > 0) {
+ return rspamd_cryptobox_fast_hash (url->host, url->hostlen,
+ rspamd_hash_seed ());
+ }
+
+ return 0;
}
guint
@@ -3045,6 +3056,22 @@ rspamd_urls_cmp (gconstpointer a, gconstpointer b)
return r == 0;
}
+gboolean
+rspamd_urls_host_cmp (gconstpointer a, gconstpointer b)
+{
+ const struct rspamd_url *u1 = a, *u2 = b;
+ int r = 0;
+
+ if (u1->hostlen != u2->hostlen) {
+ return FALSE;
+ }
+ else {
+ r = memcmp (u1->host, u2->host, u1->hostlen);
+ }
+
+ return r == 0;
+}
+
gsize
rspamd_url_decode (gchar *dst, const gchar *src, gsize size)
{
@@ -3255,8 +3282,15 @@ rspamd_url_encode (struct rspamd_url *url, gsize *pdlen,
dest = rspamd_mempool_alloc (pool, dlen + 1);
d = dest;
dend = d + dlen;
- d += rspamd_snprintf ((gchar *)d, dend - d,
- "%*s://", url->protocollen, rspamd_url_protocols[url->protocol].name);
+
+ if (url->protocollen > 0 &&
+ (url->protocol >= 0 && url->protocol < G_N_ELEMENTS (rspamd_url_protocols))) {
+ d += rspamd_snprintf ((gchar *) d, dend - d,
+ "%*s://", url->protocollen, rspamd_url_protocols[url->protocol].name);
+ }
+ else {
+ d += rspamd_snprintf ((gchar *) d, dend - d, "http://");
+ }
if (url->userlen > 0) {
ENCODE_URL_COMPONENT ((guchar *)url->user, url->userlen,
diff --git a/src/libserver/url.h b/src/libserver/url.h
index fa5c69f00..523fb2c1f 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -203,12 +203,15 @@ void rspamd_url_add_tag (struct rspamd_url *url, const gchar *tag,
guint rspamd_url_hash (gconstpointer u);
guint rspamd_email_hash (gconstpointer u);
+guint rspamd_url_host_hash (gconstpointer u);
+
/* Compare two emails for building emails hash */
gboolean rspamd_emails_cmp (gconstpointer a, gconstpointer b);
/* Compare two urls for building emails hash */
gboolean rspamd_urls_cmp (gconstpointer a, gconstpointer b);
+gboolean rspamd_urls_host_cmp (gconstpointer a, gconstpointer b);
/**
* Decode URL encoded string in-place and return new length of a string, src and dst are NULL terminated
More information about the Commits
mailing list