commit c902314: [Rework] Urls: process query urls in HTML urls correctly
Vsevolod Stakhov
vsevolod at highsecure.ru
Sun Mar 22 17:28:06 UTC 2020
Author: Vsevolod Stakhov
Date: 2020-03-22 17:25:32 +0000
URL: https://github.com/rspamd/rspamd/commit/c90231490b9d64c8c6dad94ef562c79ff3326f5b (HEAD -> master)
[Rework] Urls: process query urls in HTML urls correctly
---
src/libserver/html.c | 79 ++++++++++++++++++++++++++--------------------------
src/libserver/url.c | 4 ---
2 files changed, 39 insertions(+), 44 deletions(-)
diff --git a/src/libserver/html.c b/src/libserver/html.c
index 71efe632a..edcb0f2b2 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -1615,57 +1615,56 @@ rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag,
return NULL;
}
+struct rspamd_html_url_query_cbd {
+ rspamd_mempool_t *pool;
+ khash_t (rspamd_url_hash) *url_set;
+ struct rspamd_url *url;
+};
+
+static gboolean
+rspamd_html_url_query_callback (struct rspamd_url *url, gsize start_offset,
+ gsize end_offset, gpointer ud)
+{
+ struct rspamd_html_url_query_cbd *cbd =
+ (struct rspamd_html_url_query_cbd *)ud;
+ rspamd_mempool_t *pool;
+
+ pool = cbd->pool;
+
+ if (url->protocol == PROTOCOL_MAILTO) {
+ if (url->userlen == 0) {
+ return FALSE;
+ }
+ }
+
+ msg_debug_html ("found url %s in query of url"
+ " %*s", url->string,
+ cbd->url->querylen, rspamd_url_query_unsafe (cbd->url));
+
+ url->flags |= RSPAMD_URL_FLAG_QUERY;
+ rspamd_url_set_add_or_increase (cbd->url_set, url);
+
+ return TRUE;
+}
+
static void
rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
khash_t (rspamd_url_hash) *url_set)
{
- struct rspamd_url *query_url;
- gchar *url_str;
- gint rc;
- gboolean prefix_added;
-
if (url->flags & RSPAMD_URL_FLAG_UNNORMALISED) {
url->flags |= RSPAMD_URL_FLAG_OBSCURED;
}
if (url->querylen > 0) {
+ struct rspamd_html_url_query_cbd qcbd;
- if (rspamd_url_find (pool, rspamd_url_query_unsafe (url), url->querylen, &url_str,
- RSPAMD_URL_FIND_ALL,
- NULL, &prefix_added)) {
- query_url = rspamd_mempool_alloc0 (pool,
- sizeof (struct rspamd_url));
-
- rc = rspamd_url_parse (query_url,
- url_str,
- strlen (url_str),
- pool,
- RSPAMD_URL_PARSE_TEXT);
-
- if (rc == URI_ERRNO_OK &&
- query_url->hostlen > 0) {
- msg_debug_html ("found url %s in query of url"
- " %*s", url_str, url->querylen, rspamd_url_query_unsafe (url));
-
- if (prefix_added) {
- query_url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
- }
-
- if (query_url->flags
- & (RSPAMD_URL_FLAG_UNNORMALISED|RSPAMD_URL_FLAG_OBSCURED|
- RSPAMD_URL_FLAG_NUMERIC)) {
- /* Set obscured flag if query url is bad */
- url->flags |= RSPAMD_URL_FLAG_OBSCURED;
- }
+ qcbd.pool = pool;
+ qcbd.url_set = url_set;
- /* And vice-versa */
- if (url->flags & RSPAMD_URL_FLAG_OBSCURED) {
- query_url->flags |= RSPAMD_URL_FLAG_OBSCURED;
- }
-
- rspamd_url_set_add_or_increase (url_set, query_url);
- }
- }
+ rspamd_url_find_multiple(pool,
+ rspamd_url_query_unsafe (url), url->querylen,
+ RSPAMD_URL_FIND_ALL, NULL,
+ rspamd_html_url_query_callback, &qcbd);
}
}
diff --git a/src/libserver/url.c b/src/libserver/url.c
index 30872c38d..0669d932f 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -3253,10 +3253,6 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
(struct rspamd_url_mimepart_cbdata *)ud;
struct rspamd_process_exception *ex;
struct rspamd_task *task;
- gchar *url_str = NULL;
- struct rspamd_url *query_url;
- gint rc;
- gboolean prefix_added;
task = cbd->task;
ex = rspamd_mempool_alloc0 (task->task_pool, sizeof (struct rspamd_process_exception));
More information about the Commits
mailing list