commit b7467f9: [Fix] Urls: Fix processing of html urls when it comes to the flags
Vsevolod Stakhov
vsevolod at highsecure.ru
Sat Mar 6 23:56:28 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-03-06 23:49:16 +0000
URL: https://github.com/rspamd/rspamd/commit/b7467f9d294faf54f25d2b2fc32255f613097416 (HEAD -> master)
[Fix] Urls: Fix processing of html urls when it comes to the flags
Issue: #3664
---
src/libserver/html.c | 11 +++++++---
src/libserver/url.c | 58 +++++++++++++++++++++++++++++++++++++---------------
src/libserver/url.h | 5 +++--
src/lua/lua_task.c | 2 +-
4 files changed, 54 insertions(+), 22 deletions(-)
diff --git a/src/libserver/html.c b/src/libserver/html.c
index 974b59129..aa1cdf6cc 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -1764,7 +1764,7 @@ rspamd_html_url_query_callback (struct rspamd_url *url, gsize start_offset,
url->flags |= RSPAMD_URL_FLAG_QUERY;
- if (rspamd_url_set_add_or_increase (cbd->url_set, url) && cbd->part_urls) {
+ if (rspamd_url_set_add_or_increase(cbd->url_set, url, false) && cbd->part_urls) {
g_ptr_array_add (cbd->part_urls, url);
}
@@ -1903,7 +1903,7 @@ rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
if (img->url) {
img->url->flags |= RSPAMD_URL_FLAG_IMAGE;
- if (rspamd_url_set_add_or_increase (url_set, img->url) &&
+ if (rspamd_url_set_add_or_increase(url_set, img->url, false) &&
part_urls) {
g_ptr_array_add (part_urls, img->url);
}
@@ -3245,10 +3245,15 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool,
if (url != NULL) {
if (url_set != NULL) {
- if (rspamd_url_set_add_or_increase (url_set, url)) {
+ struct rspamd_url *maybe_existing =
+ rspamd_url_set_add_or_return (url_set, url);
+ if (maybe_existing == url) {
rspamd_process_html_url (pool, url, url_set,
part_urls);
}
+ else {
+ url = maybe_existing;
+ }
}
href_offset = dest->len;
diff --git a/src/libserver/url.c b/src/libserver/url.c
index a5de7ebdf..8183213b6 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -3377,7 +3377,7 @@ rspamd_url_query_callback (struct rspamd_url *url, gsize start_offset,
url->flags |= RSPAMD_URL_FLAG_QUERY;
- if (rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url)) {
+ if (rspamd_url_set_add_or_increase(MESSAGE_FIELD (task, urls), url, false)) {
if (cbd->part && cbd->part->mime_part->urls) {
g_ptr_array_add (cbd->part->mime_part->urls, url);
}
@@ -3433,8 +3433,8 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
- if (rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url) &&
- cbd->part->mime_part->urls) {
+ if (rspamd_url_set_add_or_increase(MESSAGE_FIELD (task, urls), url, false) &&
+ cbd->part->mime_part->urls) {
g_ptr_array_add (cbd->part->mime_part->urls, url);
}
@@ -3592,7 +3592,7 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
}
}
- rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url);
+ rspamd_url_set_add_or_increase(MESSAGE_FIELD (task, urls), url, false);
/* We also search the query for additional url inside */
if (url->querylen > 0) {
@@ -3622,8 +3622,8 @@ rspamd_url_task_subject_callback (struct rspamd_url *url, gsize start_offset,
}
}
- rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls),
- query_url);
+ rspamd_url_set_add_or_increase(MESSAGE_FIELD (task, urls),
+ query_url, false);
}
}
}
@@ -4044,21 +4044,44 @@ rspamd_url_protocol_from_string (const gchar *str)
bool
-rspamd_url_set_add_or_increase (khash_t (rspamd_url_hash) *set,
- struct rspamd_url *u)
+rspamd_url_set_add_or_increase(khash_t (rspamd_url_hash) *set,
+ struct rspamd_url *u,
+ bool enforce_replace)
{
khiter_t k;
gint r;
- k = kh_put (rspamd_url_hash, set, u, &r);
+ k = kh_get (rspamd_url_hash, set, u);
- if (r == 0) {
+ if (k != kh_end (set)) {
+ /* Existing url */
struct rspamd_url *ex = kh_key (set, k);
-
- ex->count ++;
+#define SUSPICIOUS_URL_FLAGS (RSPAMD_URL_FLAG_PHISHED|RSPAMD_URL_FLAG_OBSCURED|RSPAMD_URL_FLAG_ZW_SPACES)
+ if (enforce_replace) {
+ kh_key (set, k) = u;
+ u->count++;
+ }
+ else {
+ if (u->flags & SUSPICIOUS_URL_FLAGS) {
+ if (!(ex->flags & SUSPICIOUS_URL_FLAGS)) {
+ /* Propagate new url to an old one */
+ kh_key (set, k) = u;
+ u->count++;
+ }
+ else {
+ ex->count++;
+ }
+ }
+ else {
+ ex->count++;
+ }
+ }
return false;
}
+ else {
+ k = kh_put (rspamd_url_hash, set, u, &r);
+ }
return true;
}
@@ -4071,12 +4094,15 @@ rspamd_url_set_add_or_return (khash_t (rspamd_url_hash) *set,
gint r;
if (set) {
- k = kh_put (rspamd_url_hash, set, u, &r);
+ k = kh_get (rspamd_url_hash, set, u);
- if (r == 0) {
- struct rspamd_url *ex = kh_key (set, k);
+ if (k != kh_end (set)) {
+ return kh_key (set, k);
+ }
+ else {
+ k = kh_put (rspamd_url_hash, set, u, &r);
- return ex;
+ return kh_key (set, k);
}
}
diff --git a/src/libserver/url.h b/src/libserver/url.h
index 567cdd137..59485ab9a 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -296,8 +296,9 @@ KHASH_DECLARE (rspamd_url_host_hash, struct rspamd_url *, char);
* @param u
* @return true if a new url has been added
*/
-bool rspamd_url_set_add_or_increase (khash_t (rspamd_url_hash) *set,
- struct rspamd_url *u);
+bool rspamd_url_set_add_or_increase(khash_t (rspamd_url_hash) *set,
+ struct rspamd_url *u,
+ bool enforce_replace);
/**
* Same as rspamd_url_set_add_or_increase but returns the existing url if found
diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c
index d2bd17aba..579f04fb9 100644
--- a/src/lua/lua_task.c
+++ b/src/lua/lua_task.c
@@ -2507,7 +2507,7 @@ lua_task_inject_url (lua_State * L)
}
if (task && task->message && url && url->url) {
- if (rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url->url)) {
+ if (rspamd_url_set_add_or_increase(MESSAGE_FIELD (task, urls), url->url, false)) {
if (mpart && mpart->urls) {
/* Also add url to the mime part */
g_ptr_array_add (mpart->urls, url->url);
More information about the Commits
mailing list