commit 6b7622a: [Rework] Urls: Improve query urls handling
Vsevolod Stakhov
vsevolod at highsecure.ru
Sun Mar 22 13:14:08 UTC 2020
Author: Vsevolod Stakhov
Date: 2020-03-22 13:08:26 +0000
URL: https://github.com/rspamd/rspamd/commit/6b7622a2ff2110fe1c715278386b9fdad0bedcd0 (HEAD -> master)
[Rework] Urls: Improve query urls handling
---
src/libserver/url.c | 67 +++++++++++++++++++++++++++++------------------------
src/libserver/url.h | 1 +
2 files changed, 38 insertions(+), 30 deletions(-)
diff --git a/src/libserver/url.c b/src/libserver/url.c
index e0f05c3b0..30872c38d 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -3213,11 +3213,44 @@ struct rspamd_url_mimepart_cbdata {
gsize url_len;
};
+static gboolean
+rspamd_url_query_callback (struct rspamd_url *url, gsize start_offset,
+ gsize end_offset, gpointer ud)
+{
+ struct rspamd_url_mimepart_cbdata *cbd =
+ (struct rspamd_url_mimepart_cbdata *)ud;
+ struct rspamd_task *task;
+
+ task = cbd->task;
+
+ if (url->protocol == PROTOCOL_MAILTO) {
+ if (url->userlen == 0) {
+ return FALSE;
+ }
+ }
+ /* Also check max urls */
+ if (cbd->task->cfg && cbd->task->cfg->max_urls > 0) {
+ if (kh_size (MESSAGE_FIELD (task, urls)) > cbd->task->cfg->max_urls) {
+ msg_err_task ("part has too many URLs, we cannot process more: "
+ "%d urls extracted ",
+ (guint)kh_size (MESSAGE_FIELD (task, urls)));
+
+ return FALSE;
+ }
+ }
+
+ url->flags |= RSPAMD_URL_FLAG_QUERY;
+ rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url);
+
+ return TRUE;
+}
+
static gboolean
rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
gsize end_offset, gpointer ud)
{
- struct rspamd_url_mimepart_cbdata *cbd = ud;
+ struct rspamd_url_mimepart_cbdata *cbd =
+ (struct rspamd_url_mimepart_cbdata *)ud;
struct rspamd_process_exception *ex;
struct rspamd_task *task;
gchar *url_str = NULL;
@@ -3270,36 +3303,10 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
/* We also search the query for additional url inside */
if (url->querylen > 0) {
- if (rspamd_url_find (task->task_pool,
+ rspamd_url_find_multiple (task->task_pool,
rspamd_url_query_unsafe (url), url->querylen,
- &url_str, RSPAMD_URL_FIND_ALL, NULL, &prefix_added)) {
- query_url = rspamd_mempool_alloc0 (task->task_pool,
- sizeof (struct rspamd_url));
- rc = rspamd_url_parse (query_url,
- url_str,
- strlen (url_str),
- task->task_pool,
- RSPAMD_URL_PARSE_TEXT);
-
- if (rc == URI_ERRNO_OK &&
- query_url->hostlen > 0) {
- msg_debug_task ("found url %s in query of url"
- " %*s", url_str, url->querylen, rspamd_url_query_unsafe (url));
-
- if (prefix_added) {
- query_url->flags |= RSPAMD_URL_FLAG_SCHEMALESS;
- }
-
- if (query_url->protocol == PROTOCOL_MAILTO) {
- if (query_url->userlen == 0) {
- return TRUE;
- }
- }
-
- query_url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
- rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), query_url);
- }
- }
+ RSPAMD_URL_FIND_ALL, NULL,
+ rspamd_url_query_callback, cbd);
}
return TRUE;
diff --git a/src/libserver/url.h b/src/libserver/url.h
index bf8ba4b63..bb9c57399 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -35,6 +35,7 @@ enum rspamd_url_flags {
RSPAMD_URL_FLAG_ZW_SPACES = 1u << 17u,
RSPAMD_URL_FLAG_DISPLAY_URL = 1u << 18u,
RSPAMD_URL_FLAG_IMAGE = 1u << 19u,
+ RSPAMD_URL_FLAG_QUERY = 1u << 20u,
};
struct rspamd_url_tag {
More information about the Commits
mailing list