commit 5dd1ccb: [Minor] Allow attaching of urls to the mime parts
Vsevolod Stakhov
vsevolod at highsecure.ru
Tue May 5 14:56:07 UTC 2020
Author: Vsevolod Stakhov
Date: 2020-05-05 14:59:33 +0100
URL: https://github.com/rspamd/rspamd/commit/5dd1ccbb33e53b9a3903970bbd623569869ad008
[Minor] Allow attaching of urls to the mime parts
---
src/libmime/message.c | 8 +++++++-
src/libmime/message.h | 1 +
src/libmime/mime_parser.c | 2 ++
src/libserver/html.c | 36 +++++++++++++++++++++++++++---------
src/libserver/html.h | 3 ++-
src/libserver/url.c | 14 ++++++++++++--
6 files changed, 51 insertions(+), 13 deletions(-)
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 4b00d2dd0..eec992552 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -758,7 +758,8 @@ rspamd_message_process_html_text_part (struct rspamd_task *task,
text_part->html,
text_part->utf_raw_content,
&text_part->exceptions,
- MESSAGE_FIELD (task, urls));
+ MESSAGE_FIELD (task, urls),
+ text_part->mime_part->urls);
if (text_part->utf_content->len == 0) {
text_part->flags |= RSPAMD_MIME_TEXT_PART_FLAG_EMPTY;
@@ -925,6 +926,7 @@ rspamd_message_from_data (struct rspamd_task *task, const guchar *start,
part->parsed_data.begin = start;
part->parsed_data.len = len;
part->part_number = MESSAGE_FIELD (task, parts)->len;
+ part->urls = g_ptr_array_new ();
part->raw_headers = rspamd_message_headers_new ();
part->headers_order = NULL;
@@ -1052,6 +1054,10 @@ rspamd_message_dtor (struct rspamd_message *msg)
LUA_REGISTRYINDEX,
p->specific.lua_specific.cbref);
}
+
+ if (p->urls) {
+ g_ptr_array_unref (p->urls);
+ }
}
PTR_ARRAY_FOREACH (msg->text_parts, i, tp) {
diff --git a/src/libmime/message.h b/src/libmime/message.h
index 96ed9d5d4..a921d6f38 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -91,6 +91,7 @@ struct rspamd_mime_part {
struct rspamd_mime_header *headers_order;
struct rspamd_mime_headers_table *raw_headers;
+ GPtrArray *urls;
gchar *raw_headers_str;
gsize raw_headers_len;
diff --git a/src/libmime/mime_parser.c b/src/libmime/mime_parser.c
index 590ee57d6..4fc37ad3d 100644
--- a/src/libmime/mime_parser.c
+++ b/src/libmime/mime_parser.c
@@ -683,6 +683,7 @@ rspamd_mime_parse_normal_part (struct rspamd_task *task,
}
part->part_number = MESSAGE_FIELD (task, parts)->len;
+ part->urls = g_ptr_array_new ();
g_ptr_array_add (MESSAGE_FIELD (task, parts), part);
msg_debug_mime ("parsed data part %T/%T of length %z (%z orig), %s cte",
&part->ct->type, &part->ct->subtype, part->parsed_data.len,
@@ -1017,6 +1018,7 @@ rspamd_mime_parse_multipart_part (struct rspamd_task *task,
}
part->part_number = MESSAGE_FIELD (task, parts)->len;
+ part->urls = g_ptr_array_new ();
g_ptr_array_add (MESSAGE_FIELD (task, parts), part);
st->nesting ++;
rspamd_mime_part_get_cte (task, part->raw_headers, part, FALSE);
diff --git a/src/libserver/html.c b/src/libserver/html.c
index f8c43bdd5..b916019d9 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -1548,7 +1548,7 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len,
static struct rspamd_url *
rspamd_html_process_url_tag (rspamd_mempool_t *pool, struct html_tag *tag,
- struct html_content *hc)
+ struct html_content *hc)
{
struct html_tag_component *comp;
GList *cur;
@@ -1628,6 +1628,7 @@ struct rspamd_html_url_query_cbd {
rspamd_mempool_t *pool;
khash_t (rspamd_url_hash) *url_set;
struct rspamd_url *url;
+ GPtrArray *part_urls;
};
static gboolean
@@ -1651,14 +1652,18 @@ rspamd_html_url_query_callback (struct rspamd_url *url, gsize start_offset,
cbd->url->querylen, rspamd_url_query_unsafe (cbd->url));
url->flags |= RSPAMD_URL_FLAG_QUERY;
- rspamd_url_set_add_or_increase (cbd->url_set, url);
+
+ if (rspamd_url_set_add_or_increase (cbd->url_set, url) && cbd->part_urls) {
+ g_ptr_array_add (cbd->part_urls, url);
+ }
return TRUE;
}
static void
rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
- khash_t (rspamd_url_hash) *url_set)
+ khash_t (rspamd_url_hash) *url_set,
+ GPtrArray *part_urls)
{
if (url->flags & RSPAMD_URL_FLAG_UNNORMALISED) {
url->flags |= RSPAMD_URL_FLAG_OBSCURED;
@@ -1670,12 +1675,17 @@ rspamd_process_html_url (rspamd_mempool_t *pool, struct rspamd_url *url,
qcbd.pool = pool;
qcbd.url_set = url_set;
qcbd.url = url;
+ qcbd.part_urls = part_urls;
rspamd_url_find_multiple(pool,
rspamd_url_query_unsafe (url), url->querylen,
RSPAMD_URL_FIND_ALL, NULL,
rspamd_html_url_query_callback, &qcbd);
}
+
+ if (part_urls) {
+ g_ptr_array_add (part_urls, url);
+ }
}
static void
@@ -1732,7 +1742,8 @@ rspamd_html_process_data_image (rspamd_mempool_t *pool,
static void
rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
- struct html_content *hc, khash_t (rspamd_url_hash) *url_set)
+ struct html_content *hc, khash_t (rspamd_url_hash) *url_set,
+ GPtrArray *part_urls)
{
struct html_tag_component *comp;
struct html_image *img;
@@ -1778,7 +1789,11 @@ rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
if (img->url) {
img->url->flags |= RSPAMD_URL_FLAG_IMAGE;
- rspamd_url_set_add_or_increase (url_set, img->url);
+
+ if (rspamd_url_set_add_or_increase (url_set, img->url) &&
+ part_urls) {
+ g_ptr_array_add (part_urls, img->url);
+ }
}
}
}
@@ -2603,7 +2618,8 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool,
struct html_content *hc,
GByteArray *in,
GList **exceptions,
- khash_t (rspamd_url_hash) *url_set)
+ khash_t (rspamd_url_hash) *url_set,
+ GPtrArray *part_urls)
{
const guchar *p, *c, *end, *savep = NULL;
guchar t;
@@ -3067,7 +3083,8 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool,
if (url_set != NULL) {
if (rspamd_url_set_add_or_increase (url_set, url)) {
- rspamd_process_html_url (pool, url, url_set);
+ rspamd_process_html_url (pool, url, url_set,
+ part_urls);
}
}
@@ -3129,7 +3146,8 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool,
}
if (cur_tag->id == Tag_IMG && !(cur_tag->flags & FL_CLOSING)) {
- rspamd_html_process_img_tag (pool, cur_tag, hc, url_set);
+ rspamd_html_process_img_tag (pool, cur_tag, hc, url_set,
+ part_urls);
}
else if (cur_tag->flags & FL_BLOCK) {
struct html_block *bl;
@@ -3194,5 +3212,5 @@ rspamd_html_process_part (rspamd_mempool_t *pool,
struct html_content *hc,
GByteArray *in)
{
- return rspamd_html_process_part_full (pool, hc, in, NULL, NULL);
+ return rspamd_html_process_part_full (pool, hc, in, NULL, NULL, NULL);
}
diff --git a/src/libserver/html.h b/src/libserver/html.h
index 72eac8d79..b319964ce 100644
--- a/src/libserver/html.h
+++ b/src/libserver/html.h
@@ -143,7 +143,8 @@ GByteArray *rspamd_html_process_part (rspamd_mempool_t *pool,
GByteArray *rspamd_html_process_part_full (rspamd_mempool_t *pool,
struct html_content *hc,
GByteArray *in, GList **exceptions,
- khash_t (rspamd_url_hash) *url_set);
+ khash_t (rspamd_url_hash) *url_set,
+ GPtrArray *part_urls);
/*
* Returns true if a specified tag has been seen in a part
diff --git a/src/libserver/url.c b/src/libserver/url.c
index a47d732f7..c10073dcb 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -3296,7 +3296,13 @@ rspamd_url_query_callback (struct rspamd_url *url, gsize start_offset,
}
url->flags |= RSPAMD_URL_FLAG_QUERY;
- rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url);
+
+
+ if (rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url)) {
+ if (cbd->part && cbd->part->mime_part->urls) {
+ g_ptr_array_add (cbd->part->mime_part->urls, url);
+ }
+ }
return TRUE;
}
@@ -3347,7 +3353,11 @@ rspamd_url_text_part_callback (struct rspamd_url *url, gsize start_offset,
}
url->flags |= RSPAMD_URL_FLAG_FROM_TEXT;
- rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url);
+
+ if (rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url) &&
+ cbd->part->mime_part->urls) {
+ g_ptr_array_add (cbd->part->mime_part->urls, url);
+ }
cbd->part->exceptions = g_list_prepend (
cbd->part->exceptions,
More information about the Commits
mailing list