commit 9e7bf60: [Rework] Change the way to extract URLs when dealing with alternative parts
Vsevolod Stakhov
vsevolod at highsecure.ru
Thu Mar 19 16:14:06 UTC 2020
Author: Vsevolod Stakhov
Date: 2020-03-19 16:06:42 +0000
URL: https://github.com/rspamd/rspamd/commit/9e7bf606f92f56911d6f31a9b8b1e7d030ca27a7 (HEAD -> master)
[Rework] Change the way to extract URLs when dealing with alternative parts
---
src/libmime/message.c | 45 +++++++++++++++++++++++++++++++++++++++++----
1 file changed, 41 insertions(+), 4 deletions(-)
diff --git a/src/libmime/message.c b/src/libmime/message.c
index c45550e6d..f167730d4 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -859,12 +859,49 @@ rspamd_message_process_text_part_maybe (struct rspamd_task *task,
rspamd_normalize_text_part (task, text_part);
if (!IS_PART_HTML (text_part)) {
- rspamd_url_text_extract (task->task_pool, task, text_part,
- RSPAMD_URL_FIND_ALL);
+ if (mime_part->parent_part) {
+ struct rspamd_mime_part *parent = mime_part->parent_part;
+
+ if (IS_PART_MULTIPART (parent) && parent->specific.mp->children->len == 2) {
+ /*
+ * Use strict extraction mode: we will extract missing urls from
+ * an html part if needed
+ */
+ rspamd_url_text_extract (task->task_pool, task, text_part,
+ RSPAMD_URL_FIND_STRICT);
+ }
+ else {
+ /*
+ * Fall back to full text extraction using TLD patterns
+ */
+ rspamd_url_text_extract (task->task_pool, task, text_part,
+ RSPAMD_URL_FIND_ALL);
+ }
+ }
+ else {
+ /*
+ * Fall back to full text extraction using TLD patterns
+ */
+ rspamd_url_text_extract (task->task_pool, task, text_part,
+ RSPAMD_URL_FIND_ALL);
+ }
}
else {
- rspamd_url_text_extract (task->task_pool, task, text_part,
- RSPAMD_URL_FIND_STRICT);
+ if (mime_part->parent_part) {
+ struct rspamd_mime_part *parent = mime_part->parent_part;
+
+ if (IS_PART_MULTIPART (parent) && parent->specific.mp->children->len == 2) {
+ /* Do not extract urls from HTML at all */
+ }
+ else {
+ rspamd_url_text_extract (task->task_pool, task, text_part,
+ RSPAMD_URL_FIND_STRICT);
+ }
+ }
+ else {
+ rspamd_url_text_extract (task->task_pool, task, text_part,
+ RSPAMD_URL_FIND_STRICT);
+ }
}
if (text_part->exceptions) {
More information about the Commits
mailing list