commit 229cf1c: [Fix] Add a special logic for text part with no text extraction

Vsevolod Stakhov vsevolod at highsecure.ru
Mon Aug 16 10:28:07 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-08-16 11:19:40 +0100
URL: https://github.com/rspamd/rspamd/commit/229cf1c94b1879d92028735d46da3a9021f97c29 (HEAD -> master)

[Fix] Add a special logic for text part with no text extraction

---
 lualib/lua_magic/types.lua |  4 ++++
 src/libmime/message.c      | 16 +++++++++++++++-
 src/libmime/message.h      |  7 ++++---
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/lualib/lua_magic/types.lua b/lualib/lua_magic/types.lua
index f082a53e5..09c93effc 100644
--- a/lualib/lua_magic/types.lua
+++ b/lualib/lua_magic/types.lua
@@ -287,6 +287,7 @@ local types = {
   xml = {
     ct = 'application/xml',
     type = 'text',
+    no_text = true,
   },
   txt = {
     type = 'text',
@@ -302,16 +303,19 @@ local types = {
     type = 'text',
     ct = 'text/csv',
     av_check = false,
+    no_text = true,
   },
   ics = {
     type = 'text',
     ct = 'text/calendar',
     av_check = false,
+    no_text = true,
   },
   vcf = {
     type = 'text',
     ct = 'text/vcard',
     av_check = false,
+    no_text = true,
   },
   eml = {
     type = 'message',
diff --git a/src/libmime/message.c b/src/libmime/message.c
index d788844a3..e6fc5be94 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -1446,6 +1446,19 @@ rspamd_message_process (struct rspamd_task *task)
 						part->detected_type = rspamd_mempool_strdup (task->task_pool,
 								lua_tostring (L, -1));
 					}
+
+					lua_pop (L, 1);
+
+					lua_pushstring (L, "no_text");
+					lua_gettable (L, -2);
+
+					if (lua_isboolean (L, -1)) {
+						if (!!lua_toboolean (L, -1)) {
+							part->flags |= RSPAMD_MIME_PART_NO_TEXT_EXTRACTION;
+						}
+					}
+
+					lua_pop (L, 1);
 				}
 			}
 
@@ -1479,7 +1492,8 @@ rspamd_message_process (struct rspamd_task *task)
 		rspamd_images_process_mime_part_maybe (task, part);
 
 		/* Still no content detected, try text heuristic */
-		if (part->part_type == RSPAMD_MIME_PART_UNDEFINED) {
+		if (part->part_type == RSPAMD_MIME_PART_UNDEFINED &&
+				!(part->flags & RSPAMD_MIME_PART_NO_TEXT_EXTRACTION)) {
 			rspamd_message_process_text_part_maybe (task, part);
 		}
 	}
diff --git a/src/libmime/message.h b/src/libmime/message.h
index 4549c056d..25bf70f77 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -31,9 +31,10 @@ struct rspamd_image;
 struct rspamd_archive;
 
 enum rspamd_mime_part_flags {
-	RSPAMD_MIME_PART_ATTACHEMENT = (1 << 1),
-	RSPAMD_MIME_PART_BAD_CTE = (1 << 4),
-	RSPAMD_MIME_PART_MISSING_CTE = (1 << 5),
+	RSPAMD_MIME_PART_ATTACHEMENT = (1u << 1u),
+	RSPAMD_MIME_PART_BAD_CTE = (1u << 4u),
+	RSPAMD_MIME_PART_MISSING_CTE = (1u << 5u),
+	RSPAMD_MIME_PART_NO_TEXT_EXTRACTION = (1u << 6u),
 };
 
 enum rspamd_mime_part_type {


More information about the Commits mailing list