commit 3377d26: [Minor] Treat absent Content-Type more wisely

Vsevolod Stakhov vsevolod at highsecure.ru
Fri Mar 12 21:21:05 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-03-12 21:13:04 +0000
URL: https://github.com/rspamd/rspamd/commit/3377d26289d64f828169c6dfbf4c71b3f5cdea09 (HEAD -> master)

[Minor] Treat absent Content-Type more wisely

---
 src/libmime/mime_parser.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/libmime/mime_parser.c b/src/libmime/mime_parser.c
index 0d6659867..a89b76b3a 100644
--- a/src/libmime/mime_parser.c
+++ b/src/libmime/mime_parser.c
@@ -25,6 +25,7 @@
 #include "contrib/uthash/utlist.h"
 #include <openssl/cms.h>
 #include <openssl/pkcs7.h>
+#include "contrib/fastutf8/fastutf8.h"
 
 struct rspamd_mime_parser_lib_ctx {
 	struct rspamd_multipattern *mp_boundary;
@@ -610,8 +611,20 @@ rspamd_mime_parse_normal_part (struct rspamd_task *task,
 				 * In theory, it is very unsafe to process it as a text part
 				 * as we unlikely get some sane result
 				 */
-				part->ct->flags &= ~RSPAMD_CONTENT_TYPE_TEXT;
-				part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
+
+				/*
+				 * On the other hand, there is an evidence that some
+				 * emails actually rely on that.
+				 * So we apply an expensive hack here:
+				 * if there are no 8bit characters -OR- the content is valid
+				 * UTF8, we can still imply Content-Type == text/plain
+				 */
+
+				if (rspamd_str_has_8bit (part->raw_data.begin, part->raw_data.len) &&
+					!rspamd_fast_utf8_validate (part->raw_data.begin, part->raw_data.len)) {
+					part->ct->flags &= ~RSPAMD_CONTENT_TYPE_TEXT;
+					part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
+				}
 			}
 		}
 


More information about the Commits mailing list