commit f03ded0: [Feature] Core: Ignore and mark invisible spaces

Vsevolod Stakhov vsevolod at highsecure.ru
Sat Jan 12 15:00:03 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-01-12 14:53:19 +0000
URL: https://github.com/rspamd/rspamd/commit/f03ded05654f6fd62028e3dcaea461fe0116b96c

[Feature] Core: Ignore and mark invisible spaces

---
 src/libmime/message.c | 39 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/src/libmime/message.c b/src/libmime/message.c
index a15485339..4cb9e07b3 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -215,13 +215,15 @@ rspamd_mime_part_detect_language (struct rspamd_task *task,
 }
 
 static void
-rspamd_strip_newlines_parse (const gchar *begin, const gchar *pe,
+rspamd_strip_newlines_parse (struct rspamd_task *task,
+		const gchar *begin, const gchar *pe,
 		struct rspamd_mime_text_part *part)
 {
 	const gchar *p = begin, *c = begin;
 	gchar last_c = '\0';
 	gboolean crlf_added = FALSE;
 	gboolean url_open_bracket = FALSE;
+	UChar32 uc;
 
 	enum {
 		normal_char,
@@ -230,6 +232,39 @@ rspamd_strip_newlines_parse (const gchar *begin, const gchar *pe,
 	} state = normal_char;
 
 	while (p < pe) {
+		if (IS_PART_UTF (part)) {
+			gint32 off = p - begin;
+			U8_NEXT (begin, off, pe - begin, uc);
+
+			if (uc != -1) {
+				while (p < pe) {
+					if (uc == 0x200b) {
+						/* Invisible space ! */
+						task->flags |= RSPAMD_TASK_FLAG_BAD_UNICODE;
+
+						if (p > c) {
+							g_byte_array_append (part->utf_stripped_content,
+									(const guint8 *) c, p - c);
+							c = begin + off;
+							p = c;
+						}
+
+						U8_NEXT (begin, off, pe - begin, uc);
+
+						if (uc != 0x200b) {
+							break;
+						}
+
+						p = begin + off;
+						c = p;
+					}
+					else {
+						break;
+					}
+				}
+			}
+		}
+
 		if (G_UNLIKELY (*p) == '\r') {
 			switch (state) {
 			case normal_char:
@@ -469,7 +504,7 @@ rspamd_normalize_text_part (struct rspamd_task *task,
 		p = (const gchar *)part->utf_content->data;
 		end = p + part->utf_content->len;
 
-		rspamd_strip_newlines_parse (p, end, part);
+		rspamd_strip_newlines_parse (task, p, end, part);
 
 		for (i = 0; i < part->newlines->len; i ++) {
 			ex = rspamd_mempool_alloc (task->task_pool, sizeof (*ex));


More information about the Commits mailing list