commit 6b9f798: [Minor] Fix stupid email clients entities 'guessing'

Vsevolod Stakhov vsevolod at highsecure.ru
Tue Feb 18 18:21:06 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-02-18 18:13:59 +0000
URL: https://github.com/rspamd/rspamd/commit/6b9f798d6a0be70e5e7c42f49bfccef67c7f7e54 (HEAD -> master)

[Minor] Fix stupid email clients entities 'guessing'

---
 src/libserver/html.c | 40 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 36 insertions(+), 4 deletions(-)

diff --git a/src/libserver/html.c b/src/libserver/html.c
index 668d1bdff..df1773f71 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -342,9 +342,10 @@ guint
 rspamd_html_decode_entitles_inplace (gchar *s, gsize len)
 {
 	goffset l, rep_len;
-	gchar *t = s, *h = s, *e = s, *end_ptr;
+	gchar *t = s, *h = s, *e = s, *end_ptr, old_c;
 	const gchar *end;
 	const gchar *entity;
+	gboolean seen_hash = FALSE, seen_digit_only = FALSE, seen_hex = FALSE;
 	gint state = 0, base;
 	UChar32 uc;
 	khiter_t k;
@@ -364,6 +365,9 @@ rspamd_html_decode_entitles_inplace (gchar *s, gsize len)
 		case 0:
 			if (*h == '&') {
 				state = 1;
+				seen_hash = FALSE;
+				seen_hex = FALSE;
+				seen_digit_only = FALSE;
 				e = h;
 				h++;
 				continue;
@@ -376,15 +380,17 @@ rspamd_html_decode_entitles_inplace (gchar *s, gsize len)
 			break;
 		case 1:
 			if (*h == ';' && h > e) {
+decode_entity:
 				/* Determine base */
 				/* First find in entities table */
+				old_c = *h;
 				*h = '\0';
 				entity = e + 1;
 				uc = 0;
 
 				if (*entity != '#') {
 					k = kh_get (entity_by_name, html_entity_by_name, entity);
-					*h = ';';
+					*h = old_c;
 
 					if (k != kh_end (html_entity_by_name)) {
 						if (kh_val (html_entity_by_name, k)) {
@@ -429,7 +435,7 @@ rspamd_html_decode_entitles_inplace (gchar *s, gsize len)
 
 					if (end_ptr != NULL && *end_ptr != '\0') {
 						/* Skip undecoded */
-						*h = ';';
+						*h = old_c;
 
 						if (end - t > h - e + 1) {
 							memmove (t, e, h - e + 1);
@@ -438,7 +444,7 @@ rspamd_html_decode_entitles_inplace (gchar *s, gsize len)
 					}
 					else {
 						/* Search for a replacement */
-						*h = ';';
+						*h = old_c;
 						k = kh_get (entity_by_number, html_entity_by_number, uc);
 
 						if (k != kh_end (html_entity_by_number)) {
@@ -480,6 +486,11 @@ rspamd_html_decode_entitles_inplace (gchar *s, gsize len)
 								t += h - e + 1;
 							}
 						}
+
+						if (end - t > 0 && old_c != ';') {
+							/* Fuck email clients, fuck them */
+							*t++ = old_c;
+						}
 					}
 				}
 
@@ -496,6 +507,27 @@ rspamd_html_decode_entitles_inplace (gchar *s, gsize len)
 
 				e = h;
 			}
+			else if (*h == '#') {
+				seen_hash = TRUE;
+
+				if (h + 1 < end && h[1] == 'x') {
+					seen_hex = TRUE;
+					/* Skip one more character */
+					h ++;
+				}
+			}
+			else if (g_ascii_isdigit (*h) || (seen_hex && g_ascii_isxdigit (*h))) {
+				seen_digit_only = TRUE;
+			}
+			else {
+				if (seen_digit_only && seen_hash && h > e) {
+					/* We have seen some digits, so we can try to decode, eh */
+					/* Fuck retarded email clients... */
+					goto decode_entity;
+				}
+
+				seen_digit_only = FALSE;
+			}
 
 			h++;
 


More information about the Commits mailing list