commit b82366d: [Fix] Another brain damage html standard adoptions

Vsevolod Stakhov vsevolod at highsecure.ru
Tue Mar 3 22:15:25 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-03-02 16:52:39 +0000
URL: https://github.com/rspamd/rspamd/commit/b82366d9eff3791c986c5d04d107d0fb38a65c3c

[Fix] Another brain damage html standard adoptions

---
 src/libserver/html.c | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/src/libserver/html.c b/src/libserver/html.c
index 78c69406c..b7e78e57b 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -1147,9 +1147,35 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool,
 			state = parse_equal;
 		}
 		else if (!g_ascii_isspace (*in)) {
-			hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
-			tag->flags |= FL_BROKEN;
-			state = ignore_bad_tag;
+			/*
+			 * HTML defines that crap could still be restored and
+			 * calculated somehow... So we have to follow this stupid behaviour
+			 */
+			/*
+			 * TODO: estimate what insane things do email clients in each case
+			 */
+			if (*in == '>') {
+				/*
+				 * Attribtute name followed by end of tag
+				 * Should be okay (empty attribute). The rest is handled outside
+				 * this automata.
+				 */
+
+			}
+			else if (*in == '"' || *in == '\'') {
+				/* Attribute followed by quote... Missing '=' ? Dunno, need to test */
+				hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+				tag->flags |= FL_BROKEN;
+				state = ignore_bad_tag;
+			}
+			else {
+				/*
+				 * Just start another attribute ignoring an empty attributes for
+				 * now. We don't use them in fact...
+				 */
+				state = parse_attr_name;
+				*savep = in;
+			}
 		}
 		break;
 


More information about the Commits mailing list