commit b82366d: [Fix] Another brain damage html standard adoptions
Vsevolod Stakhov
vsevolod at highsecure.ru
Tue Mar 3 22:15:25 UTC 2020
Author: Vsevolod Stakhov
Date: 2020-03-02 16:52:39 +0000
URL: https://github.com/rspamd/rspamd/commit/b82366d9eff3791c986c5d04d107d0fb38a65c3c
[Fix] Another brain damage html standard adoptions
---
src/libserver/html.c | 32 +++++++++++++++++++++++++++++---
1 file changed, 29 insertions(+), 3 deletions(-)
diff --git a/src/libserver/html.c b/src/libserver/html.c
index 78c69406c..b7e78e57b 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -1147,9 +1147,35 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool,
state = parse_equal;
}
else if (!g_ascii_isspace (*in)) {
- hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
- tag->flags |= FL_BROKEN;
- state = ignore_bad_tag;
+ /*
+ * HTML defines that crap could still be restored and
+ * calculated somehow... So we have to follow this stupid behaviour
+ */
+ /*
+ * TODO: estimate what insane things do email clients in each case
+ */
+ if (*in == '>') {
+ /*
+ * Attribtute name followed by end of tag
+ * Should be okay (empty attribute). The rest is handled outside
+ * this automata.
+ */
+
+ }
+ else if (*in == '"' || *in == '\'') {
+ /* Attribute followed by quote... Missing '=' ? Dunno, need to test */
+ hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
+ tag->flags |= FL_BROKEN;
+ state = ignore_bad_tag;
+ }
+ else {
+ /*
+ * Just start another attribute ignoring an empty attributes for
+ * now. We don't use them in fact...
+ */
+ state = parse_attr_name;
+ *savep = in;
+ }
}
break;
More information about the Commits
mailing list