commit 4c6234a: [Fix] Fix some complicated case with the closing tags parsing
Vsevolod Stakhov
vsevolod at highsecure.ru
Wed Sep 8 13:49:04 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-09-08 14:45:33 +0100
URL: https://github.com/rspamd/rspamd/commit/4c6234a1a07c3fd777551c6789ad0b44523da210 (HEAD -> master)
[Fix] Fix some complicated case with the closing tags parsing
---
src/libserver/html/html.cxx | 46 ++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 43 insertions(+), 3 deletions(-)
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 97009749f..b9729a71e 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -1752,9 +1752,49 @@ html_process_input(rspamd_mempool_t *pool,
break;
case tag_raw_text_less_than:
if (t == '/') {
- /* Shift back */
- p = c;
- state = tag_begin;
+ /* Here are special things: we look for obrace and then ensure
+ * that if there is any closing brace nearby
+ * (we look maximum at 30 characters). We also need to ensure
+ * that we have no special characters, such as punctuation marks and
+ * so on.
+ * Basically, we validate the input to be sane.
+ * Since closing tags must not have attributes, these assumptions
+ * seems to be reasonable enough for our toy parser.
+ */
+ gint cur_lookahead = 1;
+ gint max_lookahead = MIN (end - p, 30);
+ bool valid_closing_tag = true;
+
+ if (p + 1 < end && !g_ascii_isalpha (p[1])) {
+ valid_closing_tag = false;
+ }
+ else {
+ while (cur_lookahead < max_lookahead) {
+ gchar tt = p[cur_lookahead];
+ if (tt == '>') {
+ break;
+ }
+ else if (tt < '\n' || tt == ',') {
+ valid_closing_tag = false;
+ break;
+ }
+ cur_lookahead ++;
+ }
+
+ if (cur_lookahead == max_lookahead) {
+ valid_closing_tag = false;
+ }
+ }
+
+ if (valid_closing_tag) {
+ /* Shift back */
+ p = c;
+ state = tag_begin;
+ }
+ else {
+ p ++;
+ state = tag_raw_text;
+ }
}
else {
p ++;
More information about the Commits
mailing list