commit 4c6234a: [Fix] Fix some complicated case with the closing tags parsing

Wed Sep 8 13:49:04 UTC 2021

Author: Vsevolod Stakhov
Date: 2021-09-08 14:45:33 +0100
URL: https://github.com/rspamd/rspamd/commit/4c6234a1a07c3fd777551c6789ad0b44523da210 (HEAD -> master)

[Fix] Fix some complicated case with the closing tags parsing

---
 src/libserver/html/html.cxx | 46 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 97009749f..b9729a71e 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -1752,9 +1752,49 @@ html_process_input(rspamd_mempool_t *pool,
 			break;
 		case tag_raw_text_less_than:
 			if (t == '/') {
-				/* Shift back */
-				p = c;
-				state = tag_begin;
+				/* Here are special things: we look for obrace and then ensure
+				 * that if there is any closing brace nearby
+				 * (we look maximum at 30 characters). We also need to ensure
+				 * that we have no special characters, such as punctuation marks and
+				 * so on.
+				 * Basically, we validate the input to be sane.
+				 * Since closing tags must not have attributes, these assumptions
+				 * seems to be reasonable enough for our toy parser.
+				 */
+				gint cur_lookahead = 1;
+				gint max_lookahead = MIN (end - p, 30);
+				bool valid_closing_tag = true;
+
+				if (p + 1 < end && !g_ascii_isalpha (p[1])) {
+					valid_closing_tag = false;
+				}
+				else {
+					while (cur_lookahead < max_lookahead) {
+						gchar tt = p[cur_lookahead];
+						if (tt == '>') {
+							break;
+						}
+						else if (tt < '\n' || tt == ',') {
+							valid_closing_tag = false;
+							break;
+						}
+						cur_lookahead ++;
+					}
+
+					if (cur_lookahead == max_lookahead) {
+						valid_closing_tag = false;
+					}
+				}
+
+				if (valid_closing_tag) {
+					/* Shift back */
+					p = c;
+					state = tag_begin;
+				}
+				else {
+					p ++;
+					state = tag_raw_text;
+				}
 			}
 			else {
 				p ++;