commit 4f25483: [Fix] Fix parsing of the unquoted attributes in HTML

Vsevolod Stakhov vsevolod at highsecure.ru
Tue Oct 5 14:35:04 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-10-05 15:28:43 +0100
URL: https://github.com/rspamd/rspamd/commit/4f254839f829ec18b2ec144a6de6777b1f5688f7

[Fix] Fix parsing of the unquoted attributes in HTML

---
 src/libserver/html/html.cxx | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index f8b3e96ea..1e5d52241 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -250,6 +250,7 @@ html_parse_tag_content(rspamd_mempool_t *pool,
 		ignore_bad_tag,
 		tag_end,
 		slash_after_value,
+		slash_in_unqouted_value,
 	} state;
 
 	state = static_cast<enum tag_parser_state>(parser_env.cur_state);
@@ -513,8 +514,7 @@ html_parse_tag_content(rspamd_mempool_t *pool,
 
 	case parse_value:
 		if (*in == '/') {
-			state = slash_after_value;
-			store_component_value();
+			state = slash_in_unqouted_value;
 		}
 		else if (g_ascii_isspace (*in) || *in == '>' || *in == '"') {
 			store_component_value();
@@ -570,6 +570,20 @@ html_parse_tag_content(rspamd_mempool_t *pool,
 			state = parse_attr_name;
 		}
 		break;
+	case slash_in_unqouted_value:
+		if (*in == '>') {
+			/* That slash was in fact closing tag slash, wohoo */
+			tag->flags |= FL_CLOSED;
+			state = tag_end;
+			store_component_value();
+		}
+		else {
+			/* Welcome to the world of html, revert state and save missing / */
+			parser_env.buf.push_back('/');
+			store_value_character(false);
+			state = parse_value;
+		}
+		break;
 	case ignore_bad_tag:
 	case tag_end:
 		break;


More information about the Commits mailing list