commit 4f25483: [Fix] Fix parsing of the unquoted attributes in HTML
Vsevolod Stakhov
vsevolod at highsecure.ru
Tue Oct 5 14:35:04 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-10-05 15:28:43 +0100
URL: https://github.com/rspamd/rspamd/commit/4f254839f829ec18b2ec144a6de6777b1f5688f7
[Fix] Fix parsing of the unquoted attributes in HTML
---
src/libserver/html/html.cxx | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index f8b3e96ea..1e5d52241 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -250,6 +250,7 @@ html_parse_tag_content(rspamd_mempool_t *pool,
ignore_bad_tag,
tag_end,
slash_after_value,
+ slash_in_unqouted_value,
} state;
state = static_cast<enum tag_parser_state>(parser_env.cur_state);
@@ -513,8 +514,7 @@ html_parse_tag_content(rspamd_mempool_t *pool,
case parse_value:
if (*in == '/') {
- state = slash_after_value;
- store_component_value();
+ state = slash_in_unqouted_value;
}
else if (g_ascii_isspace (*in) || *in == '>' || *in == '"') {
store_component_value();
@@ -570,6 +570,20 @@ html_parse_tag_content(rspamd_mempool_t *pool,
state = parse_attr_name;
}
break;
+ case slash_in_unqouted_value:
+ if (*in == '>') {
+ /* That slash was in fact closing tag slash, wohoo */
+ tag->flags |= FL_CLOSED;
+ state = tag_end;
+ store_component_value();
+ }
+ else {
+ /* Welcome to the world of html, revert state and save missing / */
+ parser_env.buf.push_back('/');
+ store_value_character(false);
+ state = parse_value;
+ }
+ break;
case ignore_bad_tag:
case tag_end:
break;
More information about the Commits
mailing list