commit 235979e: [Minor] Further fixes for tag state machine

Vsevolod Stakhov vsevolod at highsecure.ru
Tue Jul 13 13:14:06 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-07-13 14:12:58 +0100
URL: https://github.com/rspamd/rspamd/commit/235979e848be5322b9f8d5a979a3c8fde46d9fa4 (HEAD -> master)

[Minor] Further fixes for tag state machine

---
 src/libserver/html/html.cxx       | 9 ++++++---
 src/libserver/html/html_tests.cxx | 4 +++-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index bd323b43f..5d2479ab4 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -388,9 +388,8 @@ html_parse_tag_content(rspamd_mempool_t *pool,
 				/* Should never be in attribute names but ignored */
 				tag->flags |= FL_BROKEN;
 			}
-			else {
-				store_value_character(true);
-			}
+
+			store_value_character(true);
 		}
 
 		break;
@@ -421,11 +420,13 @@ html_parse_tag_content(rspamd_mempool_t *pool,
 				hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
 				tag->flags |= FL_BROKEN;
 				store_component_value();
+				store_value_character(true);
 				state = spaces_after_param;
 			}
 			else {
 				/* Empty attribute */
 				store_component_value();
+				store_value_character(true);
 				state = spaces_after_param;
 			}
 		}
@@ -539,6 +540,8 @@ html_parse_tag_content(rspamd_mempool_t *pool,
 			else if (*in == '=') {
 				/* Attributes cannot start with '=' */
 				tag->flags |= FL_BROKEN;
+				store_value_character(true);
+				state = parse_attr_name;
 			}
 			else {
 				store_value_character(true);
diff --git a/src/libserver/html/html_tests.cxx b/src/libserver/html/html_tests.cxx
index 07618b273..a0c60b299 100644
--- a/src/libserver/html/html_tests.cxx
+++ b/src/libserver/html/html_tests.cxx
@@ -221,7 +221,8 @@ TEST_CASE("html urls extraction")
 {
 	using namespace std::string_literals;
 	const std::vector<std::pair<std::string, std::vector<std::string>>> cases{
-			{"<a href=\"https://example.com\">test</a>", {"https://example.com"}}
+			{"<a href=\"https://example.com\">test</a>", {"https://example.com"}},
+			{"<a <poo href=\"http://example.com\">hello</a>", {"http://example.com"}},
 	};
 
 	rspamd_url_init(NULL);
@@ -244,6 +245,7 @@ TEST_CASE("html urls extraction")
 			g_byte_array_free(tmp, TRUE);
 			g_ptr_array_free(purls, TRUE);
 		}
+		++i;
 	}
 
 	rspamd_mempool_delete(pool);


More information about the Commits mailing list