commit 8d6010f: [Minor] Html: Fix one more corner case

Vsevolod Stakhov vsevolod at highsecure.ru
Wed Jul 14 16:35:06 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-07-14 17:33:48 +0100
URL: https://github.com/rspamd/rspamd/commit/8d6010f86f77c07645319ddca16bd3000f0dcca6 (HEAD -> master)

[Minor] Html: Fix one more corner case

---
 src/libserver/html/html.cxx       | 9 ++++++++-
 src/libserver/html/html_tests.cxx | 4 +++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 51f8589e2..332229b50 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -1714,7 +1714,7 @@ html_process_input(rspamd_mempool_t *pool,
 				}
 				else if (html_document_state == html_document_state::head) {
 					if (!(cur_tag->flags & (CM_EMPTY | CM_HEAD))) {
-						if (parent_tag && parent_tag->id == Tag_HEAD) {
+						if (parent_tag && (parent_tag->id == Tag_HEAD || !(parent_tag->flags & CM_HEAD))) {
 							/*
 							 * As by standard, we have to close the HEAD tag
 							 * and switch to the body state
@@ -1728,6 +1728,13 @@ html_process_input(rspamd_mempool_t *pool,
 						else if (cur_tag->id == Tag_BODY) {
 							html_document_state = html_document_state::body;
 						}
+						else {
+							/*
+							 * For propagation in something like
+							 * <title><p><a>ololo</a></p></title> - should be unprocessed
+							 */
+							cur_tag->flags |= CM_HEAD;
+						}
 					}
 				}
 
diff --git a/src/libserver/html/html_tests.cxx b/src/libserver/html/html_tests.cxx
index 4e87d7e2d..73f2ad81b 100644
--- a/src/libserver/html/html_tests.cxx
+++ b/src/libserver/html/html_tests.cxx
@@ -223,6 +223,8 @@ TEST_CASE("html urls extraction")
 {
 	using namespace std::string_literals;
 	const std::vector<std::tuple<std::string, std::vector<std::string>, std::optional<std::string>>> cases{
+			{"<style></style><a href=\"https://www.example.com\">yolo</a>",
+					{"https://www.example.com"}, "yolo"},
 			{"<a href=\"https://example.com\">test</a>", {"https://example.com"}, "test"},
 			{"<a <poo href=\"http://example.com\">hello</a>", {"http://example.com"}, "hello"},
 			{"<html>\n"
@@ -230,7 +232,7 @@ TEST_CASE("html urls extraction")
 			 "<body>\n"
 			 "<a href=\"https://www.example.com\">hello</a>\n"
 			 "</body>\n"
-			 "</html>", {"https://www.example.com"}, "hello"}
+			 "</html>", {"https://www.example.com"}, "hello"},
 	};
 
 	rspamd_url_init(NULL);


More information about the Commits mailing list