commit 87ef0c4: [Minor] Ignore bogus head tags inside body

Vsevolod Stakhov vsevolod at highsecure.ru
Tue Jul 13 15:56:06 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-07-13 16:52:09 +0100
URL: https://github.com/rspamd/rspamd/commit/87ef0c44cef19ce6498fe5e595097fd09aeaf396 (HEAD -> master)

[Minor] Ignore bogus head tags inside body

---
 src/libserver/html/html.cxx       | 3 ++-
 src/libserver/html/html_tests.cxx | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index cf12b0a01..51f8589e2 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -1085,7 +1085,7 @@ html_append_tag_content(rspamd_mempool_t *pool,
 
 		return ret;
 	}
-	else if (tag->id == Tag_HEAD) {
+	else if (tag->id == Tag_HEAD && (tag->flags & FL_IGNORE)) {
 		auto ret = tag->closing.end;
 		calculate_final_tag_offsets();
 
@@ -1706,6 +1706,7 @@ html_process_input(rspamd_mempool_t *pool,
 				if (html_document_state == html_document_state::doctype) {
 					if (cur_tag->id == Tag_HEAD || (cur_tag->flags & CM_HEAD)) {
 						html_document_state = html_document_state::head;
+						cur_tag->flags |= FL_IGNORE;
 					}
 					else if (cur_tag->id != Tag_HTML) {
 						html_document_state = html_document_state::body;
diff --git a/src/libserver/html/html_tests.cxx b/src/libserver/html/html_tests.cxx
index ac06a353b..1181e79ac 100644
--- a/src/libserver/html/html_tests.cxx
+++ b/src/libserver/html/html_tests.cxx
@@ -69,6 +69,7 @@ TEST_CASE("html text extraction")
 {
 	using namespace std::string_literals;
 	const std::vector<std::pair<std::string, std::string>> cases{
+			{"<html><body><html><head>displayed</body></html></body></html>", "displayed"},
 			{"test", "test"},
 			{"test\0"s, "test\uFFFD"s},
 			{"test\0test"s, "test\uFFFDtest"s},
@@ -184,6 +185,7 @@ TEST_CASE("html text extraction")
 			/* Head tag with some stuff */
 			{"<html><head><p>oh my god</head><body></body></html>", "oh my god\n"},
 			{"<html><head><title>oh my god</head><body></body></html>", ""},
+
 	};
 
 	rspamd_url_init(NULL);


More information about the Commits mailing list