commit 87ef0c4: [Minor] Ignore bogus head tags inside body
Vsevolod Stakhov
vsevolod at highsecure.ru
Tue Jul 13 15:56:06 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-07-13 16:52:09 +0100
URL: https://github.com/rspamd/rspamd/commit/87ef0c44cef19ce6498fe5e595097fd09aeaf396 (HEAD -> master)
[Minor] Ignore bogus head tags inside body
---
src/libserver/html/html.cxx | 3 ++-
src/libserver/html/html_tests.cxx | 2 ++
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index cf12b0a01..51f8589e2 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -1085,7 +1085,7 @@ html_append_tag_content(rspamd_mempool_t *pool,
return ret;
}
- else if (tag->id == Tag_HEAD) {
+ else if (tag->id == Tag_HEAD && (tag->flags & FL_IGNORE)) {
auto ret = tag->closing.end;
calculate_final_tag_offsets();
@@ -1706,6 +1706,7 @@ html_process_input(rspamd_mempool_t *pool,
if (html_document_state == html_document_state::doctype) {
if (cur_tag->id == Tag_HEAD || (cur_tag->flags & CM_HEAD)) {
html_document_state = html_document_state::head;
+ cur_tag->flags |= FL_IGNORE;
}
else if (cur_tag->id != Tag_HTML) {
html_document_state = html_document_state::body;
diff --git a/src/libserver/html/html_tests.cxx b/src/libserver/html/html_tests.cxx
index ac06a353b..1181e79ac 100644
--- a/src/libserver/html/html_tests.cxx
+++ b/src/libserver/html/html_tests.cxx
@@ -69,6 +69,7 @@ TEST_CASE("html text extraction")
{
using namespace std::string_literals;
const std::vector<std::pair<std::string, std::string>> cases{
+ {"<html><body><html><head>displayed</body></html></body></html>", "displayed"},
{"test", "test"},
{"test\0"s, "test\uFFFD"s},
{"test\0test"s, "test\uFFFDtest"s},
@@ -184,6 +185,7 @@ TEST_CASE("html text extraction")
/* Head tag with some stuff */
{"<html><head><p>oh my god</head><body></body></html>", "oh my god\n"},
{"<html><head><title>oh my god</head><body></body></html>", ""},
+
};
rspamd_url_init(NULL);
More information about the Commits
mailing list