commit 8d6010f: [Minor] Html: Fix one more corner case
Vsevolod Stakhov
vsevolod at highsecure.ru
Wed Jul 14 16:35:06 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-07-14 17:33:48 +0100
URL: https://github.com/rspamd/rspamd/commit/8d6010f86f77c07645319ddca16bd3000f0dcca6 (HEAD -> master)
[Minor] Html: Fix one more corner case
---
src/libserver/html/html.cxx | 9 ++++++++-
src/libserver/html/html_tests.cxx | 4 +++-
2 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 51f8589e2..332229b50 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -1714,7 +1714,7 @@ html_process_input(rspamd_mempool_t *pool,
}
else if (html_document_state == html_document_state::head) {
if (!(cur_tag->flags & (CM_EMPTY | CM_HEAD))) {
- if (parent_tag && parent_tag->id == Tag_HEAD) {
+ if (parent_tag && (parent_tag->id == Tag_HEAD || !(parent_tag->flags & CM_HEAD))) {
/*
* As by standard, we have to close the HEAD tag
* and switch to the body state
@@ -1728,6 +1728,13 @@ html_process_input(rspamd_mempool_t *pool,
else if (cur_tag->id == Tag_BODY) {
html_document_state = html_document_state::body;
}
+ else {
+ /*
+ * For propagation in something like
+ * <title><p><a>ololo</a></p></title> - should be unprocessed
+ */
+ cur_tag->flags |= CM_HEAD;
+ }
}
}
diff --git a/src/libserver/html/html_tests.cxx b/src/libserver/html/html_tests.cxx
index 4e87d7e2d..73f2ad81b 100644
--- a/src/libserver/html/html_tests.cxx
+++ b/src/libserver/html/html_tests.cxx
@@ -223,6 +223,8 @@ TEST_CASE("html urls extraction")
{
using namespace std::string_literals;
const std::vector<std::tuple<std::string, std::vector<std::string>, std::optional<std::string>>> cases{
+ {"<style></style><a href=\"https://www.example.com\">yolo</a>",
+ {"https://www.example.com"}, "yolo"},
{"<a href=\"https://example.com\">test</a>", {"https://example.com"}, "test"},
{"<a <poo href=\"http://example.com\">hello</a>", {"http://example.com"}, "hello"},
{"<html>\n"
@@ -230,7 +232,7 @@ TEST_CASE("html urls extraction")
"<body>\n"
"<a href=\"https://www.example.com\">hello</a>\n"
"</body>\n"
- "</html>", {"https://www.example.com"}, "hello"}
+ "</html>", {"https://www.example.com"}, "hello"},
};
rspamd_url_init(NULL);
More information about the Commits
mailing list