commit 38ad1b8: [Minor] Html: Fix another corner case
Vsevolod Stakhov
vsevolod at highsecure.ru
Mon Jul 5 16:42:04 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-07-05 16:04:38 +0100
URL: https://github.com/rspamd/rspamd/commit/38ad1b8e2a9dc1bb24c2f8abf68ddcd7fb92bf12
[Minor] Html: Fix another corner case
---
src/libserver/html/html.cxx | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index e1550f411..395648c0c 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -179,8 +179,8 @@ html_check_balance(struct html_content *hc,
if (hc->all_tags.empty()) {
auto &&vtag = std::make_unique<html_tag>();
- vtag->id = tag->id;
- vtag->flags = FL_VIRTUAL|FL_CLOSED;
+ vtag->id = Tag_HTML;
+ vtag->flags = FL_VIRTUAL;
vtag->tag_start = 0;
vtag->content_offset = 0;
calculate_content_length(vtag.get());
@@ -191,9 +191,12 @@ html_check_balance(struct html_content *hc,
else {
vtag->parent = hc->root_tag;
}
+
hc->all_tags.emplace_back(std::move(vtag));
+ tag->parent = vtag.get();
- return vtag.get();
+ /* Recursively call with a virtual <html> tag inserted */
+ return html_check_balance(hc, tag, tag_start_offset, tag_end_offset);
}
}
@@ -1931,6 +1934,12 @@ TEST_CASE("html text extraction")
{
const std::vector<std::pair<std::string, std::string>> cases{
+ {"</head>\n"
+ "<body>\n"
+ "<p> Hello. I have some bad news.\n"
+ "<br /> <br /> <br /> <strong> <br /> <br /> <br /> <br /> <br /> <br /> <br /> <br /> </strong><span> <br /> </span></p>\n"
+ "</body>\n"
+ "</html>", " Hello. I have some bad news.\n\n\n\n\n\n\n\n"},
{" <body>\n"
" <!-- escape content -->\n"
" a b a > b a < b a & b 'a "a"\n"
More information about the Commits
mailing list