commit 38ad1b8: [Minor] Html: Fix another corner case

Vsevolod Stakhov vsevolod at highsecure.ru
Mon Jul 5 16:42:04 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-07-05 16:04:38 +0100
URL: https://github.com/rspamd/rspamd/commit/38ad1b8e2a9dc1bb24c2f8abf68ddcd7fb92bf12

[Minor] Html: Fix another corner case

---
 src/libserver/html/html.cxx | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index e1550f411..395648c0c 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -179,8 +179,8 @@ html_check_balance(struct html_content *hc,
 
 		if (hc->all_tags.empty()) {
 			auto &&vtag = std::make_unique<html_tag>();
-			vtag->id = tag->id;
-			vtag->flags = FL_VIRTUAL|FL_CLOSED;
+			vtag->id = Tag_HTML;
+			vtag->flags = FL_VIRTUAL;
 			vtag->tag_start = 0;
 			vtag->content_offset = 0;
 			calculate_content_length(vtag.get());
@@ -191,9 +191,12 @@ html_check_balance(struct html_content *hc,
 			else {
 				vtag->parent = hc->root_tag;
 			}
+
 			hc->all_tags.emplace_back(std::move(vtag));
+			tag->parent = vtag.get();
 
-			return vtag.get();
+			/* Recursively call with a virtual <html> tag inserted */
+			return html_check_balance(hc, tag, tag_start_offset, tag_end_offset);
 		}
 	}
 
@@ -1931,6 +1934,12 @@ TEST_CASE("html text extraction")
 {
 
 	const std::vector<std::pair<std::string, std::string>> cases{
+			{"</head>\n"
+			 "<body>\n"
+			 "<p> Hello. I have some bad news.\n"
+			 "<br /> <br /> <br /> <strong> <br /> <br /> <br /> <br /> <br /> <br /> <br /> <br /> </strong><span> <br /> </span></p>\n"
+			 "</body>\n"
+			 "</html>", " Hello. I have some bad news.\n\n\n\n\n\n\n\n"},
 			{"  <body>\n"
 			 "    <!-- escape content -->\n"
 			 "    a b a > b a < b a & b 'a "a"\n"


More information about the Commits mailing list