commit 3f17e65: [Minor] Fix sgml tags processing

Vsevolod Stakhov vsevolod at highsecure.ru
Fri Jun 11 15:35:04 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-06-11 16:33:29 +0100
URL: https://github.com/rspamd/rspamd/commit/3f17e650d2659c78e12edc27caad61c2833d973e (HEAD -> master)

[Minor] Fix sgml tags processing

---
 src/libserver/html/html.cxx | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 23dabc4d5..1cbc1f105 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -1421,7 +1421,7 @@ html_process_input(rspamd_mempool_t *pool,
 			if (t == '>') {
 				state = tag_end;
 				/* We don't know a lot about sgml tags, ignore them */
-				cur_tag = hc->root_tag;
+				cur_tag = nullptr;
 				continue;
 			}
 			p ++;
@@ -1698,11 +1698,14 @@ html_debug_structure(const html_content &hc) -> std::string
 TEST_CASE("html parsing") {
 
 	const std::vector<std::pair<std::string, std::string>> cases{
-		{"<html><div><div></div></div></html>", "+html;++div;+++div;"},
-		{"<html><div><div></div></html>", "+html;++div;+++div;"},
-		{"<html><div><div></div></html></div>", "+html;++div;+++div;"},
-		{"<p><p><a></p></a></a>", "+p;++p;+++a;"},
-		{"<div><a href=\"http://example.com\"></div></a>", "+div;++a;"},
+			{"<html><!DOCTYPE html><body>", "+html;++body;"},
+			{"<html><div><div></div></div></html>", "+html;++div;+++div;"},
+			{"<html><div><div></div></html>", "+html;++div;+++div;"},
+			{"<html><div><div></div></html></div>", "+html;++div;+++div;"},
+			{"<p><p><a></p></a></a>", "+p;++p;+++a;"},
+			{"<div><a href=\"http://example.com\"></div></a>", "+div;++a;"},
+			{"<html><!DOCTYPE html><body><head><body></body></html></body></html>",
+					"+html;++body;+++head;++++body;"}
 	};
 
 	rspamd_url_init(NULL);


More information about the Commits mailing list