commit 3f17e65: [Minor] Fix sgml tags processing
Vsevolod Stakhov
vsevolod at highsecure.ru
Fri Jun 11 15:35:04 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-06-11 16:33:29 +0100
URL: https://github.com/rspamd/rspamd/commit/3f17e650d2659c78e12edc27caad61c2833d973e (HEAD -> master)
[Minor] Fix sgml tags processing
---
src/libserver/html/html.cxx | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 23dabc4d5..1cbc1f105 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -1421,7 +1421,7 @@ html_process_input(rspamd_mempool_t *pool,
if (t == '>') {
state = tag_end;
/* We don't know a lot about sgml tags, ignore them */
- cur_tag = hc->root_tag;
+ cur_tag = nullptr;
continue;
}
p ++;
@@ -1698,11 +1698,14 @@ html_debug_structure(const html_content &hc) -> std::string
TEST_CASE("html parsing") {
const std::vector<std::pair<std::string, std::string>> cases{
- {"<html><div><div></div></div></html>", "+html;++div;+++div;"},
- {"<html><div><div></div></html>", "+html;++div;+++div;"},
- {"<html><div><div></div></html></div>", "+html;++div;+++div;"},
- {"<p><p><a></p></a></a>", "+p;++p;+++a;"},
- {"<div><a href=\"http://example.com\"></div></a>", "+div;++a;"},
+ {"<html><!DOCTYPE html><body>", "+html;++body;"},
+ {"<html><div><div></div></div></html>", "+html;++div;+++div;"},
+ {"<html><div><div></div></html>", "+html;++div;+++div;"},
+ {"<html><div><div></div></html></div>", "+html;++div;+++div;"},
+ {"<p><p><a></p></a></a>", "+p;++p;+++a;"},
+ {"<div><a href=\"http://example.com\"></div></a>", "+div;++a;"},
+ {"<html><!DOCTYPE html><body><head><body></body></html></body></html>",
+ "+html;++body;+++head;++++body;"}
};
rspamd_url_init(NULL);
More information about the Commits
mailing list