commit c93a787: [Project] Fix xml/sgml tags processing

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Jun 24 16:42:04 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-06-24 17:30:20 +0100
URL: https://github.com/rspamd/rspamd/commit/c93a78780f425a36cb3d9d2a7b0a8eca8707831e

[Project] Fix xml/sgml tags processing

---
 src/libserver/html/html.cxx     | 17 ++++++++++++++---
 src/libserver/html/html.h       | 13 -------------
 src/libserver/html/html_tag.hxx | 14 ++++++++++++++
 3 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index e427c73bf..45094e7f8 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -851,8 +851,6 @@ html_process_img_tag(rspamd_mempool_t *pool,
 
 	img = rspamd_mempool_alloc0_type (pool, struct html_image);
 	img->tag = tag;
-	tag->flags |= FL_IMAGE;
-
 
 	for (const auto &param : tag->parameters) {
 
@@ -1096,7 +1094,12 @@ html_append_tag_content(rspamd_mempool_t *pool,
 	}
 
 	if (!tag->block) {
-		is_visible = true;
+		if ((tag->flags & (FL_COMMENT|FL_XML))) {
+			is_visible = false;
+		}
+		else {
+			is_visible = true;
+		}
 	}
 	else if (!tag->block->is_visible()) {
 		is_visible = false;
@@ -1328,10 +1331,17 @@ html_process_input(rspamd_mempool_t *pool,
 				break;
 			case '!':
 				state = sgml_tag;
+				hc->all_tags.emplace_back(std::make_unique<html_tag>());
+				cur_tag = hc->all_tags.back().get();
+				cur_tag->tag_start = c - start;
 				p ++;
 				break;
 			case '?':
 				state = xml_tag;
+				hc->all_tags.emplace_back(std::make_unique<html_tag>());
+				cur_tag = hc->all_tags.back().get();
+				cur_tag->tag_start = c - start;
+				cur_tag->flags |= FL_XML;
 				hc->flags |= RSPAMD_HTML_FLAG_XML;
 				p ++;
 				break;
@@ -1365,6 +1375,7 @@ html_process_input(rspamd_mempool_t *pool,
 				p ++;
 				break;
 			case '-':
+				cur_tag->flags |= FL_COMMENT;
 				state = comment_tag;
 				p ++;
 				break;
diff --git a/src/libserver/html/html.h b/src/libserver/html/html.h
index 291e0cfda..b6307f88f 100644
--- a/src/libserver/html/html.h
+++ b/src/libserver/html/html.h
@@ -58,19 +58,6 @@ struct html_image {
 };
 
 
-/* Public tags flags */
-/* XML tag */
-#define FL_XML          (1 << 23)
-/* Closing tag */
-#define FL_CLOSING      (1 << 24)
-/* Fully closed tag (e.g. <a attrs />) */
-#define FL_CLOSED       (1 << 25)
-#define FL_BROKEN       (1 << 26)
-#define FL_IGNORE       (1 << 27)
-#define FL_BLOCK        (1 << 28)
-#define FL_HREF         (1 << 29)
-#define FL_IMAGE        (1 << 30)
-
 /* Forwarded declaration */
 struct rspamd_task;
 
diff --git a/src/libserver/html/html_tag.hxx b/src/libserver/html/html_tag.hxx
index cad5368cf..9091b9060 100644
--- a/src/libserver/html/html_tag.hxx
+++ b/src/libserver/html/html_tag.hxx
@@ -40,6 +40,20 @@ enum class html_component_type : std::uint8_t {
 	RSPAMD_HTML_COMPONENT_ALT,
 	RSPAMD_HTML_COMPONENT_ID,
 };
+
+/* Public tags flags */
+/* XML tag */
+#define FL_XML          (1 << 23)
+/* Closing tag */
+#define FL_CLOSING      (1 << 24)
+/* Fully closed tag (e.g. <a attrs />) */
+#define FL_CLOSED       (1 << 25)
+#define FL_BROKEN       (1 << 26)
+#define FL_IGNORE       (1 << 27)
+#define FL_BLOCK        (1 << 28)
+#define FL_HREF         (1 << 29)
+#define FL_COMMENT      (1 << 29)
+
 /**
  * Returns component type from a string
  * @param st


More information about the Commits mailing list