commit c93a787: [Project] Fix xml/sgml tags processing
Vsevolod Stakhov
vsevolod at highsecure.ru
Thu Jun 24 16:42:04 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-06-24 17:30:20 +0100
URL: https://github.com/rspamd/rspamd/commit/c93a78780f425a36cb3d9d2a7b0a8eca8707831e
[Project] Fix xml/sgml tags processing
---
src/libserver/html/html.cxx | 17 ++++++++++++++---
src/libserver/html/html.h | 13 -------------
src/libserver/html/html_tag.hxx | 14 ++++++++++++++
3 files changed, 28 insertions(+), 16 deletions(-)
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index e427c73bf..45094e7f8 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -851,8 +851,6 @@ html_process_img_tag(rspamd_mempool_t *pool,
img = rspamd_mempool_alloc0_type (pool, struct html_image);
img->tag = tag;
- tag->flags |= FL_IMAGE;
-
for (const auto ¶m : tag->parameters) {
@@ -1096,7 +1094,12 @@ html_append_tag_content(rspamd_mempool_t *pool,
}
if (!tag->block) {
- is_visible = true;
+ if ((tag->flags & (FL_COMMENT|FL_XML))) {
+ is_visible = false;
+ }
+ else {
+ is_visible = true;
+ }
}
else if (!tag->block->is_visible()) {
is_visible = false;
@@ -1328,10 +1331,17 @@ html_process_input(rspamd_mempool_t *pool,
break;
case '!':
state = sgml_tag;
+ hc->all_tags.emplace_back(std::make_unique<html_tag>());
+ cur_tag = hc->all_tags.back().get();
+ cur_tag->tag_start = c - start;
p ++;
break;
case '?':
state = xml_tag;
+ hc->all_tags.emplace_back(std::make_unique<html_tag>());
+ cur_tag = hc->all_tags.back().get();
+ cur_tag->tag_start = c - start;
+ cur_tag->flags |= FL_XML;
hc->flags |= RSPAMD_HTML_FLAG_XML;
p ++;
break;
@@ -1365,6 +1375,7 @@ html_process_input(rspamd_mempool_t *pool,
p ++;
break;
case '-':
+ cur_tag->flags |= FL_COMMENT;
state = comment_tag;
p ++;
break;
diff --git a/src/libserver/html/html.h b/src/libserver/html/html.h
index 291e0cfda..b6307f88f 100644
--- a/src/libserver/html/html.h
+++ b/src/libserver/html/html.h
@@ -58,19 +58,6 @@ struct html_image {
};
-/* Public tags flags */
-/* XML tag */
-#define FL_XML (1 << 23)
-/* Closing tag */
-#define FL_CLOSING (1 << 24)
-/* Fully closed tag (e.g. <a attrs />) */
-#define FL_CLOSED (1 << 25)
-#define FL_BROKEN (1 << 26)
-#define FL_IGNORE (1 << 27)
-#define FL_BLOCK (1 << 28)
-#define FL_HREF (1 << 29)
-#define FL_IMAGE (1 << 30)
-
/* Forwarded declaration */
struct rspamd_task;
diff --git a/src/libserver/html/html_tag.hxx b/src/libserver/html/html_tag.hxx
index cad5368cf..9091b9060 100644
--- a/src/libserver/html/html_tag.hxx
+++ b/src/libserver/html/html_tag.hxx
@@ -40,6 +40,20 @@ enum class html_component_type : std::uint8_t {
RSPAMD_HTML_COMPONENT_ALT,
RSPAMD_HTML_COMPONENT_ID,
};
+
+/* Public tags flags */
+/* XML tag */
+#define FL_XML (1 << 23)
+/* Closing tag */
+#define FL_CLOSING (1 << 24)
+/* Fully closed tag (e.g. <a attrs />) */
+#define FL_CLOSED (1 << 25)
+#define FL_BROKEN (1 << 26)
+#define FL_IGNORE (1 << 27)
+#define FL_BLOCK (1 << 28)
+#define FL_HREF (1 << 29)
+#define FL_COMMENT (1 << 29)
+
/**
* Returns component type from a string
* @param st
More information about the Commits
mailing list