commit acc4b64: [Project] Add process exceptions for invisible text

Vsevolod Stakhov vsevolod at highsecure.ru
Wed Jun 16 14:28:07 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-06-16 15:22:48 +0100
URL: https://github.com/rspamd/rspamd/commit/acc4b6480944600f47dcc0458214afe5b569ab33 (HEAD -> master)

[Project] Add process exceptions for invisible text

---
 src/libserver/html/html.cxx           | 56 ++++++++++++++++++++++++++++++++++-
 src/libstat/tokenizers/tokenizers.c   |  8 +++++
 src/libutil/util.h                    | 16 ++++++++++
 src/rspamd.h                          | 15 ----------
 test/functional/messages/zerofont.eml |  1 +
 5 files changed, 80 insertions(+), 16 deletions(-)

diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 5c5157740..b9c1a41cc 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -1658,7 +1658,7 @@ html_process_input(rspamd_mempool_t *pool,
 	}, html_content::traverse_type::POST_ORDER);
 
 	/* Propagate styles */
-	hc->traverse_block_tags([&hc](const html_tag *tag) -> bool {
+	hc->traverse_block_tags([&hc, &exceptions,&pool](const html_tag *tag) -> bool {
 		if (hc->css_style) {
 			auto *css_block = hc->css_style->check_tag_block(tag);
 
@@ -1674,6 +1674,60 @@ html_process_input(rspamd_mempool_t *pool,
 		if (tag->block) {
 			tag->block->compute_visibility();
 
+			if (exceptions) {
+				if (!tag->block->is_visible()) {
+					if (tag->parent == nullptr || (tag->parent->block && tag->parent->block->is_visible())) {
+						/* Add exception for an invisible element */
+						auto * ex = rspamd_mempool_alloc_type (pool,struct rspamd_process_exception);
+						ex->pos = tag->content_offset;
+						ex->len = tag->content_length;
+						ex->type = RSPAMD_EXCEPTION_INVISIBLE;
+						ex->ptr = (void *)tag;
+
+						*exceptions = g_list_prepend(*exceptions, ex);
+					}
+				}
+				else if (*exceptions && tag->parent) {
+					/* Current block is visible, check if parent is invisible */
+					auto *ex = (struct rspamd_process_exception*)g_list_first(*exceptions)->data;
+
+					/*
+					 * TODO: we need to handle the following cases:
+					 * <inv><vis><inv> -< insert one more exception
+					 * <vis><inv> -< increase content_offset decrease length
+					 * <inv><vis> -< decrease length
+					 */
+					if (ex && ex->type == RSPAMD_EXCEPTION_INVISIBLE &&
+						ex->ptr == (void *)tag->parent) {
+						auto *parent = tag->parent;
+
+						if (tag->content_offset + tag->content_length ==
+							parent->content_offset + parent->content_length) {
+							/* <inv><vis> */
+							ex->len -= tag->content_length;
+						}
+						else if (tag->content_offset == parent->content_offset) {
+							/* <vis><inv> */
+							ex->len -= tag->content_length;
+							ex->pos += tag->content_length;
+						}
+						else if (tag->content_offset > ex->pos) {
+							auto *nex = rspamd_mempool_alloc_type (pool,
+									struct rspamd_process_exception);
+							auto start_len = tag->content_offset - ex->pos;
+							auto end_len = ex->len - tag->content_length - tag->content_length;
+							nex->pos = tag->content_offset + tag->content_length;
+							nex->len = end_len;
+							nex->type = RSPAMD_EXCEPTION_INVISIBLE;
+							nex->ptr = (void *)parent; /* ! */
+							ex->len = start_len;
+							*exceptions = g_list_prepend(*exceptions, ex);
+						}
+
+					}
+				}
+			}
+
 			for (const auto *cld_tag : tag->children) {
 				if (cld_tag->block) {
 					cld_tag->block->propagate_block(*tag->block);
diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c
index f3b05240c..8d6d93add 100644
--- a/src/libstat/tokenizers/tokenizers.c
+++ b/src/libstat/tokenizers/tokenizers.c
@@ -275,6 +275,14 @@ rspamd_tokenize_exception (struct rspamd_process_exception *ex, GArray *res)
 		g_array_append_val (res, token);
 		token.flags = 0;
 	}
+	else if (ex->type == RSPAMD_EXCEPTION_INVISIBLE) {
+		token.original.begin = "!!INV!!";
+		token.original.len = sizeof ("!!INV!!") - 1;
+		token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION;
+
+		g_array_append_val (res, token);
+		token.flags = 0;
+	}
 }
 
 
diff --git a/src/libutil/util.h b/src/libutil/util.h
index e947b0a54..9ee8a09ae 100644
--- a/src/libutil/util.h
+++ b/src/libutil/util.h
@@ -21,6 +21,22 @@ extern "C" {
 
 struct rspamd_config;
 
+enum rspamd_exception_type {
+	RSPAMD_EXCEPTION_NEWLINE = 0,
+	RSPAMD_EXCEPTION_URL,
+	RSPAMD_EXCEPTION_GENERIC,
+	RSPAMD_EXCEPTION_INVISIBLE,
+};
+/**
+ * Structure to point exception in text from processing
+ */
+struct rspamd_process_exception {
+	goffset pos;
+	guint len;
+	gpointer ptr;
+	enum rspamd_exception_type type;
+};
+
 /**
  * Create generic socket
  * @param af address family
diff --git a/src/rspamd.h b/src/rspamd.h
index bc1ed8a86..eb5ce541e 100644
--- a/src/rspamd.h
+++ b/src/rspamd.h
@@ -316,21 +316,6 @@ struct rspamd_main {
 	struct rspamd_http_context *http_ctx;
 };
 
-enum rspamd_exception_type {
-	RSPAMD_EXCEPTION_NEWLINE = 0,
-	RSPAMD_EXCEPTION_URL,
-	RSPAMD_EXCEPTION_GENERIC,
-};
-/**
- * Structure to point exception in text from processing
- */
-struct rspamd_process_exception {
-	goffset pos;
-	guint len;
-	gpointer ptr;
-	enum rspamd_exception_type type;
-};
-
 /**
  * Control session object
  */
diff --git a/test/functional/messages/zerofont.eml b/test/functional/messages/zerofont.eml
index 79fa5ede4..c5242d95d 100644
--- a/test/functional/messages/zerofont.eml
+++ b/test/functional/messages/zerofont.eml
@@ -13,5 +13,6 @@ Content-Type: text/html
   </head>
   <body class="activity-stream">
     <div>fi<span style="FONT-SIZE: 0px">le </span>sh<span style="FONT-SIZE: 0px">aring </span></div>
+  <a href="https://example.com">test url</a>
   </body>
 </html>


More information about the Commits mailing list