commit acc4b64: [Project] Add process exceptions for invisible text
Vsevolod Stakhov
vsevolod at highsecure.ru
Wed Jun 16 14:28:07 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-06-16 15:22:48 +0100
URL: https://github.com/rspamd/rspamd/commit/acc4b6480944600f47dcc0458214afe5b569ab33 (HEAD -> master)
[Project] Add process exceptions for invisible text
---
src/libserver/html/html.cxx | 56 ++++++++++++++++++++++++++++++++++-
src/libstat/tokenizers/tokenizers.c | 8 +++++
src/libutil/util.h | 16 ++++++++++
src/rspamd.h | 15 ----------
test/functional/messages/zerofont.eml | 1 +
5 files changed, 80 insertions(+), 16 deletions(-)
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 5c5157740..b9c1a41cc 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -1658,7 +1658,7 @@ html_process_input(rspamd_mempool_t *pool,
}, html_content::traverse_type::POST_ORDER);
/* Propagate styles */
- hc->traverse_block_tags([&hc](const html_tag *tag) -> bool {
+ hc->traverse_block_tags([&hc, &exceptions,&pool](const html_tag *tag) -> bool {
if (hc->css_style) {
auto *css_block = hc->css_style->check_tag_block(tag);
@@ -1674,6 +1674,60 @@ html_process_input(rspamd_mempool_t *pool,
if (tag->block) {
tag->block->compute_visibility();
+ if (exceptions) {
+ if (!tag->block->is_visible()) {
+ if (tag->parent == nullptr || (tag->parent->block && tag->parent->block->is_visible())) {
+ /* Add exception for an invisible element */
+ auto * ex = rspamd_mempool_alloc_type (pool,struct rspamd_process_exception);
+ ex->pos = tag->content_offset;
+ ex->len = tag->content_length;
+ ex->type = RSPAMD_EXCEPTION_INVISIBLE;
+ ex->ptr = (void *)tag;
+
+ *exceptions = g_list_prepend(*exceptions, ex);
+ }
+ }
+ else if (*exceptions && tag->parent) {
+ /* Current block is visible, check if parent is invisible */
+ auto *ex = (struct rspamd_process_exception*)g_list_first(*exceptions)->data;
+
+ /*
+ * TODO: we need to handle the following cases:
+ * <inv><vis><inv> -< insert one more exception
+ * <vis><inv> -< increase content_offset decrease length
+ * <inv><vis> -< decrease length
+ */
+ if (ex && ex->type == RSPAMD_EXCEPTION_INVISIBLE &&
+ ex->ptr == (void *)tag->parent) {
+ auto *parent = tag->parent;
+
+ if (tag->content_offset + tag->content_length ==
+ parent->content_offset + parent->content_length) {
+ /* <inv><vis> */
+ ex->len -= tag->content_length;
+ }
+ else if (tag->content_offset == parent->content_offset) {
+ /* <vis><inv> */
+ ex->len -= tag->content_length;
+ ex->pos += tag->content_length;
+ }
+ else if (tag->content_offset > ex->pos) {
+ auto *nex = rspamd_mempool_alloc_type (pool,
+ struct rspamd_process_exception);
+ auto start_len = tag->content_offset - ex->pos;
+ auto end_len = ex->len - tag->content_length - tag->content_length;
+ nex->pos = tag->content_offset + tag->content_length;
+ nex->len = end_len;
+ nex->type = RSPAMD_EXCEPTION_INVISIBLE;
+ nex->ptr = (void *)parent; /* ! */
+ ex->len = start_len;
+ *exceptions = g_list_prepend(*exceptions, ex);
+ }
+
+ }
+ }
+ }
+
for (const auto *cld_tag : tag->children) {
if (cld_tag->block) {
cld_tag->block->propagate_block(*tag->block);
diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c
index f3b05240c..8d6d93add 100644
--- a/src/libstat/tokenizers/tokenizers.c
+++ b/src/libstat/tokenizers/tokenizers.c
@@ -275,6 +275,14 @@ rspamd_tokenize_exception (struct rspamd_process_exception *ex, GArray *res)
g_array_append_val (res, token);
token.flags = 0;
}
+ else if (ex->type == RSPAMD_EXCEPTION_INVISIBLE) {
+ token.original.begin = "!!INV!!";
+ token.original.len = sizeof ("!!INV!!") - 1;
+ token.flags = RSPAMD_STAT_TOKEN_FLAG_EXCEPTION;
+
+ g_array_append_val (res, token);
+ token.flags = 0;
+ }
}
diff --git a/src/libutil/util.h b/src/libutil/util.h
index e947b0a54..9ee8a09ae 100644
--- a/src/libutil/util.h
+++ b/src/libutil/util.h
@@ -21,6 +21,22 @@ extern "C" {
struct rspamd_config;
+enum rspamd_exception_type {
+ RSPAMD_EXCEPTION_NEWLINE = 0,
+ RSPAMD_EXCEPTION_URL,
+ RSPAMD_EXCEPTION_GENERIC,
+ RSPAMD_EXCEPTION_INVISIBLE,
+};
+/**
+ * Structure to point exception in text from processing
+ */
+struct rspamd_process_exception {
+ goffset pos;
+ guint len;
+ gpointer ptr;
+ enum rspamd_exception_type type;
+};
+
/**
* Create generic socket
* @param af address family
diff --git a/src/rspamd.h b/src/rspamd.h
index bc1ed8a86..eb5ce541e 100644
--- a/src/rspamd.h
+++ b/src/rspamd.h
@@ -316,21 +316,6 @@ struct rspamd_main {
struct rspamd_http_context *http_ctx;
};
-enum rspamd_exception_type {
- RSPAMD_EXCEPTION_NEWLINE = 0,
- RSPAMD_EXCEPTION_URL,
- RSPAMD_EXCEPTION_GENERIC,
-};
-/**
- * Structure to point exception in text from processing
- */
-struct rspamd_process_exception {
- goffset pos;
- guint len;
- gpointer ptr;
- enum rspamd_exception_type type;
-};
-
/**
* Control session object
*/
diff --git a/test/functional/messages/zerofont.eml b/test/functional/messages/zerofont.eml
index 79fa5ede4..c5242d95d 100644
--- a/test/functional/messages/zerofont.eml
+++ b/test/functional/messages/zerofont.eml
@@ -13,5 +13,6 @@ Content-Type: text/html
</head>
<body class="activity-stream">
<div>fi<span style="FONT-SIZE: 0px">le </span>sh<span style="FONT-SIZE: 0px">aring </span></div>
+ <a href="https://example.com">test url</a>
</body>
</html>
More information about the Commits
mailing list