commit 27ad207: [Minor] Further fixes for MIXED_CHARSET rule

Vsevolod Stakhov vsevolod at highsecure.ru
Sat Sep 5 10:35:06 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-09-05 11:32:49 +0100
URL: https://github.com/rspamd/rspamd/commit/27ad207da620685499aa04c8e9c7d4e4eafa11dc (HEAD -> master)

[Minor] Further fixes for MIXED_CHARSET rule

---
 src/plugins/chartable.c | 37 +++++++++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/src/plugins/chartable.c b/src/plugins/chartable.c
index e1348b561..ce37ebd58 100644
--- a/src/plugins/chartable.c
+++ b/src/plugins/chartable.c
@@ -585,7 +585,7 @@ rspamd_chartable_process_word_ascii (struct rspamd_task *task,
 	return badness;
 }
 
-static void
+static gboolean
 rspamd_chartable_process_part (struct rspamd_task *task,
 							   struct rspamd_mime_text_part *part,
 							   struct chartable_ctx *chartable_module_ctx,
@@ -597,7 +597,7 @@ rspamd_chartable_process_part (struct rspamd_task *task,
 
 	if (part == NULL || part->utf_words == NULL ||
 			part->utf_words->len == 0) {
-		return;
+		return FALSE;
 	}
 
 	for (i = 0; i < part->utf_words->len; i++) {
@@ -625,15 +625,17 @@ rspamd_chartable_process_part (struct rspamd_task *task,
 
 	cur_score /= (gdouble)part->nwords;
 
-	if (cur_score > 2.0) {
-		cur_score = 2.0;
+	if (cur_score > 1.0) {
+		cur_score = 1.0;
 	}
 
 	if (cur_score > chartable_module_ctx->threshold) {
 		rspamd_task_insert_result (task, chartable_module_ctx->symbol,
 				cur_score, NULL);
-
+		return TRUE;
 	}
+
+	return FALSE;
 }
 
 static void
@@ -645,8 +647,9 @@ chartable_symbol_callback (struct rspamd_task *task,
 	struct rspamd_mime_text_part *part;
 	struct chartable_ctx *chartable_module_ctx = chartable_get_context (task->cfg);
 	const gchar *language = NULL;
-	gboolean ignore_diacritics = FALSE;
+	gboolean ignore_diacritics = FALSE, seen_violated_part = FALSE;
 
+	/* Check if we have parts with diacritic symbols language */
 	PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, part) {
 		if (part->languages && part->languages->len > 0) {
 			struct rspamd_lang_detector_res *lang =
@@ -660,8 +663,15 @@ chartable_symbol_callback (struct rspamd_task *task,
 			}
 		}
 
-		rspamd_chartable_process_part (task, part, chartable_module_ctx,
-				ignore_diacritics);
+		if (rspamd_chartable_process_part (task, part, chartable_module_ctx,
+				ignore_diacritics)) {
+			seen_violated_part = TRUE;
+		}
+	}
+
+	if (MESSAGE_FIELD (task, text_parts)->len == 0) {
+		/* No text parts, assume that we should ignore diacritics checks for metatokens */
+		ignore_diacritics = TRUE;
 	}
 
 	if (task->meta_words != NULL) {
@@ -677,11 +687,18 @@ chartable_symbol_callback (struct rspamd_task *task,
 
 		cur_score /= (gdouble)arlen;
 
-		if (cur_score > 2.0) {
-			cur_score = 2.0;
+		if (cur_score > 1.0) {
+			cur_score = 1.0;
 		}
 
 		if (cur_score > chartable_module_ctx->threshold) {
+			if (!seen_violated_part) {
+				/* Further penalise */
+				if (cur_score > 0.25) {
+					cur_score = 0.25;
+				}
+			}
+
 			rspamd_task_insert_result (task, chartable_module_ctx->symbol,
 					cur_score, "subject");
 


More information about the Commits mailing list