commit 9c8d32c: [Feature] Allow options matching in composites

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Aug 15 14:56:07 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-08-15 15:49:23 +0100
URL: https://github.com/rspamd/rspamd/commit/9c8d32c7a237d889153d0bc3a444d568195eaabf

[Feature] Allow options matching in composites

---
 src/libserver/composites.c | 164 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 146 insertions(+), 18 deletions(-)

diff --git a/src/libserver/composites.c b/src/libserver/composites.c
index 92ccd4f15..9262e4101 100644
--- a/src/libserver/composites.c
+++ b/src/libserver/composites.c
@@ -51,6 +51,24 @@ struct composites_data {
 	guint8 *checked;
 };
 
+struct rspamd_composite_option_match {
+	enum {
+		RSPAMD_COMPOSITE_OPTION_PLAIN,
+		RSPAMD_COMPOSITE_OPTION_RE
+	} type;
+
+	union {
+		rspamd_regexp_t *re;
+		gchar *match;
+	} data;
+	struct rspamd_composite_option_match *prev, *next;
+};
+
+struct rspamd_composite_atom {
+	gchar *symbol;
+	struct rspamd_composite_option_match *opts;
+};
+
 enum rspamd_composite_action {
 	RSPAMD_COMPOSITE_UNTOUCH = 0,
 	RSPAMD_COMPOSITE_REMOVE_SYMBOL = (1 << 0),
@@ -92,11 +110,12 @@ rspamd_composite_expr_parse (const gchar *line, gsize len,
 {
 	gsize clen;
 	rspamd_expression_atom_t *res;
+	struct rspamd_composite_atom *atom;
 
 	/*
 	 * Composites are just sequences of symbols
 	 */
-	clen = strcspn (line, ", \t()><!|&\n");
+	clen = strcspn (line, "; \t()><!|&\n");
 	if (clen == 0) {
 		/* Invalid composite atom */
 		g_set_error (err, rspamd_composites_quark (), 100, "Invalid composite: %s",
@@ -107,15 +126,75 @@ rspamd_composite_expr_parse (const gchar *line, gsize len,
 	res = rspamd_mempool_alloc0 (pool, sizeof (*res));
 	res->len = clen;
 	res->str = line;
-	res->data = rspamd_mempool_alloc (pool, clen + 1);
-	rspamd_strlcpy (res->data, line, clen + 1);
+
+	atom = rspamd_mempool_alloc0 (pool, sizeof (*atom));
+
+	/* Now check for options combinations */
+	const gchar *obrace, *ebrace;
+
+	if ((obrace = memchr (line, '[', clen)) != NULL && obrace > line) {
+		atom->symbol = rspamd_mempool_alloc (pool, obrace - line + 1);
+		rspamd_strlcpy (atom->symbol, line, obrace - line + 1);
+		ebrace = memchr (line, ']', clen);
+
+		if (ebrace != NULL && ebrace > obrace) {
+			/* We can make a list of options */
+			gchar **opts = rspamd_string_len_split (obrace + 1,
+					ebrace - obrace - 1, ",", -1, pool);
+
+			for (guint i = 0; opts[i] != NULL; i ++) {
+				struct rspamd_composite_option_match *opt_match;
+
+				opt_match = rspamd_mempool_alloc (pool, sizeof (*opt_match));
+
+				if (opts[i][0] == '/' && strchr (opts[i] + 1, '/') != NULL) {
+					/* Regexp */
+					rspamd_regexp_t *re;
+					GError *re_err = NULL;
+
+					re = rspamd_regexp_new (opts[i], NULL, &re_err);
+
+					if (re == NULL) {
+						msg_err_pool ("cannot create regexp from string %s: %s",
+								opts[i], err);
+
+						g_error_free (re_err);
+					}
+					else {
+						rspamd_mempool_add_destructor (pool,
+								(rspamd_mempool_destruct_t)rspamd_regexp_unref,
+								re);
+						opt_match->data.re = re;
+						opt_match->type = RSPAMD_COMPOSITE_OPTION_RE;
+
+						DL_APPEND (atom->opts, opt_match);
+					}
+				}
+				else {
+					/* Plain match */
+					opt_match->data.match = opts[i];
+					opt_match->type = RSPAMD_COMPOSITE_OPTION_PLAIN;
+
+					DL_APPEND (atom->opts, opt_match);
+				}
+			}
+		}
+	}
+	else {
+		atom->symbol = rspamd_mempool_alloc (pool, clen + 1);
+		rspamd_strlcpy (atom->symbol, line, clen + 1);
+	}
+
+	res->data = atom;
 
 	return res;
 }
 
 static gdouble
 rspamd_composite_process_single_symbol (struct composites_data *cd,
-		const gchar *sym, struct rspamd_symbol_result **pms)
+										const gchar *sym,
+										struct rspamd_symbol_result **pms,
+										struct rspamd_composite_atom *atom)
 {
 	struct rspamd_symbol_result *ms = NULL;
 	gdouble rc = 0;
@@ -162,11 +241,54 @@ rspamd_composite_process_single_symbol (struct composites_data *cd,
 	if (ms) {
 		msg_debug_composites ("found symbol %s in composite %s, weight: %.3f",
 				sym, cd->composite->sym, ms->score);
-		if (ms->score == 0) {
-			rc = 0.001; /* Distinguish from 0 */
+
+		/* Now check options */
+		struct rspamd_composite_option_match *cur_opt;
+
+		DL_FOREACH (atom->opts, cur_opt) {
+			struct rspamd_symbol_option *opt;
+			bool found = false;
+
+			DL_FOREACH (ms->opts_head, opt) {
+				if (cur_opt->type == RSPAMD_COMPOSITE_OPTION_PLAIN) {
+					if (strcmp (opt->option, cur_opt->data.match) == 0) {
+						found = true;
+
+						break;
+					}
+				}
+				else {
+					if (rspamd_regexp_match (cur_opt->data.re,
+							opt->option, 0, FALSE)) {
+						found = true;
+
+						break;
+					}
+				}
+			}
+
+
+			if (!found) {
+				msg_debug_composites ("symbol %s in composite %s misses required option %s",
+						sym,
+						cd->composite->sym,
+						ms->score,
+						cur_opt->type == RSPAMD_COMPOSITE_OPTION_PLAIN ?
+						  cur_opt->data.match :
+						  rspamd_regexp_get_pattern (cur_opt->data.re));
+				ms = NULL;
+
+				break;
+			}
 		}
-		else {
-			rc = ms->score;
+
+		if (ms) {
+			if (ms->score == 0) {
+				rc = 0.001; /* Distinguish from 0 */
+			}
+			else {
+				rc = ms->score;
+			}
 		}
 	}
 
@@ -257,7 +379,8 @@ rspamd_composite_expr_process (void *ud,
 		rspamd_expression_atom_t *atom)
 {
 	struct composites_data *cd = (struct composites_data *)ud;
-	const gchar *beg = atom->data, *sym = NULL;
+	const gchar *sym = NULL;
+	struct rspamd_composite_atom *comp_atom = (struct rspamd_composite_atom *)atom->data;
 
 	struct rspamd_symbol_result *ms = NULL;
 	struct rspamd_symbols_group *gr;
@@ -288,7 +411,7 @@ rspamd_composite_expr_process (void *ud,
 		return rc;
 	}
 
-	sym = beg;
+	sym = comp_atom->symbol;
 
 	while (*sym != '\0' && !g_ascii_isalnum (*sym)) {
 		sym ++;
@@ -302,13 +425,14 @@ rspamd_composite_expr_process (void *ud,
 
 			while (g_hash_table_iter_next (&it, &k, &v)) {
 				sdef = v;
-				rc = rspamd_composite_process_single_symbol (cd, sdef->name, &ms);
+				rc = rspamd_composite_process_single_symbol (cd, sdef->name, &ms,
+						comp_atom);
 
 				if (rc) {
 					rspamd_composite_process_symbol_removal (atom,
 							cd,
 							ms,
-							beg);
+							comp_atom->symbol);
 
 					if (fabs (rc) > max) {
 						max = fabs (rc);
@@ -332,13 +456,14 @@ rspamd_composite_expr_process (void *ud,
 				if (sdef->score > 0) {
 					rc = rspamd_composite_process_single_symbol (cd,
 							sdef->name,
-							&ms);
+							&ms,
+							comp_atom);
 
 					if (rc) {
 						rspamd_composite_process_symbol_removal (atom,
 								cd,
 								ms,
-								beg);
+								comp_atom->symbol);
 
 						if (fabs (rc) > max) {
 							max = fabs (rc);
@@ -361,13 +486,16 @@ rspamd_composite_expr_process (void *ud,
 				sdef = v;
 
 				if (sdef->score < 0) {
-					rc = rspamd_composite_process_single_symbol (cd, sdef->name, &ms);
+					rc = rspamd_composite_process_single_symbol (cd,
+							sdef->name,
+							&ms,
+							comp_atom);
 
 					if (rc) {
 						rspamd_composite_process_symbol_removal (atom,
 								cd,
 								ms,
-								beg);
+								comp_atom->symbol);
 
 						if (fabs (rc) > max) {
 							max = fabs (rc);
@@ -380,13 +508,13 @@ rspamd_composite_expr_process (void *ud,
 		}
 	}
 	else {
-		rc = rspamd_composite_process_single_symbol (cd, sym, &ms);
+		rc = rspamd_composite_process_single_symbol (cd, sym, &ms, comp_atom);
 
 		if (rc) {
 			rspamd_composite_process_symbol_removal (atom,
 					cd,
 					ms,
-					beg);
+					comp_atom->symbol);
 		}
 	}
 


More information about the Commits mailing list