commit f87014f: [Rework] Composites: Rewrite the composites logic

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Jul 15 19:07:06 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-07-15 17:10:03 +0100
URL: https://github.com/rspamd/rspamd/commit/f87014f894470620f39b93f390413be9555c1a0b

[Rework] Composites: Rewrite the composites logic

---
 src/libmime/scan_result.c               |   2 +-
 src/libmime/scan_result.h               |   2 +-
 src/libserver/composites/composites.cxx | 696 ++++++++++++++------------------
 3 files changed, 314 insertions(+), 386 deletions(-)

diff --git a/src/libmime/scan_result.c b/src/libmime/scan_result.c
index e7fab8c6d..16ec9b0c5 100644
--- a/src/libmime/scan_result.c
+++ b/src/libmime/scan_result.c
@@ -921,7 +921,7 @@ rspamd_task_find_symbol_result (struct rspamd_task *task, const char *sym,
 		result = task->result;
 	}
 
-	k = kh_get (rspamd_symbols_hash, result->symbols, sym);
+	k = kh_get(rspamd_symbols_hash, result->symbols, sym);
 
 	if (k != kh_end (result->symbols)) {
 		res = kh_value (result->symbols, k);
diff --git a/src/libmime/scan_result.h b/src/libmime/scan_result.h
index b7a548be0..c8bacf3e8 100644
--- a/src/libmime/scan_result.h
+++ b/src/libmime/scan_result.h
@@ -42,7 +42,7 @@ struct rspamd_symbol_result {
 	struct rspamd_symbol *sym;                     /**< symbol configuration					*/
 	gssize opts_len;                               /**< total size of all options (negative if truncated option is added) */
 	guint nshots;
-	enum rspamd_symbol_result_flags flags;
+	int flags;
 	struct rspamd_symbol_result *next;
 };
 
diff --git a/src/libserver/composites/composites.cxx b/src/libserver/composites/composites.cxx
index 0d9534681..56ca554b3 100644
--- a/src/libserver/composites/composites.cxx
+++ b/src/libserver/composites/composites.cxx
@@ -1,5 +1,5 @@
 /*-
- * Copyright 2016 Vsevolod Stakhov
+ * Copyright 2021 Vsevolod Stakhov
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -54,6 +54,7 @@ static rspamd_expression_atom_t *rspamd_composite_expr_parse(const gchar *line,
 static gdouble rspamd_composite_expr_process(void *ud, rspamd_expression_atom_t *atom);
 static gint rspamd_composite_expr_priority(rspamd_expression_atom_t *atom);
 static void rspamd_composite_expr_destroy(rspamd_expression_atom_t *atom);
+static void composites_foreach_callback(gpointer key, gpointer value, void *data);
 }
 
 const struct rspamd_atom_subr composite_expr_subr = {
@@ -65,6 +66,8 @@ const struct rspamd_atom_subr composite_expr_subr = {
 
 namespace rspamd::composites {
 
+static constexpr const double epsilon = 0.00001;
+
 enum class rspamd_composite_policy {
 	RSPAMD_COMPOSITE_POLICY_REMOVE_ALL = 0,
 	RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL,
@@ -84,23 +87,72 @@ struct rspamd_composite {
 	rspamd_composite_policy policy;
 };
 
+struct symbol_remove_data {
+	const char *sym;
+	struct rspamd_composite *comp;
+	GNode *parent;
+	std::uint8_t action;
+};
+
 struct composites_data {
 	struct rspamd_task *task;
 	struct rspamd_composite *composite;
 	struct rspamd_scan_result *metric_res;
-	GHashTable *symbols_to_remove;
-	guint8 *checked;
-	struct composites_data *next;
+	robin_hood::unordered_flat_map<std::string_view,
+			std::vector<symbol_remove_data>> symbols_to_remove;
+	std::vector<bool> checked;
+
+	explicit composites_data(struct rspamd_task *task, struct rspamd_scan_result *mres) :
+			task(task), composite(nullptr), metric_res(mres) {
+		checked.resize(g_hash_table_size(task->cfg->composite_symbols) * 2);
+	}
 };
 
 struct rspamd_composite_option_match {
-	std::variant<rspamd_regexp_t *, std::string> match;
+	std::variant<rspamd_regexp_t *, std::string_view> match;
 
-	~rspamd_composite_option_match() {
+	explicit rspamd_composite_option_match(const char *start, std::size_t len)
+	{
+		match = std::string_view{start, len};
+	}
+
+	explicit rspamd_composite_option_match(rspamd_regexp_t *re)
+	{
+		match = re;
+	}
+
+	~rspamd_composite_option_match()
+	{
 		if (std::holds_alternative<rspamd_regexp_t *>(match)) {
 			rspamd_regexp_unref(std::get<rspamd_regexp_t *>(match));
 		}
 	}
+
+	auto math_opt(const std::string_view &data) const -> bool
+	{
+		return std::visit([&](auto arg) -> bool {
+			if constexpr (std::is_same_v<decltype(arg), std::string_view>) {
+				return data == arg;
+			}
+			else {
+				return rspamd_regexp_search(arg,
+						data.data(), data.size(),
+						nullptr, nullptr, false, nullptr);
+			}
+		}, match);
+	}
+
+	auto get_pat() const -> std::string_view
+	{
+		return std::visit([&](auto arg) -> std::string_view {
+			if constexpr (std::is_same_v<decltype(arg), std::string_view>) {
+				return std::string_view(arg);
+			}
+			else {
+				return std::string_view(rspamd_regexp_get_pattern(arg));
+			}
+		}, match);
+	}
 };
 
 enum class rspamd_composite_atom_type {
@@ -108,9 +160,10 @@ enum class rspamd_composite_atom_type {
 	ATOM_COMPOSITE,
 	ATOM_PLAIN
 };
+
 struct rspamd_composite_atom {
 	std::string symbol;
-	rspamd_composite_atom_type comp_type;
+	rspamd_composite_atom_type comp_type = rspamd_composite_atom_type::ATOM_UNKNOWN;
 	struct rspamd_composite *ncomp; /* underlying composite */
 	std::vector<rspamd_composite_option_match> opts;
 };
@@ -122,18 +175,18 @@ enum rspamd_composite_action : std::uint8_t {
 	RSPAMD_COMPOSITE_REMOVE_FORCED = (1u << 2)
 };
 
-struct symbol_remove_data {
-	const char *sym;
-	struct rspamd_composite *comp;
-	GNode *parent;
-	std::uint8_t action;
-	struct symbol_remove_data *prev, *next;
-};
-
 static GQuark
-rspamd_composites_quark (void)
+rspamd_composites_quark(void)
 {
-	return g_quark_from_static_string ("composites");
+	return g_quark_from_static_string("composites");
+}
+
+static auto
+rspamd_composite_atom_dtor(void *ptr)
+{
+	auto *atom = reinterpret_cast<rspamd_expression_atom_t *>(ptr);
+
+	delete atom;
 }
 
 static rspamd_expression_atom_t *
@@ -142,8 +195,6 @@ rspamd_composite_expr_parse(const gchar *line, gsize len,
 							gpointer ud, GError **err)
 {
 	gsize clen = 0;
-	rspamd_expression_atom_t *res;
-	struct rspamd_composite_atom *atom;
 	const gchar *p, *end;
 	enum composite_expr_state {
 		comp_state_read_symbol = 0,
@@ -249,18 +300,15 @@ rspamd_composite_expr_parse(const gchar *line, gsize len,
 	p = line;
 	state = comp_state_read_symbol;
 
-	atom = rspamd_mempool_alloc0 (pool, sizeof(*atom));
-	atom->comp_type = ATOM_UNKNOWN;
-	res = rspamd_mempool_alloc0 (pool, sizeof(*res));
+	auto *atom = new rspamd_composite_atom;
+	auto *res = rspamd_mempool_alloc0_type(pool, rspamd_expression_atom_t);
 	res->len = clen;
 	res->str = line;
 
 	/* Full state machine to fill a composite atom */
-	const gchar *opt_start = NULL;
+	const gchar *opt_start = nullptr;
 
 	while (p < end) {
-		struct rspamd_composite_option_match *opt_match;
-
 		if (state == comp_state_read_end) {
 			break;
 		}
@@ -277,9 +325,7 @@ rspamd_composite_expr_parse(const gchar *line, gsize len,
 				state = comp_state_read_end;
 			}
 
-			atom->symbol = rspamd_mempool_alloc (pool, clen + 1);
-			rspamd_strlcpy(atom->symbol, line, clen + 1);
-
+			atom->symbol = std::string{line, clen};
 			break;
 		case comp_state_read_obrace:
 			p++;
@@ -308,18 +354,12 @@ rspamd_composite_expr_parse(const gchar *line, gsize len,
 			break;
 		case comp_state_read_option:
 			if (*p == ',' || *p == ']') {
-				opt_match = rspamd_mempool_alloc (pool, sizeof(*opt_match));
-				/* Plain match */
-				gchar *opt_buf;
+				/* Plain match, copy option to ensure string_view validity */
 				gint opt_len = p - opt_start;
-
-				opt_buf = rspamd_mempool_alloc (pool, opt_len + 1);
+				auto *opt_buf = rspamd_mempool_alloc_buffer(pool, opt_len + 1);
 				rspamd_strlcpy(opt_buf, opt_start, opt_len + 1);
-
-				opt_match->data.match = opt_buf;
-				opt_match->type = RSPAMD_COMPOSITE_OPTION_PLAIN;
-
-				DL_APPEND (atom->opts, opt_match);
+				opt_buf = g_strstrip(opt_buf);
+				atom->opts.emplace_back(opt_buf, strlen(opt_buf));
 
 				if (*p == ',') {
 					p++;
@@ -335,33 +375,20 @@ rspamd_composite_expr_parse(const gchar *line, gsize len,
 			break;
 		case comp_state_read_regexp_end:
 			if (*p == ',' || *p == ']') {
-				opt_match = rspamd_mempool_alloc (pool, sizeof(*opt_match));
-				/* Plain match */
-				gchar *opt_buf;
-				gint opt_len = p - opt_start;
-
-				opt_buf = rspamd_mempool_alloc (pool, opt_len + 1);
-				rspamd_strlcpy(opt_buf, opt_start, opt_len + 1);
-
+				auto opt_len = p - opt_start;
 				rspamd_regexp_t *re;
-				GError *re_err = NULL;
+				GError *re_err = nullptr;
 
-				re = rspamd_regexp_new(opt_buf, NULL, &re_err);
+				re = rspamd_regexp_new_len(opt_start, opt_len, nullptr, &re_err);
 
-				if (re == NULL) {
-					msg_err_pool ("cannot create regexp from string %s: %e",
-							opt_buf, re_err);
+				if (re == nullptr) {
+					msg_err_pool ("cannot create regexp from string %*s: %e",
+							opt_len, opt_start, re_err);
 
 					g_error_free(re_err);
 				}
 				else {
-					rspamd_mempool_add_destructor (pool,
-							(rspamd_mempool_destruct_t) rspamd_regexp_unref,
-							re);
-					opt_match->data.re = re;
-					opt_match->type = RSPAMD_COMPOSITE_OPTION_RE;
-
-					DL_APPEND (atom->opts, opt_match);
+					atom->opts.emplace_back(re);
 				}
 
 				if (*p == ',') {
@@ -409,66 +436,137 @@ rspamd_composite_expr_parse(const gchar *line, gsize len,
 	return res;
 }
 
-}
+static auto
+process_symbol_removal(rspamd_expression_atom_t *atom,
+					   struct composites_data *cd,
+					   struct rspamd_symbol_result *ms,
+					   const std::string &beg) -> void
+{
+	struct rspamd_task *task = cd->task;
 
-static void composites_foreach_callback (gpointer key, gpointer value, void *data);
+	if (ms == nullptr) {
+		return;
+	}
 
+	/*
+	 * At this point we know that we need to do something about this symbol,
+	 * however, we don't know whether we need to delete it unfortunately,
+	 * that depends on the later decisions when the complete expression is
+	 * evaluated.
+	 */
+	auto rd_it = cd->symbols_to_remove.find(ms->name);
+
+	auto fill_removal_structure = [&](symbol_remove_data &nrd) {
+		nrd.sym = ms->name;
+
+		/* By default remove symbols */
+		switch (cd->composite->policy) {
+		case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_ALL:
+		default:
+			nrd.action = (RSPAMD_COMPOSITE_REMOVE_SYMBOL | RSPAMD_COMPOSITE_REMOVE_WEIGHT);
+			break;
+		case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL:
+			nrd.action = RSPAMD_COMPOSITE_REMOVE_SYMBOL;
+			break;
+		case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT:
+			nrd.action = RSPAMD_COMPOSITE_REMOVE_WEIGHT;
+			break;
+		case rspamd_composite_policy::RSPAMD_COMPOSITE_POLICY_LEAVE:
+			nrd.action = 0;
+			break;
+		}
+
+		for (auto t : beg) {
+			if (t == '~') {
+				nrd.action &= ~RSPAMD_COMPOSITE_REMOVE_SYMBOL;
+			}
+			else if (t == '-') {
+				nrd.action &= ~(RSPAMD_COMPOSITE_REMOVE_WEIGHT |
+								RSPAMD_COMPOSITE_REMOVE_SYMBOL);
+			}
+			else if (t == '^') {
+				nrd.action |= RSPAMD_COMPOSITE_REMOVE_FORCED;
+			}
+			else {
+				break;
+			}
+		}
+
+		nrd.comp = cd->composite;
+		nrd.parent = atom->parent;
+	};
+
+	if (rd_it != cd->symbols_to_remove.end()) {
+		fill_removal_structure(rd_it->second.emplace_back());
+		msg_debug_composites ("%s: added symbol %s to removal: %d policy, from composite %s",
+				cd->metric_res->name,
+				ms->name, rd_it->second.back().action,
+				cd->composite->sym.c_str());
+	}
+	else {
+		std::vector<symbol_remove_data> nrd;
+		fill_removal_structure(nrd.emplace_back());
+		msg_debug_composites ("%s: added symbol %s to removal: %d policy, from composite %s",
+				cd->metric_res->name,
+				ms->name, nrd.front().action,
+				cd->composite->sym.c_str());
+		cd->symbols_to_remove[ms->name] = std::move(nrd);
+	}
+}
 
-static gdouble
-rspamd_composite_process_single_symbol (struct composites_data *cd,
-										const gchar *sym,
-										struct rspamd_symbol_result **pms,
-										struct rspamd_composite_atom *atom)
+static auto
+process_single_symbol(struct composites_data *cd,
+					  const gchar *sym,
+					  struct rspamd_symbol_result **pms,
+					  struct rspamd_composite_atom *atom) -> double
 {
-	struct rspamd_symbol_result *ms = NULL;
+	struct rspamd_symbol_result *ms = nullptr;
 	gdouble rc = 0;
 	struct rspamd_task *task = cd->task;
 
-	if ((ms = rspamd_task_find_symbol_result (cd->task, sym, cd->metric_res)) == NULL) {
+	if ((ms = rspamd_task_find_symbol_result(cd->task, sym, cd->metric_res)) == nullptr) {
 		msg_debug_composites ("not found symbol %s in composite %s", sym,
-				cd->composite->sym);
+				cd->composite->sym.c_str());
 
-		if (atom->comp_type == ATOM_UNKNOWN) {
+		if (G_UNLIKELY(atom->comp_type == rspamd_composite_atom_type::ATOM_UNKNOWN)) {
 			struct rspamd_composite *ncomp;
 
 			if ((ncomp =
-						 g_hash_table_lookup (cd->task->cfg->composite_symbols,
+						 g_hash_table_lookup(cd->task->cfg->composite_symbols,
 								 sym)) != NULL) {
-				atom->comp_type = ATOM_COMPOSITE;
+				atom->comp_type = rspamd_composite_atom_type::ATOM_COMPOSITE;
 				atom->ncomp = ncomp;
 			}
 			else {
-				atom->comp_type = ATOM_PLAIN;
+				atom->comp_type = rspamd_composite_atom_type::ATOM_PLAIN;
 			}
 		}
 
-		if (atom->comp_type == ATOM_COMPOSITE) {
+		if (atom->comp_type == rspamd_composite_atom_type::ATOM_COMPOSITE) {
 			msg_debug_composites ("symbol %s for composite %s is another composite",
-					sym, cd->composite->sym);
-
-			if (isclr (cd->checked, atom->ncomp->id * 2)) {
-				struct rspamd_composite *saved;
+					sym, cd->composite->sym.c_str());
 
-				msg_debug_composites ("composite dependency %s for %s is not checked",
-						sym, cd->composite->sym);
+			if (!cd->checked[atom->ncomp->id * 2]) {
+				msg_debug_composites("composite dependency %s for %s is not checked",
+						sym, cd->composite->sym.c_str());
 				/* Set checked for this symbol to avoid cyclic references */
-				setbit (cd->checked, cd->composite->id * 2);
-				saved = cd->composite; /* Save the current composite */
-				composites_foreach_callback ((gpointer)atom->ncomp->sym, atom->ncomp, cd);
-
+				cd->checked[cd->composite->id * 2] = true;
+				auto *saved = cd->composite; /* Save the current composite */
+				composites_foreach_callback((gpointer)atom->ncomp->sym.c_str(),
+						(gpointer)atom->ncomp, (gpointer)cd);
 				/* Restore state */
 				cd->composite = saved;
-				clrbit (cd->checked, cd->composite->id * 2);
+				cd->checked[cd->composite->id * 2] = false;
 
-				ms = rspamd_task_find_symbol_result (cd->task, sym,
+				ms = rspamd_task_find_symbol_result(cd->task, sym,
 						cd->metric_res);
 			}
 			else {
 				/*
 				 * XXX: in case of cyclic references this would return 0
 				 */
-				if (isset (cd->checked, atom->ncomp->id * 2 + 1)) {
-					ms = rspamd_task_find_symbol_result (cd->task, sym,
+				if (cd->checked[atom->ncomp->id * 2 + 1]) {
+					ms = rspamd_task_find_symbol_result(cd->task, sym,
 							cd->metric_res);
 				}
 			}
@@ -476,54 +574,28 @@ rspamd_composite_process_single_symbol (struct composites_data *cd,
 	}
 
 	if (ms) {
-		msg_debug_composites ("found symbol %s in composite %s, weight: %.3f",
-				sym, cd->composite->sym, ms->score);
+		msg_debug_composites("found symbol %s in composite %s, weight: %.3f",
+				sym, cd->composite->sym.c_str(), ms->score);
 
 		/* Now check options */
-		struct rspamd_composite_option_match *cur_opt;
-
-		DL_FOREACH (atom->opts, cur_opt) {
+		for (const auto &cur_opt : atom->opts) {
 			struct rspamd_symbol_option *opt;
-			bool found = false;
+			auto found = false;
 
 			DL_FOREACH (ms->opts_head, opt) {
-				if (cur_opt->type == RSPAMD_COMPOSITE_OPTION_PLAIN) {
-					gsize mlen = strlen (cur_opt->data.match);
-
-					if (opt->optlen == mlen &&
-						memcmp (opt->option, cur_opt->data.match, mlen) == 0) {
-
-						found = true;
-
-						break;
-					}
-				}
-				else {
-					if (rspamd_regexp_search (cur_opt->data.re,
-							opt->option, opt->optlen, NULL, NULL, FALSE, NULL)) {
-						found = true;
-
-						break;
-					}
+				if (cur_opt.math_opt({opt->option, opt->optlen})) {
+					found = true;
+					break;
 				}
 			}
 
-
 			if (!found) {
-				if (cur_opt->type == RSPAMD_COMPOSITE_OPTION_PLAIN) {
-					msg_debug_composites ("symbol %s in composite %s misses required option %s",
-							sym,
-							cd->composite->sym,
-							cur_opt->data.match);
-				}
-				else {
-					msg_debug_composites ("symbol %s in composite %s failed to match regexp %s",
-							sym,
-							cd->composite->sym,
-							rspamd_regexp_get_pattern (cur_opt->data.re));
-				}
-
-				ms = NULL;
+				auto pat = cur_opt.get_pat();
+				msg_debug_composites ("symbol %s in composite %s misses required option %*s",
+						sym,
+						cd->composite->sym.c_str(),
+						(int) pat.size(), pat.data());
+				ms = nullptr;
 
 				break;
 			}
@@ -543,109 +615,21 @@ rspamd_composite_process_single_symbol (struct composites_data *cd,
 	return rc;
 }
 
-static void
-rspamd_composite_process_symbol_removal (rspamd_expression_atom_t *atom,
-										 struct composites_data *cd,
-										 struct rspamd_symbol_result *ms,
-										 const gchar *beg)
-{
-	gchar t;
-	struct symbol_remove_data *rd, *nrd;
-	struct rspamd_task *task = cd->task;
-
-	if (ms == NULL) {
-		return;
-	}
-
-	/*
-	 * At this point we know that we need to do something about this symbol,
-	 * however, we don't know whether we need to delete it unfortunately,
-	 * that depends on the later decisions when the complete expression is
-	 * evaluated.
-	 */
-	rd = g_hash_table_lookup (cd->symbols_to_remove, ms->name);
-
-	nrd = rspamd_mempool_alloc (cd->task->task_pool, sizeof (*nrd));
-	nrd->sym = ms->name;
-
-	/* By default remove symbols */
-	switch (cd->composite->policy) {
-	case RSPAMD_COMPOSITE_POLICY_REMOVE_ALL:
-	default:
-		nrd->action = (RSPAMD_COMPOSITE_REMOVE_SYMBOL|RSPAMD_COMPOSITE_REMOVE_WEIGHT);
-		break;
-	case RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL:
-		nrd->action = RSPAMD_COMPOSITE_REMOVE_SYMBOL;
-		break;
-	case RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT:
-		nrd->action = RSPAMD_COMPOSITE_REMOVE_WEIGHT;
-		break;
-	case RSPAMD_COMPOSITE_POLICY_LEAVE:
-		nrd->action = 0;
-		break;
-	}
-
-	for (;;) {
-		t = *beg;
-
-		if (t == '~') {
-			nrd->action &= ~RSPAMD_COMPOSITE_REMOVE_SYMBOL;
-		}
-		else if (t == '-') {
-			nrd->action &= ~(RSPAMD_COMPOSITE_REMOVE_WEIGHT|
-							 RSPAMD_COMPOSITE_REMOVE_SYMBOL);
-		}
-		else if (t == '^') {
-			nrd->action |= RSPAMD_COMPOSITE_REMOVE_FORCED;
-		}
-		else {
-			break;
-		}
-
-		beg ++;
-	}
-
-	nrd->comp = cd->composite;
-	nrd->parent = atom->parent;
-
-	if (rd == NULL) {
-		DL_APPEND (rd, nrd);
-		g_hash_table_insert (cd->symbols_to_remove, (gpointer)ms->name, rd);
-		msg_debug_composites ("%s: added symbol %s to removal: %d policy, from composite %s",
-				cd->metric_res->name,
-				ms->name, nrd->action,
-				cd->composite->sym);
-	}
-	else {
-		DL_APPEND (rd, nrd);
-		msg_debug_composites ("%s: append symbol %s to removal: %d policy, from composite %s",
-				cd->metric_res->name,
-				ms->name, nrd->action,
-				cd->composite->sym);
-	}
-}
-
-static gdouble
-rspamd_composite_expr_process (void *ud,
-		rspamd_expression_atom_t *atom)
+static auto
+rspamd_composite_expr_process(void *ud, rspamd_expression_atom_t *atom) -> double
 {
-	static const double epsilon = 0.00001;
-	struct composites_data *cd = (struct composites_data *)ud;
-	const gchar *sym = NULL;
-	struct rspamd_composite_atom *comp_atom = (struct rspamd_composite_atom *)atom->data;
+	struct composites_data *cd = (struct composites_data *) ud;
+	struct rspamd_composite_atom *comp_atom = (struct rspamd_composite_atom *) atom->data;
 
 	struct rspamd_symbol_result *ms = NULL;
-	struct rspamd_symbols_group *gr;
-	struct rspamd_symbol *sdef;
 	struct rspamd_task *task = cd->task;
-	GHashTableIter it;
-	gpointer k, v;
-	gdouble rc = 0, max = 0;
+	gdouble rc = 0;
 
-	if (isset (cd->checked, cd->composite->id * 2)) {
+	if (cd->checked[cd->composite->id * 2]) {
 		/* We have already checked this composite, so just return its value */
-		if (isset (cd->checked, cd->composite->id * 2 + 1)) {
-			ms = rspamd_task_find_symbol_result (cd->task, sym, cd->metric_res);
+		if (cd->checked[cd->composite->id * 2 + 1]) {
+			ms = rspamd_task_find_symbol_result(cd->task, comp_atom->symbol.c_str(),
+					cd->metric_res);
 		}
 
 		if (ms) {
@@ -654,120 +638,72 @@ rspamd_composite_expr_process (void *ud,
 			}
 			else {
 				/* Treat negative and positive scores equally... */
-				rc = fabs (ms->score);
+				rc = fabs(ms->score);
 			}
 		}
 
-		msg_debug_composites ("composite %s is already checked, result: %.2f",
-				cd->composite->sym, rc);
+		msg_debug_composites("composite %s is already checked, result: %.2f",
+				cd->composite->sym.c_str(), rc);
 
 		return rc;
 	}
 
-	sym = comp_atom->symbol;
-	guint slen = strlen (sym);
+	/* Note: sym is zero terminated as it is a view on std::string */
+	auto sym = std::string_view{comp_atom->symbol};
+	auto group_process_functor = [&](auto cond, int sub_start) -> double {
+		auto max = 0.;
+		GHashTableIter it;
+		gpointer k, v;
+		struct rspamd_symbols_group *gr;
 
-	while (*sym != '\0' && !g_ascii_isalnum (*sym)) {
-		sym ++;
-		slen --;
-	}
+		gr = (struct rspamd_symbols_group *) g_hash_table_lookup(cd->task->cfg->groups,
+				sym.substr(sub_start).data());
 
-	if (slen > 2) {
-		if (G_UNLIKELY (memcmp (sym, "g:", 2) == 0)) {
-			gr = g_hash_table_lookup (cd->task->cfg->groups, sym + 2);
+		if (gr != nullptr) {
+			g_hash_table_iter_init(&it, gr->symbols);
 
-			if (gr != NULL) {
-				g_hash_table_iter_init (&it, gr->symbols);
+			while (g_hash_table_iter_next(&it, &k, &v)) {
+				auto *sdef = (rspamd_symbol *) v;
 
-				while (g_hash_table_iter_next (&it, &k, &v)) {
-					sdef = v;
-					rc = rspamd_composite_process_single_symbol (cd, sdef->name, &ms,
+				if (cond(sdef->score)) {
+					rc = process_single_symbol(cd,
+							sdef->name,
+							&ms,
 							comp_atom);
 
-					if (rc) {
*** OUTPUT TRUNCATED, 380 LINES SKIPPED ***


More information about the Commits mailing list