commit b2c36fe: [Rework] Composites: Start rework of the composites framework

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Jul 15 19:07:04 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-07-15 10:54:08 +0100
URL: https://github.com/rspamd/rspamd/commit/b2c36feea701c6685d83b8c4e6282fe7e307609d

[Rework] Composites: Start rework of the composites framework

---
 src/libserver/CMakeLists.txt                       |   2 +-
 src/libserver/cfg_rcl.c                            |   2 +-
 .../{composites.c => composites/composites.cxx}    | 172 ++++++++++++---------
 src/libserver/{ => composites}/composites.h        |  19 ---
 src/libserver/task.c                               |   2 +-
 src/lua/lua_cfg_file.c                             |   2 +-
 src/lua/lua_config.c                               |   2 +-
 7 files changed, 106 insertions(+), 95 deletions(-)

diff --git a/src/libserver/CMakeLists.txt b/src/libserver/CMakeLists.txt
index 189e9fe03..75fad36ac 100644
--- a/src/libserver/CMakeLists.txt
+++ b/src/libserver/CMakeLists.txt
@@ -3,7 +3,7 @@ ADD_SUBDIRECTORY(css)
 SET(LIBRSPAMDSERVERSRC
 				${CMAKE_CURRENT_SOURCE_DIR}/cfg_utils.c
 				${CMAKE_CURRENT_SOURCE_DIR}/cfg_rcl.c
-				${CMAKE_CURRENT_SOURCE_DIR}/composites.c
+				${CMAKE_CURRENT_SOURCE_DIR}/composites/composites.cxx
 				${CMAKE_CURRENT_SOURCE_DIR}/dkim.c
 				${CMAKE_CURRENT_SOURCE_DIR}/dns.c
 				${CMAKE_CURRENT_SOURCE_DIR}/dynamic_cfg.c
diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c
index 73b9a3b1d..68b94abfe 100644
--- a/src/libserver/cfg_rcl.c
+++ b/src/libserver/cfg_rcl.c
@@ -20,7 +20,7 @@
 #include "cfg_file.h"
 #include "lua/lua_common.h"
 #include "expression.h"
-#include "composites.h"
+#include "src/libserver/composites/composites.h"
 #include "libserver/worker_util.h"
 #include "unix-std.h"
 #include "cryptobox.h"
diff --git a/src/libserver/composites.c b/src/libserver/composites/composites.cxx
similarity index 88%
rename from src/libserver/composites.c
rename to src/libserver/composites/composites.cxx
index 6f3e8a7b0..0d9534681 100644
--- a/src/libserver/composites.c
+++ b/src/libserver/composites/composites.cxx
@@ -21,7 +21,10 @@
 #include "scan_result.h"
 #include "composites.h"
 
-#include <math.h>
+#include <cmath>
+#include <vector>
+#include <variant>
+#include "contrib/robin-hood/robin_hood.h"
 
 #define msg_err_composites(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
         "composites", task->task_pool->tag.uid, \
@@ -43,6 +46,44 @@
 
 INIT_LOG_MODULE(composites)
 
+
+namespace rspamd::composites {
+static rspamd_expression_atom_t *rspamd_composite_expr_parse(const gchar *line, gsize len,
+															 rspamd_mempool_t *pool,
+															 gpointer ud, GError **err);
+static gdouble rspamd_composite_expr_process(void *ud, rspamd_expression_atom_t *atom);
+static gint rspamd_composite_expr_priority(rspamd_expression_atom_t *atom);
+static void rspamd_composite_expr_destroy(rspamd_expression_atom_t *atom);
+}
+
+const struct rspamd_atom_subr composite_expr_subr = {
+		.parse = rspamd::composites::rspamd_composite_expr_parse,
+		.process = rspamd::composites::rspamd_composite_expr_process,
+		.priority = rspamd::composites::rspamd_composite_expr_priority,
+		.destroy = rspamd::composites::rspamd_composite_expr_destroy
+};
+
+namespace rspamd::composites {
+
+enum class rspamd_composite_policy {
+	RSPAMD_COMPOSITE_POLICY_REMOVE_ALL = 0,
+	RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL,
+	RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT,
+	RSPAMD_COMPOSITE_POLICY_LEAVE,
+	RSPAMD_COMPOSITE_POLICY_UNKNOWN
+};
+
+/**
+ * Static composites structure
+ */
+struct rspamd_composite {
+	std::string str_expr;
+	std::string sym;
+	struct rspamd_expression *expr;
+	gint id;
+	rspamd_composite_policy policy;
+};
+
 struct composites_data {
 	struct rspamd_task *task;
 	struct rspamd_composite *composite;
@@ -53,59 +94,42 @@ struct composites_data {
 };
 
 struct rspamd_composite_option_match {
-	enum {
-		RSPAMD_COMPOSITE_OPTION_PLAIN,
-		RSPAMD_COMPOSITE_OPTION_RE
-	} type;
-
-	union {
-		rspamd_regexp_t *re;
-		gchar *match;
-	} data;
-	struct rspamd_composite_option_match *prev, *next;
+	std::variant<rspamd_regexp_t *, std::string> match;
+
+	~rspamd_composite_option_match() {
+		if (std::holds_alternative<rspamd_regexp_t *>(match)) {
+			rspamd_regexp_unref(std::get<rspamd_regexp_t *>(match));
+		}
+	}
 };
 
+enum class rspamd_composite_atom_type {
+	ATOM_UNKNOWN,
+	ATOM_COMPOSITE,
+	ATOM_PLAIN
+};
 struct rspamd_composite_atom {
-	gchar *symbol;
-	enum {
-		ATOM_UNKNOWN,
-		ATOM_COMPOSITE,
-		ATOM_PLAIN
-	} comp_type;
-
+	std::string symbol;
+	rspamd_composite_atom_type comp_type;
 	struct rspamd_composite *ncomp; /* underlying composite */
-	struct rspamd_composite_option_match *opts;
+	std::vector<rspamd_composite_option_match> opts;
 };
 
-enum rspamd_composite_action {
+enum rspamd_composite_action : std::uint8_t {
 	RSPAMD_COMPOSITE_UNTOUCH = 0,
-	RSPAMD_COMPOSITE_REMOVE_SYMBOL = (1 << 0),
-	RSPAMD_COMPOSITE_REMOVE_WEIGHT = (1 << 1),
-	RSPAMD_COMPOSITE_REMOVE_FORCED = (1 << 2)
+	RSPAMD_COMPOSITE_REMOVE_SYMBOL = (1u << 0),
+	RSPAMD_COMPOSITE_REMOVE_WEIGHT = (1u << 1),
+	RSPAMD_COMPOSITE_REMOVE_FORCED = (1u << 2)
 };
 
 struct symbol_remove_data {
-	const gchar *sym;
+	const char *sym;
 	struct rspamd_composite *comp;
 	GNode *parent;
-	guint action;
+	std::uint8_t action;
 	struct symbol_remove_data *prev, *next;
 };
 
-static rspamd_expression_atom_t * rspamd_composite_expr_parse (const gchar *line, gsize len,
-		rspamd_mempool_t *pool, gpointer ud, GError **err);
-static gdouble rspamd_composite_expr_process (void *ud, rspamd_expression_atom_t *atom);
-static gint rspamd_composite_expr_priority (rspamd_expression_atom_t *atom);
-static void rspamd_composite_expr_destroy (rspamd_expression_atom_t *atom);
-static void composites_foreach_callback (gpointer key, gpointer value, void *data);
-
-const struct rspamd_atom_subr composite_expr_subr = {
-	.parse = rspamd_composite_expr_parse,
-	.process = rspamd_composite_expr_process,
-	.priority = rspamd_composite_expr_priority,
-	.destroy = rspamd_composite_expr_destroy
-};
-
 static GQuark
 rspamd_composites_quark (void)
 {
@@ -113,8 +137,9 @@ rspamd_composites_quark (void)
 }
 
 static rspamd_expression_atom_t *
-rspamd_composite_expr_parse (const gchar *line, gsize len,
-		rspamd_mempool_t *pool, gpointer ud, GError **err)
+rspamd_composite_expr_parse(const gchar *line, gsize len,
+							rspamd_mempool_t *pool,
+							gpointer ud, GError **err)
 {
 	gsize clen = 0;
 	rspamd_expression_atom_t *res;
@@ -142,7 +167,7 @@ rspamd_composite_expr_parse (const gchar *line, gsize len,
 
 		switch (state) {
 		case comp_state_read_symbol:
-			clen = rspamd_memcspn (p, "[; \t()><!|&\n", len);
+			clen = rspamd_memcspn(p, "[; \t()><!|&\n", len);
 			p += clen;
 
 			if (*p == '[') {
@@ -153,10 +178,10 @@ rspamd_composite_expr_parse (const gchar *line, gsize len,
 			}
 			break;
 		case comp_state_read_obrace:
-			p ++;
+			p++;
 
 			if (*p == '/') {
-				p ++;
+				p++;
 				state = comp_state_read_regexp;
 			}
 			else {
@@ -166,25 +191,25 @@ rspamd_composite_expr_parse (const gchar *line, gsize len,
 		case comp_state_read_regexp:
 			if (*p == '\\' && p + 1 < end) {
 				/* Escaping */
-				p ++;
+				p++;
 			}
 			else if (*p == '/') {
 				/* End of regexp, possible flags */
 				state = comp_state_read_regexp_end;
 			}
-			p ++;
+			p++;
 			break;
 		case comp_state_read_option:
 		case comp_state_read_regexp_end:
 			if (*p == ',') {
-				p ++;
+				p++;
 				state = comp_state_read_comma;
 			}
 			else if (*p == ']') {
 				state = comp_state_read_ebrace;
 			}
 			else {
-				p ++;
+				p++;
 			}
 			break;
 		case comp_state_read_comma:
@@ -201,11 +226,11 @@ rspamd_composite_expr_parse (const gchar *line, gsize len,
 			}
 			else {
 				/* Skip spaces after comma */
-				p ++;
+				p++;
 			}
 			break;
 		case comp_state_read_ebrace:
-			p ++;
+			p++;
 			state = comp_state_read_end;
 			break;
 		case comp_state_read_end:
@@ -214,8 +239,8 @@ rspamd_composite_expr_parse (const gchar *line, gsize len,
 	}
 
 	if (state != comp_state_read_end) {
-		g_set_error (err, rspamd_composites_quark (), 100, "invalid composite: %s;"
-														   "parser stopped in state %d",
+		g_set_error(err, rspamd_composites_quark(), 100, "invalid composite: %s;"
+														 "parser stopped in state %d",
 				line, state);
 		return NULL;
 	}
@@ -224,9 +249,9 @@ rspamd_composite_expr_parse (const gchar *line, gsize len,
 	p = line;
 	state = comp_state_read_symbol;
 
-	atom = rspamd_mempool_alloc0 (pool, sizeof (*atom));
+	atom = rspamd_mempool_alloc0 (pool, sizeof(*atom));
 	atom->comp_type = ATOM_UNKNOWN;
-	res = rspamd_mempool_alloc0 (pool, sizeof (*res));
+	res = rspamd_mempool_alloc0 (pool, sizeof(*res));
 	res->len = clen;
 	res->str = line;
 
@@ -242,7 +267,7 @@ rspamd_composite_expr_parse (const gchar *line, gsize len,
 
 		switch (state) {
 		case comp_state_read_symbol:
-			clen = rspamd_memcspn (p, "[; \t()><!|&\n", len);
+			clen = rspamd_memcspn(p, "[; \t()><!|&\n", len);
 			p += clen;
 
 			if (*p == '[') {
@@ -253,15 +278,15 @@ rspamd_composite_expr_parse (const gchar *line, gsize len,
 			}
 
 			atom->symbol = rspamd_mempool_alloc (pool, clen + 1);
-			rspamd_strlcpy (atom->symbol, line, clen + 1);
+			rspamd_strlcpy(atom->symbol, line, clen + 1);
 
 			break;
 		case comp_state_read_obrace:
-			p ++;
+			p++;
 
 			if (*p == '/') {
 				opt_start = p;
-				p ++; /* Starting slash */
+				p++; /* Starting slash */
 				state = comp_state_read_regexp;
 			}
 			else {
@@ -273,23 +298,23 @@ rspamd_composite_expr_parse (const gchar *line, gsize len,
 		case comp_state_read_regexp:
 			if (*p == '\\' && p + 1 < end) {
 				/* Escaping */
-				p ++;
+				p++;
 			}
 			else if (*p == '/') {
 				/* End of regexp, possible flags */
 				state = comp_state_read_regexp_end;
 			}
-			p ++;
+			p++;
 			break;
 		case comp_state_read_option:
 			if (*p == ',' || *p == ']') {
-				opt_match = rspamd_mempool_alloc (pool, sizeof (*opt_match));
+				opt_match = rspamd_mempool_alloc (pool, sizeof(*opt_match));
 				/* Plain match */
 				gchar *opt_buf;
 				gint opt_len = p - opt_start;
 
 				opt_buf = rspamd_mempool_alloc (pool, opt_len + 1);
-				rspamd_strlcpy (opt_buf, opt_start, opt_len + 1);
+				rspamd_strlcpy(opt_buf, opt_start, opt_len + 1);
 
 				opt_match->data.match = opt_buf;
 				opt_match->type = RSPAMD_COMPOSITE_OPTION_PLAIN;
@@ -305,33 +330,33 @@ rspamd_composite_expr_parse (const gchar *line, gsize len,
 				}
 			}
 			else {
-				p ++;
+				p++;
 			}
 			break;
 		case comp_state_read_regexp_end:
 			if (*p == ',' || *p == ']') {
-				opt_match = rspamd_mempool_alloc (pool, sizeof (*opt_match));
+				opt_match = rspamd_mempool_alloc (pool, sizeof(*opt_match));
 				/* Plain match */
 				gchar *opt_buf;
 				gint opt_len = p - opt_start;
 
 				opt_buf = rspamd_mempool_alloc (pool, opt_len + 1);
-				rspamd_strlcpy (opt_buf, opt_start, opt_len + 1);
+				rspamd_strlcpy(opt_buf, opt_start, opt_len + 1);
 
 				rspamd_regexp_t *re;
 				GError *re_err = NULL;
 
-				re = rspamd_regexp_new (opt_buf, NULL, &re_err);
+				re = rspamd_regexp_new(opt_buf, NULL, &re_err);
 
 				if (re == NULL) {
 					msg_err_pool ("cannot create regexp from string %s: %e",
 							opt_buf, re_err);
 
-					g_error_free (re_err);
+					g_error_free(re_err);
 				}
 				else {
 					rspamd_mempool_add_destructor (pool,
-							(rspamd_mempool_destruct_t)rspamd_regexp_unref,
+							(rspamd_mempool_destruct_t) rspamd_regexp_unref,
 							re);
 					opt_match->data.re = re;
 					opt_match->type = RSPAMD_COMPOSITE_OPTION_RE;
@@ -348,7 +373,7 @@ rspamd_composite_expr_parse (const gchar *line, gsize len,
 				}
 			}
 			else {
-				p ++;
+				p++;
 			}
 			break;
 		case comp_state_read_comma:
@@ -367,11 +392,11 @@ rspamd_composite_expr_parse (const gchar *line, gsize len,
 			}
 			else {
 				/* Skip spaces after comma */
-				p ++;
+				p++;
 			}
 			break;
 		case comp_state_read_ebrace:
-			p ++;
+			p++;
 			state = comp_state_read_end;
 			break;
 		case comp_state_read_end:
@@ -384,6 +409,11 @@ rspamd_composite_expr_parse (const gchar *line, gsize len,
 	return res;
 }
 
+}
+
+static void composites_foreach_callback (gpointer key, gpointer value, void *data);
+
+
 static gdouble
 rspamd_composite_process_single_symbol (struct composites_data *cd,
 										const gchar *sym,
diff --git a/src/libserver/composites.h b/src/libserver/composites/composites.h
similarity index 75%
rename from src/libserver/composites.h
rename to src/libserver/composites/composites.h
index bb7eb8994..d39863b88 100644
--- a/src/libserver/composites.h
+++ b/src/libserver/composites/composites.h
@@ -29,25 +29,6 @@ struct rspamd_task;
  */
 extern const struct rspamd_atom_subr composite_expr_subr;
 
-enum rspamd_composite_policy {
-	RSPAMD_COMPOSITE_POLICY_REMOVE_ALL = 0,
-	RSPAMD_COMPOSITE_POLICY_REMOVE_SYMBOL,
-	RSPAMD_COMPOSITE_POLICY_REMOVE_WEIGHT,
-	RSPAMD_COMPOSITE_POLICY_LEAVE,
-	RSPAMD_COMPOSITE_POLICY_UNKNOWN
-};
-
-/**
- * Composite structure
- */
-struct rspamd_composite {
-	const gchar *str_expr;
-	const gchar *sym;
-	struct rspamd_expression *expr;
-	gint id;
-	enum rspamd_composite_policy policy;
-};
-
 /**
  * Process all results and form composite metrics from existent metrics as it is defined in config
  * @param task worker's task that present message from user
diff --git a/src/libserver/task.c b/src/libserver/task.c
index aae374c21..c9f3fb627 100644
--- a/src/libserver/task.c
+++ b/src/libserver/task.c
@@ -21,7 +21,7 @@
 #include "message.h"
 #include "lua/lua_common.h"
 #include "email_addr.h"
-#include "composites.h"
+#include "src/libserver/composites/composites.h"
 #include "stat_api.h"
 #include "unix-std.h"
 #include "utlist.h"
diff --git a/src/lua/lua_cfg_file.c b/src/lua/lua_cfg_file.c
index 68acdd368..af8964b32 100644
--- a/src/lua/lua_cfg_file.c
+++ b/src/lua/lua_cfg_file.c
@@ -15,7 +15,7 @@
  */
 #include "lua_common.h"
 #include "expression.h"
-#include "composites.h"
+#include "src/libserver/composites/composites.h"
 
 #ifdef HAVE_SYS_UTSNAME_H
 #endif
diff --git a/src/lua/lua_config.c b/src/lua/lua_config.c
index 2631f1cec..c2f5efb42 100644
--- a/src/lua/lua_config.c
+++ b/src/lua/lua_config.c
@@ -16,7 +16,7 @@
 #include "lua_common.h"
 #include "libmime/message.h"
 #include "libutil/expression.h"
-#include "libserver/composites.h"
+#include "src/libserver/composites/composites.h"
 #include "libserver/cfg_file_private.h"
 #include "libmime/lang_detection.h"
 #include "lua/lua_map.h"


More information about the Commits mailing list