commit 309ae8e: [Project] Start rewrite symcache in c++

Vsevolod Stakhov vsevolod at rspamd.com
Sat Apr 30 19:21:04 UTC 2022


Author: Vsevolod Stakhov
Date: 2022-03-31 21:55:51 +0100
URL: https://github.com/rspamd/rspamd/commit/309ae8e66a97d65804bce0e75efb2769ceb7a4ee

[Project] Start rewrite symcache in c++

---
 src/libserver/CMakeLists.txt                       |   2 +-
 .../{rspamd_symcache.c => rspamd_symcache.cxx}     | 215 ++++++++++++---------
 src/libserver/rspamd_symcache.h                    |   3 +
 3 files changed, 132 insertions(+), 88 deletions(-)

diff --git a/src/libserver/CMakeLists.txt b/src/libserver/CMakeLists.txt
index a4fdbbfcb..7371e8ade 100644
--- a/src/libserver/CMakeLists.txt
+++ b/src/libserver/CMakeLists.txt
@@ -20,7 +20,7 @@ SET(LIBRSPAMDSERVERSRC
 				${CMAKE_CURRENT_SOURCE_DIR}/roll_history.c
 				${CMAKE_CURRENT_SOURCE_DIR}/spf.c
 				${CMAKE_CURRENT_SOURCE_DIR}/ssl_util.c
-				${CMAKE_CURRENT_SOURCE_DIR}/rspamd_symcache.c
+        ${CMAKE_CURRENT_SOURCE_DIR}/rspamd_symcache.cxx
 				${CMAKE_CURRENT_SOURCE_DIR}/task.c
 				${CMAKE_CURRENT_SOURCE_DIR}/url.c
 				${CMAKE_CURRENT_SOURCE_DIR}/worker_util.c
diff --git a/src/libserver/rspamd_symcache.c b/src/libserver/rspamd_symcache.cxx
similarity index 97%
rename from src/libserver/rspamd_symcache.c
rename to src/libserver/rspamd_symcache.cxx
index d2989d213..a1aa8c504 100644
--- a/src/libserver/rspamd_symcache.c
+++ b/src/libserver/rspamd_symcache.cxx
@@ -23,13 +23,17 @@
 #include "unix-std.h"
 #include "contrib/t1ha/t1ha.h"
 #include "libserver/worker_util.h"
-#include "khash.h"
-#include "utlist.h"
-#include <math.h>
 
-#if defined(__STDC_VERSION__) &&  __STDC_VERSION__ >= 201112L
-# include <stdalign.h>
-#endif
+#include <cmath>
+#include <cstdint>
+#include <vector>
+#include <string>
+#include <string_view>
+#include <memory>
+#include <variant>
+#include "libutil/cxx/local_shared_ptr.hxx"
+
+#include "contrib/robin-hood/robin_hood.h"
 
 #define msg_err_cache(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
         cache->static_pool->tag.tagname, cache->cfg->checksum, \
@@ -67,26 +71,35 @@ INIT_LOG_MODULE(symcache)
 	(dyn_item)->finished = 1
 #define CLR_FINISH_BIT(checkpoint, dyn_item) \
 	(dyn_item)->finished = 0
-static const guchar rspamd_symcache_magic[8] = {'r', 's', 'c', 2, 0, 0, 0, 0 };
+
+namespace rspamd::symcache {
+
+static const std::uint8_t rspamd_symcache_magic[8] = {'r', 's', 'c', 2, 0, 0, 0, 0};
 
 struct rspamd_symcache_header {
-	guchar magic[8];
-	guint nitems;
-	guchar checksum[64];
-	guchar unused[128];
+	std::uint8_t magic[8];
+	unsigned int nitems;
+	std::uint8_t checksum[64];
+	std::uint8_t unused[128];
 };
 
-struct symcache_order {
-	GPtrArray *d;
-	guint id;
-	ref_entry_t ref;
+struct cache_item;
+using cache_item_ptr = rspamd::local_shared_ptr<cache_item>;
+using cache_item_weak_ptr = rspamd::local_weak_ptr<cache_item>;
+
+struct order_generation {
+	std::vector<cache_item_weak_ptr> d;
+	unsigned int generation_id;
 };
 
+using order_generation_ptr = rspamd::local_shared_ptr<order_generation>;
+
 /*
  * This structure is optimised to store ids list:
  * - If the first element is -1 then use dynamic part, else use static part
+ * There is no std::variant to save space
  */
-struct rspamd_symcache_id_list {
+struct id_list {
 	union {
 		guint32 st[4];
 		struct {
@@ -98,97 +111,135 @@ struct rspamd_symcache_id_list {
 	};
 };
 
-struct rspamd_symcache_condition {
+struct item_condition {
+private:
 	gint cb;
-	struct rspamd_symcache_condition *prev, *next;
+	lua_State *L;
+public:
+	item_condition() {
+		// TODO
+	}
+	virtual ~item_condition() {
+		// TODO
+	}
 };
 
-struct rspamd_symcache_item {
+class normal_item {
+private:
+	symbol_func_t func;
+	void *user_data;
+	std::vector<item_condition> conditions;
+public:
+	explicit normal_item() {
+		// TODO
+	}
+	auto add_condition() -> void {
+		// TODO
+	}
+	auto call() -> void {
+		// TODO
+	}
+};
+
+class virtual_item {
+private:
+	int parent_id;
+	cache_item_ptr parent;
+public:
+	explicit virtual_item() {
+		// TODO
+	}
+};
+
+struct cache_item {
 	/* This block is likely shared */
 	struct rspamd_symcache_item_stat *st;
-
-	guint64 last_count;
 	struct rspamd_counter_data *cd;
-	gchar *symbol;
-	const gchar *type_descr;
-	gint type;
+
+	std::uint64_t last_count;
+	std::string symbol;
+	std::string_view type_descr;
+	int type;
 
 	/* Callback data */
-	union {
-		struct {
-			symbol_func_t func;
-			gpointer user_data;
-			struct rspamd_symcache_condition *conditions;
-		} normal;
-		struct {
-			gint parent;
-			struct rspamd_symcache_item *parent_item;
-		} virtual;
-	} specific;
+	std::variant<normal_item, virtual_item> specific;
 
 	/* Condition of execution */
-	gboolean enabled;
-	/* Used for async stuff checks */
-	gboolean is_filter;
-	gboolean is_virtual;
+	bool enabled;
 
 	/* Priority */
-	gint priority;
+	int priority;
 	/* Topological order */
-	guint order;
-	gint id;
-	gint frequency_peaks;
+	unsigned int order;
+	/* Unique id - counter */
+	int id;
+
+	int frequency_peaks;
 	/* Settings ids */
-	struct rspamd_symcache_id_list allowed_ids;
+	id_list allowed_ids;
 	/* Allows execution but not symbols insertion */
-	struct rspamd_symcache_id_list exec_only_ids;
-	struct rspamd_symcache_id_list forbidden_ids;
+	id_list exec_only_ids;
+	id_list forbidden_ids;
 
 	/* Dependencies */
-	GPtrArray *deps;
-	GPtrArray *rdeps;
+	std::vector<cache_item_ptr> deps;
+	/* Reverse dependencies */
+	std::vector<cache_item_ptr> rdeps;
+};
 
-	/* Container */
-	GPtrArray *container;
+struct delayed_cache_dependency {
+	std::string from;
+	std::string to;
+};
+
+struct delayed_cache_condition {
+	std::string sym;
+	int cbref;
+	lua_State *L;
 };
 
 struct rspamd_symcache {
-	/* Hash table for fast access */
-	GHashTable *items_by_symbol;
-	GPtrArray *items_by_id;
-	struct symcache_order *items_by_order;
-	GPtrArray *connfilters;
-	GPtrArray *prefilters;
-	GPtrArray *filters;
-	GPtrArray *postfilters;
-	GPtrArray *composites;
-	GPtrArray *idempotent;
-	GPtrArray *virtual;
-	GList *delayed_deps;
-	GList *delayed_conditions;
+	/* Map indexed by symbol name: all symbols must have unique names, so this map holds ownership */
+	robin_hood::unordered_flat_map<std::string_view, cache_item_ptr> items_by_symbol;
+	std::vector<cache_item_weak_ptr> items_by_id;
+
+	/* Items sorted into some order */
+	order_generation_ptr items_by_order;
+	unsigned int cur_order_gen;
+
+	std::vector<cache_item_weak_ptr> connfilters;
+	std::vector<cache_item_weak_ptr> prefilters;
+	std::vector<cache_item_weak_ptr> filters;
+	std::vector<cache_item_weak_ptr> postfilters;
+	std::vector<cache_item_weak_ptr> composites;
+	std::vector<cache_item_weak_ptr> idempotent;
+	std::vector<cache_item_weak_ptr> virtual_symbols;
+
+	std::vector<delayed_cache_dependency> delayed_deps;
+	std::vector<delayed_cache_condition> delayed_conditions;
+
 	rspamd_mempool_t *static_pool;
-	guint64 cksum;
-	gdouble total_weight;
-	guint used_items;
-	guint stats_symbols_count;
-	guint64 total_hits;
-	guint id;
+	std::uint64_t cksum;
+	double total_weight;
+	std::size_t used_items;
+	std::size_t stats_symbols_count;
+	std::uint64_t total_hits;
+
 	struct rspamd_config *cfg;
-	gdouble reload_time;
-	gdouble last_profile;
-	gint peak_cb;
+	double reload_time;
+	double last_profile;
+	int peak_cb;
 };
 
-struct rspamd_symcache_dynamic_item {
+struct cache_dynamic_item {
 	guint16 start_msec; /* Relative to task time */
-	unsigned started:1;
-	unsigned finished:1;
+	unsigned started: 1;
+	unsigned finished: 1;
 	/* unsigned pad:14; */
 	guint32 async_events;
 };
 
 
-
 struct cache_dependency {
 	struct rspamd_symcache_item *item; /* Real dependency */
 	gchar *sym; /* Symbolic dep name */
@@ -196,17 +247,6 @@ struct cache_dependency {
 	gint vid; /* Virtual from */
 };
 
-struct delayed_cache_dependency {
-	gchar *from;
-	gchar *to;
-};
-
-struct delayed_cache_condition {
-	gchar *sym;
-	gint cbref;
-	lua_State *L;
-};
-
 struct cache_savepoint {
 	guint version;
 	guint items_inflight;
@@ -229,6 +269,7 @@ struct rspamd_cache_refresh_cbdata {
 	struct rspamd_worker *w;
 	struct ev_loop *event_loop;
 };
+} // namespace rspamd
 
 /* At least once per minute */
 #define PROFILE_MAX_TIME (60.0)
diff --git a/src/libserver/rspamd_symcache.h b/src/libserver/rspamd_symcache.h
index 1d670db04..303544d7b 100644
--- a/src/libserver/rspamd_symcache.h
+++ b/src/libserver/rspamd_symcache.h
@@ -69,6 +69,9 @@ struct rspamd_abstract_callback_data {
 	char data[];
 };
 
+/**
+ * Shared memory block specific for each symbol
+ */
 struct rspamd_symcache_item_stat {
 	struct rspamd_counter_data time_counter;
 	gdouble avg_time;


More information about the Commits mailing list