commit 1ce6e9b: [Project] Reimplement dependencies processing

Vsevolod Stakhov vsevolod at rspamd.com
Sat Apr 30 19:21:19 UTC 2022


Author: Vsevolod Stakhov
Date: 2022-04-10 09:05:56 +0100
URL: https://github.com/rspamd/rspamd/commit/1ce6e9b3c7d30e9fef3840ae70eb20859f46d7ff

[Project] Reimplement dependencies processing

---
 src/libserver/symcache/symcache_impl.cxx     | 114 +++++++++++++--------------
 src/libserver/symcache/symcache_internal.hxx | 101 ++----------------------
 2 files changed, 62 insertions(+), 153 deletions(-)

diff --git a/src/libserver/symcache/symcache_impl.cxx b/src/libserver/symcache/symcache_impl.cxx
index a7afae6d9..11fd7b0e6 100644
--- a/src/libserver/symcache/symcache_impl.cxx
+++ b/src/libserver/symcache/symcache_impl.cxx
@@ -31,7 +31,6 @@ auto symcache::init() -> bool
 		res = load_items();
 	}
 
-
 	/* Deal with the delayed dependencies */
 	for (const auto &delayed_dep : *delayed_deps) {
 		auto virt_item = get_item_by_name(delayed_dep.from, false);
@@ -78,45 +77,49 @@ auto symcache::init() -> bool
 	}
 	delayed_conditions.reset();
 
-	PTR_ARRAY_FOREACH (cache->items_by_id, i, it) {
+	for (auto &it : items_by_id) {
+		it->process_deps(*this);
+	}
 
-		PTR_ARRAY_FOREACH (it->deps, j, dep) {
-			rspamd_symcache_process_dep(cache, it, dep);
-		}
+	for (auto &it : virtual_symbols) {
+		it->process_deps(*this);
+	}
 
-		if (it->deps) {
-			/* Reversed loop to make removal safe */
-			for (j = it->deps->len - 1; j >= 0; j--) {
-				dep = g_ptr_array_index (it->deps, j);
+	/* Sorting stuff */
+	auto postfilters_cmp = [](const auto &it1, const auto &it2) -> int {
+		if (it1->priority > it2-> priority) {
+			return 1;
+		}
+		else if (it1->priority == it2->priority) {
+			return 0;
+		}
 
-				if (dep->item == NULL) {
-					/* Remove useless dep */
-					g_ptr_array_remove_index(it->deps, j);
-				}
-			}
+		return -1;
+	};
+	auto prefilters_cmp = [](const auto &it1, const auto &it2) -> int {
+		if (it1->priority > it2-> priority) {
+			return -1;
+		}
+		else if (it1->priority == it2->priority) {
+			return 0;
 		}
-	}
 
-	/* Special case for virtual symbols */
-	PTR_ARRAY_FOREACH (cache->virtual, i, it) {
+		return 1;
+	};
 
-		PTR_ARRAY_FOREACH (it->deps, j, dep) {
-			rspamd_symcache_process_dep(cache, it, dep);
-		}
-	}
 
-	g_ptr_array_sort_with_data(cache->connfilters, prefilters_cmp, cache);
-	g_ptr_array_sort_with_data(cache->prefilters, prefilters_cmp, cache);
-	g_ptr_array_sort_with_data(cache->postfilters, postfilters_cmp, cache);
-	g_ptr_array_sort_with_data(cache->idempotent, postfilters_cmp, cache);
+	std::stable_sort(std::begin(connfilters), std::end(connfilters), prefilters_cmp);
+	std::stable_sort(std::begin(prefilters), std::end(prefilters), prefilters_cmp);
+	std::stable_sort(std::begin(postfilters), std::end(postfilters), postfilters_cmp);
+	std::stable_sort(std::begin(idempotent), std::end(idempotent), postfilters_cmp);
 
 	rspamd_symcache_resort(cache);
 
 	/* Connect metric symbols with symcache symbols */
-	if (cache->cfg->symbols) {
-		g_hash_table_foreach(cache->cfg->symbols,
+	if (cfg->symbols) {
+		g_hash_table_foreach(cfg->symbols,
 				rspamd_symcache_metric_connect_cb,
-				cache);
+				this);
 	}
 
 	return res;
@@ -134,7 +137,7 @@ auto symcache::load_items() -> bool
 
 
 	if (cached_map->get_size() < (gint) sizeof(symcache_header)) {
-		msg_info_cache("cannot use file %s, truncated: %z", cfg->cache_filename, ,
+		msg_info_cache("cannot use file %s, truncated: %z", cfg->cache_filename,
 				errno, strerror(errno));
 		return false;
 	}
@@ -373,12 +376,10 @@ auto symcache::add_dependency(int id_from, std::string_view to, int virtual_id_f
 	const auto &source = items_by_id[id_from];
 	g_assert (source.get() != nullptr);
 
-	source->deps.emplace_back(cache_dependency{
-			.item = cache_item_ptr{nullptr},
-			.sym = std::string(to),
-			.id = id_from,
-			.vid = -1,
-	});
+	source->deps.emplace_back(cache_item_ptr{nullptr},
+			std::string(to),
+			id_from,
+			-1);
 
 
 	if (virtual_id_from >= 0) {
@@ -386,12 +387,10 @@ auto symcache::add_dependency(int id_from, std::string_view to, int virtual_id_f
 		/* We need that for settings id propagation */
 		const auto &vsource = virtual_symbols[virtual_id_from];
 		g_assert (vsource.get() != nullptr);
-		vsource->deps.emplace_back(cache_dependency{
-				.item = cache_item_ptr{nullptr},
-				.sym = std::string(to),
-				.id = -1,
-				.vid = virtual_id_from,
-		});
+		vsource->deps.emplace_back(cache_item_ptr{nullptr},
+				std::string(to),
+				-1,
+				virtual_id_from);
 	}
 }
 
@@ -423,38 +422,36 @@ auto cache_item::process_deps(const symcache &cache) -> void
 
 			if (!vdit) {
 				if (dit) {
-					msg_err_cache ("cannot add dependency from %s on %s: no dependency symbol registered",
+					msg_err_cache("cannot add dependency from %s on %s: no dependency symbol registered",
 							dep.sym.c_str(), dit->symbol.c_str());
 				}
 			}
 			else {
-				msg_debug_cache ("process virtual dependency %s(%d) on %s(%d)", symbol.c_str(),
+				msg_debug_cache("process virtual dependency %s(%d) on %s(%d)", symbol.c_str(),
 						dep.vid, vdit->symbol.c_str(), vdit->id);
 
 				std::size_t nids = 0;
 
-				msg_debug_cache ("check id propagation for dependency %s from %s",
+				/* Propagate ids */
+				msg_debug_cache("check id propagation for dependency %s from %s",
 						symbol.c_str(), dit->symbol.c_str());
 
 				const auto *ids = dit->allowed_ids.get_ids(nids);
 
-				/* TODO: merge? */
 				if (nids > 0) {
-					msg_info_cache ("propagate allowed ids from %s to %s",
+					msg_debug_cache("propagate allowed ids from %s to %s",
 							dit->symbol.c_str(), symbol.c_str());
 
-					rspamd_symcache_set_allowed_settings_ids (cache, it->symbol, ids,
-							nids);
+					allowed_ids.set_ids(ids, nids, cache.get_pool());
 				}
 
-				ids = rspamd_symcache_get_forbidden_settings_ids (cache, dit->symbol, &nids);
+				ids = dit->forbidden_ids.get_ids(nids);
 
 				if (nids > 0) {
-					msg_info_cache ("propagate forbidden ids from %s to %s",
-							dit->symbol, it->symbol);
+					msg_debug_cache("propagate forbidden ids from %s to %s",
+							dit->symbol.c_str(), symbol.c_str());
 
-					rspamd_symcache_set_forbidden_settings_ids (cache, it->symbol, ids,
-							nids);
+					forbidden_ids.set_ids(ids, nids, cache.get_pool());
 				}
 			}
 		}
@@ -524,14 +521,13 @@ auto cache_item::process_deps(const symcache &cache) -> void
 			msg_err_cache ("cannot find dependency on symbol %s for symbol %s",
 					dep.sym.c_str(), symbol.c_str());
 
-			return;
-		}
-
-		if (vdit) {
-			/* Use virtual symbol to propagate deps */
-			rspamd_symcache_propagate_dep (cache, it, vdit);
+			continue;
 		}
 	}
+
+	// Remove empty deps
+	deps.erase(std::remove_if(std::begin(deps), std::end(deps),
+			[](const auto &dep){ return !dep.item; }), std::end(deps));
 }
 
 auto virtual_item::get_parent(const symcache &cache) const -> const cache_item *
diff --git a/src/libserver/symcache/symcache_internal.hxx b/src/libserver/symcache/symcache_internal.hxx
index cb7d206b0..a2b852c19 100644
--- a/src/libserver/symcache/symcache_internal.hxx
+++ b/src/libserver/symcache/symcache_internal.hxx
@@ -36,6 +36,8 @@
 #include "cfg_file.h"
 #include "lua/lua_common.h"
 
+#include "symcache_id_list.hxx"
+
 #define msg_err_cache(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
         "symcache", log_tag(), \
         RSPAMD_LOG_FUNC, \
@@ -82,99 +84,6 @@ struct order_generation {
 
 using order_generation_ptr = std::shared_ptr<order_generation>;
 
-/*
- * This structure is optimised to store ids list:
- * - If the first element is -1 then use dynamic part, else use static part
- * There is no std::variant to save space
- */
-struct id_list {
-	union {
-		std::uint32_t st[4];
-		struct {
-			std::uint32_t e; /* First element */
-			std::uint16_t len;
-			std::uint16_t allocated;
-			std::uint32_t *n;
-		} dyn;
-	} data;
-
-	id_list() {
-		std::memset((void *)&data, 0, sizeof(data));
-	}
-	/**
-	 * Returns ids from a compressed list, accepting a mutable reference for number of elements
-	 * @param nids output of the number of elements
-	 * @return
-	 */
-	auto get_ids(std::size_t &nids) const -> const std::uint32_t * {
-		if (data.dyn.e == -1) {
-			/* Dynamic list */
-			nids = data.dyn.len;
-
-			return data.dyn.n;
-		}
-		else {
-			auto cnt = 0;
-
-			while (data.st[cnt] != 0 && cnt < G_N_ELEMENTS(data.st)) {
-				cnt ++;
-			}
-
-			nids = cnt;
-
-			return data.st;
-		}
-	}
-
-	auto add_id(std::uint32_t id, rspamd_mempool_t *pool) -> void {
-		if (data.st[0] == -1) {
-			/* Dynamic array */
-			if (data.dyn.len < data.dyn.allocated) {
-				/* Trivial, append + sort */
-				data.dyn.n[data.dyn.len++] = id;
-			}
-			else {
-				/* Reallocate */
-				g_assert (data.dyn.allocated <= G_MAXINT16);
-				data.dyn.allocated *= 2;
-
-				auto *new_array = rspamd_mempool_alloc_array_type(pool,
-						data.dyn.allocated, std::uint32_t);
-				memcpy(new_array, data.dyn.n, data.dyn.len * sizeof(std::uint32_t));
-				data.dyn.n = new_array;
-				data.dyn.n[data.dyn.len++] = id;
-			}
-
-			std::sort(data.dyn.n, data.dyn.n + data.dyn.len);
-		}
-		else {
-			/* Static part */
-			auto cnt = 0u;
-			while (data.st[cnt] != 0 && cnt < G_N_ELEMENTS (data.st)) {
-				cnt ++;
-			}
-
-			if (cnt < G_N_ELEMENTS (data.st)) {
-				data.st[cnt] = id;
-			}
-			else {
-				/* Switch to dynamic */
-				data.dyn.allocated = G_N_ELEMENTS (data.st) * 2;
-				auto *new_array = rspamd_mempool_alloc_array_type(pool,
-						data.dyn.allocated, std::uint32_t);
-				memcpy (new_array, data.st, sizeof(data.st));
-				data.dyn.n = new_array;
-				data.dyn.e = -1; /* Marker */
-				data.dyn.len = G_N_ELEMENTS (data.st);
-
-				/* Recursively jump to dynamic branch that will handle insertion + sorting */
-				add_id(id, pool); // tail call
-			}
-		}
-	}
-
-
-};
 
 class symcache;
 
@@ -264,7 +173,7 @@ struct cache_item : std::enable_shared_from_this<cache_item> {
 	/* Dependencies */
 	std::vector<cache_dependency> deps;
 	/* Reverse dependencies */
-	std::vector<cache_item_ptr> rdeps;
+	std::vector<cache_dependency> rdeps;
 
 public:
 	[[nodiscard]] static auto create() -> cache_item_ptr {
@@ -426,6 +335,10 @@ public:
 	auto log_tag() const -> const char* {
 		return cfg->checksum;
 	}
+
+	auto get_pool() const {
+		return static_pool;
+	}
 };
 
 /*


More information about the Commits mailing list