commit 8214b27: [Rework] Re-implement cache sorting

Vsevolod Stakhov vsevolod at rspamd.com
Sat Apr 30 19:21:22 UTC 2022


Author: Vsevolod Stakhov
Date: 2022-04-10 11:09:51 +0100
URL: https://github.com/rspamd/rspamd/commit/8214b27e0b9178549116c8c5449271f1fe099a88

[Rework] Re-implement cache sorting

---
 src/libserver/cfg_file.h                     |   2 +-
 src/libserver/symcache/symcache_impl.cxx     | 150 ++++++++++++++++++++++++++-
 src/libserver/symcache/symcache_internal.hxx |  10 +-
 3 files changed, 156 insertions(+), 6 deletions(-)

diff --git a/src/libserver/cfg_file.h b/src/libserver/cfg_file.h
index 7532639a7..18524af8d 100644
--- a/src/libserver/cfg_file.h
+++ b/src/libserver/cfg_file.h
@@ -146,7 +146,7 @@ struct rspamd_symbol {
 	struct rspamd_symbols_group *gr; /* Main group */
 	GPtrArray *groups; /* Other groups */
 	guint flags;
-	struct rspamd_symcache_item *cache_item;
+	void *cache_item;
 	gint nshots;
 };
 
diff --git a/src/libserver/symcache/symcache_impl.cxx b/src/libserver/symcache/symcache_impl.cxx
index 11fd7b0e6..aadd53b8f 100644
--- a/src/libserver/symcache/symcache_impl.cxx
+++ b/src/libserver/symcache/symcache_impl.cxx
@@ -18,6 +18,8 @@
 #include "unix-std.h"
 #include "libutil/cxx/locked_file.hxx"
 
+#include <cmath>
+
 namespace rspamd::symcache {
 
 INIT_LOG_MODULE_PUBLIC(symcache)
@@ -113,13 +115,13 @@ auto symcache::init() -> bool
 	std::stable_sort(std::begin(postfilters), std::end(postfilters), postfilters_cmp);
 	std::stable_sort(std::begin(idempotent), std::end(idempotent), postfilters_cmp);
 
-	rspamd_symcache_resort(cache);
+	resort();
 
 	/* Connect metric symbols with symcache symbols */
 	if (cfg->symbols) {
 		g_hash_table_foreach(cfg->symbols,
-				rspamd_symcache_metric_connect_cb,
-				this);
+				symcache::metric_connect_cb,
+				(void *)this);
 	}
 
 	return res;
@@ -316,6 +318,20 @@ bool symcache::save_items() const
 	return ret;
 }
 
+auto symcache::metric_connect_cb(void *k, void *v, void *ud) -> void
+{
+	auto *cache = (symcache *)ud;
+	const auto *sym = (const char *)k;
+	auto *s = (struct rspamd_symbol *)v;
+	auto weight = *s->weight_ptr;
+	auto *item = cache->get_item_by_name_mut(sym, false);
+
+	if (item) {
+		item->st->weight = weight;
+		s->cache_item = (void *)item;
+	}
+}
+
 
 auto symcache::get_item_by_id(int id, bool resolve_parent) const -> const cache_item *
 {
@@ -394,6 +410,134 @@ auto symcache::add_dependency(int id_from, std::string_view to, int virtual_id_f
 	}
 }
 
+auto symcache::resort() -> void
+{
+	auto ord = std::make_shared<order_generation>(filters.size(), cur_order_gen);
+
+	for (auto &it : filters) {
+		total_hits += it->st->total_hits;
+		it->order = 0;
+		ord->d.emplace_back(it);
+	}
+
+	enum class tsort_mask {
+		PERM,
+		TEMP
+	};
+
+	constexpr auto tsort_unmask = [](cache_item *it) -> auto {
+		return (it->order & ~((1u << 31) | (1u << 30)));
+	};
+
+	/* Recursive topological sort helper */
+	const auto tsort_visit = [&](cache_item *it, unsigned cur_order, auto &&rec) {
+		constexpr auto tsort_mark = [](cache_item *it, tsort_mask how) {
+			switch (how) {
+			case tsort_mask::PERM:
+				it->order |= (1u << 31);
+				break;
+			case tsort_mask::TEMP:
+				it->order |= (1u << 30);
+				break;
+			}
+		};
+		constexpr auto tsort_is_marked = [](cache_item *it, tsort_mask how) {
+			switch (how) {
+			case tsort_mask::PERM:
+				return (it->order & (1u << 31));
+			case tsort_mask::TEMP:
+				return (it->order & (1u << 30));
+			}
+		};
+
+		if (tsort_is_marked(it, tsort_mask::PERM)) {
+			if (cur_order > tsort_unmask(it)) {
+				/* Need to recalculate the whole chain */
+				it->order = cur_order; /* That also removes all masking */
+			}
+			else {
+				/* We are fine, stop DFS */
+				return;
+			}
+		}
+		else if (tsort_is_marked(it, tsort_mask::TEMP)) {
+			msg_err_cache("cyclic dependencies found when checking '%s'!",
+					it->symbol.c_str());
+			return;
+		}
+
+		tsort_mark(it, tsort_mask::TEMP);
+		msg_debug_cache("visiting node: %s (%d)", it->symbol.c_str(), cur_order);
+
+		for (const auto &dep : it->deps) {
+			msg_debug_cache ("visiting dep: %s (%d)", dep.item->symbol.c_str(), cur_order + 1);
+			rec(dep.item.get(), cur_order + 1, rec);
+		}
+
+		it->order = cur_order;
+		tsort_mark(it, tsort_mask::PERM);
+	};
+	/*
+	 * Topological sort
+	 */
+	total_hits = 0;
+
+	for (const auto &it : filters) {
+		if (it->order == 0) {
+			tsort_visit(it.get(), 0, tsort_visit);
+		}
+	}
+
+
+	/* Main sorting comparator */
+	constexpr auto score_functor = [](auto w, auto f, auto t) -> auto {
+		auto time_alpha = 1.0, weight_alpha = 0.1, freq_alpha = 0.01;
+
+		return ((w > 0.0 ? w : weight_alpha) * (f > 0.0 ? f : freq_alpha) /
+				(t > time_alpha ? t : time_alpha));
+	};
+
+	auto cache_order_cmp = [&](const auto &it1, const auto &it2) -> auto {
+		auto o1 = tsort_unmask(it1.get()), o2 = tsort_unmask(it2.get());
+		double w1 = 0., w2 = 0.;
+
+		if (o1 == o2) {
+			/* No topological order */
+			if (it1->priority == it2->priority) {
+				auto avg_freq = ((double) total_hits / used_items);
+				auto avg_weight = (total_weight / used_items);
+				auto f1 = (double) it1->st->total_hits / avg_freq;
+				auto f2 = (double) it2->st->total_hits / avg_freq;
+				auto weight1 = std::fabs(it1->st->weight) / avg_weight;
+				auto weight2 = std::fabs(it2->st->weight) / avg_weight;
+				auto t1 = it1->st->avg_time;
+				auto t2 = it2->st->avg_time;
+				w1 = score_functor(weight1, f1, t1);
+				w2 = score_functor(weight2, f2, t2);
+			} else {
+				/* Strict sorting */
+				w1 = std::abs(it1->priority);
+				w2 = std::abs(it2->priority);
+			}
+		}
+		else {
+			w1 = o1;
+			w2 = o2;
+		}
+
+		if (w2 > w1) {
+			return 1;
+		}
+		else if (w2 < w1) {
+			return -1;
+		}
+
+		return 0;
+	};
+
+	std::stable_sort(std::begin(ord->d), std::end(ord->d), cache_order_cmp);
+	std::swap(ord, items_by_order);
+}
 
 
 auto cache_item::get_parent(const symcache &cache) const -> const cache_item *
diff --git a/src/libserver/symcache/symcache_internal.hxx b/src/libserver/symcache/symcache_internal.hxx
index a2b852c19..7dd664e5c 100644
--- a/src/libserver/symcache/symcache_internal.hxx
+++ b/src/libserver/symcache/symcache_internal.hxx
@@ -78,13 +78,16 @@ using cache_item_ptr = std::shared_ptr<cache_item>;
 using cache_item_weak_ptr = std::weak_ptr<cache_item>;
 
 struct order_generation {
-	std::vector<cache_item_weak_ptr> d;
+	std::vector<cache_item_ptr> d;
 	unsigned int generation_id;
+
+	explicit order_generation(std::size_t nelts, unsigned id) : generation_id(id) {
+		d.reserve(nelts);
+	}
 };
 
 using order_generation_ptr = std::shared_ptr<order_generation>;
 
-
 class symcache;
 
 struct item_condition {
@@ -269,6 +272,9 @@ private:
 	/* Internal methods */
 	auto load_items() -> bool;
 	auto save_items() const -> bool;
+	auto resort() -> void;
+	/* Helper for g_hash_table_foreach */
+	static auto metric_connect_cb(void *k, void *v, void *ud) -> void;
 
 public:
 	explicit symcache(struct rspamd_config *cfg) : cfg(cfg) {


More information about the Commits mailing list