commit ce9f332: [Feature] Implement symbols augmentations

Vsevolod Stakhov vsevolod at rspamd.com
Sat May 14 12:49:07 UTC 2022


Author: Vsevolod Stakhov
Date: 2022-05-14 13:05:14 +0100
URL: https://github.com/rspamd/rspamd/commit/ce9f332830db017758e523b195f4f3d671c59630

[Feature] Implement symbols augmentations

---
 src/libserver/symcache/symcache_impl.cxx     | 38 ++++++++++++++++++----------
 src/libserver/symcache/symcache_internal.hxx |  8 ++++++
 src/libserver/symcache/symcache_item.cxx     | 37 +++++++++++++++++++++++++++
 src/libserver/symcache/symcache_item.hxx     | 17 +++++++++++++
 4 files changed, 87 insertions(+), 13 deletions(-)

diff --git a/src/libserver/symcache/symcache_impl.cxx b/src/libserver/symcache/symcache_impl.cxx
index f76188c9f..e557f6212 100644
--- a/src/libserver/symcache/symcache_impl.cxx
+++ b/src/libserver/symcache/symcache_impl.cxx
@@ -425,6 +425,7 @@ auto symcache::add_dependency(int id_from, std::string_view to, int virtual_id_f
 
 auto symcache::resort() -> void
 {
+	auto log_func = RSPAMD_LOG_FUNC;
 	auto ord = std::make_shared<order_generation>(filters.size() +
 			prefilters.size() +
 			composites.size() +
@@ -436,6 +437,7 @@ auto symcache::resort() -> void
 	for (auto &it: filters) {
 		if (it) {
 			total_hits += it->st->total_hits;
+			/* Unmask topological order */
 			it->order = 0;
 			ord->d.emplace_back(it);
 		}
@@ -484,16 +486,16 @@ auto symcache::resort() -> void
 			}
 		}
 		else if (tsort_is_marked(it, tsort_mask::TEMP)) {
-			msg_err_cache("cyclic dependencies found when checking '%s'!",
+			msg_err_cache_lambda("cyclic dependencies found when checking '%s'!",
 					it->symbol.c_str());
 			return;
 		}
 
 		tsort_mark(it, tsort_mask::TEMP);
-		msg_debug_cache("visiting node: %s (%d)", it->symbol.c_str(), cur_order);
+		msg_debug_cache_lambda("visiting node: %s (%d)", it->symbol.c_str(), cur_order);
 
 		for (const auto &dep: it->deps) {
-			msg_debug_cache ("visiting dep: %s (%d)", dep.item->symbol.c_str(), cur_order + 1);
+			msg_debug_cache_lambda("visiting dep: %s (%d)", dep.item->symbol.c_str(), cur_order + 1);
 			rec(dep.item.get(), cur_order + 1, rec);
 		}
 
@@ -528,16 +530,26 @@ auto symcache::resort() -> void
 		if (o1 == o2) {
 			/* No topological order */
 			if (it1->priority == it2->priority) {
-				auto avg_freq = ((double) total_hits / used_items);
-				auto avg_weight = (total_weight / used_items);
-				auto f1 = (double) it1->st->total_hits / avg_freq;
-				auto f2 = (double) it2->st->total_hits / avg_freq;
-				auto weight1 = std::fabs(it1->st->weight) / avg_weight;
-				auto weight2 = std::fabs(it2->st->weight) / avg_weight;
-				auto t1 = it1->st->avg_time;
-				auto t2 = it2->st->avg_time;
-				w1 = score_functor(weight1, f1, t1);
-				w2 = score_functor(weight2, f2, t2);
+
+				auto augmentations1 = it1->get_augmentation_weight();
+				auto augmentations2 = it2->get_augmentation_weight();
+
+				if (augmentations1 == augmentations2) {
+					auto avg_freq = ((double) total_hits / used_items);
+					auto avg_weight = (total_weight / used_items);
+					auto f1 = (double) it1->st->total_hits / avg_freq;
+					auto f2 = (double) it2->st->total_hits / avg_freq;
+					auto weight1 = std::fabs(it1->st->weight) / avg_weight;
+					auto weight2 = std::fabs(it2->st->weight) / avg_weight;
+					auto t1 = it1->st->avg_time;
+					auto t2 = it2->st->avg_time;
+					w1 = score_functor(weight1, f1, t1);
+					w2 = score_functor(weight2, f2, t2);
+				}
+				else {
+					w1 = augmentations1;
+					w2 = augmentations2;
+				}
 			}
 			else {
 				/* Strict sorting */
diff --git a/src/libserver/symcache/symcache_internal.hxx b/src/libserver/symcache/symcache_internal.hxx
index 84ae8de7f..6a96eb547 100644
--- a/src/libserver/symcache/symcache_internal.hxx
+++ b/src/libserver/symcache/symcache_internal.hxx
@@ -44,6 +44,10 @@
         "symcache", log_tag(), \
         RSPAMD_LOG_FUNC, \
         __VA_ARGS__)
+#define msg_err_cache_lambda(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
+        "symcache", log_tag(), \
+        log_func, \
+        __VA_ARGS__)
 #define msg_err_cache_task(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
         "symcache", task->task_pool->tag.uid, \
         RSPAMD_LOG_FUNC, \
@@ -60,6 +64,10 @@
         ::rspamd::symcache::rspamd_symcache_log_id, "symcache", log_tag(), \
         RSPAMD_LOG_FUNC, \
         __VA_ARGS__)
+#define msg_debug_cache_lambda(...)  rspamd_conditional_debug_fast (NULL, NULL, \
+        ::rspamd::symcache::rspamd_symcache_log_id, "symcache", log_tag(), \
+        log_func, \
+        __VA_ARGS__)
 #define msg_debug_cache_task(...)  rspamd_conditional_debug_fast (NULL, NULL, \
         ::rspamd::symcache::rspamd_symcache_log_id, "symcache", task->task_pool->tag.uid, \
         RSPAMD_LOG_FUNC, \
diff --git a/src/libserver/symcache/symcache_item.cxx b/src/libserver/symcache/symcache_item.cxx
index 70c1921bb..091e6cbf9 100644
--- a/src/libserver/symcache/symcache_item.cxx
+++ b/src/libserver/symcache/symcache_item.cxx
@@ -19,9 +19,22 @@
 #include "symcache_item.hxx"
 #include "fmt/core.h"
 #include "libserver/task.h"
+#include "libutil/cxx/util.hxx"
+#include <numeric>
+#include <functional>
 
 namespace rspamd::symcache {
 
+/* A list of internal augmentations that are known to Rspamd with their weight */
+static const auto known_augmentations =
+		robin_hood::unordered_flat_map<std::string, int, rspamd::smart_str_hash, rspamd::smart_str_equal>{
+				{"passthrough", 10},
+				{"single_network", 1},
+				{"no_network", 0},
+				{"many_network", 1},
+				{"important", 5},
+		};
+
 auto cache_item::get_parent(const symcache &cache) const -> const cache_item *
 {
 	if (is_virtual()) {
@@ -347,6 +360,30 @@ auto cache_item::is_allowed(struct rspamd_task *task, bool exec_only) const -> b
 	return true;
 }
 
+auto
+cache_item::add_augmentation(const symcache &cache, std::string_view augmentation) -> bool {
+	auto log_tag = [&]() { return cache.log_tag(); };
+
+	if (augmentations.contains(augmentation)) {
+		msg_warn_cache("duplicate augmentation: %s", augmentation.data());
+	}
+
+	augmentations.insert(std::string(augmentation));
+
+	return known_augmentations.contains(augmentation);
+}
+
+auto
+cache_item::get_augmentation_weight() const -> int
+{
+	return std::accumulate(std::begin(augmentations), std::end(augmentations),
+						  0, [](int acc, const std::string &augmentation) {
+		int zero = 0; /* C++ limitation of the cref */
+		return acc + rspamd::find_map(known_augmentations, augmentation).value_or(std::cref<int>(zero));
+	});
+}
+
+
 auto virtual_item::get_parent(const symcache &cache) const -> const cache_item *
 {
 	if (parent) {
diff --git a/src/libserver/symcache/symcache_item.hxx b/src/libserver/symcache/symcache_item.hxx
index 40e2d67c1..70203770a 100644
--- a/src/libserver/symcache/symcache_item.hxx
+++ b/src/libserver/symcache/symcache_item.hxx
@@ -32,6 +32,7 @@
 #include "contrib/expected/expected.hpp"
 #include "contrib/libev/ev.h"
 #include "symcache_runtime.hxx"
+#include "libutil/cxx/hash_util.hxx"
 
 namespace rspamd::symcache {
 
@@ -193,6 +194,9 @@ struct cache_item : std::enable_shared_from_this<cache_item> {
 	id_list exec_only_ids{};
 	id_list forbidden_ids{};
 
+	/* Set of augmentations */
+	robin_hood::unordered_flat_set<std::string, rspamd::smart_str_hash, rspamd::smart_str_equal> augmentations;
+
 	/* Dependencies */
 	std::vector<cache_dependency> deps;
 	/* Reverse dependencies */
@@ -378,6 +382,19 @@ public:
 		}
 	}
 
+	/**
+	 * Add an augmentation to the item, returns `true` if augmentation is known and unique, false otherwise
+	 * @param augmentation
+	 * @return
+	 */
+	auto add_augmentation(const symcache &cache, std::string_view augmentation) -> bool;
+
+	/**
+	 * Return sum weight of all known augmentations
+	 * @return
+	 */
+	auto get_augmentation_weight() const -> int;
+
 private:
 	/**
 	 * Constructor for a normal symbols with callback


More information about the Commits mailing list