commit ce9f332: [Feature] Implement symbols augmentations
Vsevolod Stakhov
vsevolod at rspamd.com
Sat May 14 12:49:07 UTC 2022
Author: Vsevolod Stakhov
Date: 2022-05-14 13:05:14 +0100
URL: https://github.com/rspamd/rspamd/commit/ce9f332830db017758e523b195f4f3d671c59630
[Feature] Implement symbols augmentations
---
src/libserver/symcache/symcache_impl.cxx | 38 ++++++++++++++++++----------
src/libserver/symcache/symcache_internal.hxx | 8 ++++++
src/libserver/symcache/symcache_item.cxx | 37 +++++++++++++++++++++++++++
src/libserver/symcache/symcache_item.hxx | 17 +++++++++++++
4 files changed, 87 insertions(+), 13 deletions(-)
diff --git a/src/libserver/symcache/symcache_impl.cxx b/src/libserver/symcache/symcache_impl.cxx
index f76188c9f..e557f6212 100644
--- a/src/libserver/symcache/symcache_impl.cxx
+++ b/src/libserver/symcache/symcache_impl.cxx
@@ -425,6 +425,7 @@ auto symcache::add_dependency(int id_from, std::string_view to, int virtual_id_f
auto symcache::resort() -> void
{
+ auto log_func = RSPAMD_LOG_FUNC;
auto ord = std::make_shared<order_generation>(filters.size() +
prefilters.size() +
composites.size() +
@@ -436,6 +437,7 @@ auto symcache::resort() -> void
for (auto &it: filters) {
if (it) {
total_hits += it->st->total_hits;
+ /* Unmask topological order */
it->order = 0;
ord->d.emplace_back(it);
}
@@ -484,16 +486,16 @@ auto symcache::resort() -> void
}
}
else if (tsort_is_marked(it, tsort_mask::TEMP)) {
- msg_err_cache("cyclic dependencies found when checking '%s'!",
+ msg_err_cache_lambda("cyclic dependencies found when checking '%s'!",
it->symbol.c_str());
return;
}
tsort_mark(it, tsort_mask::TEMP);
- msg_debug_cache("visiting node: %s (%d)", it->symbol.c_str(), cur_order);
+ msg_debug_cache_lambda("visiting node: %s (%d)", it->symbol.c_str(), cur_order);
for (const auto &dep: it->deps) {
- msg_debug_cache ("visiting dep: %s (%d)", dep.item->symbol.c_str(), cur_order + 1);
+ msg_debug_cache_lambda("visiting dep: %s (%d)", dep.item->symbol.c_str(), cur_order + 1);
rec(dep.item.get(), cur_order + 1, rec);
}
@@ -528,16 +530,26 @@ auto symcache::resort() -> void
if (o1 == o2) {
/* No topological order */
if (it1->priority == it2->priority) {
- auto avg_freq = ((double) total_hits / used_items);
- auto avg_weight = (total_weight / used_items);
- auto f1 = (double) it1->st->total_hits / avg_freq;
- auto f2 = (double) it2->st->total_hits / avg_freq;
- auto weight1 = std::fabs(it1->st->weight) / avg_weight;
- auto weight2 = std::fabs(it2->st->weight) / avg_weight;
- auto t1 = it1->st->avg_time;
- auto t2 = it2->st->avg_time;
- w1 = score_functor(weight1, f1, t1);
- w2 = score_functor(weight2, f2, t2);
+
+ auto augmentations1 = it1->get_augmentation_weight();
+ auto augmentations2 = it2->get_augmentation_weight();
+
+ if (augmentations1 == augmentations2) {
+ auto avg_freq = ((double) total_hits / used_items);
+ auto avg_weight = (total_weight / used_items);
+ auto f1 = (double) it1->st->total_hits / avg_freq;
+ auto f2 = (double) it2->st->total_hits / avg_freq;
+ auto weight1 = std::fabs(it1->st->weight) / avg_weight;
+ auto weight2 = std::fabs(it2->st->weight) / avg_weight;
+ auto t1 = it1->st->avg_time;
+ auto t2 = it2->st->avg_time;
+ w1 = score_functor(weight1, f1, t1);
+ w2 = score_functor(weight2, f2, t2);
+ }
+ else {
+ w1 = augmentations1;
+ w2 = augmentations2;
+ }
}
else {
/* Strict sorting */
diff --git a/src/libserver/symcache/symcache_internal.hxx b/src/libserver/symcache/symcache_internal.hxx
index 84ae8de7f..6a96eb547 100644
--- a/src/libserver/symcache/symcache_internal.hxx
+++ b/src/libserver/symcache/symcache_internal.hxx
@@ -44,6 +44,10 @@
"symcache", log_tag(), \
RSPAMD_LOG_FUNC, \
__VA_ARGS__)
+#define msg_err_cache_lambda(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
+ "symcache", log_tag(), \
+ log_func, \
+ __VA_ARGS__)
#define msg_err_cache_task(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
"symcache", task->task_pool->tag.uid, \
RSPAMD_LOG_FUNC, \
@@ -60,6 +64,10 @@
::rspamd::symcache::rspamd_symcache_log_id, "symcache", log_tag(), \
RSPAMD_LOG_FUNC, \
__VA_ARGS__)
+#define msg_debug_cache_lambda(...) rspamd_conditional_debug_fast (NULL, NULL, \
+ ::rspamd::symcache::rspamd_symcache_log_id, "symcache", log_tag(), \
+ log_func, \
+ __VA_ARGS__)
#define msg_debug_cache_task(...) rspamd_conditional_debug_fast (NULL, NULL, \
::rspamd::symcache::rspamd_symcache_log_id, "symcache", task->task_pool->tag.uid, \
RSPAMD_LOG_FUNC, \
diff --git a/src/libserver/symcache/symcache_item.cxx b/src/libserver/symcache/symcache_item.cxx
index 70c1921bb..091e6cbf9 100644
--- a/src/libserver/symcache/symcache_item.cxx
+++ b/src/libserver/symcache/symcache_item.cxx
@@ -19,9 +19,22 @@
#include "symcache_item.hxx"
#include "fmt/core.h"
#include "libserver/task.h"
+#include "libutil/cxx/util.hxx"
+#include <numeric>
+#include <functional>
namespace rspamd::symcache {
+/* A list of internal augmentations that are known to Rspamd with their weight */
+static const auto known_augmentations =
+ robin_hood::unordered_flat_map<std::string, int, rspamd::smart_str_hash, rspamd::smart_str_equal>{
+ {"passthrough", 10},
+ {"single_network", 1},
+ {"no_network", 0},
+ {"many_network", 1},
+ {"important", 5},
+ };
+
auto cache_item::get_parent(const symcache &cache) const -> const cache_item *
{
if (is_virtual()) {
@@ -347,6 +360,30 @@ auto cache_item::is_allowed(struct rspamd_task *task, bool exec_only) const -> b
return true;
}
+auto
+cache_item::add_augmentation(const symcache &cache, std::string_view augmentation) -> bool {
+ auto log_tag = [&]() { return cache.log_tag(); };
+
+ if (augmentations.contains(augmentation)) {
+ msg_warn_cache("duplicate augmentation: %s", augmentation.data());
+ }
+
+ augmentations.insert(std::string(augmentation));
+
+ return known_augmentations.contains(augmentation);
+}
+
+auto
+cache_item::get_augmentation_weight() const -> int
+{
+ return std::accumulate(std::begin(augmentations), std::end(augmentations),
+ 0, [](int acc, const std::string &augmentation) {
+ int zero = 0; /* C++ limitation of the cref */
+ return acc + rspamd::find_map(known_augmentations, augmentation).value_or(std::cref<int>(zero));
+ });
+}
+
+
auto virtual_item::get_parent(const symcache &cache) const -> const cache_item *
{
if (parent) {
diff --git a/src/libserver/symcache/symcache_item.hxx b/src/libserver/symcache/symcache_item.hxx
index 40e2d67c1..70203770a 100644
--- a/src/libserver/symcache/symcache_item.hxx
+++ b/src/libserver/symcache/symcache_item.hxx
@@ -32,6 +32,7 @@
#include "contrib/expected/expected.hpp"
#include "contrib/libev/ev.h"
#include "symcache_runtime.hxx"
+#include "libutil/cxx/hash_util.hxx"
namespace rspamd::symcache {
@@ -193,6 +194,9 @@ struct cache_item : std::enable_shared_from_this<cache_item> {
id_list exec_only_ids{};
id_list forbidden_ids{};
+ /* Set of augmentations */
+ robin_hood::unordered_flat_set<std::string, rspamd::smart_str_hash, rspamd::smart_str_equal> augmentations;
+
/* Dependencies */
std::vector<cache_dependency> deps;
/* Reverse dependencies */
@@ -378,6 +382,19 @@ public:
}
}
+ /**
+ * Add an augmentation to the item, returns `true` if augmentation is known and unique, false otherwise
+ * @param augmentation
+ * @return
+ */
+ auto add_augmentation(const symcache &cache, std::string_view augmentation) -> bool;
+
+ /**
+ * Return sum weight of all known augmentations
+ * @return
+ */
+ auto get_augmentation_weight() const -> int;
+
private:
/**
* Constructor for a normal symbols with callback
More information about the Commits
mailing list