commit 562b5dd: [Rework] Reimplement saving/loading the cache items
Vsevolod Stakhov
vsevolod at rspamd.com
Sat Apr 30 19:21:14 UTC 2022
Author: Vsevolod Stakhov
Date: 2022-04-03 12:36:55 +0100
URL: https://github.com/rspamd/rspamd/commit/562b5dd0fd72065176ade3616e2bafe3618879a8
[Rework] Reimplement saving/loading the cache items
---
src/libserver/symcache/symcache_impl.cxx | 108 ++++++++++++++++++++++-----
src/libserver/symcache/symcache_internal.hxx | 19 ++---
2 files changed, 99 insertions(+), 28 deletions(-)
diff --git a/src/libserver/symcache/symcache_impl.cxx b/src/libserver/symcache/symcache_impl.cxx
index aaf8b0cdd..7836c80b9 100644
--- a/src/libserver/symcache/symcache_impl.cxx
+++ b/src/libserver/symcache/symcache_impl.cxx
@@ -142,7 +142,7 @@ auto symcache::load_items() -> bool
return false;
}
- const auto *hdr = (struct symcache_header *)cached_map->get_map();
+ const auto *hdr = (struct symcache_header *) cached_map->get_map();
if (memcmp(hdr->magic, symcache_magic,
sizeof(symcache_magic)) != 0) {
@@ -152,7 +152,7 @@ auto symcache::load_items() -> bool
}
auto *parser = ucl_parser_new(0);
- const auto *p = (const std::uint8_t *)(hdr + 1);
+ const auto *p = (const std::uint8_t *) (hdr + 1);
if (!ucl_parser_add_chunk(parser, p, cached_map->get_size() - sizeof(*hdr))) {
msg_info_cache ("cannot use file %s, cannot parse: %s", cfg->cache_filename,
@@ -222,15 +222,13 @@ auto symcache::load_items() -> bool
}
if (item->is_virtual() && !(item->type & SYMBOL_TYPE_GHOST)) {
- g_assert (item->specific.virtual.parent < (gint)cache->items_by_id->len);
- parent = g_ptr_array_index (cache->items_by_id,
- item->specific.virtual.parent);
- item->specific.virtual.parent_item = parent;
+ const auto &parent = item->get_parent(*this);
- if (parent->st->weight < item->st->weight) {
- parent->st->weight = item->st->weight;
+ if (parent) {
+ if (parent->st->weight < item->st->weight) {
+ parent->st->weight = item->st->weight;
+ }
}
-
/*
* We maintain avg_time for virtual symbols equal to the
* parent item avg_time
@@ -238,8 +236,8 @@ auto symcache::load_items() -> bool
item->st->avg_time = parent->st->avg_time;
}
- cache->total_weight += fabs(item->st->weight);
- cache->total_hits += item->st->total_hits;
+ total_weight += fabs(item->st->weight);
+ total_hits += item->st->total_hits;
}
}
@@ -249,41 +247,113 @@ auto symcache::load_items() -> bool
return true;
}
-auto symcache::get_item_by_id(int id, bool resolve_parent) const -> const cache_item_ptr &
+template<typename T>
+static constexpr auto round_to_hundreds(T x)
+{
+ return (::floor(x) * 100.0) / 100.0;
+}
+
+bool symcache::save_items() const
+{
+ auto file_sink = util::raii_file_sink::create(cfg->cache_filename,
+ O_WRONLY | O_TRUNC, 00644);
+
+ if (!file_sink.has_value()) {
+ if (errno == EEXIST) {
+ /* Some other process is already writing data, give up silently */
+ return false;
+ }
+
+ msg_err_cache("%s", file_sink.error().c_str());
+
+ return false;
+ }
+
+ struct symcache_header hdr;
+ memset(&hdr, 0, sizeof(hdr));
+ memcpy(hdr.magic, symcache_magic, sizeof(symcache_magic));
+
+ if (write(file_sink->get_fd(), &hdr, sizeof(hdr)) == -1) {
+ msg_err_cache("cannot write to file %s, error %d, %s", cfg->cache_filename,
+ errno, strerror(errno));
+
+ return false;
+ }
+
+ auto *top = ucl_object_typed_new(UCL_OBJECT);
+
+ for (const auto &it : items_by_symbol) {
+ auto item = it.second;
+ auto elt = ucl_object_typed_new(UCL_OBJECT);
+ ucl_object_insert_key(elt,
+ ucl_object_fromdouble(round_to_hundreds(item->st->weight)),
+ "weight", 0, false);
+ ucl_object_insert_key(elt,
+ ucl_object_fromdouble(round_to_hundreds(item->st->time_counter.mean)),
+ "time", 0, false);
+ ucl_object_insert_key(elt, ucl_object_fromint(item->st->total_hits),
+ "count", 0, false);
+
+ auto *freq = ucl_object_typed_new(UCL_OBJECT);
+ ucl_object_insert_key(freq,
+ ucl_object_fromdouble(round_to_hundreds(item->st->frequency_counter.mean)),
+ "avg", 0, false);
+ ucl_object_insert_key(freq,
+ ucl_object_fromdouble(round_to_hundreds(item->st->frequency_counter.stddev)),
+ "stddev", 0, false);
+ ucl_object_insert_key(elt, freq, "frequency", 0, false);
+
+ ucl_object_insert_key(top, elt, it.first.data(), 0, true);
+ }
+
+ auto fp = fdopen(file_sink->get_fd(), "a");
+ auto *efunc = ucl_object_emit_file_funcs(fp);
+ auto ret = ucl_object_emit_full(top, UCL_EMIT_JSON_COMPACT, efunc, nullptr);
+ ucl_object_emit_funcs_free(efunc);
+ ucl_object_unref(top);
+ fclose(fp);
+
+ return ret;
+}
+
+
+auto symcache::get_item_by_id(int id, bool resolve_parent) const -> const cache_item *
{
if (id < 0 || id >= items_by_id.size()) {
+ g_error("internal error: requested item with id %d, when we have just %d items in the cache",
+ id, (int)items_by_id.size());
g_abort();
}
auto &ret = items_by_id[id];
if (!ret) {
- g_abort();
+ return nullptr;
}
if (resolve_parent && ret->is_virtual()) {
return ret->get_parent(*this);
}
- return ret;
+ return ret.get();
}
-auto cache_item::get_parent(const symcache &cache) const -> const cache_item_ptr &
+auto cache_item::get_parent(const symcache &cache) const -> const cache_item *
{
if (is_virtual()) {
const auto &virtual_sp = std::get<virtual_item>(specific);
- return virtual_sp.get_parent()
+ return virtual_sp.get_parent(cache);
}
- return cache_item_ptr{nullptr};
+ return nullptr;
}
-auto virtual_item::get_parent(const symcache &cache) const -> const cache_item_ptr &
+auto virtual_item::get_parent(const symcache &cache) const -> const cache_item *
{
if (parent) {
- return parent;
+ return parent.get();
}
return cache.get_item_by_id(parent_id, false);
diff --git a/src/libserver/symcache/symcache_internal.hxx b/src/libserver/symcache/symcache_internal.hxx
index a1207fc97..420004064 100644
--- a/src/libserver/symcache/symcache_internal.hxx
+++ b/src/libserver/symcache/symcache_internal.hxx
@@ -36,24 +36,24 @@
#include "lua/lua_common.h"
#define msg_err_cache(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
- cache->static_pool->tag.tagname, cache->cfg->checksum, \
- G_STRFUNC, \
+ static_pool->tag.tagname, cfg->checksum, \
+ RSPAMD_LOG_FUNC, \
__VA_ARGS__)
#define msg_warn_cache(...) rspamd_default_log_function (G_LOG_LEVEL_WARNING, \
static_pool->tag.tagname, cfg->checksum, \
- G_STRFUNC, \
+ RSPAMD_LOG_FUNC, \
__VA_ARGS__)
#define msg_info_cache(...) rspamd_default_log_function (G_LOG_LEVEL_INFO, \
static_pool->tag.tagname, cfg->checksum, \
- G_STRFUNC, \
+ RSPAMD_LOG_FUNC, \
__VA_ARGS__)
#define msg_debug_cache(...) rspamd_conditional_debug_fast (NULL, NULL, \
rspamd_symcache_log_id, "symcache", cfg->checksum, \
- G_STRFUNC, \
+ RSPAMD_LOG_FUNC, \
__VA_ARGS__)
#define msg_debug_cache_task(...) rspamd_conditional_debug_fast (NULL, NULL, \
rspamd_symcache_log_id, "symcache", task->task_pool->tag.uid, \
- G_STRFUNC, \
+ RSPAMD_LOG_FUNC, \
__VA_ARGS__)
namespace rspamd::symcache {
@@ -214,7 +214,7 @@ public:
// TODO
}
- auto get_parent(const symcache &cache) const -> const cache_item_ptr&;
+ auto get_parent(const symcache &cache) const -> const cache_item *;
};
struct cache_item {
@@ -253,7 +253,7 @@ struct cache_item {
std::vector<cache_item_ptr> rdeps;
auto is_virtual() const -> bool { return std::holds_alternative<virtual_item>(specific); }
- auto get_parent(const symcache &cache) const -> const cache_item_ptr &;
+ auto get_parent(const symcache &cache) const -> const cache_item *;
};
struct delayed_cache_dependency {
@@ -306,6 +306,7 @@ private:
private:
/* Internal methods */
auto load_items() -> bool;
+ auto save_items() const -> bool;
public:
explicit symcache(struct rspamd_config *cfg) : cfg(cfg) {
@@ -326,7 +327,7 @@ public:
}
}
- auto get_item_by_id(int id, bool resolve_parent) const -> const cache_item_ptr &;
+ auto get_item_by_id(int id, bool resolve_parent) const -> const cache_item *;
/*
* Initialises the symbols cache, must be called after all symbols are added
More information about the Commits
mailing list