commit 562b5dd: [Rework] Reimplement saving/loading the cache items

Vsevolod Stakhov vsevolod at rspamd.com
Sat Apr 30 19:21:14 UTC 2022


Author: Vsevolod Stakhov
Date: 2022-04-03 12:36:55 +0100
URL: https://github.com/rspamd/rspamd/commit/562b5dd0fd72065176ade3616e2bafe3618879a8

[Rework] Reimplement saving/loading the cache items

---
 src/libserver/symcache/symcache_impl.cxx     | 108 ++++++++++++++++++++++-----
 src/libserver/symcache/symcache_internal.hxx |  19 ++---
 2 files changed, 99 insertions(+), 28 deletions(-)

diff --git a/src/libserver/symcache/symcache_impl.cxx b/src/libserver/symcache/symcache_impl.cxx
index aaf8b0cdd..7836c80b9 100644
--- a/src/libserver/symcache/symcache_impl.cxx
+++ b/src/libserver/symcache/symcache_impl.cxx
@@ -142,7 +142,7 @@ auto symcache::load_items() -> bool
 		return false;
 	}
 
-	const auto *hdr = (struct symcache_header *)cached_map->get_map();
+	const auto *hdr = (struct symcache_header *) cached_map->get_map();
 
 	if (memcmp(hdr->magic, symcache_magic,
 			sizeof(symcache_magic)) != 0) {
@@ -152,7 +152,7 @@ auto symcache::load_items() -> bool
 	}
 
 	auto *parser = ucl_parser_new(0);
-	const auto *p = (const std::uint8_t *)(hdr + 1);
+	const auto *p = (const std::uint8_t *) (hdr + 1);
 
 	if (!ucl_parser_add_chunk(parser, p, cached_map->get_size() - sizeof(*hdr))) {
 		msg_info_cache ("cannot use file %s, cannot parse: %s", cfg->cache_filename,
@@ -222,15 +222,13 @@ auto symcache::load_items() -> bool
 			}
 
 			if (item->is_virtual() && !(item->type & SYMBOL_TYPE_GHOST)) {
-				g_assert (item->specific.virtual.parent < (gint)cache->items_by_id->len);
-				parent = g_ptr_array_index (cache->items_by_id,
-						item->specific.virtual.parent);
-				item->specific.virtual.parent_item = parent;
+				const auto &parent = item->get_parent(*this);
 
-				if (parent->st->weight < item->st->weight) {
-					parent->st->weight = item->st->weight;
+				if (parent) {
+					if (parent->st->weight < item->st->weight) {
+						parent->st->weight = item->st->weight;
+					}
 				}
-
 				/*
 				 * We maintain avg_time for virtual symbols equal to the
 				 * parent item avg_time
@@ -238,8 +236,8 @@ auto symcache::load_items() -> bool
 				item->st->avg_time = parent->st->avg_time;
 			}
 
-			cache->total_weight += fabs(item->st->weight);
-			cache->total_hits += item->st->total_hits;
+			total_weight += fabs(item->st->weight);
+			total_hits += item->st->total_hits;
 		}
 	}
 
@@ -249,41 +247,113 @@ auto symcache::load_items() -> bool
 	return true;
 }
 
-auto symcache::get_item_by_id(int id, bool resolve_parent) const -> const cache_item_ptr &
+template<typename T>
+static constexpr auto round_to_hundreds(T x)
+{
+	return (::floor(x) * 100.0) / 100.0;
+}
+
+bool symcache::save_items() const
+{
+	auto file_sink = util::raii_file_sink::create(cfg->cache_filename,
+			O_WRONLY | O_TRUNC, 00644);
+
+	if (!file_sink.has_value()) {
+		if (errno == EEXIST) {
+			/* Some other process is already writing data, give up silently */
+			return false;
+		}
+
+		msg_err_cache("%s", file_sink.error().c_str());
+
+		return false;
+	}
+
+	struct symcache_header hdr;
+	memset(&hdr, 0, sizeof(hdr));
+	memcpy(hdr.magic, symcache_magic, sizeof(symcache_magic));
+
+	if (write(file_sink->get_fd(), &hdr, sizeof(hdr)) == -1) {
+		msg_err_cache("cannot write to file %s, error %d, %s", cfg->cache_filename,
+				errno, strerror(errno));
+
+		return false;
+	}
+
+	auto *top = ucl_object_typed_new(UCL_OBJECT);
+
+	for (const auto &it : items_by_symbol) {
+		auto item = it.second;
+		auto elt = ucl_object_typed_new(UCL_OBJECT);
+		ucl_object_insert_key(elt,
+				ucl_object_fromdouble(round_to_hundreds(item->st->weight)),
+				"weight", 0, false);
+		ucl_object_insert_key(elt,
+				ucl_object_fromdouble(round_to_hundreds(item->st->time_counter.mean)),
+				"time", 0, false);
+		ucl_object_insert_key(elt, ucl_object_fromint(item->st->total_hits),
+				"count", 0, false);
+
+		auto *freq = ucl_object_typed_new(UCL_OBJECT);
+		ucl_object_insert_key(freq,
+				ucl_object_fromdouble(round_to_hundreds(item->st->frequency_counter.mean)),
+				"avg", 0, false);
+		ucl_object_insert_key(freq,
+				ucl_object_fromdouble(round_to_hundreds(item->st->frequency_counter.stddev)),
+				"stddev", 0, false);
+		ucl_object_insert_key(elt, freq, "frequency", 0, false);
+
+		ucl_object_insert_key(top, elt, it.first.data(), 0, true);
+	}
+
+	auto fp = fdopen(file_sink->get_fd(), "a");
+	auto *efunc = ucl_object_emit_file_funcs(fp);
+	auto ret = ucl_object_emit_full(top, UCL_EMIT_JSON_COMPACT, efunc, nullptr);
+	ucl_object_emit_funcs_free(efunc);
+	ucl_object_unref(top);
+	fclose(fp);
+
+	return ret;
+}
+
+
+auto symcache::get_item_by_id(int id, bool resolve_parent) const -> const cache_item *
 {
 	if (id < 0 || id >= items_by_id.size()) {
+		g_error("internal error: requested item with id %d, when we have just %d items in the cache",
+				id, (int)items_by_id.size());
 		g_abort();
 	}
 
 	auto &ret = items_by_id[id];
 
 	if (!ret) {
-		g_abort();
+		return nullptr;
 	}
 
 	if (resolve_parent && ret->is_virtual()) {
 		return ret->get_parent(*this);
 	}
 
-	return ret;
+	return ret.get();
 }
 
 
-auto cache_item::get_parent(const symcache &cache) const -> const cache_item_ptr &
+auto cache_item::get_parent(const symcache &cache) const -> const cache_item *
 {
 	if (is_virtual()) {
 		const auto &virtual_sp = std::get<virtual_item>(specific);
 
-		return virtual_sp.get_parent()
+		return virtual_sp.get_parent(cache);
 	}
 
-	return cache_item_ptr{nullptr};
+	return nullptr;
 }
 
-auto virtual_item::get_parent(const symcache &cache) const -> const cache_item_ptr &
+auto virtual_item::get_parent(const symcache &cache) const -> const cache_item *
 {
 	if (parent) {
-		return parent;
+		return parent.get();
 	}
 
 	return cache.get_item_by_id(parent_id, false);
diff --git a/src/libserver/symcache/symcache_internal.hxx b/src/libserver/symcache/symcache_internal.hxx
index a1207fc97..420004064 100644
--- a/src/libserver/symcache/symcache_internal.hxx
+++ b/src/libserver/symcache/symcache_internal.hxx
@@ -36,24 +36,24 @@
 #include "lua/lua_common.h"
 
 #define msg_err_cache(...) rspamd_default_log_function (G_LOG_LEVEL_CRITICAL, \
-        cache->static_pool->tag.tagname, cache->cfg->checksum, \
-        G_STRFUNC, \
+        static_pool->tag.tagname, cfg->checksum, \
+        RSPAMD_LOG_FUNC, \
         __VA_ARGS__)
 #define msg_warn_cache(...)   rspamd_default_log_function (G_LOG_LEVEL_WARNING, \
         static_pool->tag.tagname, cfg->checksum, \
-        G_STRFUNC, \
+        RSPAMD_LOG_FUNC, \
         __VA_ARGS__)
 #define msg_info_cache(...)   rspamd_default_log_function (G_LOG_LEVEL_INFO, \
         static_pool->tag.tagname, cfg->checksum, \
-        G_STRFUNC, \
+        RSPAMD_LOG_FUNC, \
         __VA_ARGS__)
 #define msg_debug_cache(...)  rspamd_conditional_debug_fast (NULL, NULL, \
         rspamd_symcache_log_id, "symcache", cfg->checksum, \
-        G_STRFUNC, \
+        RSPAMD_LOG_FUNC, \
         __VA_ARGS__)
 #define msg_debug_cache_task(...)  rspamd_conditional_debug_fast (NULL, NULL, \
         rspamd_symcache_log_id, "symcache", task->task_pool->tag.uid, \
-        G_STRFUNC, \
+        RSPAMD_LOG_FUNC, \
         __VA_ARGS__)
 
 namespace rspamd::symcache {
@@ -214,7 +214,7 @@ public:
 		// TODO
 	}
 
-	auto get_parent(const symcache &cache) const -> const cache_item_ptr&;
+	auto get_parent(const symcache &cache) const -> const cache_item *;
 };
 
 struct cache_item {
@@ -253,7 +253,7 @@ struct cache_item {
 	std::vector<cache_item_ptr> rdeps;
 
 	auto is_virtual() const -> bool { return std::holds_alternative<virtual_item>(specific); }
-	auto get_parent(const symcache &cache) const -> const cache_item_ptr &;
+	auto get_parent(const symcache &cache) const -> const cache_item *;
 };
 
 struct delayed_cache_dependency {
@@ -306,6 +306,7 @@ private:
 private:
 	/* Internal methods */
 	auto load_items() -> bool;
+	auto save_items() const -> bool;
 
 public:
 	explicit symcache(struct rspamd_config *cfg) : cfg(cfg) {
@@ -326,7 +327,7 @@ public:
 		}
 	}
 
-	auto get_item_by_id(int id, bool resolve_parent) const -> const cache_item_ptr &;
+	auto get_item_by_id(int id, bool resolve_parent) const -> const cache_item *;
 
 	/*
 	 * Initialises the symbols cache, must be called after all symbols are added


More information about the Commits mailing list