commit b3a343b: [Project] Cdb: Use shared data between cdb statfiles

Vsevolod Stakhov vsevolod at highsecure.ru
Tue Oct 19 10:56:06 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-10-19 11:39:00 +0100
URL: https://github.com/rspamd/rspamd/commit/b3a343bf86468d88998c36c7363d13e3a0dc9cee

[Project] Cdb: Use shared data between cdb statfiles

---
 src/libstat/backends/cdb_backend.cxx | 114 +++++++++++++++++++++++++++++------
 1 file changed, 95 insertions(+), 19 deletions(-)

diff --git a/src/libstat/backends/cdb_backend.cxx b/src/libstat/backends/cdb_backend.cxx
index 899ad949a..46b1f8121 100644
--- a/src/libstat/backends/cdb_backend.cxx
+++ b/src/libstat/backends/cdb_backend.cxx
@@ -27,14 +27,81 @@
 #include <string>
 #include <optional>
 #include "contrib/expected/expected.hpp"
+#include "contrib/robin-hood/robin_hood.h"
 #include "fmt/core.h"
 
 namespace rspamd::stat::cdb {
 
+/*
+ * Utility class to share cdb instances over statfiles instances, as each
+ * cdb has tokens for both ham and spam classes
+ */
+class cdb_shared_storage {
+public:
+	using cdb_element_t = std::shared_ptr<struct cdb>;
+	cdb_shared_storage() noexcept = default;
+
+	auto get_cdb(const char *path) const -> std::optional<cdb_element_t> {
+		auto found = elts.find(path);
+
+		if (found != elts.end()) {
+			if (!found->second.expired()) {
+				return found->second.lock();
+			}
+		}
+
+		return std::nullopt;
+	}
+	/* Create a new smart pointer over POD cdb structure */
+	static auto new_cdb() -> cdb_element_t {
+		auto ret = cdb_element_t(new struct cdb, cdb_deleter());
+		memset(ret.get(), 0, sizeof(struct cdb));
+		return ret;
+	}
+	/* Enclose cdb into storage */
+	auto push_cdb(const char *path, cdb_element_t cdbp) -> cdb_element_t {
+		auto found = elts.find(path);
+
+		if (found != elts.end()) {
+			if (found->second.expired()) {
+				/* OK, move in lieu of the expired weak pointer */
+
+				found->second = cdbp;
+				return cdbp;
+			}
+			else {
+				/*
+				 * Existing and not expired, return the existing one
+				 */
+				return found->second.lock();
+			}
+		}
+		else {
+			/* Not existing, make a weak ptr and return the original */
+			elts.emplace(path,std::weak_ptr<struct cdb>(cdbp));
+			return cdbp;
+		}
+	}
+private:
+	/*
+	 * We store weak pointers here to allow owning cdb statfiles to free
+	 * expensive cdb before this cache is terminated (e.g. on dynamic cdb reload)
+	 */
+	robin_hood::unordered_flat_map<std::string, std::weak_ptr<struct cdb>> elts;
+
+	struct cdb_deleter {
+		void operator()(struct cdb *c) const {
+			cdb_free(c);
+		}
+	};
+};
+
+static cdb_shared_storage cdb_shared_storage;
+
 class ro_backend final {
 public:
-	explicit ro_backend(struct rspamd_statfile *_st, std::unique_ptr<struct cdb> &&_db)
-			: st(_st), db(std::move(_db)) {}
+	explicit ro_backend(struct rspamd_statfile *_st, cdb_shared_storage::cdb_element_t _db)
+			: st(_st), db(_db) {}
 	ro_backend() = delete;
 	ro_backend(const ro_backend &) = delete;
 	ro_backend(ro_backend &&other) noexcept {
@@ -47,18 +114,13 @@ public:
 
 		return *this;
 	}
-	~ro_backend() {
-		if (db) {
-			// Might be worth to use unique ptr with a custom deleter
-			cdb_free(db.get());
-		}
-	}
+	~ro_backend() {}
 
 	auto load_cdb() -> tl::expected<bool, std::string>;
 	auto process_token(const rspamd_token_t *tok) const -> std::optional<float>;
 private:
 	struct rspamd_statfile *st;
-	std::unique_ptr<struct cdb> db;
+	cdb_shared_storage::cdb_element_t db;
 	bool loaded = false;
 	std::uint64_t learns_spam = 0;
 	std::uint64_t learns_ham = 0;
@@ -190,21 +252,35 @@ open_cdb(struct rspamd_statfile *st) -> tl::expected<ro_backend, std::string>
 	if (filename && ucl_object_type(filename) == UCL_STRING) {
 		const auto *path = ucl_object_tostring(filename);
 
-		auto fd = rspamd_file_xopen(path, O_RDONLY, 0, true);
+		auto cached_cdb_maybe = cdb_shared_storage.get_cdb(path);
+		cdb_shared_storage::cdb_element_t cdbp;
 
-		if (fd == -1) {
-			return tl::make_unexpected(fmt::format("cannot open {}: {}",
-					path, strerror(errno)));
-		}
+		if (!cached_cdb_maybe) {
+
+			auto fd = rspamd_file_xopen(path, O_RDONLY, 0, true);
+
+			if (fd == -1) {
+				return tl::make_unexpected(fmt::format("cannot open {}: {}",
+						path, strerror(errno)));
+			}
+
+			cdbp = cdb_shared_storage::new_cdb();
 
-		auto &&cdbs = std::make_unique<struct cdb>();
+			if (cdb_init(cdbp.get(), fd) == -1) {
+				return tl::make_unexpected(fmt::format("cannot init cdb in {}: {}",
+						path, strerror(errno)));
+			}
+		}
+		else {
+			cdbp = cached_cdb_maybe.value();
+		}
 
-		if (cdb_init(cdbs.get(), fd) == -1) {
-			return tl::make_unexpected(fmt::format("cannot init cdb in {}: {}",
-					path, strerror(errno)));
+		if (!cdbp) {
+			return tl::make_unexpected(fmt::format("cannot init cdb in {}: internal error",
+					path));
 		}
 
-		ro_backend bk{st, std::move(cdbs)};
+		ro_backend bk{st, cdbp};
 
 		auto res = bk.load_cdb();
 


More information about the Commits mailing list