commit f0f53c0: [Minor] Simplify id_list by using of the small vector

Vsevolod Stakhov vsevolod at rspamd.com
Sun Jul 17 19:42:07 UTC 2022


Author: Vsevolod Stakhov
Date: 2022-07-17 18:16:05 +0100
URL: https://github.com/rspamd/rspamd/commit/f0f53c0fa6d0a80512df47a1fc4307c1933037e2

[Minor] Simplify id_list by using of the small vector

---
 src/libserver/symcache/symcache_c.cxx       |   4 +-
 src/libserver/symcache/symcache_id_list.hxx | 135 ++++++----------------------
 src/libserver/symcache/symcache_impl.cxx    |   8 +-
 src/libserver/symcache/symcache_item.cxx    |   4 +-
 src/libserver/symcache/symcache_item.hxx    |   6 +-
 5 files changed, 37 insertions(+), 120 deletions(-)

diff --git a/src/libserver/symcache/symcache_c.cxx b/src/libserver/symcache/symcache_c.cxx
index 580e0d137..e0d8711f1 100644
--- a/src/libserver/symcache/symcache_c.cxx
+++ b/src/libserver/symcache/symcache_c.cxx
@@ -380,7 +380,7 @@ rspamd_symcache_set_allowed_settings_ids(struct rspamd_symcache *cache,
 		return false;
 	}
 
-	item->allowed_ids.set_ids(ids, nids, real_cache->get_pool());
+	item->allowed_ids.set_ids(ids, nids);
 	return true;
 }
 
@@ -398,7 +398,7 @@ rspamd_symcache_set_forbidden_settings_ids(struct rspamd_symcache *cache,
 		return false;
 	}
 
-	item->forbidden_ids.set_ids(ids, nids, real_cache->get_pool());
+	item->forbidden_ids.set_ids(ids, nids);
 	return true;
 }
 
diff --git a/src/libserver/symcache/symcache_id_list.hxx b/src/libserver/symcache/symcache_id_list.hxx
index 10a3ba2b7..31da62abd 100644
--- a/src/libserver/symcache/symcache_id_list.hxx
+++ b/src/libserver/symcache/symcache_id_list.hxx
@@ -24,6 +24,7 @@
 
 #include "config.h"
 #include "libutil/mem_pool.h"
+#include "contrib/ankerl/svector.h"
 
 namespace rspamd::symcache {
 /*
@@ -31,22 +32,17 @@ namespace rspamd::symcache {
  * - If the first element is -1 then use dynamic part, else use static part
  * There is no std::variant to save space
  */
+
+constexpr const auto id_capacity = 4;
+constexpr const auto id_sort_threshold = 32;
+
 struct id_list {
-	union {
-		std::uint32_t st[4];
-		struct {
-			std::uint32_t e; /* First element */
-			std::uint16_t len;
-			std::uint16_t allocated;
-			std::uint32_t *n;
-		} dyn;
-	} data;
+	ankerl::svector<std::uint32_t, id_capacity> data;
 
 	id_list() = default;
 
-	auto reset()
-	{
-		std::memset(&data, 0, sizeof(data));
+	auto reset(){
+		data.clear();
 	}
 
 	/**
@@ -56,121 +52,42 @@ struct id_list {
 	 */
 	auto get_ids(unsigned &nids) const -> const std::uint32_t *
 	{
-		if (data.dyn.e == -1) {
-			/* Dynamic list */
-			nids = data.dyn.len;
-
-			return data.dyn.n;
-		}
-		else {
-			auto cnt = 0;
-
-			while (data.st[cnt] != 0 && cnt < G_N_ELEMENTS(data.st)) {
-				cnt++;
-			}
+		nids = data.size();
 
-			nids = cnt;
-
-			return data.st;
-		}
+		return data.data();
 	}
 
-	auto add_id(std::uint32_t id, rspamd_mempool_t *pool) -> void
+	auto add_id(std::uint32_t id) -> void
 	{
-		if (data.st[0] == -1) {
-			/* Dynamic array */
-			if (data.dyn.len < data.dyn.allocated) {
-				/* Trivial, append + sort */
-				data.dyn.n[data.dyn.len++] = id;
-			}
-			else {
-				/* Reallocate */
-				g_assert(data.dyn.allocated <= G_MAXINT16);
-				data.dyn.allocated *= 2;
-
-				auto *new_array = rspamd_mempool_alloc_array_type(pool,
-						data.dyn.allocated, std::uint32_t);
-				memcpy(new_array, data.dyn.n, data.dyn.len * sizeof(std::uint32_t));
-				data.dyn.n = new_array;
-				data.dyn.n[data.dyn.len++] = id;
-			}
-
-			std::sort(data.dyn.n, data.dyn.n + data.dyn.len);
-		}
-		else {
-			/* Static part */
-			auto cnt = 0u;
-			while (data.st[cnt] != 0 && cnt < G_N_ELEMENTS(data.st)) {
-				cnt++;
-			}
-
-			if (cnt < G_N_ELEMENTS(data.st)) {
-				data.st[cnt] = id;
-			}
-			else {
-				/* Switch to dynamic */
-				data.dyn.allocated = G_N_ELEMENTS(data.st) * 2;
-				auto *new_array = rspamd_mempool_alloc_array_type(pool,
-						data.dyn.allocated, std::uint32_t);
-				memcpy(new_array, data.st, sizeof(data.st));
-				data.dyn.n = new_array;
-				data.dyn.e = -1; /* Marker */
-				data.dyn.len = G_N_ELEMENTS(data.st);
-
-				/* Recursively jump to dynamic branch that will handle insertion + sorting */
-				add_id(id, pool); // tail call
-			}
+		data.push_back(id);
+
+		/* Check sort threshold */
+		if (data.size() > id_sort_threshold) {
+			std::sort(data.begin(), data.end());
 		}
 	}
 
-	auto set_ids(const std::uint32_t *ids, std::size_t nids, rspamd_mempool_t *pool) -> void
-	{
-		if (nids <= G_N_ELEMENTS(data.st)) {
-			/* Use static version */
-			reset();
+	auto set_ids(const std::uint32_t *ids, std::size_t nids) -> void {
+		data.resize(nids);
 
-			for (auto i = 0; i < nids; i++) {
-				data.st[i] = ids[i];
-			}
+		for (auto &id : data) {
+			id = *ids++;
 		}
-		else {
-			/* Need to use a separate list */
-			data.dyn.e = -1; /* Flag */
-			data.dyn.n = rspamd_mempool_alloc_array_type(pool, nids, std::uint32_t);
-			data.dyn.len = nids;
-			data.dyn.allocated = nids;
-
-			for (auto i = 0; i < nids; i++) {
-				data.dyn.n[i] = ids[i];
-			}
-
-			/* Keep sorted */
-			std::sort(data.dyn.n, data.dyn.n + data.dyn.len);
+
+		if (data.size() > id_sort_threshold) {
+			std::sort(data.begin(), data.end());
 		}
 	}
 
 	auto check_id(unsigned int id) const -> bool
 	{
-		if (data.dyn.e == -1) {
-			return std::binary_search(data.dyn.n, data.dyn.n + data.dyn.len, id);
+		if (data.size() > id_sort_threshold) {
+			return std::binary_search(data.begin(), data.end(), id);
 		}
-		else {
-			for (auto elt: data.st) {
-				if (elt == id) {
-					return true;
-				}
-				else if (elt == 0) {
-					return false;
-				}
-			}
-		}
-
-		return false;
+		return std::find(data.begin(), data.end(), id) != data.end();
 	}
 };
 
-static_assert(std::is_trivial_v<id_list>);
-
 }
 
 #endif //RSPAMD_SYMCACHE_ID_LIST_HXX
diff --git a/src/libserver/symcache/symcache_impl.cxx b/src/libserver/symcache/symcache_impl.cxx
index 0ec6d11ec..61a7347ac 100644
--- a/src/libserver/symcache/symcache_impl.cxx
+++ b/src/libserver/symcache/symcache_impl.cxx
@@ -1103,14 +1103,14 @@ symcache::process_settings_elt(struct rspamd_config_settings_elt *elt) -> void
 					 * we ignore them in symcache but prevent them from being
 					 * inserted.
 					 */
-					item->forbidden_ids.add_id(id, static_pool);
+					item->forbidden_ids.add_id(id);
 					msg_debug_cache("deny virtual symbol %s for settings %ud (%s); "
 									"parent can still be executed",
 							sym, id, elt->name);
 				}
 				else {
 					/* Normal symbol, disable it */
-					item->forbidden_ids.add_id(id, static_pool);
+					item->forbidden_ids.add_id(id);
 					msg_debug_cache ("deny symbol %s for settings %ud (%s)",
 							sym, id, elt->name);
 				}
@@ -1146,13 +1146,13 @@ symcache::process_settings_elt(struct rspamd_config_settings_elt *elt) -> void
 							continue;
 						}
 
-						parent->exec_only_ids.add_id(id, static_pool);
+						parent->exec_only_ids.add_id(id);
 						msg_debug_cache ("allow just execution of symbol %s for settings %ud (%s)",
 								parent->symbol.data(), id, elt->name);
 					}
 				}
 
-				item->allowed_ids.add_id(id, static_pool);
+				item->allowed_ids.add_id(id);
 				msg_debug_cache ("allow execution of symbol %s for settings %ud (%s)",
 						sym, id, elt->name);
 			}
diff --git a/src/libserver/symcache/symcache_item.cxx b/src/libserver/symcache/symcache_item.cxx
index 588d2e93f..01f089125 100644
--- a/src/libserver/symcache/symcache_item.cxx
+++ b/src/libserver/symcache/symcache_item.cxx
@@ -117,7 +117,7 @@ auto cache_item::process_deps(const symcache &cache) -> void
 					msg_debug_cache("propagate allowed ids from %s to %s",
 							dit->symbol.c_str(), symbol.c_str());
 
-					allowed_ids.set_ids(ids, nids, cache.get_pool());
+					allowed_ids.set_ids(ids, nids);
 				}
 
 				ids = dit->forbidden_ids.get_ids(nids);
@@ -126,7 +126,7 @@ auto cache_item::process_deps(const symcache &cache) -> void
 					msg_debug_cache("propagate forbidden ids from %s to %s",
 							dit->symbol.c_str(), symbol.c_str());
 
-					forbidden_ids.set_ids(ids, nids, cache.get_pool());
+					forbidden_ids.set_ids(ids, nids);
 				}
 			}
 		}
diff --git a/src/libserver/symcache/symcache_item.hxx b/src/libserver/symcache/symcache_item.hxx
index f0bedea5c..94e3dc57d 100644
--- a/src/libserver/symcache/symcache_item.hxx
+++ b/src/libserver/symcache/symcache_item.hxx
@@ -199,10 +199,10 @@ struct cache_item : std::enable_shared_from_this<cache_item> {
 	std::variant<normal_item, virtual_item> specific;
 
 	/* Settings ids */
-	id_list allowed_ids{};
+	id_list allowed_ids;
 	/* Allows execution but not symbols insertion */
-	id_list exec_only_ids{};
-	id_list forbidden_ids{};
+	id_list exec_only_ids;
+	id_list forbidden_ids;
 
 	/* Set of augmentations */
 	ankerl::unordered_dense::set<std::string, rspamd::smart_str_hash, rspamd::smart_str_equal> augmentations;


More information about the Commits mailing list