commit f6cbd5b: [Rework] Use faster hashing approach for memory pools variables
Vsevolod Stakhov
vsevolod at highsecure.ru
Mon Jan 27 16:07:06 UTC 2020
Author: Vsevolod Stakhov
Date: 2020-01-27 16:03:08 +0000
URL: https://github.com/rspamd/rspamd/commit/f6cbd5ba48d9072b7dda7d3043c88b06b8296f46 (HEAD -> master)
[Rework] Use faster hashing approach for memory pools variables
---
src/libutil/mem_pool.c | 112 +++++++++++++++++++++++++++++++++++-----
src/libutil/mem_pool_internal.h | 13 ++++-
2 files changed, 112 insertions(+), 13 deletions(-)
diff --git a/src/libutil/mem_pool.c b/src/libutil/mem_pool.c
index d17c72bb8..9644485f4 100644
--- a/src/libutil/mem_pool.c
+++ b/src/libutil/mem_pool.c
@@ -773,7 +773,7 @@ rspamd_mempool_delete (rspamd_mempool_t * pool)
pool->priv->elt_len,
pool->priv->used_memory,
pool->priv->wasted_memory,
- pool->priv->variables ? g_hash_table_size (pool->priv->variables) : (gsize)0,
+ pool->priv->variables ? (gsize)kh_size (pool->priv->variables) : (gsize)0,
ndtor);
GHashTableIter it;
@@ -847,7 +847,45 @@ rspamd_mempool_delete (rspamd_mempool_t * pool)
}
if (pool->priv->variables) {
- g_hash_table_destroy (pool->priv->variables);
+ struct rspamd_mempool_variable *var;
+ kh_foreach_value_ptr (pool->priv->variables, var, {
+ if (var->dtor) {
+ var->dtor (var->data);
+ }
+ });
+
+ if (pool->priv->entry && pool->priv->entry->cur_vars <
+ kh_size (pool->priv->variables)) {
+ /*
+ * Increase preallocated size in two cases:
+ * 1) Our previous guess was zero
+ * 2) Our new variables count is not more than twice larger than
+ * previous count
+ * 3) Our variables count is less than some hard limit
+ */
+ static const guint max_preallocated_vars = 512;
+
+ guint cur_size = kh_size (pool->priv->variables);
+ guint old_guess = pool->priv->entry->cur_vars;
+ guint new_guess;
+
+ if (old_guess == 0) {
+ new_guess = MIN (cur_size, max_preallocated_vars);
+ }
+ else {
+ if (old_guess * 2 < cur_size) {
+ new_guess = MIN (cur_size, max_preallocated_vars);
+ }
+ else {
+ /* Too large step */
+ new_guess = MIN (old_guess * 2, max_preallocated_vars);
+ }
+ }
+
+ pool->priv->entry->cur_vars = new_guess;
+ }
+
+ kh_destroy (rspamd_mempool_vars_hash, pool->priv->variables);
}
if (pool->priv->trash_stack) {
@@ -1107,20 +1145,41 @@ rspamd_mempool_wunlock_rwlock (rspamd_mempool_rwlock_t * lock)
}
#endif
+#define RSPAMD_MEMPOOL_VARS_HASH_SEED 0xb32ad7c55eb2e647ULL
void
rspamd_mempool_set_variable (rspamd_mempool_t *pool,
- const gchar *name,
- gpointer value,
- rspamd_mempool_destruct_t destructor)
+ const gchar *name,
+ gpointer value,
+ rspamd_mempool_destruct_t destructor)
{
if (pool->priv->variables == NULL) {
- pool->priv->variables = g_hash_table_new (rspamd_str_hash, rspamd_str_equal);
+
+ pool->priv->variables = kh_init (rspamd_mempool_vars_hash);
+
+ if (pool->priv->entry->cur_vars > 0) {
+ /* Preallocate */
+ kh_resize (rspamd_mempool_vars_hash,
+ pool->priv->variables,
+ pool->priv->entry->cur_vars);
+ }
}
- g_hash_table_insert (pool->priv->variables, rspamd_mempool_strdup (pool,
- name), value);
- if (destructor != NULL) {
- rspamd_mempool_add_destructor (pool, destructor, value);
+ gint hv = rspamd_cryptobox_fast_hash (name, strlen (name),
+ RSPAMD_MEMPOOL_VARS_HASH_SEED);
+ khiter_t it;
+ gint r;
+
+ it = kh_put (rspamd_mempool_vars_hash, pool->priv->variables, hv, &r);
+
+ if (it == kh_end (pool->priv->variables)) {
+ g_assert_not_reached ();
+ }
+ else {
+ struct rspamd_mempool_variable *pvar;
+
+ pvar = &kh_val (pool->priv->variables, it);
+ pvar->data = value;
+ pvar->dtor = destructor;
}
}
@@ -1131,14 +1190,43 @@ rspamd_mempool_get_variable (rspamd_mempool_t *pool, const gchar *name)
return NULL;
}
- return g_hash_table_lookup (pool->priv->variables, name);
+ khiter_t it;
+ gint hv = rspamd_cryptobox_fast_hash (name, strlen (name),
+ RSPAMD_MEMPOOL_VARS_HASH_SEED);
+
+ it = kh_get (rspamd_mempool_vars_hash, pool->priv->variables, hv);
+
+ if (it != kh_end (pool->priv->variables)) {
+ struct rspamd_mempool_variable *pvar;
+
+ pvar = &kh_val (pool->priv->variables, it);
+ return pvar->data;
+ }
+
+ return NULL;
}
void
rspamd_mempool_remove_variable (rspamd_mempool_t *pool, const gchar *name)
{
if (pool->priv->variables != NULL) {
- g_hash_table_remove (pool->priv->variables, name);
+ khiter_t it;
+ gint hv = rspamd_cryptobox_fast_hash (name, strlen (name),
+ RSPAMD_MEMPOOL_VARS_HASH_SEED);
+
+ it = kh_get (rspamd_mempool_vars_hash, pool->priv->variables, hv);
+
+ if (it != kh_end (pool->priv->variables)) {
+ struct rspamd_mempool_variable *pvar;
+
+ pvar = &kh_val (pool->priv->variables, it);
+
+ if (pvar->dtor) {
+ pvar->dtor (pvar->data);
+ }
+
+ kh_del (rspamd_mempool_vars_hash, pool->priv->variables, it);
+ }
}
}
diff --git a/src/libutil/mem_pool_internal.h b/src/libutil/mem_pool_internal.h
index 1f253e790..24bd77a71 100644
--- a/src/libutil/mem_pool_internal.h
+++ b/src/libutil/mem_pool_internal.h
@@ -41,6 +41,7 @@ struct rspamd_mempool_entry_point {
gchar src[ENTRY_LEN];
guint32 cur_suggestion;
guint32 cur_elts;
+ guint32 cur_vars;
struct entry_elt elts[ENTRY_NELTS];
};
@@ -55,11 +56,21 @@ struct _pool_destructors {
struct _pool_destructors *next;
};
+
+struct rspamd_mempool_variable {
+ gpointer data;
+ rspamd_mempool_destruct_t dtor;
+};
+
+KHASH_INIT (rspamd_mempool_vars_hash,
+ guint32, struct rspamd_mempool_variable, 1,
+ kh_int_hash_func, kh_int_hash_equal);
+
struct rspamd_mempool_specific {
struct _pool_chain *pools[RSPAMD_MEMPOOL_MAX];
struct _pool_destructors *dtors_head, *dtors_tail;
GPtrArray *trash_stack;
- GHashTable *variables; /**< private memory pool variables */
+ khash_t(rspamd_mempool_vars_hash) *variables;
struct rspamd_mempool_entry_point *entry;
gsize elt_len; /**< size of an element */
gsize used_memory;
More information about the Commits
mailing list