commit 1e02b32: [Rework] Use a more sane data structure and refactor

Vsevolod Stakhov vsevolod at rspamd.com
Mon Jul 29 17:49:10 UTC 2024


Author: Vsevolod Stakhov
Date: 2023-11-21 15:08:08 +0000
URL: https://github.com/rspamd/rspamd/commit/1e02b32deb13a96181950e6555bf64b2129f3989

[Rework] Use a more sane data structure and refactor

---
 src/controller.c                      | 16 +++++++++------
 src/fuzzy_storage.c                   |  2 +-
 src/libserver/fuzzy_wire.h            |  2 +-
 src/libserver/mempool_vars_internal.h |  7 ++++---
 src/plugins/fuzzy_check.c             | 38 +++++++++++++++++++++++------------
 5 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/src/controller.c b/src/controller.c
index 13e6794af..eb58db211 100644
--- a/src/controller.c
+++ b/src/controller.c
@@ -33,6 +33,7 @@
 #include "unix-std.h"
 #include "utlist.h"
 #include "libmime/lang_detection.h"
+#include "mempool_vars_internal.h"
 #include <math.h>
 
 /* 60 seconds for worker's IO */
@@ -2602,14 +2603,15 @@ rspamd_controller_stat_fin_task(void *ud)
 		ucl_object_insert_key(top, cbdata->stat, "statfiles", 0, false);
 	}
 
-	GList *fuzzy_elts = rspamd_mempool_get_variable(cbdata->task->task_pool, "fuzzy_stat");
+	GHashTable *fuzzy_elts = rspamd_mempool_get_variable(cbdata->task->task_pool, RSPAMD_MEMPOOL_FUZZY_STAT);
 
 	if (fuzzy_elts) {
 		ar = ucl_object_typed_new(UCL_OBJECT);
 
-		for (GList *cur = fuzzy_elts; cur != NULL; cur = g_list_next(cur)) {
-			entry = cur->data;
+		GHashTableIter it;
 
+		g_hash_table_iter_init(&it, fuzzy_elts);
+		while (g_hash_table_iter_next(&it, NULL, (gpointer *) &entry)) {
 			if (entry->name) {
 				ucl_object_insert_key(ar, ucl_object_fromint(entry->fuzzy_cnt),
 									  entry->name, 0, true);
@@ -3053,14 +3055,16 @@ rspamd_controller_metrics_fin_task(void *ud)
 		rspamd_fstring_free(users);
 	}
 
-	GList *fuzzy_elts = rspamd_mempool_get_variable(cbdata->task->task_pool, "fuzzy_stat");
+	GHashTable *fuzzy_elts = rspamd_mempool_get_variable(cbdata->task->task_pool, RSPAMD_MEMPOOL_FUZZY_STAT);
 
 	if (fuzzy_elts) {
 		rspamd_printf_fstring(&output, "# HELP rspamd_fuzzy_stat Fuzzy stat labelled by storage.\n");
 		rspamd_printf_fstring(&output, "# TYPE rspamd_fuzzy_stat gauge\n");
-		for (GList *cur = fuzzy_elts; cur != NULL; cur = g_list_next(cur)) {
-			entry = cur->data;
 
+		GHashTableIter it;
+
+		g_hash_table_iter_init(&it, fuzzy_elts);
+		while (g_hash_table_iter_next(&it, NULL, (gpointer *) &entry)) {
 			if (entry->name) {
 				rspamd_printf_fstring(&output, "rspamd_fuzzy_stat{storage=\"%s\"} %ud\n",
 									  entry->name, entry->fuzzy_cnt);
diff --git a/src/fuzzy_storage.c b/src/fuzzy_storage.c
index 99d2ef1a1..569889660 100644
--- a/src/fuzzy_storage.c
+++ b/src/fuzzy_storage.c
@@ -1335,7 +1335,7 @@ rspamd_fuzzy_process_command(struct fuzzy_session *session)
 		result.v1.prob = 1.0f;
 		/* Store high qword in value and low qword in flag */
 		result.v1.value = (gint32) ((guint64) session->ctx->stat.fuzzy_hashes >> 32);
-		result.v1.flag = session->ctx->stat.fuzzy_hashes & G_MAXUINT32;
+		result.v1.flag = (guint32) (session->ctx->stat.fuzzy_hashes & G_MAXUINT32);
 		rspamd_fuzzy_make_reply(cmd, &result, session, send_flags);
 	}
 	else if (cmd->cmd == FUZZY_PING) {
diff --git a/src/libserver/fuzzy_wire.h b/src/libserver/fuzzy_wire.h
index 989a31eb4..c2f93b8dc 100644
--- a/src/libserver/fuzzy_wire.h
+++ b/src/libserver/fuzzy_wire.h
@@ -135,7 +135,7 @@ struct rspamd_fuzzy_cmd_extension {
 
 struct rspamd_fuzzy_stat_entry {
 	const gchar *name;
-	guint32 fuzzy_cnt;
+	guint64 fuzzy_cnt;
 };
 
 RSPAMD_PACKED(fuzzy_peer_cmd)
diff --git a/src/libserver/mempool_vars_internal.h b/src/libserver/mempool_vars_internal.h
index 72cf1b095..6c9553868 100644
--- a/src/libserver/mempool_vars_internal.h
+++ b/src/libserver/mempool_vars_internal.h
@@ -1,11 +1,11 @@
-/*-
- * Copyright 2016 Vsevolod Stakhov
+/*
+ * Copyright 2023 Vsevolod Stakhov
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *   http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -42,5 +42,6 @@
 #define RSPAMD_MEMPOOL_HAM_LEARNS "ham_learns"
 #define RSPAMD_MEMPOOL_RE_MAPS_CACHE "re_maps_cache"
 #define RSPAMD_MEMPOOL_HTTP_STAT_BACKEND_RUNTIME "stat_http_runtime"
+#define RSPAMD_MEMPOOL_FUZZY_STAT "fuzzy_stat"
 
 #endif
diff --git a/src/plugins/fuzzy_check.c b/src/plugins/fuzzy_check.c
index ed85d7933..85db83d08 100644
--- a/src/plugins/fuzzy_check.c
+++ b/src/plugins/fuzzy_check.c
@@ -2448,24 +2448,36 @@ fuzzy_check_try_read(struct fuzzy_client_session *session)
 					fuzzy_insert_result(session, rep, cmd, io, rep->v1.flag);
 				}
 				else if (cmd->cmd == FUZZY_STAT) {
-					/* Just set pool variable to extract it in further */
+					/*
+					 * We store fuzzy stat in the following way:
+					 * 1) We store fuzzy hashes as a hash of rspamd_fuzzy_stat_entry
+					 * 2) We store the resulting hash table inside pool variable `fuzzy_stat`
+					 */
 					struct rspamd_fuzzy_stat_entry *pval;
-					GList *res;
+					GHashTable *stats_hash;
 
-					pval = rspamd_mempool_alloc(task->task_pool, sizeof(*pval));
-					pval->fuzzy_cnt = rep->v1.flag;
-					pval->name = session->rule->name;
+					stats_hash = (GHashTable *) rspamd_mempool_get_variable(task->task_pool,
+																			RSPAMD_MEMPOOL_FUZZY_STAT);
 
-					res = rspamd_mempool_get_variable(task->task_pool, "fuzzy_stat");
-
-					if (res == NULL) {
-						res = g_list_append(NULL, pval);
-						rspamd_mempool_set_variable(task->task_pool, "fuzzy_stat",
-													res, (rspamd_mempool_destruct_t) g_list_free);
+					if (stats_hash == NULL) {
+						stats_hash = g_hash_table_new(rspamd_str_hash, rspamd_str_equal);
+						rspamd_mempool_set_variable(task->task_pool, RSPAMD_MEMPOOL_FUZZY_STAT,
+													stats_hash,
+													(rspamd_mempool_destruct_t) g_hash_table_destroy);
 					}
-					else {
-						res = g_list_append(res, pval);
+
+					pval = g_hash_table_lookup(stats_hash, session->rule->name);
+
+					if (pval == NULL) {
+						pval = rspamd_mempool_alloc(task->task_pool,
+													sizeof(*pval));
+						pval->name = rspamd_mempool_strdup(task->task_pool,
+														   session->rule->name);
+						/* Safe, as pval->name is owned by the pool */
+						g_hash_table_insert(stats_hash, (char *) pval->name, pval);
 					}
+
+					pval->fuzzy_cnt = (((guint64) rep->v1.value) << 32) + rep->v1.flag;
 				}
 			}
 			else if (rep->v1.value == 403) {


More information about the Commits mailing list