commit 3242844: [Feature] Allow to have weak flags in fuzzy storage

Vsevolod Stakhov vsevolod at rspamd.com
Sat May 20 12:21:03 UTC 2023


Author: Vsevolod Stakhov
Date: 2023-05-20 13:19:24 +0100
URL: https://github.com/rspamd/rspamd/commit/32428449bd7a6035cecaaf4a684df997f2a9dab7 (HEAD -> master)

[Feature] Allow to have weak flags in fuzzy storage

---
 src/fuzzy_storage.c                               | 58 +++++++++++++++--------
 src/libserver/fuzzy_backend/fuzzy_backend_redis.c | 14 ++++--
 src/libserver/fuzzy_wire.h                        |  3 ++
 3 files changed, 53 insertions(+), 22 deletions(-)

diff --git a/src/fuzzy_storage.c b/src/fuzzy_storage.c
index 8a7a3de4e..fe9160880 100644
--- a/src/fuzzy_storage.c
+++ b/src/fuzzy_storage.c
@@ -107,8 +107,7 @@ struct rspamd_leaky_bucket_elt {
 };
 
 static const guint64 rspamd_fuzzy_storage_magic = 0x291a3253eb1b3ea5ULL;
-KHASH_SET_INIT_INT(fuzzy_key_forbidden_ids);
-
+KHASH_SET_INIT_INT(fuzzy_key_ids_set);
 
 struct rspamd_fuzzy_storage_ctx {
 	guint64 magic;
@@ -171,7 +170,9 @@ struct rspamd_fuzzy_storage_ctx {
 	gint lua_pre_handler_cbref;
 	gint lua_post_handler_cbref;
 	gint lua_blacklist_cbref;
-	khash_t(fuzzy_key_forbidden_ids) *default_forbidden_ids;
+	khash_t(fuzzy_key_ids_set) *default_forbidden_ids;
+	/* Ids that should not override other ids */
+	khash_t(fuzzy_key_ids_set) *weak_ids;
 };
 
 enum fuzzy_cmd_type {
@@ -213,7 +214,7 @@ struct fuzzy_key {
 	struct rspamd_cryptobox_pubkey *pk;
 	struct fuzzy_key_stat *stat;
 	khash_t(fuzzy_key_flag_stat) *flags_stat;
-	khash_t(fuzzy_key_forbidden_ids) *forbidden_ids;
+	khash_t(fuzzy_key_ids_set) *forbidden_ids;
 };
 
 struct rspamd_updates_cbdata {
@@ -449,7 +450,7 @@ fuzzy_key_dtor (gpointer p)
 		}
 
 		if (key->forbidden_ids) {
-			kh_destroy(fuzzy_key_forbidden_ids, key->forbidden_ids);
+			kh_destroy(fuzzy_key_ids_set, key->forbidden_ids);
 		}
 
 		g_free (key);
@@ -812,7 +813,7 @@ rspamd_fuzzy_make_reply (struct rspamd_fuzzy_cmd *cmd,
 		{
 			khiter_t k;
 
-			k = kh_get(fuzzy_key_forbidden_ids, session->ctx->default_forbidden_ids, session->reply.rep.v1.flag);
+			k = kh_get(fuzzy_key_ids_set, session->ctx->default_forbidden_ids, session->reply.rep.v1.flag);
 
 			if (k != kh_end(session->ctx->default_forbidden_ids)) {
 				/* Hash is from a forbidden flag by default */
@@ -825,7 +826,7 @@ rspamd_fuzzy_make_reply (struct rspamd_fuzzy_cmd *cmd,
 			if (session->reply.rep.v1.prob > 0 && session->key && session->key->forbidden_ids) {
 				khiter_t k;
 
-				k = kh_get(fuzzy_key_forbidden_ids, session->key->forbidden_ids, session->reply.rep.v1.flag);
+				k = kh_get(fuzzy_key_ids_set, session->key->forbidden_ids, session->reply.rep.v1.flag);
 
 				if (k != kh_end (session->key->forbidden_ids)) {
 					/* Hash is from a forbidden flag for this key */
@@ -1320,6 +1321,11 @@ rspamd_fuzzy_process_command (struct fuzzy_session *session)
 				}
 			}
 
+			if (session->ctx->weak_ids && kh_get(fuzzy_key_ids_set, session->ctx->weak_ids, cmd->flag) != kh_end(session->ctx->weak_ids)) {
+				/* Flag command as weak */
+				cmd->version |= RSPAMD_FUZZY_FLAG_WEAK;
+			}
+
 			if (session->worker->index == 0 || session->ctx->peer_fd == -1) {
 				/* Just add to the queue */
 				up_cmd.is_shingle = is_shingle;
@@ -1367,7 +1373,7 @@ rspamd_fuzzy_command_valid (struct rspamd_fuzzy_cmd *cmd, gint r)
 {
 	enum rspamd_fuzzy_epoch ret = RSPAMD_FUZZY_EPOCH_MAX;
 
-	switch (cmd->version) {
+	switch (cmd->version & RSPAMD_FUZZY_VERSION_MASK) {
 	case 4:
 		if (cmd->shingles_count > 0) {
 			if (r >= sizeof (struct rspamd_fuzzy_shingle_cmd)) {
@@ -2281,16 +2287,16 @@ rspamd_fuzzy_storage_stat (struct rspamd_main *rspamd_main,
 }
 
 static gboolean
-	fuzzy_parse_forbidden_ids (rspamd_mempool_t *pool,
+fuzzy_parse_ids (rspamd_mempool_t *pool,
 		const ucl_object_t *obj,
 		gpointer ud,
 		struct rspamd_rcl_section *section,
 		GError **err)
 {
 	struct rspamd_rcl_struct_parser *pd = (struct rspamd_rcl_struct_parser *)ud;
-	struct rspamd_fuzzy_storage_ctx *ctx;
+	khash_t(fuzzy_key_ids_set) *target;
 
-	ctx = (struct rspamd_fuzzy_storage_ctx *)pd->user_struct;
+	target = (khash_t(fuzzy_key_ids_set) *)pd->user_struct;
 
 	if (ucl_object_type (obj) == UCL_ARRAY) {
 		const ucl_object_t *cur;
@@ -2301,7 +2307,7 @@ static gboolean
 			if (ucl_object_toint_safe (cur, &id)) {
 				int r;
 
-				kh_put(fuzzy_key_forbidden_ids, ctx->default_forbidden_ids, id, &r);
+				kh_put(fuzzy_key_ids_set, target, id, &r);
 			}
 			else {
 				return FALSE;
@@ -2312,7 +2318,7 @@ static gboolean
 	}
 	else if (ucl_object_type (obj) == UCL_INT) {
 		int r;
-		kh_put(fuzzy_key_forbidden_ids, ctx->default_forbidden_ids, ucl_object_toint (obj), &r);
+		kh_put(fuzzy_key_ids_set, target, ucl_object_toint (obj), &r);
 
 		return TRUE;
 	}
@@ -2389,13 +2395,13 @@ fuzzy_parse_keypair (rspamd_mempool_t *pool,
 			const ucl_object_t *forbidden_ids = ucl_object_lookup (extensions, "forbidden_ids");
 
 			if (forbidden_ids && ucl_object_type (forbidden_ids) == UCL_ARRAY) {
-				key->forbidden_ids = kh_init(fuzzy_key_forbidden_ids);
+				key->forbidden_ids = kh_init(fuzzy_key_ids_set);
 				while ((cur = ucl_object_iterate (forbidden_ids, &it, true)) != NULL) {
 					if (ucl_object_type(cur) == UCL_INT || ucl_object_type(cur) == UCL_FLOAT) {
 						int id = ucl_object_toint(cur);
 						int r;
 
-						kh_put(fuzzy_key_forbidden_ids, key->forbidden_ids, id, &r);
+						kh_put(fuzzy_key_ids_set, key->forbidden_ids, id, &r);
 					}
 				}
 			}
@@ -2462,7 +2468,8 @@ init_fuzzy (struct rspamd_config *cfg)
 	ctx->leaky_bucket_burst = NAN;
 	ctx->leaky_bucket_rate = NAN;
 	ctx->delay = NAN;
-	ctx->default_forbidden_ids = kh_init(fuzzy_key_forbidden_ids);
+	ctx->default_forbidden_ids = kh_init(fuzzy_key_ids_set);
+	ctx->weak_ids = kh_init(fuzzy_key_ids_set);
 
 	rspamd_rcl_register_worker_option (cfg,
 			type,
@@ -2535,12 +2542,21 @@ init_fuzzy (struct rspamd_config *cfg)
 	rspamd_rcl_register_worker_option (cfg,
 			type,
 			"forbidden_ids",
-			fuzzy_parse_forbidden_ids,
-			ctx,
+			fuzzy_parse_ids,
+			ctx->default_forbidden_ids,
 			0,
 			0,
 			"Deny specific flags by default");
 
+	rspamd_rcl_register_worker_option (cfg,
+		type,
+		"weak_ids",
+		fuzzy_parse_ids,
+		ctx->weak_ids,
+		0,
+		0,
+		"Treat these flags as weak (i.e. they do not overwrite strong flags)");
+
 	rspamd_rcl_register_worker_option (cfg,
 			type,
 			"keypair_cache_size",
@@ -3019,7 +3035,11 @@ start_fuzzy (struct rspamd_worker *worker)
 	}
 
 	if (ctx->default_forbidden_ids) {
-		kh_destroy(fuzzy_key_forbidden_ids, ctx->default_forbidden_ids);
+		kh_destroy(fuzzy_key_ids_set, ctx->default_forbidden_ids);
+	}
+
+	if (ctx->weak_ids) {
+		kh_destroy(fuzzy_key_ids_set, ctx->weak_ids);
 	}
 
 	REF_RELEASE (ctx->cfg);
diff --git a/src/libserver/fuzzy_backend/fuzzy_backend_redis.c b/src/libserver/fuzzy_backend/fuzzy_backend_redis.c
index 0b7953bfe..390119fbc 100644
--- a/src/libserver/fuzzy_backend/fuzzy_backend_redis.c
+++ b/src/libserver/fuzzy_backend/fuzzy_backend_redis.c
@@ -1049,8 +1049,16 @@ rspamd_fuzzy_update_append_command (struct rspamd_fuzzy_backend *bk,
 		g_string_append_len (key, cmd->digest, sizeof (cmd->digest));
 		value = g_string_sized_new (sizeof ("4294967296"));
 		rspamd_printf_gstring (value, "%d", cmd->flag);
-		session->argv[cur_shift] = g_strdup ("HSET");
-		session->argv_lens[cur_shift++] = sizeof ("HSET") - 1;
+
+		if (cmd->version & RSPAMD_FUZZY_FLAG_WEAK) {
+			session->argv[cur_shift] = g_strdup ("HSETNX");
+			session->argv_lens[cur_shift++] = sizeof ("HSETNX") - 1;
+		}
+		else {
+			session->argv[cur_shift] = g_strdup ("HSET");
+			session->argv_lens[cur_shift++] = sizeof ("HSET") - 1;
+		}
+
 		session->argv[cur_shift] = key->str;
 		session->argv_lens[cur_shift++] = key->len;
 		session->argv[cur_shift] = g_strdup ("F");
@@ -1445,7 +1453,7 @@ rspamd_fuzzy_backend_update_redis (struct rspamd_fuzzy_backend *bk,
 
 	/*
 	 * For each normal hash addition we do 3 redis commands:
-	 * HSET <key> F <flag>
+	 * HSET <key> F <flag> **OR** HSETNX <key> F <flag> when flag is weak
 	 * HINCRBY <key> V <weight>
 	 * EXPIRE <key> <expire>
 	 * INCR <prefix||fuzzy_count>
diff --git a/src/libserver/fuzzy_wire.h b/src/libserver/fuzzy_wire.h
index 8680e16fb..fd65ffb38 100644
--- a/src/libserver/fuzzy_wire.h
+++ b/src/libserver/fuzzy_wire.h
@@ -13,6 +13,9 @@ extern "C" {
 #define RSPAMD_FUZZY_VERSION 4
 #define RSPAMD_FUZZY_KEYLEN 8
 
+#define RSPAMD_FUZZY_FLAG_WEAK (1u << 7u)
+/* Use lower 4 bits for the version */
+#define RSPAMD_FUZZY_VERSION_MASK 0x0fu
 /* Commands for fuzzy storage */
 #define FUZZY_CHECK 0
 #define FUZZY_WRITE 1


More information about the Commits mailing list