commit 3242844: [Feature] Allow to have weak flags in fuzzy storage
Vsevolod Stakhov
vsevolod at rspamd.com
Sat May 20 12:21:03 UTC 2023
Author: Vsevolod Stakhov
Date: 2023-05-20 13:19:24 +0100
URL: https://github.com/rspamd/rspamd/commit/32428449bd7a6035cecaaf4a684df997f2a9dab7 (HEAD -> master)
[Feature] Allow to have weak flags in fuzzy storage
---
src/fuzzy_storage.c | 58 +++++++++++++++--------
src/libserver/fuzzy_backend/fuzzy_backend_redis.c | 14 ++++--
src/libserver/fuzzy_wire.h | 3 ++
3 files changed, 53 insertions(+), 22 deletions(-)
diff --git a/src/fuzzy_storage.c b/src/fuzzy_storage.c
index 8a7a3de4e..fe9160880 100644
--- a/src/fuzzy_storage.c
+++ b/src/fuzzy_storage.c
@@ -107,8 +107,7 @@ struct rspamd_leaky_bucket_elt {
};
static const guint64 rspamd_fuzzy_storage_magic = 0x291a3253eb1b3ea5ULL;
-KHASH_SET_INIT_INT(fuzzy_key_forbidden_ids);
-
+KHASH_SET_INIT_INT(fuzzy_key_ids_set);
struct rspamd_fuzzy_storage_ctx {
guint64 magic;
@@ -171,7 +170,9 @@ struct rspamd_fuzzy_storage_ctx {
gint lua_pre_handler_cbref;
gint lua_post_handler_cbref;
gint lua_blacklist_cbref;
- khash_t(fuzzy_key_forbidden_ids) *default_forbidden_ids;
+ khash_t(fuzzy_key_ids_set) *default_forbidden_ids;
+ /* Ids that should not override other ids */
+ khash_t(fuzzy_key_ids_set) *weak_ids;
};
enum fuzzy_cmd_type {
@@ -213,7 +214,7 @@ struct fuzzy_key {
struct rspamd_cryptobox_pubkey *pk;
struct fuzzy_key_stat *stat;
khash_t(fuzzy_key_flag_stat) *flags_stat;
- khash_t(fuzzy_key_forbidden_ids) *forbidden_ids;
+ khash_t(fuzzy_key_ids_set) *forbidden_ids;
};
struct rspamd_updates_cbdata {
@@ -449,7 +450,7 @@ fuzzy_key_dtor (gpointer p)
}
if (key->forbidden_ids) {
- kh_destroy(fuzzy_key_forbidden_ids, key->forbidden_ids);
+ kh_destroy(fuzzy_key_ids_set, key->forbidden_ids);
}
g_free (key);
@@ -812,7 +813,7 @@ rspamd_fuzzy_make_reply (struct rspamd_fuzzy_cmd *cmd,
{
khiter_t k;
- k = kh_get(fuzzy_key_forbidden_ids, session->ctx->default_forbidden_ids, session->reply.rep.v1.flag);
+ k = kh_get(fuzzy_key_ids_set, session->ctx->default_forbidden_ids, session->reply.rep.v1.flag);
if (k != kh_end(session->ctx->default_forbidden_ids)) {
/* Hash is from a forbidden flag by default */
@@ -825,7 +826,7 @@ rspamd_fuzzy_make_reply (struct rspamd_fuzzy_cmd *cmd,
if (session->reply.rep.v1.prob > 0 && session->key && session->key->forbidden_ids) {
khiter_t k;
- k = kh_get(fuzzy_key_forbidden_ids, session->key->forbidden_ids, session->reply.rep.v1.flag);
+ k = kh_get(fuzzy_key_ids_set, session->key->forbidden_ids, session->reply.rep.v1.flag);
if (k != kh_end (session->key->forbidden_ids)) {
/* Hash is from a forbidden flag for this key */
@@ -1320,6 +1321,11 @@ rspamd_fuzzy_process_command (struct fuzzy_session *session)
}
}
+ if (session->ctx->weak_ids && kh_get(fuzzy_key_ids_set, session->ctx->weak_ids, cmd->flag) != kh_end(session->ctx->weak_ids)) {
+ /* Flag command as weak */
+ cmd->version |= RSPAMD_FUZZY_FLAG_WEAK;
+ }
+
if (session->worker->index == 0 || session->ctx->peer_fd == -1) {
/* Just add to the queue */
up_cmd.is_shingle = is_shingle;
@@ -1367,7 +1373,7 @@ rspamd_fuzzy_command_valid (struct rspamd_fuzzy_cmd *cmd, gint r)
{
enum rspamd_fuzzy_epoch ret = RSPAMD_FUZZY_EPOCH_MAX;
- switch (cmd->version) {
+ switch (cmd->version & RSPAMD_FUZZY_VERSION_MASK) {
case 4:
if (cmd->shingles_count > 0) {
if (r >= sizeof (struct rspamd_fuzzy_shingle_cmd)) {
@@ -2281,16 +2287,16 @@ rspamd_fuzzy_storage_stat (struct rspamd_main *rspamd_main,
}
static gboolean
- fuzzy_parse_forbidden_ids (rspamd_mempool_t *pool,
+fuzzy_parse_ids (rspamd_mempool_t *pool,
const ucl_object_t *obj,
gpointer ud,
struct rspamd_rcl_section *section,
GError **err)
{
struct rspamd_rcl_struct_parser *pd = (struct rspamd_rcl_struct_parser *)ud;
- struct rspamd_fuzzy_storage_ctx *ctx;
+ khash_t(fuzzy_key_ids_set) *target;
- ctx = (struct rspamd_fuzzy_storage_ctx *)pd->user_struct;
+ target = (khash_t(fuzzy_key_ids_set) *)pd->user_struct;
if (ucl_object_type (obj) == UCL_ARRAY) {
const ucl_object_t *cur;
@@ -2301,7 +2307,7 @@ static gboolean
if (ucl_object_toint_safe (cur, &id)) {
int r;
- kh_put(fuzzy_key_forbidden_ids, ctx->default_forbidden_ids, id, &r);
+ kh_put(fuzzy_key_ids_set, target, id, &r);
}
else {
return FALSE;
@@ -2312,7 +2318,7 @@ static gboolean
}
else if (ucl_object_type (obj) == UCL_INT) {
int r;
- kh_put(fuzzy_key_forbidden_ids, ctx->default_forbidden_ids, ucl_object_toint (obj), &r);
+ kh_put(fuzzy_key_ids_set, target, ucl_object_toint (obj), &r);
return TRUE;
}
@@ -2389,13 +2395,13 @@ fuzzy_parse_keypair (rspamd_mempool_t *pool,
const ucl_object_t *forbidden_ids = ucl_object_lookup (extensions, "forbidden_ids");
if (forbidden_ids && ucl_object_type (forbidden_ids) == UCL_ARRAY) {
- key->forbidden_ids = kh_init(fuzzy_key_forbidden_ids);
+ key->forbidden_ids = kh_init(fuzzy_key_ids_set);
while ((cur = ucl_object_iterate (forbidden_ids, &it, true)) != NULL) {
if (ucl_object_type(cur) == UCL_INT || ucl_object_type(cur) == UCL_FLOAT) {
int id = ucl_object_toint(cur);
int r;
- kh_put(fuzzy_key_forbidden_ids, key->forbidden_ids, id, &r);
+ kh_put(fuzzy_key_ids_set, key->forbidden_ids, id, &r);
}
}
}
@@ -2462,7 +2468,8 @@ init_fuzzy (struct rspamd_config *cfg)
ctx->leaky_bucket_burst = NAN;
ctx->leaky_bucket_rate = NAN;
ctx->delay = NAN;
- ctx->default_forbidden_ids = kh_init(fuzzy_key_forbidden_ids);
+ ctx->default_forbidden_ids = kh_init(fuzzy_key_ids_set);
+ ctx->weak_ids = kh_init(fuzzy_key_ids_set);
rspamd_rcl_register_worker_option (cfg,
type,
@@ -2535,12 +2542,21 @@ init_fuzzy (struct rspamd_config *cfg)
rspamd_rcl_register_worker_option (cfg,
type,
"forbidden_ids",
- fuzzy_parse_forbidden_ids,
- ctx,
+ fuzzy_parse_ids,
+ ctx->default_forbidden_ids,
0,
0,
"Deny specific flags by default");
+ rspamd_rcl_register_worker_option (cfg,
+ type,
+ "weak_ids",
+ fuzzy_parse_ids,
+ ctx->weak_ids,
+ 0,
+ 0,
+ "Treat these flags as weak (i.e. they do not overwrite strong flags)");
+
rspamd_rcl_register_worker_option (cfg,
type,
"keypair_cache_size",
@@ -3019,7 +3035,11 @@ start_fuzzy (struct rspamd_worker *worker)
}
if (ctx->default_forbidden_ids) {
- kh_destroy(fuzzy_key_forbidden_ids, ctx->default_forbidden_ids);
+ kh_destroy(fuzzy_key_ids_set, ctx->default_forbidden_ids);
+ }
+
+ if (ctx->weak_ids) {
+ kh_destroy(fuzzy_key_ids_set, ctx->weak_ids);
}
REF_RELEASE (ctx->cfg);
diff --git a/src/libserver/fuzzy_backend/fuzzy_backend_redis.c b/src/libserver/fuzzy_backend/fuzzy_backend_redis.c
index 0b7953bfe..390119fbc 100644
--- a/src/libserver/fuzzy_backend/fuzzy_backend_redis.c
+++ b/src/libserver/fuzzy_backend/fuzzy_backend_redis.c
@@ -1049,8 +1049,16 @@ rspamd_fuzzy_update_append_command (struct rspamd_fuzzy_backend *bk,
g_string_append_len (key, cmd->digest, sizeof (cmd->digest));
value = g_string_sized_new (sizeof ("4294967296"));
rspamd_printf_gstring (value, "%d", cmd->flag);
- session->argv[cur_shift] = g_strdup ("HSET");
- session->argv_lens[cur_shift++] = sizeof ("HSET") - 1;
+
+ if (cmd->version & RSPAMD_FUZZY_FLAG_WEAK) {
+ session->argv[cur_shift] = g_strdup ("HSETNX");
+ session->argv_lens[cur_shift++] = sizeof ("HSETNX") - 1;
+ }
+ else {
+ session->argv[cur_shift] = g_strdup ("HSET");
+ session->argv_lens[cur_shift++] = sizeof ("HSET") - 1;
+ }
+
session->argv[cur_shift] = key->str;
session->argv_lens[cur_shift++] = key->len;
session->argv[cur_shift] = g_strdup ("F");
@@ -1445,7 +1453,7 @@ rspamd_fuzzy_backend_update_redis (struct rspamd_fuzzy_backend *bk,
/*
* For each normal hash addition we do 3 redis commands:
- * HSET <key> F <flag>
+ * HSET <key> F <flag> **OR** HSETNX <key> F <flag> when flag is weak
* HINCRBY <key> V <weight>
* EXPIRE <key> <expire>
* INCR <prefix||fuzzy_count>
diff --git a/src/libserver/fuzzy_wire.h b/src/libserver/fuzzy_wire.h
index 8680e16fb..fd65ffb38 100644
--- a/src/libserver/fuzzy_wire.h
+++ b/src/libserver/fuzzy_wire.h
@@ -13,6 +13,9 @@ extern "C" {
#define RSPAMD_FUZZY_VERSION 4
#define RSPAMD_FUZZY_KEYLEN 8
+#define RSPAMD_FUZZY_FLAG_WEAK (1u << 7u)
+/* Use lower 4 bits for the version */
+#define RSPAMD_FUZZY_VERSION_MASK 0x0fu
/* Commands for fuzzy storage */
#define FUZZY_CHECK 0
#define FUZZY_WRITE 1
More information about the Commits
mailing list