commit 7ad02fc: [Rework] Use xxh3 as a default hash and fix memory/alignment issues

Vsevolod Stakhov vsevolod at highsecure.ru
Sun Dec 5 18:21:04 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-12-05 18:15:36 +0000
URL: https://github.com/rspamd/rspamd/commit/7ad02fc1f7e786a0db9f1e25f34d2771997a1c57 (HEAD -> master)

[Rework] Use xxh3 as a default hash and fix memory/alignment issues

---
 contrib/xxhash/xxh_x86dispatch.h |  1 -
 src/libcryptobox/cryptobox.c     | 56 +++++++++++++++++++++++++++++++++++-----
 src/libcryptobox/cryptobox.h     | 13 ++++++++--
 src/libserver/cfg_utils.c        |  2 +-
 src/libutil/util.c               |  1 -
 src/lua/lua_cryptobox.c          | 12 ++++-----
 6 files changed, 68 insertions(+), 17 deletions(-)

diff --git a/contrib/xxhash/xxh_x86dispatch.h b/contrib/xxhash/xxh_x86dispatch.h
index 6bc17bcbb..8e91fcf74 100644
--- a/contrib/xxhash/xxh_x86dispatch.h
+++ b/contrib/xxhash/xxh_x86dispatch.h
@@ -71,7 +71,6 @@ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update_dispatch(XXH3_state_t* state, c
 
 # undef  XXH128
 # define XXH128 XXH3_128bits_withSeed_dispatch
-# define XXH3_128bits XXH3_128bits_dispatch
 # undef  XXH3_128bits
 # define XXH3_128bits XXH3_128bits_dispatch
 # undef  XXH3_128bits_withSeed
diff --git a/src/libcryptobox/cryptobox.c b/src/libcryptobox/cryptobox.c
index d48cee16b..fe4d3df29 100644
--- a/src/libcryptobox/cryptobox.c
+++ b/src/libcryptobox/cryptobox.c
@@ -27,6 +27,8 @@
 #include "base64/base64.h"
 #include "ottery.h"
 #include "printf.h"
+#define XXH_INLINE_ALL
+#define XXH_PRIVATE_API
 #include "xxhash.h"
 #define MUM_TARGET_INDEPENDENT_HASH 1 /* For 32/64 bit equal hashes */
 #include "../../contrib/mumhash/mum.h"
@@ -1461,7 +1463,7 @@ void
 rspamd_cryptobox_hash_final (rspamd_cryptobox_hash_state_t *p, guchar *out)
 {
 	crypto_generichash_blake2b_state *st = cryptobox_align_ptr (p,
-			_Alignof(crypto_generichash_blake2b_state));
+			RSPAMD_ALIGNOF(crypto_generichash_blake2b_state));
 	crypto_generichash_blake2b_final (st, out, crypto_generichash_blake2b_BYTES_MAX);
 }
 
@@ -1480,6 +1482,8 @@ void rspamd_cryptobox_hash (guchar *out,
 
 G_STATIC_ASSERT (sizeof (t1ha_context_t) <=
 		sizeof (((rspamd_cryptobox_fast_hash_state_t *)NULL)->opaque));
+G_STATIC_ASSERT (sizeof (struct XXH3_state_s) <=
+				 sizeof (((rspamd_cryptobox_fast_hash_state_t *)NULL)->opaque));
 
 
 struct RSPAMD_ALIGNED(16) _mum_iuf {
@@ -1491,13 +1495,33 @@ struct RSPAMD_ALIGNED(16) _mum_iuf {
 	unsigned rem;
 };
 
+rspamd_cryptobox_fast_hash_state_t*
+rspamd_cryptobox_fast_hash_new(void)
+{
+	rspamd_cryptobox_fast_hash_state_t *nst;
+	int ret = posix_memalign ((void **)&nst, RSPAMD_ALIGNOF(rspamd_cryptobox_fast_hash_state_t),
+			sizeof(rspamd_cryptobox_fast_hash_state_t));
+
+	if (ret != 0) {
+		abort();
+	}
+
+	return nst;
+}
+
+void
+rspamd_cryptobox_fast_hash_free(rspamd_cryptobox_fast_hash_state_t *st)
+{
+	free(st);
+}
+
 void
 rspamd_cryptobox_fast_hash_init (rspamd_cryptobox_fast_hash_state_t *st,
 		guint64 seed)
 {
-	t1ha_context_t *rst = (t1ha_context_t *)st->opaque;
-	st->type = RSPAMD_CRYPTOBOX_T1HA;
-	t1ha2_init (rst, seed, 0);
+	XXH3_state_t *rst = (XXH3_state_t *)st->opaque;
+	st->type = RSPAMD_CRYPTOBOX_XXHASH3;
+	XXH3_64bits_reset_withSeed (rst, seed);
 }
 
 void
@@ -1527,6 +1551,13 @@ rspamd_cryptobox_fast_hash_init_specific (rspamd_cryptobox_fast_hash_state_t *st
 		XXH32_reset (xst, seed);
 		break;
 	}
+	case RSPAMD_CRYPTOBOX_XXHASH3:
+	{
+		XXH3_state_t *xst = (XXH3_state_t *)  st->opaque;
+		st->type = RSPAMD_CRYPTOBOX_XXHASH3;
+		XXH3_64bits_reset_withSeed (xst, seed);
+		break;
+	}
 	case RSPAMD_CRYPTOBOX_MUMHASH: {
 		struct _mum_iuf *iuf = (struct _mum_iuf *)  st->opaque;
 		st->type = RSPAMD_CRYPTOBOX_MUMHASH;
@@ -1559,6 +1590,12 @@ rspamd_cryptobox_fast_hash_update (rspamd_cryptobox_fast_hash_state_t *st,
 			XXH32_update (xst, data, len);
 			break;
 		}
+		case RSPAMD_CRYPTOBOX_XXHASH3:
+		{
+			XXH3_state_t *xst = (XXH3_state_t *)  st->opaque;
+			XXH3_64bits_update (xst, data, len);
+			break;
+		}
 		case RSPAMD_CRYPTOBOX_MUMHASH: {
 			struct _mum_iuf *iuf = (struct _mum_iuf *)  st->opaque;
 			gsize drem = len;
@@ -1629,6 +1666,11 @@ rspamd_cryptobox_fast_hash_final (rspamd_cryptobox_fast_hash_state_t *st)
 			ret = XXH32_digest (xst);
 			break;
 		}
+		case RSPAMD_CRYPTOBOX_XXHASH3: {
+			XXH3_state_t *xst = (XXH3_state_t *)  st->opaque;
+			ret = XXH3_64bits_digest (xst);
+			break;
+		}
 		case RSPAMD_CRYPTOBOX_MUMHASH: {
 			struct _mum_iuf *iuf = (struct _mum_iuf *)  st->opaque;
 			iuf->h = mum_hash_step (iuf->h, iuf->buf.ll);
@@ -1656,14 +1698,14 @@ static inline guint64
 rspamd_cryptobox_fast_hash_machdep (const void *data,
 		gsize len, guint64 seed)
 {
-	return t1ha2_atonce (data, len, seed);
+	return XXH3_64bits_withSeed(data, len, seed);
 }
 
 static inline guint64
 rspamd_cryptobox_fast_hash_indep (const void *data,
 		gsize len, guint64 seed)
 {
-	return t1ha2_atonce (data, len, seed);
+	return XXH3_64bits_withSeed(data, len, seed);
 }
 
 guint64
@@ -1682,6 +1724,8 @@ rspamd_cryptobox_fast_hash_specific (
 	switch (type) {
 	case RSPAMD_CRYPTOBOX_XXHASH32:
 		return XXH32 (data, len, seed);
+	case RSPAMD_CRYPTOBOX_XXHASH3:
+		return XXH3_64bits_withSeed (data, len, seed);
 	case RSPAMD_CRYPTOBOX_XXHASH64:
 		return XXH64 (data, len, seed);
 	case RSPAMD_CRYPTOBOX_MUMHASH:
diff --git a/src/libcryptobox/cryptobox.h b/src/libcryptobox/cryptobox.h
index aa93f8972..e7d2dc79a 100644
--- a/src/libcryptobox/cryptobox.h
+++ b/src/libcryptobox/cryptobox.h
@@ -348,6 +348,7 @@ void rspamd_cryptobox_hash (guchar *out,
 enum rspamd_cryptobox_fast_hash_type {
 	RSPAMD_CRYPTOBOX_XXHASH64 = 0,
 	RSPAMD_CRYPTOBOX_XXHASH32,
+	RSPAMD_CRYPTOBOX_XXHASH3,
 	RSPAMD_CRYPTOBOX_MUMHASH,
 	RSPAMD_CRYPTOBOX_T1HA,
 	RSPAMD_CRYPTOBOX_HASHFAST,
@@ -355,11 +356,19 @@ enum rspamd_cryptobox_fast_hash_type {
 };
 
 /* Non crypto hash IUF interface */
-typedef struct rspamd_cryptobox_fast_hash_state_s {
-	guint64 opaque[11];
+typedef struct CRYPTO_ALIGN(64) rspamd_cryptobox_fast_hash_state_s {
+	guchar opaque[576]; /* Required for xxhash3 */
 	enum rspamd_cryptobox_fast_hash_type type;
 } rspamd_cryptobox_fast_hash_state_t;
 
+
+/**
+ * Creates a new cryptobox state properly aligned
+ * @return
+ */
+rspamd_cryptobox_fast_hash_state_t* rspamd_cryptobox_fast_hash_new(void);
+void rspamd_cryptobox_fast_hash_free(rspamd_cryptobox_fast_hash_state_t *st);
+
 /**
  * Init cryptobox hash state using key if needed, `st` must point to the buffer
  * with at least rspamd_cryptobox_HASHSTATEBYTES bytes length. If keylen == 0, then
diff --git a/src/libserver/cfg_utils.c b/src/libserver/cfg_utils.c
index 3f699a843..67ceb5df8 100644
--- a/src/libserver/cfg_utils.c
+++ b/src/libserver/cfg_utils.c
@@ -164,7 +164,7 @@ rspamd_config_new (enum rspamd_config_init_flags flags)
 	rspamd_mempool_t *pool;
 
 	pool = rspamd_mempool_new (8 * 1024 * 1024, "cfg", 0);
-	cfg = rspamd_mempool_alloc0 (pool, sizeof (*cfg));
+	cfg = rspamd_mempool_alloc0_type(pool, struct rspamd_config);
 	/* Allocate larger pool for cfg */
 	cfg->cfg_pool = pool;
 	cfg->dns_timeout = 1.0;
diff --git a/src/libutil/util.c b/src/libutil/util.c
index 27631ae65..2b0dfa9c1 100644
--- a/src/libutil/util.c
+++ b/src/libutil/util.c
@@ -17,7 +17,6 @@
 #include "util.h"
 #include "unix-std.h"
 
-#include "xxhash.h"
 #include "ottery.h"
 #include "cryptobox.h"
 
diff --git a/src/lua/lua_cryptobox.c b/src/lua/lua_cryptobox.c
index f16fd8b67..7d1b8e4a9 100644
--- a/src/lua/lua_cryptobox.c
+++ b/src/lua/lua_cryptobox.c
@@ -1011,7 +1011,7 @@ lua_cryptobox_hash_dtor (struct rspamd_lua_cryptobox_hash *h)
 		free (h->content.h); /* Allocated by posix_memalign */
 	}
 	else {
-		g_free (h->content.fh);
+		rspamd_cryptobox_fast_hash_free (h->content.fh);
 	}
 
 	g_free (h);
@@ -1023,7 +1023,7 @@ rspamd_lua_hash_init_default (struct rspamd_lua_cryptobox_hash *h,
 {
 	h->type = LUA_CRYPTOBOX_HASH_BLAKE2;
 	if (posix_memalign ((void **)&h->content.h,
-			_Alignof (rspamd_cryptobox_hash_state_t),
+			RSPAMD_ALIGNOF(rspamd_cryptobox_hash_state_t),
 			sizeof (*h->content.h)) != 0) {
 		g_assert_not_reached ();
 	}
@@ -1128,28 +1128,28 @@ rspamd_lua_hash_create (const gchar *type, const gchar *key, gsize keylen)
 		}
 		else if (g_ascii_strcasecmp (type, "xxh64") == 0) {
 			h->type = LUA_CRYPTOBOX_HASH_XXHASH64;
-			h->content.fh = g_malloc0 (sizeof (*h->content.fh));
+			h->content.fh = rspamd_cryptobox_fast_hash_new ();
 			rspamd_cryptobox_fast_hash_init_specific (h->content.fh,
 					RSPAMD_CRYPTOBOX_XXHASH64, 0);
 			h->out_len = sizeof (guint64);
 		}
 		else if (g_ascii_strcasecmp (type, "xxh32") == 0) {
 			h->type = LUA_CRYPTOBOX_HASH_XXHASH32;
-			h->content.fh = g_malloc0 (sizeof (*h->content.fh));
+			h->content.fh = rspamd_cryptobox_fast_hash_new ();
 			rspamd_cryptobox_fast_hash_init_specific (h->content.fh,
 					RSPAMD_CRYPTOBOX_XXHASH32, 0);
 			h->out_len = sizeof (guint32);
 		}
 		else if (g_ascii_strcasecmp (type, "mum") == 0) {
 			h->type = LUA_CRYPTOBOX_HASH_MUM;
-			h->content.fh = g_malloc0 (sizeof (*h->content.fh));
+			h->content.fh = rspamd_cryptobox_fast_hash_new ();
 			rspamd_cryptobox_fast_hash_init_specific (h->content.fh,
 					RSPAMD_CRYPTOBOX_MUMHASH, 0);
 			h->out_len = sizeof (guint64);
 		}
 		else if (g_ascii_strcasecmp (type, "t1ha") == 0) {
 			h->type = LUA_CRYPTOBOX_HASH_T1HA;
-			h->content.fh = g_malloc0 (sizeof (*h->content.fh));
+			h->content.fh = rspamd_cryptobox_fast_hash_new ();
 			rspamd_cryptobox_fast_hash_init_specific (h->content.fh,
 					RSPAMD_CRYPTOBOX_T1HA, 0);
 			h->out_len = sizeof (guint64);


More information about the Commits mailing list