commit 9ca1941: [Feature] Improve base64 usage

Vsevolod Stakhov vsevolod at highsecure.ru
Tue Nov 12 16:49:07 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-11-12 16:24:33 +0000
URL: https://github.com/rspamd/rspamd/commit/9ca194183cbbc5c1f0e1c819de6b62ccb00d3e95

[Feature] Improve base64 usage

---
 src/libcryptobox/base64/avx2.c   |  7 ++++++-
 src/libcryptobox/base64/base64.c | 43 +++++++++++++++++++++++-----------------
 src/libcryptobox/base64/sse42.c  |  7 ++++++-
 3 files changed, 37 insertions(+), 20 deletions(-)

diff --git a/src/libcryptobox/base64/avx2.c b/src/libcryptobox/base64/avx2.c
index 80f3b9972..432149a29 100644
--- a/src/libcryptobox/base64/avx2.c
+++ b/src/libcryptobox/base64/avx2.c
@@ -144,6 +144,7 @@ dec_reshuffle (__m256i in)
 		const __m256i eq_2F       = _mm256_cmpeq_epi8(str, mask_2F); \
 		const __m256i roll        = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2F, hi_nibbles)); \
 		if (!_mm256_testz_si256(lo, hi)) { \
+			seen_error = true; \
 			break; \
 		} \
 		str = _mm256_add_epi8(str, roll); \
@@ -168,12 +169,15 @@ base64_decode_avx2 (const char *in, size_t inlen,
 	uint8_t q, carry;
 	size_t outl = 0;
 	size_t leftover = 0;
+	bool seen_error = false;
 
 repeat:
 	switch (leftover) {
 		for (;;) {
 		case 0:
-			INNER_LOOP_AVX2
+			if (G_LIKELY (!seen_error)) {
+				INNER_LOOP_AVX2
+			}
 
 			if (inlen-- == 0) {
 				ret = 1;
@@ -267,6 +271,7 @@ repeat:
 		}
 
 		if (inlen > 0) {
+			seen_error = false;
 			goto repeat;
 		}
 	}
diff --git a/src/libcryptobox/base64/base64.c b/src/libcryptobox/base64/base64.c
index 03ca99786..f3759110b 100644
--- a/src/libcryptobox/base64/base64.c
+++ b/src/libcryptobox/base64/base64.c
@@ -19,6 +19,7 @@
 #include "base64.h"
 #include "platform_config.h"
 #include "str_util.h"
+#include "util.h"
 #include "contrib/libottery/ottery.h"
 
 extern unsigned long cpu_config;
@@ -116,20 +117,13 @@ gboolean
 rspamd_cryptobox_base64_decode (const gchar *in, gsize inlen,
 		guchar *out, gsize *outlen)
 {
-	if (inlen > 256) {
+	if (inlen > 128) {
 		/*
 		 * For SIMD base64 decoding we need really large inputs with no
 		 * garbadge such as newlines
-		 * Otherwise, naive version is MUCH faster
+		 * Otherwise, naive version is faster
 		 */
-
-		if (rspamd_memcspn (in, base64_alphabet, 256) == 256) {
-			return base64_opt->decode (in, inlen, out, outlen);
-		}
-		else {
-			/* Garbage found */
-			return base64_ref->decode (in, inlen, out, outlen);
-		}
+		return base64_opt->decode (in, inlen, out, outlen);
 	}
 	else {
 		/* Small input, use reference version */
@@ -139,12 +133,12 @@ rspamd_cryptobox_base64_decode (const gchar *in, gsize inlen,
 	g_assert_not_reached ();
 }
 
-size_t
-base64_test (bool generic, size_t niters, size_t len)
+double
+base64_test (bool generic, size_t niters, size_t len, size_t str_len)
 {
 	size_t cycles;
 	guchar *in, *out, *tmp;
-	const base64_impl_t *impl;
+	gdouble t1, t2, total = 0;
 	gsize outlen;
 
 	g_assert (len > 0);
@@ -152,22 +146,35 @@ base64_test (bool generic, size_t niters, size_t len)
 	tmp = g_malloc (len);
 	ottery_rand_bytes (in, len);
 
-	impl = generic ? &base64_list[0] : base64_opt;
+	out = rspamd_encode_base64_fold (in, len, str_len, &outlen,
+			RSPAMD_TASK_NEWLINES_CRLF);
 
-	out = rspamd_encode_base64 (in, len, 0, &outlen);
-	impl->decode (out, outlen, tmp, &len);
+	if (generic) {
+		base64_list[0].decode (out, outlen, tmp, &len);
+	}
+	else {
+		rspamd_cryptobox_base64_decode (out, outlen, tmp, &len);
+	}
 
 	g_assert (memcmp (in, tmp, len) == 0);
 
 	for (cycles = 0; cycles < niters; cycles ++) {
-		impl->decode (out, outlen, in, &len);
+		t1 = rspamd_get_ticks (TRUE);
+		if (generic) {
+			base64_list[0].decode (out, outlen, tmp, &len);
+		}
+		else {
+			rspamd_cryptobox_base64_decode (out, outlen, tmp, &len);
+		}
+		t2 = rspamd_get_ticks (TRUE);
+		total += t2 - t1;
 	}
 
 	g_free (in);
 	g_free (tmp);
 	g_free (out);
 
-	return cycles;
+	return total;
 }
 
 
diff --git a/src/libcryptobox/base64/sse42.c b/src/libcryptobox/base64/sse42.c
index 1d1287ad2..806dd5298 100644
--- a/src/libcryptobox/base64/sse42.c
+++ b/src/libcryptobox/base64/sse42.c
@@ -118,6 +118,7 @@ static inline __m128i dec_reshuffle (__m128i in)
 			'A','Z', \
 			'a','z'); \
 		if (_mm_cmpistrc(range, str, _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | _SIDD_NEGATIVE_POLARITY)) { \
+			seen_error = true; \
 			break; \
 		} \
 		__m128i indices = _mm_subs_epu8(str, _mm_set1_epi8(46)); \
@@ -150,12 +151,15 @@ base64_decode_sse42 (const char *in, size_t inlen,
 	uint8_t q, carry;
 	size_t outl = 0;
 	size_t leftover = 0;
+	bool seen_error = false;
 
 repeat:
 	switch (leftover) {
 		for (;;) {
 		case 0:
-			INNER_LOOP_SSE42
+			if (G_LIKELY (!seen_error)) {
+				INNER_LOOP_SSE42
+			}
 
 			if (inlen-- == 0) {
 				ret = 1;
@@ -249,6 +253,7 @@ repeat:
 		}
 
 		if (inlen > 0) {
+			seen_error = false;
 			goto repeat;
 		}
 	}


More information about the Commits mailing list