commit fb55aca: [Minor] Add sse2 accelerated function for lowercasing and copying a string

Vsevolod Stakhov vsevolod at highsecure.ru
Wed Jun 16 14:28:04 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-06-16 14:39:06 +0100
URL: https://github.com/rspamd/rspamd/commit/fb55acaa1aa930df113936aef303345932168997

[Minor] Add sse2 accelerated function for lowercasing and copying a string

---
 src/libutil/str_util.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 src/libutil/str_util.h |  8 ++++++++
 2 files changed, 50 insertions(+)

diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c
index 1e92c8e54..6b0cc3b68 100644
--- a/src/libutil/str_util.c
+++ b/src/libutil/str_util.c
@@ -27,6 +27,10 @@
 #endif
 #include <math.h>
 
+#ifdef __x86_64__
+#include <immintrin.h>
+#endif
+
 #include "contrib/fastutf8/fastutf8.h"
 
 const guchar lc_map[256] = {
@@ -98,6 +102,44 @@ rspamd_str_lc (gchar *str, guint size)
 	return size;
 }
 
+gsize
+rspamd_str_copy_lc (const gchar *src, gchar *dst, gsize size)
+{
+	gchar *d = dst;
+
+	/* Find aligned start */
+	while ((0xf & (uintptr_t)src) && size > 0) {
+		*d++ = lc_map[(guchar)*src++];
+		size --;
+	}
+
+	/* Aligned start in src */
+#ifdef __x86_64__
+	while (size >= 16) {
+		__m128i sv = _mm_load_si128((const __m128i*)src);
+		/* From A */
+		__m128i rangeshift = _mm_sub_epi8(sv, _mm_set1_epi8((char)('A'+128)));
+		/* To Z */
+		__m128i nomodify = _mm_cmpgt_epi8(rangeshift, _mm_set1_epi8(-128 + 25));
+		/* ^ ' ' */
+		__m128i flip  = _mm_andnot_si128(nomodify, _mm_set1_epi8(0x20));
+		__m128i uc = _mm_xor_si128(sv, flip);
+		_mm_storeu_si128((__m128i*)d, uc);
+		d += 16;
+		src += 16;
+		size -= 16;
+	}
+#endif
+
+	/* Leftover */
+	while (size > 0) {
+		*d++ = lc_map[(guchar)*src++];
+		size --;
+	}
+
+	return (d - dst);
+}
+
 gint
 rspamd_lc_cmp (const gchar *s, const gchar *d, gsize l)
 {
diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h
index cfa37848f..e5e4cfb76 100644
--- a/src/libutil/str_util.h
+++ b/src/libutil/str_util.h
@@ -43,6 +43,14 @@ gint rspamd_lc_cmp (const gchar *s, const gchar *d, gsize l);
  */
 guint rspamd_str_lc (gchar *str, guint size);
 
+/**
+ * Performs ascii copy & lowercase
+ * @param src
+ * @param size
+ * @return
+ */
+gsize rspamd_str_copy_lc (const gchar *src, gchar *dst, gsize size);
+
 /**
  * Convert string to lowercase in-place using utf (limited) conversion
  */


More information about the Commits mailing list