commit cec3e89: [Feature] Speed up is_ascii function
Vsevolod Stakhov
vsevolod at highsecure.ru
Mon Jan 27 18:28:06 UTC 2020
Author: Vsevolod Stakhov
Date: 2020-01-27 18:05:15 +0000
URL: https://github.com/rspamd/rspamd/commit/cec3e89b046705b1ce488626ad059ce4b4ea9b6b
[Feature] Speed up is_ascii function
---
src/libutil/str_util.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++-
src/libutil/str_util.h | 34 +----------------------
2 files changed, 73 insertions(+), 34 deletions(-)
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c
index 8fcaca484..5cee63baf 100644
--- a/src/libutil/str_util.c
+++ b/src/libutil/str_util.c
@@ -3324,4 +3324,75 @@ rspamd_string_len_split (const gchar *in, gsize len, const gchar *spill,
}
return res;
-}
\ No newline at end of file
+}
+
+#if defined(__x86_64__)
+#include <x86intrin.h>
+#endif
+
+static inline gboolean
+rspamd_str_has_8bit_u64 (const guchar *beg, gsize len)
+{
+ guint8 orb = 0;
+
+ if (len >= 16) {
+ const guchar *nextd = beg+8;
+ guint64 n1 = 0, n2 = 0;
+
+ do {
+ n1 |= *(const guint64 *)beg;
+ n2 |= *(const guint64 *)nextd;
+ beg += 16;
+ nextd += 16;
+ len -= 16;
+ } while (len >= 16);
+
+ /*
+ * Idea from Benny Halevy <bhalevy at scylladb.com>
+ * - 7-th bit set ==> orb = !(non-zero) - 1 = 0 - 1 = 0xFF
+ * - 7-th bit clear ==> orb = !0 - 1 = 1 - 1 = 0x00
+ */
+ orb = !((n1 | n2) & 0x8080808080808080ULL) - 1;
+ }
+
+ while (len--) {
+ orb |= *beg++;
+ }
+
+ return orb >= 0x80;
+}
+
+gboolean
+rspamd_str_has_8bit (const guchar *beg, gsize len)
+{
+#if defined(__x86_64__)
+ if (len >= 32) {
+ const uint8_t *nextd = beg + 16;
+
+ __m128i n1 = _mm_set1_epi8 (0), n2;
+
+ n2 = n1;
+
+ while (len >= 32) {
+ __m128i xmm1 = _mm_lddqu_si128 ((const __m128i *)beg);
+ __m128i xmm2 = _mm_lddqu_si128 ((const __m128i *)nextd);
+
+ n1 = _mm_or_si128 (n1, xmm1);
+ n2 = _mm_or_si128 (n2, xmm2);
+
+ beg += 32;
+ nextd += 32;
+ len -= 32;
+ }
+
+ n1 = _mm_or_si128 (n1, n2);
+
+ /* We assume 2 complement here */
+ if (_mm_movemask_epi8 (n1)) {
+ return TRUE;
+ }
+ }
+#endif
+
+ return rspamd_str_has_8bit_u64 (beg, len);
+}
diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h
index c08dd55bb..22643176b 100644
--- a/src/libutil/str_util.h
+++ b/src/libutil/str_util.h
@@ -440,39 +440,7 @@ gsize rspamd_memspn (const gchar *s, const gchar *e, gsize len);
*/
#define rspamd_is_aligned(p, n) (((uintptr_t)(p) & ((uintptr_t)(n) - 1)) == 0)
#define rspamd_is_aligned_as(p, v) rspamd_is_aligned(p, _Alignof(__typeof((v))))
-
-static inline gboolean
-rspamd_str_has_8bit (const guchar *beg, gsize len)
-{
- unsigned long *w;
- gsize i, leftover;
-
- if (rspamd_is_aligned_as (beg, *w)) {
- leftover = len % sizeof (*w);
- w = (unsigned long *) beg;
-
- for (i = 0; i < len / sizeof (*w); i++) {
- if (rspamd_str_hasmore (*w, 127)) {
- return TRUE;
- }
-
- w++;
- }
-
- beg = (const guchar *) w;
- }
- else {
- leftover = len;
- }
-
- for (i = 0; i < leftover; i++) {
- if (beg[i] > 127) {
- return TRUE;
- }
- }
-
- return FALSE;
-}
+gboolean rspamd_str_has_8bit (const guchar *beg, gsize len);
struct UConverter;
More information about the Commits
mailing list