commit d0974f0: [Fix] Fix trie code when there are regexps and Hyperscan is absent
Vsevolod Stakhov
vsevolod at highsecure.ru
Tue Sep 10 11:42:04 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-09-10 12:34:52 +0100
URL: https://github.com/rspamd/rspamd/commit/d0974f01f91da985d7646f6ef64fed1e053c64b2 (HEAD -> master)
[Fix] Fix trie code when there are regexps and Hyperscan is absent
---
src/libutil/multipattern.c | 74 +++++++++++++++++++++++++++++++++++++++-------
1 file changed, 63 insertions(+), 11 deletions(-)
diff --git a/src/libutil/multipattern.c b/src/libutil/multipattern.c
index b2cdc6645..0fc028969 100644
--- a/src/libutil/multipattern.c
+++ b/src/libutil/multipattern.c
@@ -25,6 +25,7 @@
#include "hs.h"
#endif
#include "acism.h"
+#include "libutil/regexp.h"
#include <stdalign.h>
#define MAX_SCRATCH 4
@@ -51,6 +52,7 @@ struct RSPAMD_ALIGNED(64) rspamd_multipattern {
#endif
ac_trie_t *t;
GArray *pats;
+ GArray *res;
gboolean compiled;
guint cnt;
@@ -192,14 +194,14 @@ rspamd_multipattern_pattern_filter (const gchar *pattern, gsize len,
gsize *dst_len)
{
gchar *ret = NULL;
-#ifdef WITH_HYPERSCAN
- if (rspamd_hs_check ()) {
- gint gl_flags = RSPAMD_REGEXP_ESCAPE_ASCII;
+ gint gl_flags = RSPAMD_REGEXP_ESCAPE_ASCII;
- if (flags & RSPAMD_MULTIPATTERN_UTF8) {
- gl_flags |= RSPAMD_REGEXP_ESCAPE_UTF;
- }
+ if (flags & RSPAMD_MULTIPATTERN_UTF8) {
+ gl_flags |= RSPAMD_REGEXP_ESCAPE_UTF;
+ }
+#ifdef WITH_HYPERSCAN
+ if (rspamd_hs_check ()) {
if (flags & RSPAMD_MULTIPATTERN_TLD) {
gchar *tmp;
gsize tlen;
@@ -228,6 +230,14 @@ rspamd_multipattern_pattern_filter (const gchar *pattern, gsize len,
if (flags & RSPAMD_MULTIPATTERN_TLD) {
ret = rspamd_multipattern_escape_tld_acism (pattern, len, dst_len);
}
+ else if (flags & RSPAMD_MULTIPATTERN_RE) {
+ ret = rspamd_str_regexp_escape (pattern, len, dst_len, gl_flags |
+ RSPAMD_REGEXP_ESCAPE_RE);
+ }
+ else if (flags & RSPAMD_MULTIPATTERN_GLOB) {
+ ret = rspamd_str_regexp_escape (pattern, len, dst_len,
+ gl_flags | RSPAMD_REGEXP_ESCAPE_GLOB);
+ }
else {
ret = malloc (len + 1);
*dst_len = rspamd_strlcpy (ret, pattern, len + 1);
@@ -496,7 +506,30 @@ rspamd_multipattern_compile (struct rspamd_multipattern *mp, GError **err)
#endif
if (mp->cnt > 0) {
- mp->t = acism_create ((const ac_trie_pat_t *)mp->pats->data, mp->cnt);
+
+ if (mp->flags & (RSPAMD_MULTIPATTERN_GLOB|RSPAMD_MULTIPATTERN_RE)) {
+ /* Fallback to pcre... */
+ rspamd_regexp_t *re;
+ mp->res = g_array_sized_new (FALSE, TRUE,
+ sizeof (rspamd_regexp_t *), mp->cnt);
+
+ for (guint i = 0; i < mp->cnt; i ++) {
+ const ac_trie_pat_t *pat;
+
+ pat = &g_array_index (mp->pats, ac_trie_pat_t, i);
+
+ re = rspamd_regexp_new (pat->ptr, NULL, err);
+
+ if (re == NULL) {
+ return FALSE;
+ }
+
+ g_array_append_val (mp->res, re);
+ }
+ }
+ else {
+ mp->t = acism_create ((const ac_trie_pat_t *) mp->pats->data, mp->cnt);
+ }
}
mp->compiled = TRUE;
@@ -617,11 +650,30 @@ rspamd_multipattern_lookup (struct rspamd_multipattern *mp,
gint state = 0;
- ret = acism_lookup (mp->t, in, len, rspamd_multipattern_acism_cb, &cbd,
- &state, mp->flags & RSPAMD_MULTIPATTERN_ICASE);
+ if (mp->flags & (RSPAMD_MULTIPATTERN_GLOB|RSPAMD_MULTIPATTERN_RE)) {
+ /* Terribly inefficient, but who cares - just use hyperscan */
+ for (guint i = 0; i < mp->cnt; i ++) {
+ rspamd_regexp_t *re = g_array_index (mp->res, rspamd_regexp_t *, i);
+ const gchar *start = NULL, *end = NULL;
+
+ while (rspamd_regexp_search (re,
+ in,
+ len,
+ &start,
+ &end,
+ TRUE,
+ NULL)) {
+ ret = rspamd_multipattern_acism_cb (i, end - in, &cbd);
+ }
+ }
+ }
+ else {
+ ret = acism_lookup (mp->t, in, len, rspamd_multipattern_acism_cb, &cbd,
+ &state, mp->flags & RSPAMD_MULTIPATTERN_ICASE);
- if (pnfound) {
- *pnfound = cbd.nfound;
+ if (pnfound) {
+ *pnfound = cbd.nfound;
+ }
}
return ret;
More information about the Commits
mailing list