commit 9ec0dbf: [Minor] Fix loading of unicode multipatterns
Vsevolod Stakhov
vsevolod at highsecure.ru
Thu Feb 14 17:35:12 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-02-14 17:27:45 +0000
URL: https://github.com/rspamd/rspamd/commit/9ec0dbfd35d3fd9bd61ae74e30214089f95305c4 (HEAD -> master)
[Minor] Fix loading of unicode multipatterns
---
src/libmime/lang_detection.c | 14 ++++++++++++++
src/libserver/cfg_rcl.c | 4 +++-
src/libserver/url.c | 6 +++---
3 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/src/libmime/lang_detection.c b/src/libmime/lang_detection.c
index 72964a93a..9c29212dd 100644
--- a/src/libmime/lang_detection.c
+++ b/src/libmime/lang_detection.c
@@ -459,9 +459,16 @@ rspamd_language_detector_read_file (struct rspamd_config *cfg,
const char *word = ucl_object_tolstring (w, &wlen);
const char *saved;
+#ifdef WITH_HYPERSCAN
+ rspamd_multipattern_add_pattern_len (d->stop_words[cat].mp,
+ word, wlen,
+ RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8
+ |RSPAMD_MULTIPATTERN_RE);
+#else
rspamd_multipattern_add_pattern_len (d->stop_words[cat].mp,
word, wlen,
RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8);
+#endif
nelt->stop_words ++;
nstop ++;
@@ -817,8 +824,15 @@ rspamd_language_detector_init (struct rspamd_config *cfg)
/* Map from ngramm in ucs32 to GPtrArray of rspamd_language_elt */
for (i = 0; i < RSPAMD_LANGUAGE_MAX; i ++) {
ret->trigramms[i] = kh_init (rspamd_trigram_hash);
+#ifdef WITH_HYPERSCAN
+ ret->stop_words[i].mp = rspamd_multipattern_create (
+ RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8|
+ RSPAMD_MULTIPATTERN_RE);
+#else
ret->stop_words[i].mp = rspamd_multipattern_create (
RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8);
+#endif
+
ret->stop_words[i].ranges = g_array_new (FALSE, FALSE,
sizeof (struct rspamd_stop_word_range));
}
diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c
index fa1c07f6e..21a89c06e 100644
--- a/src/libserver/cfg_rcl.c
+++ b/src/libserver/cfg_rcl.c
@@ -1314,7 +1314,9 @@ rspamd_rcl_composite_handler (rspamd_mempool_t *pool,
}
rspamd_config_add_symbol (cfg, composite_name, score,
- description, group, FALSE, FALSE,
+ description, group,
+ 0,
+ ucl_object_get_priority (obj) + 1,
1);
elt = ucl_object_lookup (obj, "groups");
diff --git a/src/libserver/url.c b/src/libserver/url.c
index 421c8a181..0effe4d6b 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -454,7 +454,7 @@ rspamd_url_parse_tld_file (const gchar *fname,
m.flags = flags;
rspamd_multipattern_add_pattern (url_scanner->search_trie, p,
- RSPAMD_MULTIPATTERN_TLD | RSPAMD_MULTIPATTERN_ICASE);
+ RSPAMD_MULTIPATTERN_TLD|RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8);
m.pattern = rspamd_multipattern_get_pattern (url_scanner->search_trie,
rspamd_multipattern_get_npatterns (url_scanner->search_trie) - 1);
m.patlen = strlen (m.pattern);
@@ -517,13 +517,13 @@ rspamd_url_init (const gchar *tld_file)
url_scanner->matchers = g_array_sized_new (FALSE, TRUE,
sizeof (struct url_matcher), 13000);
url_scanner->search_trie = rspamd_multipattern_create_sized (13000,
- RSPAMD_MULTIPATTERN_TLD | RSPAMD_MULTIPATTERN_ICASE);
+ RSPAMD_MULTIPATTERN_TLD|RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8);
}
else {
url_scanner->matchers = g_array_sized_new (FALSE, TRUE,
sizeof (struct url_matcher), 128);
url_scanner->search_trie = rspamd_multipattern_create_sized (128,
- RSPAMD_MULTIPATTERN_TLD | RSPAMD_MULTIPATTERN_ICASE);
+ RSPAMD_MULTIPATTERN_TLD|RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8);
}
rspamd_url_add_static_matchers (url_scanner);
More information about the Commits
mailing list