commit 9ec0dbf: [Minor] Fix loading of unicode multipatterns

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Feb 14 17:35:12 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-02-14 17:27:45 +0000
URL: https://github.com/rspamd/rspamd/commit/9ec0dbfd35d3fd9bd61ae74e30214089f95305c4 (HEAD -> master)

[Minor] Fix loading of unicode multipatterns

---
 src/libmime/lang_detection.c | 14 ++++++++++++++
 src/libserver/cfg_rcl.c      |  4 +++-
 src/libserver/url.c          |  6 +++---
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/src/libmime/lang_detection.c b/src/libmime/lang_detection.c
index 72964a93a..9c29212dd 100644
--- a/src/libmime/lang_detection.c
+++ b/src/libmime/lang_detection.c
@@ -459,9 +459,16 @@ rspamd_language_detector_read_file (struct rspamd_config *cfg,
 				const char *word = ucl_object_tolstring (w, &wlen);
 				const char *saved;
 
+#ifdef WITH_HYPERSCAN
+				rspamd_multipattern_add_pattern_len (d->stop_words[cat].mp,
+						word, wlen,
+						RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8
+						|RSPAMD_MULTIPATTERN_RE);
+#else
 				rspamd_multipattern_add_pattern_len (d->stop_words[cat].mp,
 						word, wlen,
 						RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8);
+#endif
 				nelt->stop_words ++;
 				nstop ++;
 
@@ -817,8 +824,15 @@ rspamd_language_detector_init (struct rspamd_config *cfg)
 	/* Map from ngramm in ucs32 to GPtrArray of rspamd_language_elt */
 	for (i = 0; i < RSPAMD_LANGUAGE_MAX; i ++) {
 		ret->trigramms[i] = kh_init (rspamd_trigram_hash);
+#ifdef WITH_HYPERSCAN
+		ret->stop_words[i].mp = rspamd_multipattern_create (
+				RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8|
+				RSPAMD_MULTIPATTERN_RE);
+#else
 		ret->stop_words[i].mp = rspamd_multipattern_create (
 				RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8);
+#endif
+
 		ret->stop_words[i].ranges = g_array_new (FALSE, FALSE,
 				sizeof (struct rspamd_stop_word_range));
 	}
diff --git a/src/libserver/cfg_rcl.c b/src/libserver/cfg_rcl.c
index fa1c07f6e..21a89c06e 100644
--- a/src/libserver/cfg_rcl.c
+++ b/src/libserver/cfg_rcl.c
@@ -1314,7 +1314,9 @@ rspamd_rcl_composite_handler (rspamd_mempool_t *pool,
 		}
 
 		rspamd_config_add_symbol (cfg, composite_name, score,
-				description, group, FALSE, FALSE,
+				description, group,
+				0,
+				ucl_object_get_priority (obj) + 1,
 				1);
 
 		elt = ucl_object_lookup (obj, "groups");
diff --git a/src/libserver/url.c b/src/libserver/url.c
index 421c8a181..0effe4d6b 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -454,7 +454,7 @@ rspamd_url_parse_tld_file (const gchar *fname,
 
 		m.flags = flags;
 		rspamd_multipattern_add_pattern (url_scanner->search_trie, p,
-				RSPAMD_MULTIPATTERN_TLD | RSPAMD_MULTIPATTERN_ICASE);
+				RSPAMD_MULTIPATTERN_TLD|RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8);
 		m.pattern = rspamd_multipattern_get_pattern (url_scanner->search_trie,
 				rspamd_multipattern_get_npatterns (url_scanner->search_trie) - 1);
 		m.patlen = strlen (m.pattern);
@@ -517,13 +517,13 @@ rspamd_url_init (const gchar *tld_file)
 		url_scanner->matchers = g_array_sized_new (FALSE, TRUE,
 				sizeof (struct url_matcher), 13000);
 		url_scanner->search_trie = rspamd_multipattern_create_sized (13000,
-				RSPAMD_MULTIPATTERN_TLD | RSPAMD_MULTIPATTERN_ICASE);
+				RSPAMD_MULTIPATTERN_TLD|RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8);
 	}
 	else {
 		url_scanner->matchers = g_array_sized_new (FALSE, TRUE,
 				sizeof (struct url_matcher), 128);
 		url_scanner->search_trie = rspamd_multipattern_create_sized (128,
-				RSPAMD_MULTIPATTERN_TLD | RSPAMD_MULTIPATTERN_ICASE);
+				RSPAMD_MULTIPATTERN_TLD|RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8);
 	}
 
 	rspamd_url_add_static_matchers (url_scanner);


More information about the Commits mailing list