commit 92e1b61: [Minor] Langdet: Add threshold for stop words
Vsevolod Stakhov
vsevolod at highsecure.ru
Fri Aug 2 17:35:06 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-08-02 18:28:29 +0100
URL: https://github.com/rspamd/rspamd/commit/92e1b614db2bba173c3352455c3454249d357c9d (HEAD -> master)
[Minor] Langdet: Add threshold for stop words
---
src/libmime/lang_detection.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/src/libmime/lang_detection.c b/src/libmime/lang_detection.c
index 74c6f7247..9ccd7bef5 100644
--- a/src/libmime/lang_detection.c
+++ b/src/libmime/lang_detection.c
@@ -1650,6 +1650,7 @@ rspamd_language_detector_try_stop_words (struct rspamd_task *task,
struct rspamd_stop_word_elt *elt;
struct rspamd_sw_cbdata cbdata;
gboolean ret = FALSE;
+ static const int stop_words_threshold = 4;
elt = &d->stop_words[cat];
cbdata.res = kh_init (rspamd_sw_hash);
@@ -1667,7 +1668,12 @@ rspamd_language_detector_try_stop_words (struct rspamd_task *task,
struct rspamd_language_elt *cur_lang;
kh_foreach (cbdata.res, cur_lang, cur_matches, {
+ if (cur_matches < stop_words_threshold) {
+ continue;
+ }
+
double rate = (double)cur_matches / (double)cur_lang->stop_words;
+
if (rate > max_rate) {
max_rate = rate;
sel = cur_lang->name;
More information about the Commits
mailing list