commit 747afaa: [Feature] Langdet: Limit number of stop words to be checked

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Jul 25 10:28:04 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-07-25 11:20:40 +0100
URL: https://github.com/rspamd/rspamd/commit/747afaaa805f90d58b4330eb32119480fe40d3db (HEAD -> master)

[Feature] Langdet: Limit number of stop words to be checked

---
 src/libmime/lang_detection.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/libmime/lang_detection.c b/src/libmime/lang_detection.c
index 7096da585..0f1563d69 100644
--- a/src/libmime/lang_detection.c
+++ b/src/libmime/lang_detection.c
@@ -1590,6 +1590,7 @@ rspamd_language_detector_sw_cb (struct rspamd_multipattern *mp,
 	struct rspamd_stop_word_range *r;
 	struct rspamd_sw_cbdata *cbdata = (struct rspamd_sw_cbdata *)context;
 	khiter_t k;
+	static const gsize max_stop_words = 80;
 
 	if (match_start > 0) {
 		prev = text + match_start - 1;
@@ -1616,6 +1617,10 @@ rspamd_language_detector_sw_cb (struct rspamd_multipattern *mp,
 
 	if (k != kh_end (cbdata->res)) {
 		kh_value (cbdata->res, k) ++;
+
+		if (kh_value (cbdata->res, k) > max_stop_words) {
+			return 1;
+		}
 	}
 	else {
 		gint tt;


More information about the Commits mailing list