commit ffdde52: [Minor] Do not count empty words

Vsevolod Stakhov vsevolod at highsecure.ru
Mon Sep 28 10:07:06 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-09-28 11:00:29 +0100
URL: https://github.com/rspamd/rspamd/commit/ffdde528bb3981325cdbc69600ffbaff024a6ad3

[Minor] Do not count empty words

---
 src/libmime/message.c | 6 +++---
 src/libmime/message.h | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/libmime/message.c b/src/libmime/message.c
index f2f22f6fe..d6a5fb5ce 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -93,10 +93,10 @@ rspamd_mime_part_extract_words (struct rspamd_task *task,
 				if (w->stemmed.len <= 3) {
 					short_len++;
 				}
-			}
 
-			if (w->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT) {
-				part->nwords ++;
+				if (w->flags & RSPAMD_STAT_TOKEN_FLAG_TEXT) {
+					part->nwords ++;
+				}
 			}
 
 			if (w->flags & (RSPAMD_STAT_TOKEN_FLAG_BROKEN_UNICODE|
diff --git a/src/libmime/message.h b/src/libmime/message.h
index a921d6f38..d6f1b76c0 100644
--- a/src/libmime/message.h
+++ b/src/libmime/message.h
@@ -140,8 +140,8 @@ struct rspamd_mime_text_part {
 	GByteArray *utf_content; /* utf8 encoded processed content */
 	GByteArray *utf_raw_content; /* utf raw content */
 	GByteArray *utf_stripped_content; /* utf content with no newlines */
-	GArray *normalized_hashes;
-	GArray *utf_words;
+	GArray *normalized_hashes; /* Array of guint64 */
+	GArray *utf_words; /* Array of rspamd_stat_token_t */
 	UText utf_stripped_text; /* Used by libicu to represent the utf8 content */
 
 	GPtrArray *newlines;    /**< positions of newlines in text, relative to content*/


More information about the Commits mailing list