commit 3389533: [Fix] Fix format string and some length issues

Vsevolod Stakhov vsevolod at rspamd.com
Tue Sep 26 13:35:03 UTC 2023


Author: Vsevolod Stakhov
Date: 2023-09-26 14:29:30 +0100
URL: https://github.com/rspamd/rspamd/commit/3389533f18e1a5b4fe48c8f1fcb87a0e5b9bcaae (HEAD -> master)

[Fix] Fix format string and some length issues

---
 src/libstat/tokenizers/tokenizers.c | 21 ++++++++++-----------
 src/libstat/tokenizers/tokenizers.h | 18 +++++++++++++++++-
 2 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c
index 6e55a33a6..ee7234df7 100644
--- a/src/libstat/tokenizers/tokenizers.c
+++ b/src/libstat/tokenizers/tokenizers.c
@@ -1,11 +1,11 @@
-/*-
- * Copyright 2016 Vsevolod Stakhov
+/*
+ * Copyright 2023 Vsevolod Stakhov
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *   http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -871,7 +871,7 @@ void rspamd_normalize_words(GArray *words, rspamd_mempool_t *pool)
 
 void rspamd_stem_words(GArray *words, rspamd_mempool_t *pool,
 					   const gchar *language,
-					   struct rspamd_lang_detector *d)
+					   struct rspamd_lang_detector *lang_detector)
 {
 	static GHashTable *stemmers = NULL;
 	struct sb_stemmer *stem = NULL;
@@ -894,7 +894,7 @@ void rspamd_stem_words(GArray *words, rspamd_mempool_t *pool,
 
 			if (stem == NULL) {
 				msg_debug_pool(
-					"<%s> cannot create lemmatizer for %s language",
+					"cannot create lemmatizer for %s language",
 					language);
 				g_hash_table_insert(stemmers, g_strdup(language),
 									GINT_TO_POINTER(-1));
@@ -919,12 +919,11 @@ void rspamd_stem_words(GArray *words, rspamd_mempool_t *pool,
 				stemmed = sb_stemmer_stem(stem,
 										  tok->normalized.begin, tok->normalized.len);
 
-				dlen = stemmed ? strlen(stemmed) : 0;
+				dlen = sb_stemmer_length(stem);
 
-				if (dlen > 0) {
-					dest = rspamd_mempool_alloc(pool, dlen + 1);
+				if (stemmed != NULL && dlen > 0) {
+					dest = rspamd_mempool_alloc(pool, dlen);
 					memcpy(dest, stemmed, dlen);
-					dest[dlen] = '\0';
 					tok->stemmed.len = dlen;
 					tok->stemmed.begin = dest;
 					tok->flags |= RSPAMD_STAT_TOKEN_FLAG_STEMMED;
@@ -940,8 +939,8 @@ void rspamd_stem_words(GArray *words, rspamd_mempool_t *pool,
 				tok->stemmed.begin = tok->normalized.begin;
 			}
 
-			if (tok->stemmed.len > 0 && d != NULL &&
-				rspamd_language_detector_is_stop_word(d, tok->stemmed.begin, tok->stemmed.len)) {
+			if (tok->stemmed.len > 0 && lang_detector != NULL &&
+				rspamd_language_detector_is_stop_word(lang_detector, tok->stemmed.begin, tok->stemmed.len)) {
 				tok->flags |= RSPAMD_STAT_TOKEN_FLAG_STOP_WORD;
 			}
 		}
diff --git a/src/libstat/tokenizers/tokenizers.h b/src/libstat/tokenizers/tokenizers.h
index e908c359d..d696364e2 100644
--- a/src/libstat/tokenizers/tokenizers.h
+++ b/src/libstat/tokenizers/tokenizers.h
@@ -1,3 +1,19 @@
+/*
+ * Copyright 2023 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 #ifndef TOKENIZERS_H
 #define TOKENIZERS_H
 
@@ -73,7 +89,7 @@ void rspamd_normalize_words(GArray *words, rspamd_mempool_t *pool);
 
 void rspamd_stem_words(GArray *words, rspamd_mempool_t *pool,
 					   const gchar *language,
-					   struct rspamd_lang_detector *d);
+					   struct rspamd_lang_detector *lang_detector);
 
 void rspamd_tokenize_meta_words(struct rspamd_task *task);
 


More information about the Commits mailing list