commit 2426e04: [Project] Show fasttext info

Vsevolod Stakhov vsevolod at rspamd.com
Sat Apr 29 17:14:04 UTC 2023


Author: Vsevolod Stakhov
Date: 2023-04-29 14:46:55 +0100
URL: https://github.com/rspamd/rspamd/commit/2426e04a9aa304ad1d24cbceb91493f205bf5b57

[Project] Show fasttext info

---
 src/libmime/lang_detection.c            | 11 +++++++++--
 src/libmime/lang_detection_fasttext.cxx | 23 +++++++++++++++++++++++
 src/libmime/lang_detection_fasttext.h   |  7 +++++++
 3 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/src/libmime/lang_detection.c b/src/libmime/lang_detection.c
index 57d2f301d..09591438e 100644
--- a/src/libmime/lang_detection.c
+++ b/src/libmime/lang_detection.c
@@ -15,6 +15,7 @@
  */
 
 #include "lang_detection.h"
+#include "lang_detection_fasttext.h"
 #include "libserver/logger.h"
 #include "libcryptobox/cryptobox.h"
 #include "libutil/multipattern.h"
@@ -181,6 +182,7 @@ struct rspamd_lang_detector {
 	UConverter *uchar_converter;
 	gsize short_text_limit;
 	gsize total_occurrences; /* number of all languages found */
+	gpointer fasttext_detector;
 	ref_entry_t ref;
 };
 
@@ -766,6 +768,7 @@ rspamd_language_detector_dtor (struct rspamd_lang_detector *d)
 		}
 
 		kh_destroy (rspamd_stopwords_hash, d->stop_words_norm);
+		rspamd_lang_detection_fasttext_destroy(d->fasttext_detector);
 	}
 }
 
@@ -886,10 +889,14 @@ rspamd_language_detector_init (struct rspamd_config *cfg)
 		total += kh_size (ret->trigrams[i]);
 	}
 
+	ret->fasttext_detector = rspamd_lang_detection_fasttext_init(cfg);
+	char *fasttext_status = rspamd_lang_detection_fasttext_show_info(ret->fasttext_detector);
+
 	msg_info_config ("loaded %d languages, "
-			"%d trigrams",
+			"%d trigrams; %s",
 			(gint)ret->languages->len,
-			(gint)total);
+			(gint)total, fasttext_status);
+	g_free (fasttext_status);
 
 	if (stop_words) {
 		ucl_object_unref (stop_words);
diff --git a/src/libmime/lang_detection_fasttext.cxx b/src/libmime/lang_detection_fasttext.cxx
index cf6b5c852..d6bd96ca1 100644
--- a/src/libmime/lang_detection_fasttext.cxx
+++ b/src/libmime/lang_detection_fasttext.cxx
@@ -33,6 +33,7 @@ namespace rspamd::langdet {
 class fasttext_langdet {
 private:
 	fasttext::FastText ft;
+	std::string model_fname;
 	bool loaded;
 
 	struct one_shot_buf : public std::streambuf {
@@ -53,6 +54,7 @@ public:
 				try {
 					ft.loadModel(ucl_object_tostring(model));
 					loaded = true;
+					model_fname = std::string{ucl_object_tostring(model)};
 				}
 				catch (std::exception &e) {
 					auto err_message = fmt::format("cannot load fasttext model: {}", e.what());
@@ -93,6 +95,16 @@ public:
 
 		return nullptr;
 	}
+
+	auto model_info(void) const -> std::string {
+		if (!loaded) {
+			return "fasttext model is not loaded";
+		}
+		else {
+			return fmt::format("fasttext model {}: {} languages, {} tokens", model_fname,
+				ft.getDictionary()->nlabels(), ft.getDictionary()->ntokens());
+		}
+	}
 };
 }
 #endif
@@ -112,6 +124,17 @@ void* rspamd_lang_detection_fasttext_init(struct rspamd_config *cfg)
 #endif
 }
 
+char *rspamd_lang_detection_fasttext_show_info(void *ud)
+{
+#ifndef WITH_FASTTEXT
+	return g_strdup("fasttext is not compiled in");
+#else
+	auto model_info = FASTTEXT_MODEL_TO_C_API(ud)->model_info();
+
+	return g_strdup(model_info.c_str());
+#endif
+}
+
 rspamd_fasttext_predict_result_t rspamd_lang_detection_fasttext_detect(void *ud,
 											   const char *in, size_t len, int k)
 {
diff --git a/src/libmime/lang_detection_fasttext.h b/src/libmime/lang_detection_fasttext.h
index 44bc8bf71..71e253940 100644
--- a/src/libmime/lang_detection_fasttext.h
+++ b/src/libmime/lang_detection_fasttext.h
@@ -27,6 +27,13 @@ struct rspamd_config;
  */
 void* rspamd_lang_detection_fasttext_init(struct rspamd_config *cfg);
 
+/**
+ * Show info about fasttext language detector
+ * @param ud
+ * @return
+ */
+char *rspamd_lang_detection_fasttext_show_info(void *ud);
+
 
 typedef  void * rspamd_fasttext_predict_result_t;
 /**


More information about the Commits mailing list