commit 2426e04: [Project] Show fasttext info
Vsevolod Stakhov
vsevolod at rspamd.com
Sat Apr 29 17:14:04 UTC 2023
Author: Vsevolod Stakhov
Date: 2023-04-29 14:46:55 +0100
URL: https://github.com/rspamd/rspamd/commit/2426e04a9aa304ad1d24cbceb91493f205bf5b57
[Project] Show fasttext info
---
src/libmime/lang_detection.c | 11 +++++++++--
src/libmime/lang_detection_fasttext.cxx | 23 +++++++++++++++++++++++
src/libmime/lang_detection_fasttext.h | 7 +++++++
3 files changed, 39 insertions(+), 2 deletions(-)
diff --git a/src/libmime/lang_detection.c b/src/libmime/lang_detection.c
index 57d2f301d..09591438e 100644
--- a/src/libmime/lang_detection.c
+++ b/src/libmime/lang_detection.c
@@ -15,6 +15,7 @@
*/
#include "lang_detection.h"
+#include "lang_detection_fasttext.h"
#include "libserver/logger.h"
#include "libcryptobox/cryptobox.h"
#include "libutil/multipattern.h"
@@ -181,6 +182,7 @@ struct rspamd_lang_detector {
UConverter *uchar_converter;
gsize short_text_limit;
gsize total_occurrences; /* number of all languages found */
+ gpointer fasttext_detector;
ref_entry_t ref;
};
@@ -766,6 +768,7 @@ rspamd_language_detector_dtor (struct rspamd_lang_detector *d)
}
kh_destroy (rspamd_stopwords_hash, d->stop_words_norm);
+ rspamd_lang_detection_fasttext_destroy(d->fasttext_detector);
}
}
@@ -886,10 +889,14 @@ rspamd_language_detector_init (struct rspamd_config *cfg)
total += kh_size (ret->trigrams[i]);
}
+ ret->fasttext_detector = rspamd_lang_detection_fasttext_init(cfg);
+ char *fasttext_status = rspamd_lang_detection_fasttext_show_info(ret->fasttext_detector);
+
msg_info_config ("loaded %d languages, "
- "%d trigrams",
+ "%d trigrams; %s",
(gint)ret->languages->len,
- (gint)total);
+ (gint)total, fasttext_status);
+ g_free (fasttext_status);
if (stop_words) {
ucl_object_unref (stop_words);
diff --git a/src/libmime/lang_detection_fasttext.cxx b/src/libmime/lang_detection_fasttext.cxx
index cf6b5c852..d6bd96ca1 100644
--- a/src/libmime/lang_detection_fasttext.cxx
+++ b/src/libmime/lang_detection_fasttext.cxx
@@ -33,6 +33,7 @@ namespace rspamd::langdet {
class fasttext_langdet {
private:
fasttext::FastText ft;
+ std::string model_fname;
bool loaded;
struct one_shot_buf : public std::streambuf {
@@ -53,6 +54,7 @@ public:
try {
ft.loadModel(ucl_object_tostring(model));
loaded = true;
+ model_fname = std::string{ucl_object_tostring(model)};
}
catch (std::exception &e) {
auto err_message = fmt::format("cannot load fasttext model: {}", e.what());
@@ -93,6 +95,16 @@ public:
return nullptr;
}
+
+ auto model_info(void) const -> std::string {
+ if (!loaded) {
+ return "fasttext model is not loaded";
+ }
+ else {
+ return fmt::format("fasttext model {}: {} languages, {} tokens", model_fname,
+ ft.getDictionary()->nlabels(), ft.getDictionary()->ntokens());
+ }
+ }
};
}
#endif
@@ -112,6 +124,17 @@ void* rspamd_lang_detection_fasttext_init(struct rspamd_config *cfg)
#endif
}
+char *rspamd_lang_detection_fasttext_show_info(void *ud)
+{
+#ifndef WITH_FASTTEXT
+ return g_strdup("fasttext is not compiled in");
+#else
+ auto model_info = FASTTEXT_MODEL_TO_C_API(ud)->model_info();
+
+ return g_strdup(model_info.c_str());
+#endif
+}
+
rspamd_fasttext_predict_result_t rspamd_lang_detection_fasttext_detect(void *ud,
const char *in, size_t len, int k)
{
diff --git a/src/libmime/lang_detection_fasttext.h b/src/libmime/lang_detection_fasttext.h
index 44bc8bf71..71e253940 100644
--- a/src/libmime/lang_detection_fasttext.h
+++ b/src/libmime/lang_detection_fasttext.h
@@ -27,6 +27,13 @@ struct rspamd_config;
*/
void* rspamd_lang_detection_fasttext_init(struct rspamd_config *cfg);
+/**
+ * Show info about fasttext language detector
+ * @param ud
+ * @return
+ */
+char *rspamd_lang_detection_fasttext_show_info(void *ud);
+
typedef void * rspamd_fasttext_predict_result_t;
/**
More information about the Commits
mailing list