commit e1ab42c: [Rework] Add preliminary support of hyperscan caching for re maps
Vsevolod Stakhov
vsevolod at highsecure.ru
Wed Nov 25 13:35:08 UTC 2020
Author: Vsevolod Stakhov
Date: 2020-11-24 16:07:45 +0000
URL: https://github.com/rspamd/rspamd/commit/e1ab42c4cfaf611759d01b4a813626afb48475ba
[Rework] Add preliminary support of hyperscan caching for re maps
---
src/libserver/maps/map_helpers.c | 166 +++++++++++++++++++++++++++++-----
src/libserver/mempool_vars_internal.h | 1 +
2 files changed, 145 insertions(+), 22 deletions(-)
diff --git a/src/libserver/maps/map_helpers.c b/src/libserver/maps/map_helpers.c
index 7eacdf61a..084806573 100644
--- a/src/libserver/maps/map_helpers.c
+++ b/src/libserver/maps/map_helpers.c
@@ -20,6 +20,7 @@
#include "radix.h"
#include "rspamd.h"
#include "cryptobox.h"
+#include "mempool_vars_internal.h"
#include "contrib/fastutf8/fastutf8.h"
#include "contrib/cdb/cdb.h"
@@ -1029,6 +1030,120 @@ rspamd_radix_dtor (struct map_cb_data *data)
}
}
+#ifdef WITH_HYPERSCAN
+
+static void
+rspamd_re_map_cache_update (const gchar *fname, struct rspamd_config *cfg)
+{
+ GHashTable *valid_re_hashes;
+
+ valid_re_hashes = rspamd_mempool_get_variable (cfg->cfg_pool,
+ RSPAMD_MEMPOOL_RE_MAPS_CACHE);
+
+ if (!valid_re_hashes) {
+ valid_re_hashes = g_hash_table_new_full (g_str_hash, g_str_equal,
+ g_free, NULL);
+ rspamd_mempool_set_variable (cfg->cfg_pool,
+ RSPAMD_MEMPOOL_RE_MAPS_CACHE,
+ valid_re_hashes, (rspamd_mempool_destruct_t)g_hash_table_unref);
+ }
+
+ g_hash_table_insert (valid_re_hashes, g_strdup (fname), "1");
+}
+
+static gboolean
+rspamd_try_load_re_map_cache (struct rspamd_regexp_map_helper *re_map)
+{
+ gchar fp[PATH_MAX];
+ gpointer map;
+ gsize len;
+
+ if (!re_map->map->cfg->hs_cache_dir) {
+ return FALSE;
+ }
+
+ rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmc",
+ re_map->map->cfg->hs_cache_dir,
+ (gint)rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
+
+ if ((map = rspamd_file_xmap (fp, PROT_READ, &len, TRUE)) != NULL) {
+ if (hs_deserialize_database (map, len, &re_map->hs_db) == HS_SUCCESS) {
+ rspamd_re_map_cache_update (fp, re_map->map->cfg);
+ munmap (map, len);
+
+ return TRUE;
+ }
+
+ munmap (map, len);
+ /* Remove stale file */
+ (void)unlink (fp);
+ }
+
+ return FALSE;
+}
+
+static gboolean
+rspamd_try_save_re_map_cache (struct rspamd_regexp_map_helper *re_map)
+{
+ gchar fp[PATH_MAX], np[PATH_MAX];
+ gsize len;
+ gint fd;
+ char *bytes = NULL;
+ struct rspamd_map *map;
+
+ map = re_map->map;
+
+ if (!re_map->map->cfg->hs_cache_dir) {
+ return FALSE;
+ }
+
+ rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmc.tmp",
+ re_map->map->cfg->hs_cache_dir,
+ (gint)rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
+
+ if ((fd = rspamd_file_xopen (fp, O_WRONLY | O_CREAT | O_EXCL, 00644, 0)) != -1) {
+ if (hs_serialize_database (re_map->hs_db, &bytes, &len) == HS_SUCCESS) {
+ if (write (fd, bytes, len) == -1) {
+ msg_warn_map ("cannot write hyperscan cache to %s: %s",
+ fp, strerror (errno));
+ unlink (fp);
+ free (bytes);
+ }
+ else {
+ free (bytes);
+ fsync (fd);
+
+ rspamd_snprintf (np, sizeof (np), "%s/%*xs.hsmc",
+ re_map->map->cfg->hs_cache_dir,
+ (gint)rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
+
+ if (rename (fp, np) == -1) {
+ msg_warn_map ("cannot rename hyperscan cache from %s to %s: %s",
+ fp, np, strerror (errno));
+ unlink (fp);
+ }
+ else {
+ msg_info_map ("written cached hyperscan data for %s to %s",
+ map->name, np);
+
+ rspamd_re_map_cache_update (np, map->cfg);
+ }
+ }
+ }
+ else {
+ msg_warn_map ("cannot serialize hyperscan cache to %s: %s",
+ fp, strerror (errno));
+ unlink (fp);
+ }
+
+
+ close (fd);
+ }
+
+ return FALSE;
+}
+#endif
+
static void
rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map)
{
@@ -1106,25 +1221,36 @@ rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map)
}
if (re_map->regexps->len > 0 && re_map->patterns) {
- gdouble ts1 = rspamd_get_ticks (FALSE);
-
- if (hs_compile_multi ((const gchar **)re_map->patterns,
- re_map->flags,
- re_map->ids,
- re_map->regexps->len,
- HS_MODE_BLOCK,
- &plt,
- &re_map->hs_db,
- &err) != HS_SUCCESS) {
-
- msg_err_map ("cannot create tree of regexp when processing '%s': %s",
- err->expression >= 0 ?
- re_map->patterns[err->expression] :
- "unknown regexp", err->message);
- re_map->hs_db = NULL;
- hs_free_compile_error (err);
- return;
+ if (!rspamd_try_load_re_map_cache (re_map)) {
+ gdouble ts1 = rspamd_get_ticks (FALSE);
+
+ if (hs_compile_multi ((const gchar **) re_map->patterns,
+ re_map->flags,
+ re_map->ids,
+ re_map->regexps->len,
+ HS_MODE_BLOCK,
+ &plt,
+ &re_map->hs_db,
+ &err) != HS_SUCCESS) {
+
+ msg_err_map ("cannot create tree of regexp when processing '%s': %s",
+ err->expression >= 0 ?
+ re_map->patterns[err->expression] :
+ "unknown regexp", err->message);
+ re_map->hs_db = NULL;
+ hs_free_compile_error (err);
+
+ return;
+ }
+
+ ts1 = (rspamd_get_ticks (FALSE) - ts1) * 1000.0;
+ msg_info_map ("hyperscan compiled %d regular expressions from %s in %.1f ms",
+ re_map->regexps->len, re_map->map->name, ts1);
+ }
+ else {
+ msg_info_map ("hyperscan read %d cached regular expressions from %s",
+ re_map->regexps->len, re_map->map->name);
}
if (hs_alloc_scratch (re_map->hs_db, &re_map->hs_scratch) != HS_SUCCESS) {
@@ -1132,10 +1258,6 @@ rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map)
hs_free_database (re_map->hs_db);
re_map->hs_db = NULL;
}
-
- ts1 = (rspamd_get_ticks (FALSE) - ts1) * 1000.0;
- msg_info_map ("hyperscan compiled %d regular expressions from %s in %.1f ms",
- re_map->regexps->len, re_map->map->name, ts1);
}
else {
msg_err_map ("regexp map is empty");
diff --git a/src/libserver/mempool_vars_internal.h b/src/libserver/mempool_vars_internal.h
index 576635a9b..6b68dd5a5 100644
--- a/src/libserver/mempool_vars_internal.h
+++ b/src/libserver/mempool_vars_internal.h
@@ -40,5 +40,6 @@
#define RSPAMD_MEMPOOL_FUZZY_RESULT "fuzzy_hashes"
#define RSPAMD_MEMPOOL_SPAM_LEARNS "spam_learns"
#define RSPAMD_MEMPOOL_HAM_LEARNS "ham_learns"
+#define RSPAMD_MEMPOOL_RE_MAPS_CACHE "re_maps_cache"
#endif
More information about the Commits
mailing list