commit e1ab42c: [Rework] Add preliminary support of hyperscan caching for re maps

Vsevolod Stakhov vsevolod at highsecure.ru
Wed Nov 25 13:35:08 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-11-24 16:07:45 +0000
URL: https://github.com/rspamd/rspamd/commit/e1ab42c4cfaf611759d01b4a813626afb48475ba

[Rework] Add preliminary support of hyperscan caching for re maps

---
 src/libserver/maps/map_helpers.c      | 166 +++++++++++++++++++++++++++++-----
 src/libserver/mempool_vars_internal.h |   1 +
 2 files changed, 145 insertions(+), 22 deletions(-)

diff --git a/src/libserver/maps/map_helpers.c b/src/libserver/maps/map_helpers.c
index 7eacdf61a..084806573 100644
--- a/src/libserver/maps/map_helpers.c
+++ b/src/libserver/maps/map_helpers.c
@@ -20,6 +20,7 @@
 #include "radix.h"
 #include "rspamd.h"
 #include "cryptobox.h"
+#include "mempool_vars_internal.h"
 #include "contrib/fastutf8/fastutf8.h"
 #include "contrib/cdb/cdb.h"
 
@@ -1029,6 +1030,120 @@ rspamd_radix_dtor (struct map_cb_data *data)
 	}
 }
 
+#ifdef WITH_HYPERSCAN
+
+static void
+rspamd_re_map_cache_update (const gchar *fname, struct rspamd_config *cfg)
+{
+	GHashTable *valid_re_hashes;
+
+	valid_re_hashes = rspamd_mempool_get_variable (cfg->cfg_pool,
+			RSPAMD_MEMPOOL_RE_MAPS_CACHE);
+
+	if (!valid_re_hashes) {
+		valid_re_hashes = g_hash_table_new_full (g_str_hash, g_str_equal,
+				g_free, NULL);
+		rspamd_mempool_set_variable (cfg->cfg_pool,
+				RSPAMD_MEMPOOL_RE_MAPS_CACHE,
+				valid_re_hashes, (rspamd_mempool_destruct_t)g_hash_table_unref);
+	}
+
+	g_hash_table_insert (valid_re_hashes, g_strdup (fname), "1");
+}
+
+static gboolean
+rspamd_try_load_re_map_cache (struct rspamd_regexp_map_helper *re_map)
+{
+	gchar fp[PATH_MAX];
+	gpointer map;
+	gsize len;
+
+	if (!re_map->map->cfg->hs_cache_dir) {
+		return FALSE;
+	}
+
+	rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmc",
+			re_map->map->cfg->hs_cache_dir,
+			(gint)rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
+
+	if ((map = rspamd_file_xmap (fp, PROT_READ, &len, TRUE)) != NULL) {
+		if (hs_deserialize_database (map, len, &re_map->hs_db) == HS_SUCCESS) {
+			rspamd_re_map_cache_update (fp, re_map->map->cfg);
+			munmap (map, len);
+
+			return TRUE;
+		}
+
+		munmap (map, len);
+		/* Remove stale file */
+		(void)unlink (fp);
+	}
+
+	return FALSE;
+}
+
+static gboolean
+rspamd_try_save_re_map_cache (struct rspamd_regexp_map_helper *re_map)
+{
+	gchar fp[PATH_MAX], np[PATH_MAX];
+	gsize len;
+	gint fd;
+	char *bytes = NULL;
+	struct rspamd_map *map;
+
+	map = re_map->map;
+
+	if (!re_map->map->cfg->hs_cache_dir) {
+		return FALSE;
+	}
+
+	rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmc.tmp",
+			re_map->map->cfg->hs_cache_dir,
+			(gint)rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
+
+	if ((fd = rspamd_file_xopen (fp, O_WRONLY | O_CREAT | O_EXCL, 00644, 0)) != -1) {
+		if (hs_serialize_database (re_map->hs_db, &bytes, &len) == HS_SUCCESS) {
+			if (write (fd, bytes, len) == -1) {
+				msg_warn_map ("cannot write hyperscan cache to %s: %s",
+						fp, strerror (errno));
+				unlink (fp);
+				free (bytes);
+			}
+			else {
+				free (bytes);
+				fsync (fd);
+
+				rspamd_snprintf (np, sizeof (np), "%s/%*xs.hsmc",
+						re_map->map->cfg->hs_cache_dir,
+						(gint)rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
+
+				if (rename (fp, np) == -1) {
+					msg_warn_map ("cannot rename hyperscan cache from %s to %s: %s",
+							fp, np, strerror (errno));
+					unlink (fp);
+				}
+				else {
+					msg_info_map ("written cached hyperscan data for %s to %s",
+							map->name, np);
+
+					rspamd_re_map_cache_update (np, map->cfg);
+				}
+			}
+		}
+		else {
+			msg_warn_map ("cannot serialize hyperscan cache to %s: %s",
+					fp, strerror (errno));
+			unlink (fp);
+		}
+
+
+		close (fd);
+	}
+
+	return FALSE;
+}
+#endif
+
 static void
 rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map)
 {
@@ -1106,25 +1221,36 @@ rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map)
 	}
 
 	if (re_map->regexps->len > 0 && re_map->patterns) {
-		gdouble ts1 = rspamd_get_ticks (FALSE);
-
-		if (hs_compile_multi ((const gchar **)re_map->patterns,
-				re_map->flags,
-				re_map->ids,
-				re_map->regexps->len,
-				HS_MODE_BLOCK,
-				&plt,
-				&re_map->hs_db,
-				&err) != HS_SUCCESS) {
-
-			msg_err_map ("cannot create tree of regexp when processing '%s': %s",
-					err->expression >= 0 ?
-							re_map->patterns[err->expression] :
-							"unknown regexp", err->message);
-			re_map->hs_db = NULL;
-			hs_free_compile_error (err);
 
-			return;
+		if (!rspamd_try_load_re_map_cache (re_map)) {
+			gdouble ts1 = rspamd_get_ticks (FALSE);
+
+			if (hs_compile_multi ((const gchar **) re_map->patterns,
+					re_map->flags,
+					re_map->ids,
+					re_map->regexps->len,
+					HS_MODE_BLOCK,
+					&plt,
+					&re_map->hs_db,
+					&err) != HS_SUCCESS) {
+
+				msg_err_map ("cannot create tree of regexp when processing '%s': %s",
+						err->expression >= 0 ?
+						re_map->patterns[err->expression] :
+						"unknown regexp", err->message);
+				re_map->hs_db = NULL;
+				hs_free_compile_error (err);
+
+				return;
+			}
+
+			ts1 = (rspamd_get_ticks (FALSE) - ts1) * 1000.0;
+			msg_info_map ("hyperscan compiled %d regular expressions from %s in %.1f ms",
+					re_map->regexps->len, re_map->map->name, ts1);
+		}
+		else {
+			msg_info_map ("hyperscan read %d cached regular expressions from %s",
+					re_map->regexps->len, re_map->map->name);
 		}
 
 		if (hs_alloc_scratch (re_map->hs_db, &re_map->hs_scratch) != HS_SUCCESS) {
@@ -1132,10 +1258,6 @@ rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map)
 			hs_free_database (re_map->hs_db);
 			re_map->hs_db = NULL;
 		}
-
-		ts1 = (rspamd_get_ticks (FALSE) - ts1) * 1000.0;
-		msg_info_map ("hyperscan compiled %d regular expressions from %s in %.1f ms",
-				re_map->regexps->len, re_map->map->name, ts1);
 	}
 	else {
 		msg_err_map ("regexp map is empty");
diff --git a/src/libserver/mempool_vars_internal.h b/src/libserver/mempool_vars_internal.h
index 576635a9b..6b68dd5a5 100644
--- a/src/libserver/mempool_vars_internal.h
+++ b/src/libserver/mempool_vars_internal.h
@@ -40,5 +40,6 @@
 #define RSPAMD_MEMPOOL_FUZZY_RESULT "fuzzy_hashes"
 #define RSPAMD_MEMPOOL_SPAM_LEARNS "spam_learns"
 #define RSPAMD_MEMPOOL_HAM_LEARNS "ham_learns"
+#define RSPAMD_MEMPOOL_RE_MAPS_CACHE "re_maps_cache"
 
 #endif


More information about the Commits mailing list