commit 64205e2: [Rework] Re cache: Load hyperscan early

Vsevolod Stakhov vsevolod at highsecure.ru
Mon Mar 23 15:35:06 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-03-23 15:34:04 +0000
URL: https://github.com/rspamd/rspamd/commit/64205e24d42f4c9a19ed48579141f619aa792c74 (HEAD -> master)

[Rework] Re cache: Load hyperscan early

---
 src/libserver/cfg_utils.c   |  4 ++++
 src/libserver/re_cache.c    | 39 ++++++++++++++++++++++++++++-----------
 src/libserver/re_cache.h    | 16 ++++++++++++----
 src/libserver/worker_util.c |  6 ++++--
 4 files changed, 48 insertions(+), 17 deletions(-)

diff --git a/src/libserver/cfg_utils.c b/src/libserver/cfg_utils.c
index 8486a029f..23ad382b0 100644
--- a/src/libserver/cfg_utils.c
+++ b/src/libserver/cfg_utils.c
@@ -888,6 +888,10 @@ rspamd_config_post_load (struct rspamd_config *cfg,
 
 		/* Init re cache */
 		rspamd_re_cache_init (cfg->re_cache, cfg);
+
+		/* Try load Hypersan */
+		rspamd_re_cache_load_hyperscan (cfg->re_cache,
+				cfg->hs_cache_dir ? cfg->hs_cache_dir :  RSPAMD_DBDIR "/");
 	}
 
 	if (opts & RSPAMD_CONFIG_INIT_LIBS) {
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c
index 257428720..29edbe0f7 100644
--- a/src/libserver/re_cache.c
+++ b/src/libserver/re_cache.c
@@ -124,7 +124,7 @@ struct rspamd_re_cache {
 	gchar hash[rspamd_cryptobox_HASHBYTES + 1];
 	lua_State *L;
 #ifdef WITH_HYPERSCAN
-	gboolean hyperscan_loaded;
+	enum rspamd_hyperscan_status hyperscan_loaded;
 	gboolean disable_hyperscan;
 	gboolean vectorized_hyperscan;
 	hs_platform_info_t plt;
@@ -241,14 +241,14 @@ rspamd_re_cache_new (void)
 	cache->re = g_ptr_array_new_full (256, rspamd_re_cache_elt_dtor);
 	cache->selectors = kh_init (lua_selectors_hash);
 #ifdef WITH_HYPERSCAN
-	cache->hyperscan_loaded = FALSE;
+	cache->hyperscan_loaded = RSPAMD_HYPERSCAN_UNKNOWN;
 #endif
 	REF_INIT_RETAIN (cache, rspamd_re_cache_destroy);
 
 	return cache;
 }
 
-gboolean
+enum rspamd_hyperscan_status
 rspamd_re_cache_is_hs_loaded (struct rspamd_re_cache *cache)
 {
 	g_assert (cache != NULL);
@@ -256,7 +256,7 @@ rspamd_re_cache_is_hs_loaded (struct rspamd_re_cache *cache)
 #ifdef WITH_HYPERSCAN
 	return cache->hyperscan_loaded;
 #else
-	return FALSE;
+	return RSPAMD_HYPERSCAN_UNSUPPORTED;
 #endif
 }
 
@@ -2364,7 +2364,7 @@ rspamd_re_cache_is_valid_hyperscan_file (struct rspamd_re_cache *cache,
 }
 
 
-gboolean
+enum rspamd_hyperscan_status
 rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache,
 		const char *cache_dir)
 {
@@ -2372,7 +2372,7 @@ rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache,
 	g_assert (cache_dir != NULL);
 
 #ifndef WITH_HYPERSCAN
-	return FALSE;
+	return RSPAMD_HYPERSCAN_UNSUPPORTED;
 #else
 	gchar path[PATH_MAX];
 	gint fd, i, n, *hs_ids = NULL, *hs_flags = NULL, total = 0, ret;
@@ -2382,7 +2382,7 @@ rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache,
 	struct rspamd_re_class *re_class;
 	struct rspamd_re_cache_elt *elt;
 	struct stat st;
-	gboolean has_valid = FALSE;
+	gboolean has_valid = FALSE, all_valid = FALSE;
 
 	g_hash_table_iter_init (&it, cache->re_classes);
 
@@ -2406,6 +2406,7 @@ rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache,
 			if (map == MAP_FAILED) {
 				msg_err_re_cache ("cannot mmap %s: %s", path, strerror (errno));
 				close (fd);
+				all_valid = FALSE;
 				continue;
 			}
 
@@ -2422,6 +2423,7 @@ rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache,
 				msg_err_re_cache ("bad number of expressions in %s: %d",
 						path, n);
 				munmap (map, st.st_size);
+				all_valid = FALSE;
 				continue;
 			}
 
@@ -2463,6 +2465,7 @@ rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache,
 				re_class->hs_ids = NULL;
 				re_class->hs_scratch = NULL;
 				re_class->hs_db = NULL;
+				all_valid = FALSE;
 
 				continue;
 			}
@@ -2491,24 +2494,38 @@ rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache,
 			re_class->hs_ids = hs_ids;
 			g_free (hs_flags);
 			re_class->nhs = n;
-			has_valid = TRUE;
+
+			if (!has_valid) {
+				has_valid = TRUE;
+				all_valid = TRUE;
+			}
 		}
 		else {
 			msg_err_re_cache ("invalid hyperscan hash file '%s'",
 					path);
+			all_valid = FALSE;
 			continue;
 		}
 	}
 
 	if (has_valid) {
-		msg_info_re_cache ("hyperscan database of %d regexps has been loaded", total);
+		if (all_valid) {
+			msg_info_re_cache ("full hyperscan database of %d regexps has been loaded", total);
+			cache->hyperscan_loaded = RSPAMD_HYPERSCAN_LOADED_FULL;
+		}
+		else {
+			msg_info_re_cache ("partial hyperscan database of %d regexps has been loaded", total);
+			cache->hyperscan_loaded = RSPAMD_HYPERSCAN_LOADED_PARTIAL;
+		}
 	}
 	else {
 		msg_info_re_cache ("hyperscan database has NOT been loaded; no valid expressions");
+		cache->hyperscan_loaded = RSPAMD_HYPERSCAN_LOAD_ERROR;
 	}
-	cache->hyperscan_loaded = has_valid;
 
-	return has_valid;
+
+
+	return cache->hyperscan_loaded;
 #endif
 }
 
diff --git a/src/libserver/re_cache.h b/src/libserver/re_cache.h
index 75cee0235..79bcaca16 100644
--- a/src/libserver/re_cache.h
+++ b/src/libserver/re_cache.h
@@ -90,12 +90,20 @@ void rspamd_re_cache_replace (struct rspamd_re_cache *cache,
 void rspamd_re_cache_init (struct rspamd_re_cache *cache,
 						   struct rspamd_config *cfg);
 
+enum rspamd_hyperscan_status {
+	RSPAMD_HYPERSCAN_UNKNOWN = 0,
+	RSPAMD_HYPERSCAN_UNSUPPORTED,
+	RSPAMD_HYPERSCAN_LOADED_PARTIAL,
+	RSPAMD_HYPERSCAN_LOADED_FULL,
+	RSPAMD_HYPERSCAN_LOAD_ERROR,
+};
+
 /**
  * Returns true when hyperscan is loaded
  * @param cache
  * @return
  */
-gboolean rspamd_re_cache_is_hs_loaded (struct rspamd_re_cache *cache);
+enum rspamd_hyperscan_status rspamd_re_cache_is_hs_loaded (struct rspamd_re_cache *cache);
 
 /**
  * Get runtime data for a cache
@@ -173,7 +181,6 @@ gint rspamd_re_cache_compile_hyperscan (struct rspamd_re_cache *cache,
 										void (*cb)(guint ncompiled, GError *err, void *cbd),
 										void *cbd);
 
-
 /**
  * Returns TRUE if the specified file is valid hyperscan cache
  */
@@ -183,8 +190,9 @@ gboolean rspamd_re_cache_is_valid_hyperscan_file (struct rspamd_re_cache *cache,
 /**
  * Loads all hyperscan regexps precompiled
  */
-gboolean rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache,
-										 const char *cache_dir);
+enum rspamd_hyperscan_status rspamd_re_cache_load_hyperscan (
+		struct rspamd_re_cache *cache,
+		const char *cache_dir);
 
 /**
  * Registers lua selector in the cache
diff --git a/src/libserver/worker_util.c b/src/libserver/worker_util.c
index ceb2f1103..5796b8d7f 100644
--- a/src/libserver/worker_util.c
+++ b/src/libserver/worker_util.c
@@ -1589,10 +1589,12 @@ rspamd_worker_hyperscan_ready (struct rspamd_main *rspamd_main,
 	memset (&rep, 0, sizeof (rep));
 	rep.type = RSPAMD_CONTROL_HYPERSCAN_LOADED;
 
-	if (!rspamd_re_cache_is_hs_loaded (cache) || cmd->cmd.hs_loaded.forced) {
+	if (rspamd_re_cache_is_hs_loaded (cache) != RSPAMD_HYPERSCAN_LOADED_FULL ||
+		cmd->cmd.hs_loaded.forced) {
+
 		msg_info ("loading hyperscan expressions after receiving compilation "
 				  "notice: %s",
-				(!rspamd_re_cache_is_hs_loaded (cache)) ?
+				(rspamd_re_cache_is_hs_loaded (cache) != RSPAMD_HYPERSCAN_LOADED_FULL) ?
 				"new db" : "forced update");
 		rep.reply.hs_loaded.status = rspamd_re_cache_load_hyperscan (
 				worker->srv->cfg->re_cache, cmd->cmd.hs_loaded.cache_dir);


More information about the Commits mailing list