commit 3a4c774: [Rework] Allow execution of async events when hs compiles regexps

Vsevolod Stakhov vsevolod at highsecure.ru
Wed Oct 2 17:49:07 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-10-02 18:41:37 +0100
URL: https://github.com/rspamd/rspamd/commit/3a4c7742435d93b617c8fd8239425d56f95a6639 (HEAD -> master)

[Rework] Allow execution of async events when hs compiles regexps

---
 src/hs_helper.c          |  82 ++++---
 src/libserver/re_cache.c | 563 ++++++++++++++++++++++++++---------------------
 src/libserver/re_cache.h |   9 +-
 3 files changed, 379 insertions(+), 275 deletions(-)

diff --git a/src/hs_helper.c b/src/hs_helper.c
index f83a9d429..3cdc2a439 100644
--- a/src/hs_helper.c
+++ b/src/hs_helper.c
@@ -178,37 +178,44 @@ rspamd_hs_helper_cleanup_dir (struct hs_helper_ctx *ctx, gboolean forced)
 	return ret;
 }
 
-static gboolean
-rspamd_rs_compile (struct hs_helper_ctx *ctx, struct rspamd_worker *worker,
-		gboolean forced)
+/* Bad hack, but who cares */
+static gboolean hack_global_forced;
+
+static void
+rspamd_rs_delayed_cb (EV_P_ ev_timer *w, int revents)
 {
-	GError *err = NULL;
+	struct rspamd_worker *worker = (struct rspamd_worker *)w->data;
 	static struct rspamd_srv_command srv_cmd;
-	gint ncompiled;
+	struct hs_helper_ctx *ctx;
 
-	if (!(ctx->cfg->libs_ctx->crypto_ctx->cpu_config & CPUID_SSSE3)) {
-		msg_warn ("CPU doesn't have SSSE3 instructions set "
-				"required for hyperscan, disable hyperscan compilation");
-		return FALSE;
-	}
+	ctx = (struct hs_helper_ctx *)worker->ctx;
+	memset (&srv_cmd, 0, sizeof (srv_cmd));
+	srv_cmd.type = RSPAMD_SRV_HYPERSCAN_LOADED;
+	rspamd_strlcpy (srv_cmd.cmd.hs_loaded.cache_dir, ctx->hs_dir,
+			sizeof (srv_cmd.cmd.hs_loaded.cache_dir));
+	srv_cmd.cmd.hs_loaded.forced = hack_global_forced;
+	hack_global_forced = FALSE;
 
-	if (!rspamd_hs_helper_cleanup_dir (ctx, forced)) {
-		msg_warn ("cannot cleanup cache dir '%s'", ctx->hs_dir);
-	}
+	rspamd_srv_send_command (worker,
+			ctx->event_loop, &srv_cmd, -1, NULL, NULL);
+	ev_timer_stop (EV_A_ w);
+	g_free (w);
+}
 
-	if ((ncompiled = rspamd_re_cache_compile_hyperscan (ctx->cfg->re_cache,
-			ctx->hs_dir, ctx->max_time, !forced,
-			&err)) == -1) {
-		msg_err ("failed to compile re cache: %e", err);
-		g_error_free (err);
+static void
+rspamd_rs_compile_cb (guint ncompiled, GError *err, void *cbd)
+{
+	struct rspamd_worker *worker = (struct rspamd_worker *)cbd;
+	ev_timer *tm;
+	ev_tstamp when = 0.0;
+	struct hs_helper_ctx *ctx;
 
-		return FALSE;
-	}
+	ctx = (struct hs_helper_ctx *)worker->ctx;
 
 	if (ncompiled > 0) {
 		msg_info ("compiled %d regular expressions to the hyperscan tree",
 				ncompiled);
-		forced = TRUE;
+		hack_global_forced = TRUE;
 	}
 
 	/*
@@ -216,17 +223,36 @@ rspamd_rs_compile (struct hs_helper_ctx *ctx, struct rspamd_worker *worker,
 	 * XXX: now we just sleep for 5 seconds to ensure that
 	 */
 	if (!ctx->loaded) {
-		ev_sleep (5.0);
+		when = 5.0; /* Postpone */
 		ctx->loaded = TRUE;
 	}
 
-	memset (&srv_cmd, 0, sizeof (srv_cmd));
-	srv_cmd.type = RSPAMD_SRV_HYPERSCAN_LOADED;
-	rspamd_strlcpy (srv_cmd.cmd.hs_loaded.cache_dir, ctx->hs_dir,
-			sizeof (srv_cmd.cmd.hs_loaded.cache_dir));
-	srv_cmd.cmd.hs_loaded.forced = forced;
+	tm = g_malloc0 (sizeof (*tm));
+	tm->data = (void *)worker;
+	ev_timer_init (tm, rspamd_rs_delayed_cb, when, 0);
+	ev_timer_start (ctx->event_loop, tm);
+}
 
-	rspamd_srv_send_command (worker, ctx->event_loop, &srv_cmd, -1, NULL, NULL);
+static gboolean
+rspamd_rs_compile (struct hs_helper_ctx *ctx, struct rspamd_worker *worker,
+		gboolean forced)
+{
+	if (!(ctx->cfg->libs_ctx->crypto_ctx->cpu_config & CPUID_SSSE3)) {
+		msg_warn ("CPU doesn't have SSSE3 instructions set "
+				"required for hyperscan, disable hyperscan compilation");
+		return FALSE;
+	}
+
+	if (!rspamd_hs_helper_cleanup_dir (ctx, forced)) {
+		msg_warn ("cannot cleanup cache dir '%s'", ctx->hs_dir);
+	}
+
+	hack_global_forced = forced; /* killmeplease */
+	rspamd_re_cache_compile_hyperscan (ctx->cfg->re_cache,
+			ctx->hs_dir, ctx->max_time, !forced,
+			ctx->event_loop,
+			rspamd_rs_compile_cb,
+			(void *)worker);
 
 	return TRUE;
 }
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c
index 617322926..d93cb8f13 100644
--- a/src/libserver/re_cache.c
+++ b/src/libserver/re_cache.c
@@ -33,6 +33,7 @@
 #include "unix-std.h"
 #include <signal.h>
 #include <stdalign.h>
+#include "contrib/libev/ev.h"
 
 #ifndef WITH_PCRE2
 #include <pcre.h>
@@ -1678,19 +1679,35 @@ rspamd_re_cache_is_finite (struct rspamd_re_cache *cache,
 }
 #endif
 
-gint
-rspamd_re_cache_compile_hyperscan (struct rspamd_re_cache *cache,
-		const char *cache_dir, gdouble max_time, gboolean silent,
-		GError **err)
+#ifdef WITH_HYPERSCAN
+struct rspamd_re_cache_hs_compile_cbdata {
+	GHashTableIter it;
+	struct rspamd_re_cache *cache;
+	const char *cache_dir;
+	gdouble max_time;
+	gboolean silent;
+	guint total;
+	void (*cb)(guint ncompiled, GError *err, void *cbd);
+	void *cbd;
+};
+
+static void
+rspamd_re_cache_compile_err (EV_P_ ev_timer *w, GError *err,
+		struct rspamd_re_cache_hs_compile_cbdata *cbdata)
 {
-	g_assert (cache != NULL);
-	g_assert (cache_dir != NULL);
+	ev_timer_stop (EV_A_ w);
+	cbdata->cb (cbdata->total, err, cbdata->cb);
+	g_free (w);
+	g_free (cbdata);
+	g_error_free (err);
+}
 
-#ifndef WITH_HYPERSCAN
-	g_set_error (err, rspamd_re_cache_quark (), EINVAL, "hyperscan is disabled");
-	return -1;
-#else
-	GHashTableIter it, cit;
+static void
+rspamd_re_cache_compile_timer_cb (EV_P_ ev_timer *w, int revents )
+{
+	struct rspamd_re_cache_hs_compile_cbdata *cbdata =
+			(struct rspamd_re_cache_hs_compile_cbdata *)w->data;
+	GHashTableIter cit;
 	gpointer k, v;
 	struct rspamd_re_class *re_class;
 	gchar path[PATH_MAX], npath[PATH_MAX];
@@ -1704,298 +1721,354 @@ rspamd_re_cache_compile_hyperscan (struct rspamd_re_cache *cache,
 	const hs_expr_ext_t **hs_exts = NULL;
 	gchar **hs_pats = NULL;
 	gchar *hs_serialized;
-	gsize serialized_len, total = 0;
+	gsize serialized_len;
 	struct iovec iov[7];
+	struct rspamd_re_cache *cache;
+	GError *err;
 
-	g_hash_table_iter_init (&it, cache->re_classes);
+	cache = cbdata->cache;
 
-	while (g_hash_table_iter_next (&it, &k, &v)) {
-		re_class = v;
-		rspamd_snprintf (path, sizeof (path), "%s%c%s.hs", cache_dir,
-				G_DIR_SEPARATOR, re_class->hash);
+	if (!g_hash_table_iter_next (&cbdata->it, &k, &v)) {
+		/* All done */
+		ev_timer_stop (EV_A_ w);
+		cbdata->cb (cbdata->total, NULL, cbdata->cbd);
+		g_free (w);
+		g_free (cbdata);
 
-		if (rspamd_re_cache_is_valid_hyperscan_file (cache, path, TRUE, TRUE)) {
+		return;
+	}
 
-			fd = open (path, O_RDONLY, 00600);
+	re_class = v;
+	rspamd_snprintf (path, sizeof (path), "%s%c%s.hs", cbdata->cache_dir,
+			G_DIR_SEPARATOR, re_class->hash);
 
-			/* Read number of regexps */
-			g_assert (fd != -1);
-			lseek (fd, RSPAMD_HS_MAGIC_LEN + sizeof (cache->plt), SEEK_SET);
-			g_assert (read (fd, &n, sizeof (n)) == sizeof (n));
-			close (fd);
+	if (rspamd_re_cache_is_valid_hyperscan_file (cache, path, TRUE, TRUE)) {
 
-			if (re_class->type_len > 0) {
-				if (!silent) {
-					msg_info_re_cache (
-							"skip already valid class %s(%*s) to cache %6s, %d regexps",
-							rspamd_re_cache_type_to_string (re_class->type),
-							(gint) re_class->type_len - 1,
-							re_class->type_data,
-							re_class->hash,
-							n);
-				}
+		fd = open (path, O_RDONLY, 00600);
+
+		/* Read number of regexps */
+		g_assert (fd != -1);
+		lseek (fd, RSPAMD_HS_MAGIC_LEN + sizeof (cache->plt), SEEK_SET);
+		g_assert (read (fd, &n, sizeof (n)) == sizeof (n));
+		close (fd);
+
+		if (re_class->type_len > 0) {
+			if (!cbdata->silent) {
+				msg_info_re_cache (
+						"skip already valid class %s(%*s) to cache %6s, %d regexps",
+						rspamd_re_cache_type_to_string (re_class->type),
+						(gint) re_class->type_len - 1,
+						re_class->type_data,
+						re_class->hash,
+						n);
 			}
-			else {
-				if (!silent) {
-					msg_info_re_cache (
-							"skip already valid class %s to cache %6s, %d regexps",
-							rspamd_re_cache_type_to_string (re_class->type),
-							re_class->hash,
-							n);
-				}
+		}
+		else {
+			if (!cbdata->silent) {
+				msg_info_re_cache (
+						"skip already valid class %s to cache %6s, %d regexps",
+						rspamd_re_cache_type_to_string (re_class->type),
+						re_class->hash,
+						n);
 			}
-
-			continue;
 		}
 
-		rspamd_snprintf (path, sizeof (path), "%s%c%s.hs.new", cache_dir,
-						G_DIR_SEPARATOR, re_class->hash);
-		fd = open (path, O_CREAT|O_TRUNC|O_EXCL|O_WRONLY, 00600);
+		ev_timer_again (EV_A_ w);
+		return;
+	}
 
-		if (fd == -1) {
-			g_set_error (err, rspamd_re_cache_quark (), errno, "cannot open file "
-					"%s: %s", path, strerror (errno));
-			return -1;
-		}
+	rspamd_snprintf (path, sizeof (path), "%s%c%s.hs.new", cbdata->cache_dir,
+			G_DIR_SEPARATOR, re_class->hash);
+	fd = open (path, O_CREAT|O_TRUNC|O_EXCL|O_WRONLY, 00600);
 
-		g_hash_table_iter_init (&cit, re_class->re);
-		n = g_hash_table_size (re_class->re);
-		hs_flags = g_malloc0 (sizeof (*hs_flags) * n);
-		hs_ids = g_malloc (sizeof (*hs_ids) * n);
-		hs_pats = g_malloc (sizeof (*hs_pats) * n);
-		hs_exts = g_malloc0 (sizeof (*hs_exts) * n);
-		i = 0;
+	if (fd == -1) {
+		err = g_error_new (rspamd_re_cache_quark (), errno,
+				"cannot open file %s: %s", path, strerror (errno));
+		rspamd_re_cache_compile_err (EV_A_ w, err, cbdata);
+		return;
+	}
 
-		while (g_hash_table_iter_next (&cit, &k, &v)) {
-			re = v;
+	g_hash_table_iter_init (&cit, re_class->re);
+	n = g_hash_table_size (re_class->re);
+	hs_flags = g_malloc0 (sizeof (*hs_flags) * n);
+	hs_ids = g_malloc (sizeof (*hs_ids) * n);
+	hs_pats = g_malloc (sizeof (*hs_pats) * n);
+	hs_exts = g_malloc0 (sizeof (*hs_exts) * n);
+	i = 0;
 
-			pcre_flags = rspamd_regexp_get_pcre_flags (re);
-			re_flags = rspamd_regexp_get_flags (re);
+	while (g_hash_table_iter_next (&cit, &k, &v)) {
+		re = v;
 
-			if (re_flags & RSPAMD_REGEXP_FLAG_PCRE_ONLY) {
-				/* Do not try to compile bad regexp */
-				msg_info_re_cache (
-						"do not try compile %s to hyperscan as it is PCRE only",
-						rspamd_regexp_get_pattern (re));
-				continue;
-			}
+		pcre_flags = rspamd_regexp_get_pcre_flags (re);
+		re_flags = rspamd_regexp_get_flags (re);
+
+		if (re_flags & RSPAMD_REGEXP_FLAG_PCRE_ONLY) {
+			/* Do not try to compile bad regexp */
+			msg_info_re_cache (
+					"do not try compile %s to hyperscan as it is PCRE only",
+					rspamd_regexp_get_pattern (re));
+			continue;
+		}
 
-			hs_flags[i] = 0;
-			hs_exts[i] = NULL;
+		hs_flags[i] = 0;
+		hs_exts[i] = NULL;
 #ifndef WITH_PCRE2
-			if (pcre_flags & PCRE_FLAG(UTF8)) {
-				hs_flags[i] |= HS_FLAG_UTF8;
-			}
+		if (pcre_flags & PCRE_FLAG(UTF8)) {
+			hs_flags[i] |= HS_FLAG_UTF8;
+		}
 #else
-			if (pcre_flags & PCRE_FLAG(UTF)) {
+		if (pcre_flags & PCRE_FLAG(UTF)) {
 				hs_flags[i] |= HS_FLAG_UTF8;
 			}
 #endif
-			if (pcre_flags & PCRE_FLAG(CASELESS)) {
-				hs_flags[i] |= HS_FLAG_CASELESS;
-			}
-			if (pcre_flags & PCRE_FLAG(MULTILINE)) {
-				hs_flags[i] |= HS_FLAG_MULTILINE;
-			}
-			if (pcre_flags & PCRE_FLAG(DOTALL)) {
-				hs_flags[i] |= HS_FLAG_DOTALL;
-			}
-			if (rspamd_regexp_get_maxhits (re) == 1) {
-				hs_flags[i] |= HS_FLAG_SINGLEMATCH;
-			}
+		if (pcre_flags & PCRE_FLAG(CASELESS)) {
+			hs_flags[i] |= HS_FLAG_CASELESS;
+		}
+		if (pcre_flags & PCRE_FLAG(MULTILINE)) {
+			hs_flags[i] |= HS_FLAG_MULTILINE;
+		}
+		if (pcre_flags & PCRE_FLAG(DOTALL)) {
+			hs_flags[i] |= HS_FLAG_DOTALL;
+		}
+		if (rspamd_regexp_get_maxhits (re) == 1) {
+			hs_flags[i] |= HS_FLAG_SINGLEMATCH;
+		}
 
-			gchar *pat = rspamd_re_cache_hs_pattern_from_pcre (re);
+		gchar *pat = rspamd_re_cache_hs_pattern_from_pcre (re);
 
-			if (hs_compile (pat,
-					hs_flags[i],
-					cache->vectorized_hyperscan ? HS_MODE_VECTORED : HS_MODE_BLOCK,
-					&cache->plt,
-					&test_db,
-					&hs_errors) != HS_SUCCESS) {
-				msg_info_re_cache ("cannot compile %s to hyperscan, try prefilter match",
-						pat);
-				hs_free_compile_error (hs_errors);
+		if (hs_compile (pat,
+				hs_flags[i],
+				cache->vectorized_hyperscan ? HS_MODE_VECTORED : HS_MODE_BLOCK,
+				&cache->plt,
+				&test_db,
+				&hs_errors) != HS_SUCCESS) {
+			msg_info_re_cache ("cannot compile %s to hyperscan, try prefilter match",
+					pat);
+			hs_free_compile_error (hs_errors);
 
-				/* The approximation operation might take a significant
-				 * amount of time, so we need to check if it's finite
-				 */
-				if (rspamd_re_cache_is_finite (cache, re, hs_flags[i], max_time)) {
-					hs_flags[i] |= HS_FLAG_PREFILTER;
-					hs_ids[i] = rspamd_regexp_get_cache_id (re);
-					hs_pats[i] = pat;
-					i++;
-				}
-				else {
-					g_free (pat); /* Avoid leak */
-				}
-			}
-			else {
+			/* The approximation operation might take a significant
+			 * amount of time, so we need to check if it's finite
+			 */
+			if (rspamd_re_cache_is_finite (cache, re, hs_flags[i], cbdata->max_time)) {
+				hs_flags[i] |= HS_FLAG_PREFILTER;
 				hs_ids[i] = rspamd_regexp_get_cache_id (re);
 				hs_pats[i] = pat;
-				i ++;
-				hs_free_database (test_db);
+				i++;
+			}
+			else {
+				g_free (pat); /* Avoid leak */
 			}
 		}
-		/* Adjust real re number */
-		n = i;
-
-		if (n > 0) {
-			/* Create the hs tree */
-			if (hs_compile_ext_multi ((const char **)hs_pats,
-					hs_flags,
-					hs_ids,
-					hs_exts,
-					n,
-					cache->vectorized_hyperscan ? HS_MODE_VECTORED : HS_MODE_BLOCK,
-					&cache->plt,
-					&test_db,
-					&hs_errors) != HS_SUCCESS) {
-
-				g_set_error (err, rspamd_re_cache_quark (), EINVAL,
-						"cannot create tree of regexp when processing '%s': %s",
-						hs_pats[hs_errors->expression], hs_errors->message);
-				g_free (hs_flags);
-				g_free (hs_ids);
-
-				for (guint j = 0; j < i; j ++) {
-					g_free (hs_pats[j]);
-				}
+		else {
+			hs_ids[i] = rspamd_regexp_get_cache_id (re);
+			hs_pats[i] = pat;
+			i ++;
+			hs_free_database (test_db);
+		}
+	}
+	/* Adjust real re number */
+	n = i;
+
+	if (n > 0) {
+		/* Create the hs tree */
+		if (hs_compile_ext_multi ((const char **)hs_pats,
+				hs_flags,
+				hs_ids,
+				hs_exts,
+				n,
+				cache->vectorized_hyperscan ? HS_MODE_VECTORED : HS_MODE_BLOCK,
+				&cache->plt,
+				&test_db,
+				&hs_errors) != HS_SUCCESS) {
 
-				g_free (hs_pats);
-				g_free (hs_exts);
-				close (fd);
-				unlink (path);
-				hs_free_compile_error (hs_errors);
 
-				return -1;
-			}
+			g_free (hs_flags);
+			g_free (hs_ids);
 
 			for (guint j = 0; j < i; j ++) {
 				g_free (hs_pats[j]);
 			}
+
 			g_free (hs_pats);
 			g_free (hs_exts);
+			close (fd);
+			unlink (path);
+			hs_free_compile_error (hs_errors);
 
-			if (hs_serialize_database (test_db, &hs_serialized,
-					&serialized_len) != HS_SUCCESS) {
-				g_set_error (err,
-						rspamd_re_cache_quark (),
-						errno,
-						"cannot serialize tree of regexp for %s",
-						re_class->hash);
+			err = g_error_new (rspamd_re_cache_quark (), EINVAL,
+					"cannot create tree of regexp when processing '%s': %s",
+					hs_pats[hs_errors->expression], hs_errors->message);
+			rspamd_re_cache_compile_err (EV_A_ w, err, cbdata);
 
-				close (fd);
-				unlink (path);
-				g_free (hs_ids);
-				g_free (hs_flags);
-				hs_free_database (test_db);
+			return;
+		}
 
-				return -1;
-			}
+		for (guint j = 0; j < i; j ++) {
+			g_free (hs_pats[j]);
+		}
 
-			hs_free_database (test_db);
+		g_free (hs_pats);
+		g_free (hs_exts);
 
-			/*
-			 * Magic - 8 bytes
-			 * Platform - sizeof (platform)
-			 * n - number of regexps
-			 * n * <regexp ids>
-			 * n * <regexp flags>
-			 * crc - 8 bytes checksum
-			 * <hyperscan blob>
-			 */
-			rspamd_cryptobox_fast_hash_init (&crc_st, 0xdeadbabe);
-			/* IDs -> Flags -> Hs blob */
-			rspamd_cryptobox_fast_hash_update (&crc_st,
-					hs_ids, sizeof (*hs_ids) * n);
-			rspamd_cryptobox_fast_hash_update (&crc_st,
-					hs_flags, sizeof (*hs_flags) * n);
-			rspamd_cryptobox_fast_hash_update (&crc_st,
-					hs_serialized, serialized_len);
-			crc = rspamd_cryptobox_fast_hash_final (&crc_st);
+		if (hs_serialize_database (test_db, &hs_serialized,
+				&serialized_len) != HS_SUCCESS) {
+			err = g_error_new (rspamd_re_cache_quark (),
+					errno,
+					"cannot serialize tree of regexp for %s",
+					re_class->hash);
 
-			if (cache->vectorized_hyperscan) {
-				iov[0].iov_base = (void *) rspamd_hs_magic_vector;
-			}
-			else {
-				iov[0].iov_base = (void *) rspamd_hs_magic;
-			}
+			close (fd);
+			unlink (path);
+			g_free (hs_ids);
+			g_free (hs_flags);
+			hs_free_database (test_db);
 
-			iov[0].iov_len = RSPAMD_HS_MAGIC_LEN;
-			iov[1].iov_base = &cache->plt;
-			iov[1].iov_len = sizeof (cache->plt);
-			iov[2].iov_base = &n;
-			iov[2].iov_len = sizeof (n);
-			iov[3].iov_base = hs_ids;
-			iov[3].iov_len = sizeof (*hs_ids) * n;
-			iov[4].iov_base = hs_flags;
-			iov[4].iov_len = sizeof (*hs_flags) * n;
-			iov[5].iov_base = &crc;
-			iov[5].iov_len = sizeof (crc);
-			iov[6].iov_base = hs_serialized;
-			iov[6].iov_len = serialized_len;
-
-			if (writev (fd, iov, G_N_ELEMENTS (iov)) == -1) {
-				g_set_error (err,
-						rspamd_re_cache_quark (),
-						errno,
-						"cannot serialize tree of regexp to %s: %s",
-						path, strerror (errno));
-				close (fd);
-				unlink (path);
-				g_free (hs_ids);
-				g_free (hs_flags);
-				g_free (hs_serialized);
+			rspamd_re_cache_compile_err (EV_A_ w, err, cbdata);
+			return;
+		}
 
-				return -1;
-			}
+		hs_free_database (test_db);
 
-			if (re_class->type_len > 0) {
-				msg_info_re_cache (
-						"compiled class %s(%*s) to cache %6s, %d regexps",
-						rspamd_re_cache_type_to_string (re_class->type),
-						(gint) re_class->type_len - 1,
-						re_class->type_data,
-						re_class->hash,
-						n);
-			}
-			else {
-				msg_info_re_cache (
-						"compiled class %s to cache %6s, %d regexps",
-						rspamd_re_cache_type_to_string (re_class->type),
-						re_class->hash,
-						n);
-			}
-
-			total += n;
+		/*
+		 * Magic - 8 bytes
+		 * Platform - sizeof (platform)
+		 * n - number of regexps
+		 * n * <regexp ids>
+		 * n * <regexp flags>
+		 * crc - 8 bytes checksum
+		 * <hyperscan blob>
+		 */
+		rspamd_cryptobox_fast_hash_init (&crc_st, 0xdeadbabe);
+		/* IDs -> Flags -> Hs blob */
+		rspamd_cryptobox_fast_hash_update (&crc_st,
+				hs_ids, sizeof (*hs_ids) * n);
+		rspamd_cryptobox_fast_hash_update (&crc_st,
+				hs_flags, sizeof (*hs_flags) * n);
+		rspamd_cryptobox_fast_hash_update (&crc_st,
+				hs_serialized, serialized_len);
+		crc = rspamd_cryptobox_fast_hash_final (&crc_st);
+
+		if (cache->vectorized_hyperscan) {
+			iov[0].iov_base = (void *) rspamd_hs_magic_vector;
+		}
+		else {
+			iov[0].iov_base = (void *) rspamd_hs_magic;
+		}
 
-			g_free (hs_serialized);
+		iov[0].iov_len = RSPAMD_HS_MAGIC_LEN;
+		iov[1].iov_base = &cache->plt;
+		iov[1].iov_len = sizeof (cache->plt);
+		iov[2].iov_base = &n;
+		iov[2].iov_len = sizeof (n);
+		iov[3].iov_base = hs_ids;
+		iov[3].iov_len = sizeof (*hs_ids) * n;
+		iov[4].iov_base = hs_flags;
+		iov[4].iov_len = sizeof (*hs_flags) * n;
+		iov[5].iov_base = &crc;
+		iov[5].iov_len = sizeof (crc);
+		iov[6].iov_base = hs_serialized;
+		iov[6].iov_len = serialized_len;
+
+		if (writev (fd, iov, G_N_ELEMENTS (iov)) == -1) {
+			err = g_error_new (rspamd_re_cache_quark (),
+					errno,
+					"cannot serialize tree of regexp to %s: %s",
+					path, strerror (errno));
+			close (fd);
+			unlink (path);
 			g_free (hs_ids);
 			g_free (hs_flags);
+			g_free (hs_serialized);
+
+			rspamd_re_cache_compile_err (EV_A_ w, err, cbdata);
+			return;
 		}
 
-		fsync (fd);
+		if (re_class->type_len > 0) {
+			msg_info_re_cache (
+					"compiled class %s(%*s) to cache %6s, %d regexps",
+					rspamd_re_cache_type_to_string (re_class->type),
+					(gint) re_class->type_len - 1,
+					re_class->type_data,
+					re_class->hash,
+					n);
+		}
+		else {
+			msg_info_re_cache (
+					"compiled class %s to cache %6s, %d regexps",
+					rspamd_re_cache_type_to_string (re_class->type),
+					re_class->hash,
+					n);
+		}
 
-		/* Now rename temporary file to the new .hs file */
-		rspamd_snprintf (npath, sizeof (path), "%s%c%s.hs", cache_dir,
-				G_DIR_SEPARATOR, re_class->hash);
+		cbdata->total += n;
 
-		if (rename (path, npath) == -1) {
-			g_set_error (err,
-					rspamd_re_cache_quark (),
-					errno,
-					"cannot rename %s to %s: %s",
-					path, npath, strerror (errno));
*** OUTPUT TRUNCATED, 98 LINES SKIPPED ***


More information about the Commits mailing list