commit be7504e: [Fix] Do not cleanup hyperscan files unless new ones are loaded

Vsevolod Stakhov vsevolod at rspamd.com
Mon Oct 30 17:14:03 UTC 2023


Author: Vsevolod Stakhov
Date: 2023-10-30 17:11:13 +0000
URL: https://github.com/rspamd/rspamd/commit/be7504e39329d94f1fae1fbeca36906cfcd12a7c (HEAD -> master)

[Fix] Do not cleanup hyperscan files unless new ones are loaded

---
 src/libserver/hyperscan_tools.cxx | 16 +++++++++-
 src/libserver/hyperscan_tools.h   | 11 +++++--
 src/libserver/rspamd_control.c    | 65 +++++++++++++++++++++------------------
 3 files changed, 58 insertions(+), 34 deletions(-)

diff --git a/src/libserver/hyperscan_tools.cxx b/src/libserver/hyperscan_tools.cxx
index 4bcc68e4a..3fce3ce26 100644
--- a/src/libserver/hyperscan_tools.cxx
+++ b/src/libserver/hyperscan_tools.cxx
@@ -94,6 +94,7 @@ private:
 	ankerl::svector<std::string, 4> cache_dirs;
 	ankerl::svector<std::string, 8> cache_extensions;
 	ankerl::unordered_dense::set<std::string> known_cached_files;
+	bool loaded = false;
 
 private:
 	hs_known_files_cache() = default;
@@ -203,7 +204,7 @@ public:
 	{
 		auto env_cleanup_disable = std::getenv("RSPAMD_NO_CLEANUP");
 		/* We clean dir merely if we are running from the main process */
-		if (rspamd_current_worker == nullptr && env_cleanup_disable == nullptr) {
+		if (rspamd_current_worker == nullptr && env_cleanup_disable == nullptr && loaded) {
 			const auto *log_func = RSPAMD_LOG_FUNC;
 			auto cleanup_dir = [&](std::string_view dir) -> void {
 				for (const auto &ext: cache_extensions) {
@@ -259,6 +260,14 @@ public:
 		else if (rspamd_current_worker == nullptr && env_cleanup_disable != nullptr) {
 			msg_debug_hyperscan("disable hyperscan cleanup: env variable RSPAMD_NO_CLEANUP is set");
 		}
+		else if (!loaded) {
+			msg_debug_hyperscan("disable hyperscan cleanup: not loaded");
+		}
+	}
+
+	auto notice_loaded() -> void
+	{
+		loaded = true;
 	}
 };
 
@@ -601,4 +610,9 @@ void rspamd_hyperscan_cleanup_maybe(void)
 	rspamd::util::hs_known_files_cache::get().cleanup_maybe();
 }
 
+void rspamd_hyperscan_notice_loaded(void)
+{
+	rspamd::util::hs_known_files_cache::get().notice_loaded();
+}
+
 #endif// WITH_HYPERSCAN
\ No newline at end of file
diff --git a/src/libserver/hyperscan_tools.h b/src/libserver/hyperscan_tools.h
index c8dd490ab..624b7b069 100644
--- a/src/libserver/hyperscan_tools.h
+++ b/src/libserver/hyperscan_tools.h
@@ -1,11 +1,11 @@
-/*-
- * Copyright 2022 Vsevolod Stakhov
+/*
+ * Copyright 2023 Vsevolod Stakhov
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *   http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -60,6 +60,11 @@ void rspamd_hyperscan_free(rspamd_hyperscan_t *db, bool invalid);
  */
 void rspamd_hyperscan_notice_known(const char *fname);
 
+/**
+ * Notice that hyperscan files are all loaded (e.g. in the main process), so we can cleanup old files on termination
+ */
+void rspamd_hyperscan_notice_loaded(void);
+
 /**
  * Cleans up old files. This method should be called on config free (in the main process)
  */
diff --git a/src/libserver/rspamd_control.c b/src/libserver/rspamd_control.c
index 9ed78a316..986a4a2ea 100644
--- a/src/libserver/rspamd_control.c
+++ b/src/libserver/rspamd_control.c
@@ -1,11 +1,11 @@
-/*-
- * Copyright 2016 Vsevolod Stakhov
+/*
+ * Copyright 2023 Vsevolod Stakhov
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *   http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -838,7 +838,7 @@ rspamd_srv_handler(EV_P_ ev_io *w, int revents)
 {
 	struct rspamd_worker *worker;
 	static struct rspamd_srv_command cmd;
-	struct rspamd_main *srv;
+	struct rspamd_main *rspamd_main;
 	struct rspamd_srv_reply_data *rdata;
 	struct msghdr msg;
 	struct cmsghdr *cmsg;
@@ -851,7 +851,7 @@ rspamd_srv_handler(EV_P_ ev_io *w, int revents)
 
 	if (revents == EV_READ) {
 		worker = (struct rspamd_worker *) w->data;
-		srv = worker->srv;
+		rspamd_main = worker->srv;
 		iov.iov_base = &cmd;
 		iov.iov_len = sizeof(cmd);
 		memset(&msg, 0, sizeof(msg));
@@ -864,8 +864,8 @@ rspamd_srv_handler(EV_P_ ev_io *w, int revents)
 
 		if (r == -1) {
 			if (errno != EAGAIN) {
-				msg_err("cannot read from worker's srv pipe: %s",
-						strerror(errno));
+				msg_err_main("cannot read from worker's srv pipe: %s",
+							 strerror(errno));
 			}
 			else {
 				return;
@@ -876,18 +876,18 @@ rspamd_srv_handler(EV_P_ ev_io *w, int revents)
 			 * Usually this means that a worker is dead, so do not try to read
 			 * anything
 			 */
-			msg_err("cannot read from worker's srv pipe connection closed; command = %s",
-					rspamd_srv_command_to_string(cmd.type));
+			msg_err_main("cannot read from worker's srv pipe connection closed; command = %s",
+						 rspamd_srv_command_to_string(cmd.type));
 			ev_io_stop(EV_A_ w);
 		}
 		else if (r != sizeof(cmd)) {
-			msg_err("cannot read from worker's srv pipe incomplete command: %d != %d; command = %s",
-					(gint) r, (gint) sizeof(cmd), rspamd_srv_command_to_string(cmd.type));
+			msg_err_main("cannot read from worker's srv pipe incomplete command: %d != %d; command = %s",
+						 (gint) r, (gint) sizeof(cmd), rspamd_srv_command_to_string(cmd.type));
 		}
 		else {
 			rdata = g_malloc0(sizeof(*rdata));
 			rdata->worker = worker;
-			rdata->srv = srv;
+			rdata->srv = rspamd_main;
 			rdata->rep.id = cmd.id;
 			rdata->rep.type = cmd.type;
 			rdata->fd = -1;
@@ -899,19 +899,19 @@ rspamd_srv_handler(EV_P_ ev_io *w, int revents)
 
 			switch (cmd.type) {
 			case RSPAMD_SRV_SOCKETPAIR:
-				spair = g_hash_table_lookup(srv->spairs, cmd.cmd.spair.pair_id);
+				spair = g_hash_table_lookup(rspamd_main->spairs, cmd.cmd.spair.pair_id);
 				if (spair == NULL) {
 					spair = g_malloc(sizeof(gint) * 2);
 
 					if (rspamd_socketpair(spair, cmd.cmd.spair.af) == -1) {
 						rdata->rep.reply.spair.code = errno;
-						msg_err("cannot create socket pair: %s", strerror(errno));
+						msg_err_main("cannot create socket pair: %s", strerror(errno));
 					}
 					else {
 						nid = g_malloc(sizeof(cmd.cmd.spair.pair_id));
 						memcpy(nid, cmd.cmd.spair.pair_id,
 							   sizeof(cmd.cmd.spair.pair_id));
-						g_hash_table_insert(srv->spairs, nid, spair);
+						g_hash_table_insert(rspamd_main->spairs, nid, spair);
 						rdata->rep.reply.spair.code = 0;
 						rdata->fd = cmd.cmd.spair.pair_num ? spair[1] : spair[0];
 					}
@@ -923,14 +923,19 @@ rspamd_srv_handler(EV_P_ ev_io *w, int revents)
 				break;
 			case RSPAMD_SRV_HYPERSCAN_LOADED:
 				/* Load RE cache to provide it for new forks */
-				if (rspamd_re_cache_is_hs_loaded(srv->cfg->re_cache) != RSPAMD_HYPERSCAN_LOADED_FULL ||
+				if (rspamd_re_cache_is_hs_loaded(rspamd_main->cfg->re_cache) != RSPAMD_HYPERSCAN_LOADED_FULL ||
 					cmd.cmd.hs_loaded.forced) {
 					rspamd_re_cache_load_hyperscan(
-						srv->cfg->re_cache,
+						rspamd_main->cfg->re_cache,
 						cmd.cmd.hs_loaded.cache_dir,
 						false);
 				}
 
+				/* After getting this notice, we can clean up old hyperscan files */
+				rspamd_hyperscan_notice_loaded();
+				msg_info_main("received hyperscan cache loaded from %s",
+							  cmd.cmd.hs_loaded.cache_dir);
+
 				/* Broadcast command to all workers */
 				memset(&wcmd, 0, sizeof(wcmd));
 				wcmd.type = RSPAMD_CONTROL_HYPERSCAN_LOADED;
@@ -938,7 +943,7 @@ rspamd_srv_handler(EV_P_ ev_io *w, int revents)
 							   cmd.cmd.hs_loaded.cache_dir,
 							   sizeof(wcmd.cmd.hs_loaded.cache_dir));
 				wcmd.cmd.hs_loaded.forced = cmd.cmd.hs_loaded.forced;
-				rspamd_control_broadcast_cmd(srv, &wcmd, rfd,
+				rspamd_control_broadcast_cmd(rspamd_main, &wcmd, rfd,
 											 rspamd_control_ignore_io_handler, NULL, worker->pid);
 				break;
 			case RSPAMD_SRV_MONITORED_CHANGE:
@@ -950,26 +955,26 @@ rspamd_srv_handler(EV_P_ ev_io *w, int revents)
 							   sizeof(wcmd.cmd.monitored_change.tag));
 				wcmd.cmd.monitored_change.alive = cmd.cmd.monitored_change.alive;
 				wcmd.cmd.monitored_change.sender = cmd.cmd.monitored_change.sender;
-				rspamd_control_broadcast_cmd(srv, &wcmd, rfd,
+				rspamd_control_broadcast_cmd(rspamd_main, &wcmd, rfd,
 											 rspamd_control_ignore_io_handler, NULL, 0);
 				break;
 			case RSPAMD_SRV_LOG_PIPE:
 				memset(&wcmd, 0, sizeof(wcmd));
 				wcmd.type = RSPAMD_CONTROL_LOG_PIPE;
 				wcmd.cmd.log_pipe.type = cmd.cmd.log_pipe.type;
-				rspamd_control_broadcast_cmd(srv, &wcmd, rfd,
+				rspamd_control_broadcast_cmd(rspamd_main, &wcmd, rfd,
 											 rspamd_control_log_pipe_io_handler, NULL, 0);
 				break;
 			case RSPAMD_SRV_ON_FORK:
 				rdata->rep.reply.on_fork.status = 0;
-				rspamd_control_handle_on_fork(&cmd, srv);
+				rspamd_control_handle_on_fork(&cmd, rspamd_main);
 				break;
 			case RSPAMD_SRV_HEARTBEAT:
 				worker->hb.last_event = ev_time();
 				rdata->rep.reply.heartbeat.status = 0;
 				break;
 			case RSPAMD_SRV_HEALTH:
-				rspamd_fill_health_reply(srv, &rdata->rep);
+				rspamd_fill_health_reply(rspamd_main, &rdata->rep);
 				break;
 			case RSPAMD_SRV_NOTICE_HYPERSCAN_CACHE:
 #ifdef WITH_HYPERSCAN
@@ -984,11 +989,11 @@ rspamd_srv_handler(EV_P_ ev_io *w, int revents)
 				/* Ensure that memcpy is safe */
 				G_STATIC_ASSERT(sizeof(wcmd.cmd.fuzzy_blocked) == sizeof(cmd.cmd.fuzzy_blocked));
 				memcpy(&wcmd.cmd.fuzzy_blocked, &cmd.cmd.fuzzy_blocked, sizeof(wcmd.cmd.fuzzy_blocked));
-				rspamd_control_broadcast_cmd(srv, &wcmd, rfd,
+				rspamd_control_broadcast_cmd(rspamd_main, &wcmd, rfd,
 											 rspamd_control_ignore_io_handler, NULL, worker->pid);
 				break;
 			default:
-				msg_err("unknown command type: %d", cmd.type);
+				msg_err_main("unknown command type: %d", cmd.type);
 				break;
 			}
 
@@ -1008,7 +1013,7 @@ rspamd_srv_handler(EV_P_ ev_io *w, int revents)
 		rdata = (struct rspamd_srv_reply_data *) w->data;
 		worker = rdata->worker;
 		worker->tmp_data = NULL; /* Avoid race */
-		srv = rdata->srv;
+		rspamd_main = rdata->srv;
 
 		memset(&msg, 0, sizeof(msg));
 
@@ -1032,13 +1037,13 @@ rspamd_srv_handler(EV_P_ ev_io *w, int revents)
 		r = sendmsg(w->fd, &msg, 0);
 
 		if (r == -1) {
-			msg_err("cannot write to worker's srv pipe when writing reply: %s; command = %s",
-					strerror(errno), rspamd_srv_command_to_string(rdata->rep.type));
+			msg_err_main("cannot write to worker's srv pipe when writing reply: %s; command = %s",
+						 strerror(errno), rspamd_srv_command_to_string(rdata->rep.type));
 		}
 		else if (r != sizeof(rdata->rep)) {
-			msg_err("cannot write to worker's srv pipe: %d != %d; command = %s",
-					(int) r, (int) sizeof(rdata->rep),
-					rspamd_srv_command_to_string(rdata->rep.type));
+			msg_err_main("cannot write to worker's srv pipe: %d != %d; command = %s",
+						 (int) r, (int) sizeof(rdata->rep),
+						 rspamd_srv_command_to_string(rdata->rep.type));
 		}
 
 		g_free(rdata);


More information about the Commits mailing list