commit 79044bf: [Fix] Further checks for the hs_scratch_alloc

Vsevolod Stakhov vsevolod at rspamd.com
Sat Feb 25 15:00:05 UTC 2023


Author: Vsevolod Stakhov
Date: 2023-02-25 14:53:31 +0000
URL: https://github.com/rspamd/rspamd/commit/79044bfc0263cdb73dddbddb5d8cccdf138af4f9

[Fix] Further checks for the hs_scratch_alloc
Issue: #4409

---
 src/libserver/hyperscan_tools.cxx | 47 ++++++++++++++++++++++++++++++++++-----
 src/libserver/hyperscan_tools.h   |  4 ++--
 src/libserver/maps/map_helpers.c  | 15 ++++++++++---
 src/libserver/re_cache.c          |  6 ++---
 src/libutil/multipattern.c        | 36 ++++++++++++++++++++++++++----
 5 files changed, 90 insertions(+), 18 deletions(-)

diff --git a/src/libserver/hyperscan_tools.cxx b/src/libserver/hyperscan_tools.cxx
index cb2e15c9a..615aa57aa 100644
--- a/src/libserver/hyperscan_tools.cxx
+++ b/src/libserver/hyperscan_tools.cxx
@@ -183,6 +183,35 @@ public:
 			mut_fname.c_str());
 	}
 
+	void delete_cached_file(const char *fname) {
+		auto mut_fname = std::string{fname};
+		std::size_t sz;
+
+		rspamd_normalize_path_inplace(mut_fname.data(), mut_fname.size(), &sz);
+		mut_fname.resize(sz);
+
+		if (mut_fname.empty()) {
+			msg_err_hyperscan("attempt to remove an empty hyperscan file!");
+			return;
+		}
+
+		if (access(mut_fname.c_str(), R_OK) != -1) {
+			if (unlink(mut_fname.c_str()) == -1) {
+				msg_err_hyperscan("cannot remove hyperscan file %s: %s",
+					mut_fname.c_str(), strerror(errno));
+			}
+			else {
+				msg_debug_hyperscan("removed hyperscan file %s", mut_fname.c_str());
+			}
+		}
+		else {
+			msg_err_hyperscan("attempt to remove non-existent hyperscan file %s: %s",
+				mut_fname.c_str(), strerror(errno));
+		}
+
+		known_cached_files.erase(mut_fname);
+	}
+
 	auto cleanup_maybe() -> void {
 		auto env_cleanup_disable = std::getenv("RSPAMD_NO_CLEANUP");
 		/* We clean dir merely if we are running from the main process */
@@ -248,6 +277,7 @@ public:
 struct hs_shared_database {
 	hs_database_t *db = nullptr; /**< internal database (might be in a shared memory) */
 	std::optional<raii_mmaped_file> maybe_map;
+	std::string cached_path;
 
 	~hs_shared_database() {
 		if (!maybe_map) {
@@ -256,8 +286,10 @@ struct hs_shared_database {
 		// Otherwise, handled by maybe_map dtor
 	}
 
-	explicit hs_shared_database(raii_mmaped_file &&map, hs_database_t *db) : db(db), maybe_map(std::move(map)) {}
-	explicit hs_shared_database(hs_database_t *db) : db(db), maybe_map(std::nullopt) {}
+	explicit hs_shared_database(raii_mmaped_file &&map, hs_database_t *db) : db(db), maybe_map(std::move(map)) {
+		cached_path = maybe_map.value().get_file().get_name();
+	}
+	explicit hs_shared_database(hs_database_t *db, const char *fname) : db(db), maybe_map(std::nullopt), cached_path{fname} {}
 	hs_shared_database(const hs_shared_database &other) = delete;
 	hs_shared_database() = default;
 	hs_shared_database(hs_shared_database &&other) noexcept {
@@ -287,7 +319,7 @@ hs_shared_from_serialized(raii_mmaped_file &&map, std::int64_t offset) -> tl::ex
 		return tl::make_unexpected(error {"cannot deserialize database", ret});
 	}
 
-	return tl::expected<hs_shared_database, error>{tl::in_place, target};
+	return tl::expected<hs_shared_database, error>{tl::in_place, target, map.get_file().get_name().data()};
 }
 
 auto load_cached_hs_file(const char *fname, std::int64_t offset = 0) -> tl::expected<hs_shared_database, error>
@@ -464,18 +496,21 @@ rspamd_hyperscan_get_database(rspamd_hyperscan_t *db)
 }
 
 rspamd_hyperscan_t *
-rspamd_hyperscan_from_raw_db(hs_database_t *db)
+rspamd_hyperscan_from_raw_db(hs_database_t *db, const char *fname)
 {
-	auto *ndb = new rspamd::util::hs_shared_database{db};
+	auto *ndb = new rspamd::util::hs_shared_database{db, fname};
 
 	return C_DB_FROM_CXX(ndb);
 }
 
 void
-rspamd_hyperscan_free(rspamd_hyperscan_t *db)
+rspamd_hyperscan_free(rspamd_hyperscan_t *db, bool invalid)
 {
 	auto *real_db = CXX_DB_FROM_C(db);
 
+	if (invalid) {
+		rspamd::util::hs_known_files_cache::get().delete_cached_file(real_db->cached_path.c_str());
+	}
 	delete real_db;
 }
 
diff --git a/src/libserver/hyperscan_tools.h b/src/libserver/hyperscan_tools.h
index e66e2ec91..d1707f490 100644
--- a/src/libserver/hyperscan_tools.h
+++ b/src/libserver/hyperscan_tools.h
@@ -41,7 +41,7 @@ rspamd_hyperscan_t *rspamd_hyperscan_maybe_load(const char *filename, goffset of
  * @param filename
  * @return
  */
-rspamd_hyperscan_t *rspamd_hyperscan_from_raw_db(hs_database_t *db);
+rspamd_hyperscan_t *rspamd_hyperscan_from_raw_db(hs_database_t *db, const char *fname);
 /**
  * Get the internal database
  * @param db
@@ -52,7 +52,7 @@ hs_database_t* rspamd_hyperscan_get_database(rspamd_hyperscan_t *db);
  * Free the database
  * @param db
  */
-void rspamd_hyperscan_free(rspamd_hyperscan_t *db);
+void rspamd_hyperscan_free(rspamd_hyperscan_t *db, bool invalid);
 
 /**
  * Notice a known hyperscan file (e.g. externally serialized)
diff --git a/src/libserver/maps/map_helpers.c b/src/libserver/maps/map_helpers.c
index 9e649414a..2fa7743b4 100644
--- a/src/libserver/maps/map_helpers.c
+++ b/src/libserver/maps/map_helpers.c
@@ -884,7 +884,7 @@ rspamd_map_helper_destroy_regexp (struct rspamd_regexp_map_helper *re_map)
 		hs_free_scratch (re_map->hs_scratch);
 	}
 	if (re_map->hs_db) {
-		rspamd_hyperscan_free(re_map->hs_db);
+		rspamd_hyperscan_free(re_map->hs_db, false);
 	}
 	if (re_map->patterns) {
 		for (i = 0; i < re_map->regexps->len; i ++) {
@@ -1243,7 +1243,16 @@ rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map)
 				return;
 			}
 
-			re_map->hs_db = rspamd_hyperscan_from_raw_db(hs_db);
+			if (re_map->map->cfg->hs_cache_dir) {
+				char fpath[PATH_MAX];
+				rspamd_snprintf(fpath, sizeof(fpath), "%s/%*xs.hsmc",
+					re_map->map->cfg->hs_cache_dir,
+					(gint) rspamd_cryptobox_HASHBYTES / 2, re_map->re_digest);
+				re_map->hs_db = rspamd_hyperscan_from_raw_db(hs_db, fpath);
+			}
+			else {
+				re_map->hs_db = rspamd_hyperscan_from_raw_db(hs_db, NULL);
+			}
 
 			ts1 = (rspamd_get_ticks (FALSE) - ts1) * 1000.0;
 			msg_info_map ("hyperscan compiled %d regular expressions from %s in %.1f ms",
@@ -1257,7 +1266,7 @@ rspamd_re_map_finalize (struct rspamd_regexp_map_helper *re_map)
 
 		if (hs_alloc_scratch (rspamd_hyperscan_get_database(re_map->hs_db), &re_map->hs_scratch) != HS_SUCCESS) {
 			msg_err_map ("cannot allocate scratch space for hyperscan");
-			rspamd_hyperscan_free(re_map->hs_db);
+			rspamd_hyperscan_free(re_map->hs_db, true);
 			re_map->hs_db = NULL;
 		}
 	}
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c
index 3f108fda5..f4d2496b1 100644
--- a/src/libserver/re_cache.c
+++ b/src/libserver/re_cache.c
@@ -196,7 +196,7 @@ rspamd_re_cache_destroy (struct rspamd_re_cache *cache)
 
 #ifdef WITH_HYPERSCAN
 		if (re_class->hs_db) {
-			rspamd_hyperscan_free(re_class->hs_db);
+			rspamd_hyperscan_free(re_class->hs_db, false);
 		}
 		if (re_class->hs_scratch) {
 			hs_free_scratch (re_class->hs_scratch);
@@ -2561,7 +2561,7 @@ rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache,
 			}
 
 			if (re_class->hs_db != NULL) {
-				rspamd_hyperscan_free (re_class->hs_db);
+				rspamd_hyperscan_free (re_class->hs_db, false);
 			}
 
 			if (re_class->hs_ids) {
@@ -2594,7 +2594,6 @@ rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache,
 
 			if ((ret = hs_alloc_scratch (rspamd_hyperscan_get_database(re_class->hs_db),
 					&re_class->hs_scratch)) != HS_SUCCESS) {
-				rspamd_hyperscan_free (re_class->hs_db);
 				if (!try_load) {
 					msg_err_re_cache ("bad hs database in %s; error code: %d", path, ret);
 				}
@@ -2604,6 +2603,7 @@ rspamd_re_cache_load_hyperscan (struct rspamd_re_cache *cache,
 				g_free (hs_ids);
 				g_free (hs_flags);
 
+				rspamd_hyperscan_free (re_class->hs_db, true);
 				re_class->hs_ids = NULL;
 				re_class->hs_scratch = NULL;
 				re_class->hs_db = NULL;
diff --git a/src/libutil/multipattern.c b/src/libutil/multipattern.c
index 489ac1beb..0d6e8b4f4 100644
--- a/src/libutil/multipattern.c
+++ b/src/libutil/multipattern.c
@@ -504,16 +504,44 @@ rspamd_multipattern_compile (struct rspamd_multipattern *mp, GError **err)
 
 					return FALSE;
 				}
-				mp->hs_db = rspamd_hyperscan_from_raw_db(db);
+
+				if (hs_cache_dir != NULL) {
+					char fpath[PATH_MAX];
+					rspamd_snprintf (fpath, sizeof (fpath), "%s/%*xs.hsmp", hs_cache_dir,
+						(gint)rspamd_cryptobox_HASHBYTES / 2, hash);
+					mp->hs_db = rspamd_hyperscan_from_raw_db(db, fpath);
+				}
+				else {
+					/* Should not happen in the real life */
+					mp->hs_db = rspamd_hyperscan_from_raw_db(db, NULL);
+				}
 			}
 
 			rspamd_multipattern_try_save_hs (mp, hash);
 
+			for (i = 0; i < MAX_SCRATCH; i ++) {
+				mp->scratch[i] = NULL;
+			}
+
 			for (i = 0; i < MAX_SCRATCH; i ++) {
 				int ret;
+
 				if ((ret = hs_alloc_scratch (rspamd_hyperscan_get_database(mp->hs_db), &mp->scratch[i])) != HS_SUCCESS) {
-					msg_err("fatal error: cannot allocate scratch space for hyperscan: %d", ret);
-					g_abort();
+					msg_err("cannot allocate scratch space for hyperscan: error code %d", ret);
+
+					/* Clean all scratches that are non-NULL */
+					for (int ii = 0; ii < MAX_SCRATCH; ii ++) {
+						if (mp->scratch[ii] != NULL) {
+							hs_free_scratch(mp->scratch[ii]);
+						}
+					}
+					g_set_error (err, rspamd_multipattern_quark (), EINVAL,
+						"cannot allocate scratch space for hyperscan: error code %d", ret);
+
+					rspamd_hyperscan_free(mp->hs_db, true);
+					mp->hs_db = NULL;
+
+					return FALSE;
 				}
 			}
 		}
@@ -728,7 +756,7 @@ rspamd_multipattern_destroy (struct rspamd_multipattern *mp)
 				}
 
 				if (mp->hs_db) {
-					rspamd_hyperscan_free(mp->hs_db);
+					rspamd_hyperscan_free(mp->hs_db, false);
 				}
 			}
 


More information about the Commits mailing list