commit 5604cdc: [Fix] Add a database check function unless we have anything from Hyperscan

Vsevolod Stakhov vsevolod at rspamd.com
Thu Mar 2 11:14:03 UTC 2023


Author: Vsevolod Stakhov
Date: 2023-03-02 11:07:28 +0000
URL: https://github.com/rspamd/rspamd/commit/5604cdc3e0b2f2e94ce99a83655b7939cde01ebe (HEAD -> master)

[Fix] Add a database check function unless we have anything from Hyperscan
Related: https://github.com/intel/hyperscan/issues/389

---
 src/libserver/hyperscan_tools.cxx | 87 +++++++++++++++++++++++++++------------
 1 file changed, 60 insertions(+), 27 deletions(-)

diff --git a/src/libserver/hyperscan_tools.cxx b/src/libserver/hyperscan_tools.cxx
index 1953e9b32..574813841 100644
--- a/src/libserver/hyperscan_tools.cxx
+++ b/src/libserver/hyperscan_tools.cxx
@@ -36,6 +36,22 @@
 
 #define HYPERSCAN_LOG_TAG "hsxxxx"
 
+// Hyperscan does not provide any API to check validity of it's databases
+// However, it is required for us to perform migrations properly without
+// failing at `hs_alloc_scratch` phase or even `hs_scan` which is **way too late**
+// Hence, we have to check hyperscan internal guts to prevent that situation...
+
+#ifndef HS_VERSION_32BIT
+#define HS_VERSION_32BIT ((HS_MAJOR << 24) | (HS_MINOR << 16) | (HS_PATCH << 8) | 0)
+#endif
+#ifndef HS_DB_VERSION
+#define HS_DB_VERSION HS_VERSION_32BIT
+#endif
+
+#ifndef HS_DB_MAGIC
+#define HS_DB_MAGIC   (0xdbdbdbdbU)
+#endif
+
 #define msg_info_hyperscan(...)   rspamd_default_log_function (G_LOG_LEVEL_INFO, \
         "hyperscan", HYPERSCAN_LOG_TAG, \
         RSPAMD_LOG_FUNC, \
@@ -141,8 +157,8 @@ public:
 		}
 
 		auto is_known = known_cached_files.insert(file.get_name());
-		msg_debug_hyperscan("added %s known hyperscan file: %*s",
-			is_known.second ? "new" : "already",
+		msg_debug_hyperscan("added %s hyperscan file: %*s",
+			is_known.second ? "new" : "already known",
 			(int)file.get_name().size(),
 			file.get_name().data());
 	}
@@ -178,8 +194,8 @@ public:
 		}
 
 		auto is_known = known_cached_files.insert(mut_fname);
-		msg_debug_hyperscan("added %s known hyperscan file: %s",
-			is_known.second ? "new" : "already",
+		msg_debug_hyperscan("added %s hyperscan file: %s",
+			is_known.second ? "new" : "already known",
 			mut_fname.c_str());
 	}
 
@@ -314,36 +330,54 @@ struct hs_shared_database {
 	}
 };
 
+struct real_hs_db {
+	std::uint32_t magic;
+	std::uint32_t version;
+	std::uint32_t length;
+	std::uint64_t platform;
+	std::uint32_t crc32;
+};
 static auto
-hs_shared_from_unserialized(raii_mmaped_file &&map) -> tl::expected<hs_shared_database, error>
+hs_is_valid_database(void *raw, std::size_t len, std::string_view fname) -> tl::expected<bool, std::string>
 {
-	auto ptr = map.get_map();
-	auto db = (hs_database_t *)ptr;
+	if (len < sizeof(real_hs_db)) {
+		return tl::make_unexpected(fmt::format("cannot load hyperscan database from {}: too short", fname));
+	}
 
-	char *info = nullptr;
-	// Check HS database sanity (see #4409 for details)
-	auto ret = hs_database_info(db, &info);
+	static real_hs_db test;
 
-	if (ret != HS_SUCCESS) {
-		if (info) {
-			g_free (info);
-		}
-		return tl::make_unexpected(
-			error{fmt::format("cannot use database {}: error code: {}", map.get_file().get_name(), ret),
-										 ret, error_category::IMPORTANT});
+	memcpy(&test, raw, sizeof(test));
+
+	if (test.magic != HS_DB_MAGIC) {
+		return tl::make_unexpected(fmt::format("cannot load hyperscan database from {}: invalid magic: {} ({} expected)",
+			fname, test.magic, HS_DB_MAGIC));
+	}
+
+	if (test.version != HS_DB_VERSION) {
+		return tl::make_unexpected(fmt::format("cannot load hyperscan database from {}: invalid version: {} ({} expected)",
+			fname, test.version, HS_DB_VERSION));
 	}
 
-	msg_debug_hyperscan("database: %s, info: %s", map.get_file().get_name(), info);
+	return true;
+}
+
+static auto
+hs_shared_from_unserialized(hs_known_files_cache &hs_cache, raii_mmaped_file &&map) -> tl::expected<hs_shared_database, error>
+{
+	auto ptr = map.get_map();
+	auto db = (hs_database_t *)ptr;
 
-	if (info) {
-		g_free (info);
+	auto is_valid = hs_is_valid_database(map.get_map(), map.get_size(), map.get_file().get_name());
+	if (!is_valid) {
+		return tl::make_unexpected(error{is_valid.error(), -1, error_category::IMPORTANT});
 	}
 
+	hs_cache.add_cached_file(map.get_file());
 	return tl::expected<hs_shared_database, error>{tl::in_place, std::move(map), db};
 }
 
 static auto
-hs_shared_from_serialized(raii_mmaped_file &&map, std::int64_t offset) -> tl::expected<hs_shared_database, error>
+hs_shared_from_serialized(hs_known_files_cache &hs_cache, raii_mmaped_file &&map, std::int64_t offset) -> tl::expected<hs_shared_database, error>
 {
 	hs_database_t *target = nullptr;
 
@@ -352,6 +386,7 @@ hs_shared_from_serialized(raii_mmaped_file &&map, std::int64_t offset) -> tl::ex
 		return tl::make_unexpected(error {"cannot deserialize database", ret});
 	}
 
+	hs_cache.add_cached_file(map.get_file());
 	return tl::expected<hs_shared_database, error>{tl::in_place, target, map.get_file().get_name().data()};
 }
 
@@ -463,22 +498,20 @@ auto load_cached_hs_file(const char *fname, std::int64_t offset = 0) -> tl::expe
 					 * being created by another process.
 					 * We cannot use it!
 					 */
-					return hs_shared_from_serialized(std::forward<T>(cached_serialized), offset);
+					return hs_shared_from_serialized(hs_cache, std::forward<T>(cached_serialized), offset);
 				}
 				else {
-					hs_cache.add_cached_file(unserialized_checked);
 					return raii_mmaped_file::mmap_shared(std::move(unserialized_checked), PROT_READ)
 						.and_then([&]<class U>(U &&mmapped_unserialized) -> auto {
-							return hs_shared_from_unserialized(std::forward<U>(mmapped_unserialized));
+							return hs_shared_from_unserialized(hs_cache, std::forward<U>(mmapped_unserialized));
 						});
 				}
 			}
 			else {
-				return hs_shared_from_serialized(std::forward<T>(cached_serialized), offset);
+				return hs_shared_from_serialized(hs_cache, std::forward<T>(cached_serialized), offset);
 			}
 #else // defined(HS_MAJOR) && defined(HS_MINOR) && HS_MAJOR >= 5 && HS_MINOR >= 4
-			hs_cache.add_cached_file(cached_serialized.get_file());
-			return hs_shared_from_serialized(std::forward<T>(cached_serialized), offset);
+			return hs_shared_from_serialized(hs_cache, std::forward<T>(cached_serialized), offset);
 #endif // defined(HS_MAJOR) && defined(HS_MINOR) && HS_MAJOR >= 5 && HS_MINOR >= 4
 		});
 }


More information about the Commits mailing list