commit 90b0eda: [Rework] Start movement of the hyperscan related routines into a single unit

Vsevolod Stakhov vsevolod at rspamd.com
Sat Oct 8 14:49:07 UTC 2022


Author: Vsevolod Stakhov
Date: 2022-10-08 15:38:49 +0100
URL: https://github.com/rspamd/rspamd/commit/90b0edae421d31c12cbc8c29fa294f7732bb4f21 (HEAD -> master)

[Rework] Start movement of the hyperscan related routines into a single unit

---
 src/libutil/CMakeLists.txt          |  3 +-
 src/libutil/cxx/hyperscan_tools.cxx | 97 +++++++++++++++++++++++++++++++++++++
 2 files changed, 99 insertions(+), 1 deletion(-)

diff --git a/src/libutil/CMakeLists.txt b/src/libutil/CMakeLists.txt
index 7b3103720..8602baf6e 100644
--- a/src/libutil/CMakeLists.txt
+++ b/src/libutil/CMakeLists.txt
@@ -18,6 +18,7 @@ SET(LIBRSPAMDUTILSRC
 				${CMAKE_CURRENT_SOURCE_DIR}/heap.c
 				${CMAKE_CURRENT_SOURCE_DIR}/multipattern.c
 				${CMAKE_CURRENT_SOURCE_DIR}/cxx/utf8_util.cxx
-				${CMAKE_CURRENT_SOURCE_DIR}/cxx/locked_file.cxx)
+				${CMAKE_CURRENT_SOURCE_DIR}/cxx/locked_file.cxx
+				${CMAKE_CURRENT_SOURCE_DIR}/cxx/hyperscan_tools.cxx)
 # Rspamdutil
 SET(RSPAMD_UTIL ${LIBRSPAMDUTILSRC} PARENT_SCOPE)
\ No newline at end of file
diff --git a/src/libutil/cxx/hyperscan_tools.cxx b/src/libutil/cxx/hyperscan_tools.cxx
new file mode 100644
index 000000000..82664d810
--- /dev/null
+++ b/src/libutil/cxx/hyperscan_tools.cxx
@@ -0,0 +1,97 @@
+/*-
+ * Copyright 2022 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "config.h"
+
+#ifdef WITH_HYPERSCAN
+#include <string>
+#include "contrib/ankerl/unordered_dense.h"
+#include "contrib/ankerl/svector.h"
+#include "fmt/core.h"
+
+#include <glob.h> /* for glob */
+#include <sys/stat.h> /* for stat */
+#include <unistd.h> /* for unlink */
+
+namespace rspamd::util {
+
+class hs_known_files_cache {
+private:
+	// These fields are filled when we add new known cache files
+	ankerl::svector<std::string, 4> cache_dirs;
+	ankerl::svector<std::string, 8> cache_extensions;
+	ankerl::unordered_dense::set<std::string> known_cached_files;
+	bool need_cleanup = false;
+private:
+	hs_known_files_cache() = default;
+
+	virtual ~hs_known_files_cache() {
+		// Cleanup cache dir
+		if (need_cleanup) {
+			auto cleanup_dir = [&](std::string_view dir) -> void {
+				for (const auto &ext : cache_extensions) {
+					glob_t globbuf;
+
+					auto glob_pattern = fmt::format("{}{}*.{}",
+							dir, G_DIR_SEPARATOR_S, ext);
+					memset(&globbuf, 0, sizeof(globbuf));
+
+					if (glob(glob_pattern.c_str(), 0, nullptr, &globbuf) == 0) {
+						for (auto i = 0; i < globbuf.gl_pathc; i++) {
+							const auto *path = globbuf.gl_pathv[i];
+							struct stat st;
+
+							if (stat(path, &st) == -1) {
+								continue;
+							}
+
+							if (S_ISREG(st.st_mode)) {
+								if (!known_cached_files.contains(path)) {
+									unlink(path);
+								}
+							}
+						}
+					}
+
+					globfree(&globbuf);
+				}
+			};
+
+			for (const auto &dir: cache_dirs) {
+				cleanup_dir(dir);
+			}
+		}
+	}
+public:
+	hs_known_files_cache(const hs_known_files_cache &) = delete;
+	hs_known_files_cache(hs_known_files_cache &&) = delete;
+
+	static auto get(bool need_cleanup) -> hs_known_files_cache& {
+		static hs_known_files_cache *singleton = nullptr;
+
+		if (singleton == nullptr) {
+			singleton = new hs_known_files_cache;
+			singleton->need_cleanup = need_cleanup;
+		}
+
+		return *singleton;
+	}
+};
+
+
+} // namespace rspamd::util
+
+
+#endif
\ No newline at end of file


More information about the Commits mailing list