commit 3097aa4: [Feature] Share hyperscan database among processes

Vsevolod Stakhov vsevolod at rspamd.com
Sat Oct 8 14:49:04 UTC 2022


Author: Vsevolod Stakhov
Date: 2022-10-08 13:45:34 +0100
URL: https://github.com/rspamd/rspamd/commit/3097aa4c87d81c4bc922b4fa14c8eef9bc53f5bc

[Feature] Share hyperscan database among processes

---
 src/libutil/multipattern.c | 120 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 116 insertions(+), 4 deletions(-)

diff --git a/src/libutil/multipattern.c b/src/libutil/multipattern.c
index 8f959f486..d795da3b4 100644
--- a/src/libutil/multipattern.c
+++ b/src/libutil/multipattern.c
@@ -49,6 +49,9 @@ struct RSPAMD_ALIGNED(64) rspamd_multipattern {
 	GArray *hs_ids;
 	GArray *hs_flags;
 	guint scratch_used;
+	/* If serialized into shared memory */
+	gboolean unser_fd;
+	gsize unser_size;
 #endif
 	ac_trie_t *t;
 	GArray *pats;
@@ -411,12 +414,112 @@ rspamd_multipattern_try_load_hs (struct rspamd_multipattern *mp,
 			(gint)rspamd_cryptobox_HASHBYTES / 2, hash);
 
 	if ((map = rspamd_file_xmap (fp, PROT_READ, &len, TRUE)) != NULL) {
-		if (hs_deserialize_database (map, len, &mp->db) == HS_SUCCESS) {
-			munmap (map, len);
-			return TRUE;
+
+		mp->unser_fd = -1;
+#if defined(HS_MAJOR) && defined(HS_MINOR) && HS_MAJOR >= 5 && HS_MINOR >= 4
+		/* Here is a logic to use a shared memory for hyperscan database */
+		rspamd_snprintf (fp, sizeof (fp), "%s/%*xs.hsmp.unser", hs_cache_dir,
+				(gint)rspamd_cryptobox_HASHBYTES / 2, hash);
+		/* Try to create a new file and lock it */
+		mp->unser_fd = rspamd_file_xopen (fp, O_CREAT|O_RDWR|O_EXCL, 00644, false);
+		if (mp->unser_fd == -1) {
+			/* A file can be already existing */
+			mp->unser_fd = rspamd_file_xopen (fp, O_RDONLY, 00644, false);
+		}
+		else {
+			/* Allocate new file, write database and reopen it in RO mode afterwards */
+			gchar tmpfp[PATH_MAX];
+			rspamd_snprintf (tmpfp, sizeof (tmpfp), "%s/hsmp-XXXXXXXXXXXXXXXXXX", hs_cache_dir);
+			int tmp_fd = g_mkstemp_full(tmpfp, O_CREAT|O_RDWR|O_EXCL, 00600);
+			g_assert(tmp_fd != -1);
+			hs_serialized_database_size (map, len, &mp->unser_size);
+			msg_debug("multipattern: create new database in %s; %Hz size", tmpfp, mp->unser_size);
+			void *buf;
+			posix_memalign(&buf, 16, mp->unser_size);
+			if (buf == NULL) {
+				g_abort();
+			}
+
+			int ret;
+
+			if ((ret = hs_deserialize_database_at (map, len, (hs_database_t *)buf)) != HS_SUCCESS) {
+				msg_err ("cannot deserialize hyperscan database: %d", ret);
+				(void)unlink(tmpfp);
+				close (tmp_fd);
+				mp->unser_fd = -1;
+				free (buf);
+			}
+			else {
+				if (write(tmp_fd, buf, mp->unser_size) == -1) {
+					msg_err ("cannot write to %s: %s", fp, strerror(errno));
+					close(tmp_fd);
+					(void)unlink(tmpfp);
+					mp->unser_fd = -1;
+					free(buf);
+				}
+				else {
+					free(buf);
+					if (rename(tmpfp, fp) == -1) {
+						if (errno != EEXIST) {
+							msg_err("cannot rename %s -> %s: %s", tmpfp, fp,
+									strerror(errno));
+						}
+						(void)unlink(tmpfp);
+						close(tmp_fd);
+					}
+					else {
+						(void) unlink(tmpfp);
+						close(tmp_fd);
+					}
+					/* Reopen in RO mode */
+					mp->unser_fd = rspamd_file_xopen (fp, O_RDONLY, 00644, false);
+				}
+			}
+
+		}
+#endif
+		if (mp->unser_fd != -1) {
+			/* We have a prepared database, so we can just use it */
+			struct stat st;
+
+			g_assert(fstat(mp->unser_fd, &st) != -1);
+			mp->unser_size = st.st_size;
+			mp->db = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, mp->unser_fd, 0);
+
+			if (mp->db == MAP_FAILED) {
+				mp->db = NULL;
+				msg_err ("cannot open cached hyperscan database: %s", strerror(errno));
+				close(mp->unser_fd);
+				mp->unser_fd = -1;
+				mp->unser_size = 0;
+				(void)unlink(fp);
+			}
+			else {
+				close(mp->unser_fd);
+				mp->unser_fd = -1;
+				msg_debug("multipattern: loaded hyperscan db from: %s, size = %Hz", fp, mp->unser_size);
+
+				return TRUE;
+			}
+			munmap(map, len);
+
+		}
+		else {
+			int ret;
+			if ((ret = hs_deserialize_database(map, len, &mp->db)) == HS_SUCCESS) {
+				munmap(map, len);
+				return TRUE;
+			}
+			else {
+				msg_err ("cannot deserialize hyperscan database: %d", ret);
+			}
 		}
 
 		munmap (map, len);
+		if (mp->unser_fd != -1) {
+			close (mp->unser_fd);
+			munmap (mp->db, mp->unser_size);
+		}
 		/* Remove stale file */
 		(void)unlink (fp);
 	}
@@ -728,7 +831,16 @@ rspamd_multipattern_destroy (struct rspamd_multipattern *mp)
 					hs_free_scratch (mp->scratch[i]);
 				}
 
-				hs_free_database (mp->db);
+				if (mp->db) {
+					if (mp->unser_size) {
+						/* Mmapped database */
+						munmap(mp->db, mp->unser_size);
+					}
+					else {
+						/* Allocated database */
+						hs_free_database (mp->db);
+					}
+				}
 			}
 
 			for (i = 0; i < mp->cnt; i ++) {


More information about the Commits mailing list