commit 0e21da4: [Project] Add `L` flag for regexps to save start of the match in Hyperscan

Vsevolod Stakhov vsevolod at highsecure.ru
Tue Jul 28 16:21:08 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-07-28 15:47:51 +0100
URL: https://github.com/rspamd/rspamd/commit/0e21da4e3833d702db12e4129926d7918a1ac527

[Project] Add `L` flag for regexps to save start of the match in Hyperscan

---
 src/libmime/mime_expressions.c | 14 ++++++++++----
 src/libserver/re_cache.c       |  4 ++++
 src/libutil/regexp.c           |  4 ++++
 src/libutil/regexp.h           |  1 +
 4 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c
index aaefd24b3..0caa324cc 100644
--- a/src/libmime/mime_expressions.c
+++ b/src/libmime/mime_expressions.c
@@ -372,6 +372,8 @@ rspamd_mime_expr_parse_regexp_atom (rspamd_mempool_t * pool, const gchar *line,
 		case 'u':
 		case 'O':
 		case 'r':
+		case 'L':
+			/* Handled by rspamd_regexp_t */
 			g_string_append_c (re_flags, *p);
 			p++;
 			break;
@@ -813,6 +815,10 @@ set:
 			goto err;
 		}
 		else {
+			const ucl_object_t *re_conditions = ucl_object_lookup (real_ud->conf_obj,
+					"re_conditions");
+			gint lua_cbref = -1;
+
 			/* Check regexp condition */
 			if (real_ud->conf_obj == NULL) {
 				g_set_error (err, rspamd_mime_expr_quark(), 300,
@@ -821,10 +827,6 @@ set:
 				goto err;
 			}
 
-			const ucl_object_t *re_conditions = ucl_object_lookup (real_ud->conf_obj,
-					"re_conditions");
-			gint lua_cbref = -1;
-
 			if (re_conditions != NULL) {
 				if (ucl_object_type (re_conditions) != UCL_OBJECT) {
 					g_set_error (err, rspamd_mime_expr_quark(), 320,
@@ -851,6 +853,10 @@ set:
 				}
 			}
 
+			if (lua_cbref != -1) {
+				msg_info_config ("added condition for regexp %s", mime_atom->str);
+			}
+
 			/* Register new item in the cache */
 			if (mime_atom->d.re->type == RSPAMD_RE_HEADER ||
 					mime_atom->d.re->type == RSPAMD_RE_RAWHEADER ||
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c
index bd207573b..73082bb2d 100644
--- a/src/libserver/re_cache.c
+++ b/src/libserver/re_cache.c
@@ -1979,6 +1979,10 @@ rspamd_re_cache_compile_timer_cb (EV_P_ ev_timer *w, int revents )
 			hs_flags[i] |= HS_FLAG_SINGLEMATCH;
 		}
 
+		if (re_flags & RSPAMD_REGEXP_FLAG_LEFTMOST) {
+			hs_flags[i] |= HS_FLAG_SOM_LEFTMOST;
+		}
+
 		gchar *pat = rspamd_re_cache_hs_pattern_from_pcre (re);
 
 		if (hs_compile (pat,
diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c
index 396da80d1..ce764a893 100644
--- a/src/libutil/regexp.c
+++ b/src/libutil/regexp.c
@@ -406,6 +406,10 @@ rspamd_regexp_new (const gchar *pattern, const gchar *flags,
 				/* We optimize all regexps by default */
 				rspamd_flags |= RSPAMD_REGEXP_FLAG_NOOPT;
 				break;
+			case 'L':
+				/* SOM_LEFTMOST hyperscan flag */
+				rspamd_flags |= RSPAMD_REGEXP_FLAG_LEFTMOST;
+				break;
 			case 'r':
 				rspamd_flags |= RSPAMD_REGEXP_FLAG_RAW;
 				rspamd_flags &= ~RSPAMD_REGEXP_FLAG_UTF;
diff --git a/src/libutil/regexp.h b/src/libutil/regexp.h
index 2e414892a..128edd761 100644
--- a/src/libutil/regexp.h
+++ b/src/libutil/regexp.h
@@ -34,6 +34,7 @@
 #define RSPAMD_REGEXP_FLAG_PCRE_ONLY (1 << 4)
 #define RSPAMD_REGEXP_FLAG_DISABLE_JIT (1 << 5)
 #define RSPAMD_REGEXP_FLAG_UTF (1 << 6)
+#define RSPAMD_REGEXP_FLAG_LEFTMOST (1 << 7)
 
 
 #ifdef  __cplusplus


More information about the Commits mailing list