commit fd222e4: [Fix] Fix processing captures from pcre2

Vsevolod Stakhov vsevolod at highsecure.ru
Tue Dec 14 17:49:11 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-12-14 17:42:35 +0000
URL: https://github.com/rspamd/rspamd/commit/fd222e43f32a87106612c3b5197a9d491256bfe4 (HEAD -> master)

[Fix] Fix processing captures from pcre2

---
 src/libutil/regexp.c | 82 ++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 63 insertions(+), 19 deletions(-)

diff --git a/src/libutil/regexp.c b/src/libutil/regexp.c
index 5b928bcb4..cd34a5998 100644
--- a/src/libutil/regexp.c
+++ b/src/libutil/regexp.c
@@ -547,6 +547,7 @@ rspamd_regexp_search (const rspamd_regexp_t *re, const gchar *text, gsize len,
 	const gchar *mt;
 	gsize remain = 0;
 	gint rc, match_flags = 0, *ovec, ncaptures, i;
+	const int junk = 0xdeadbabe;
 
 	g_assert (re != NULL);
 	g_assert (text != NULL);
@@ -607,6 +608,11 @@ rspamd_regexp_search (const rspamd_regexp_t *re, const gchar *text, gsize len,
 	ncaptures = (re->ncaptures + 1) * 3;
 	ovec = g_alloca (sizeof (gint) * ncaptures);
 
+
+	for (i = 0; i < ncaptures; i ++) {
+		ovec[i] = junk;
+	}
+
 	if (!(re->flags & RSPAMD_REGEXP_FLAG_NOOPT)) {
 #ifdef HAVE_PCRE_JIT
 # if defined(HAVE_PCRE_JIT_FAST) && !defined(DISABLE_JIT_FAST)
@@ -637,11 +643,21 @@ rspamd_regexp_search (const rspamd_regexp_t *re, const gchar *text, gsize len,
 	}
 
 	if (rc >= 0) {
-		if (start) {
-			*start = mt + ovec[0];
+		if (rc > 0) {
+			if (start) {
+				*start = mt + ovec[0];
+			}
+			if (end) {
+				*end = mt + ovec[1];
+			}
 		}
-		if (end) {
-			*end = mt + ovec[1];
+		else {
+			if (start) {
+				*start = mt;
+			}
+			if (end) {
+				*end = mt + remain;
+			}
 		}
 
 		if (captures != NULL && rc >= 1) {
@@ -652,9 +668,16 @@ rspamd_regexp_search (const rspamd_regexp_t *re, const gchar *text, gsize len,
 			g_array_set_size (captures, rc);
 
 			for (i = 0; i < rc; i ++) {
-				elt = &g_array_index (captures, struct rspamd_re_capture, i);
-				elt->p = mt + ovec[i * 2];
-				elt->len = (mt + ovec[i * 2 + 1]) - elt->p;
+				if (ovec[i * 2] != junk && ovec[i * 2] >= 0) {
+					elt = &g_array_index (captures, struct rspamd_re_capture, i);
+					elt->p = mt + ovec[i * 2];
+					elt->len = (mt + ovec[i * 2 + 1]) - elt->p;
+				}
+				else {
+					/* Runtime match returned fewer captures than expected */
+					g_array_set_size (captures, i);
+					break;
+				}
 
 			}
 		}
@@ -682,7 +705,8 @@ rspamd_regexp_search (const rspamd_regexp_t *re, const gchar *text, gsize len,
 	pcre2_match_context *mcontext;
 	PCRE_T *r;
 	const gchar *mt;
-	gsize remain = 0, *ovec;
+	PCRE2_SIZE remain = 0, *ovec;
+	const PCRE2_SIZE junk = 0xdeadbabeeeeeeeeULL;
 	gint rc, match_flags, novec, i;
 	gboolean ret = FALSE;
 
@@ -731,6 +755,14 @@ rspamd_regexp_search (const rspamd_regexp_t *re, const gchar *text, gsize len,
 	}
 
 	match_data = pcre2_match_data_create (re->ncaptures + 1, NULL);
+	novec = pcre2_get_ovector_count (match_data);
+	ovec = pcre2_get_ovector_pointer (match_data);
+
+	/* Fill ovec with crap, so we can stop if actual matches is less than announced */
+	for (i = 0; i < novec; i ++) {
+		ovec[i * 2] = junk;
+		ovec[i * 2 + 1] = junk;
+	}
 
 #ifdef HAVE_PCRE_JIT
 	if (!(re->flags & RSPAMD_REGEXP_FLAG_DISABLE_JIT) && can_jit) {
@@ -752,14 +784,21 @@ rspamd_regexp_search (const rspamd_regexp_t *re, const gchar *text, gsize len,
 #endif
 
 	if (rc >= 0) {
-		novec = pcre2_get_ovector_count (match_data);
-		ovec = pcre2_get_ovector_pointer (match_data);
-
-		if (start) {
-			*start = mt + ovec[0];
+		if (novec > 0) {
+			if (start) {
+				*start = mt + ovec[0];
+			}
+			if (end) {
+				*end = mt + ovec[1];
+			}
 		}
-		if (end) {
-			*end = mt + ovec[1];
+		else {
+			if (start) {
+				*start = mt;
+			}
+			if (end) {
+				*end = mt + remain;
+			}
 		}
 
 		if (captures != NULL && novec >= 1) {
@@ -770,10 +809,15 @@ rspamd_regexp_search (const rspamd_regexp_t *re, const gchar *text, gsize len,
 			g_array_set_size (captures, novec);
 
 			for (i = 0; i < novec; i ++) {
-				elt = &g_array_index (captures, struct rspamd_re_capture, i);
-				elt->p = mt + ovec[i * 2];
-				elt->len = (mt + ovec[i * 2 + 1]) - elt->p;
-
+				if (ovec[i * 2] != junk && ovec[i * 2] != PCRE2_UNSET) {
+					elt = &g_array_index (captures, struct rspamd_re_capture, i);
+					elt->p = mt + ovec[i * 2];
+					elt->len = (mt + ovec[i * 2 + 1]) - elt->p;
+				}
+				else {
+					g_array_set_size (captures, i);
+					break;
+				}
 			}
 		}
 


More information about the Commits mailing list