commit 3e9f86b: [Minor] Split url and email regexps

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Feb 13 17:14:10 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-02-13 17:10:52 +0000
URL: https://github.com/rspamd/rspamd/commit/3e9f86bda632d9d39cd82541c07419bd920f5cb2 (HEAD -> master)

[Minor] Split url and email regexps

---
 src/libmime/mime_expressions.c |  5 +++++
 src/libserver/re_cache.c       | 45 ++++++++++++++++++++++++++++++++++++++----
 src/libserver/re_cache.h       |  1 +
 3 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c
index 19d15cb53..a0c499882 100644
--- a/src/libmime/mime_expressions.c
+++ b/src/libmime/mime_expressions.c
@@ -241,6 +241,10 @@ rspamd_parse_long_option (const gchar *start, gsize len,
 		ret = TRUE;
 		a->type = RSPAMD_RE_URL;
 	}
+	else if (TYPE_CHECK (start, "email", len)) {
+		ret = TRUE;
+		a->type = RSPAMD_RE_EMAIL;
+	}
 	else if (TYPE_CHECK (start, "sa_body", len)) {
 		ret = TRUE;
 		a->type = RSPAMD_RE_SABODY;
@@ -1022,6 +1026,7 @@ rspamd_mime_expr_priority (rspamd_expression_atom_t *atom)
 			ret = 100;
 			break;
 		case RSPAMD_RE_URL:
+		case RSPAMD_RE_EMAIL:
 			ret = 90;
 			break;
 		case RSPAMD_RE_MIME:
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c
index 5414f8baa..862ffc351 100644
--- a/src/libserver/re_cache.c
+++ b/src/libserver/re_cache.c
@@ -1164,8 +1164,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
 		}
 		break;
 	case RSPAMD_RE_URL:
-		cnt = g_hash_table_size (MESSAGE_FIELD (task, urls)) +
-				g_hash_table_size (MESSAGE_FIELD (task, emails));
+		cnt = g_hash_table_size (MESSAGE_FIELD (task, urls));
 
 		if (cnt > 0) {
 			scvec = g_malloc (sizeof (*scvec) * cnt);
@@ -1185,6 +1184,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
 				}
 			}
 
+#if 0
 			g_hash_table_iter_init (&it, MESSAGE_FIELD (task, emails));
 
 			while (g_hash_table_iter_next (&it, &k, &v)) {
@@ -1197,7 +1197,7 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
 					lenvec[i++] = len;
 				}
 			}
-
+#endif
 			ret = rspamd_re_cache_process_regexp_data (rt, re,
 					task, scvec, lenvec, i, raw, &processed_hyperscan);
 			msg_debug_re_task ("checked url regexp: %s -> %d",
@@ -1206,6 +1206,37 @@ rspamd_re_cache_exec_re (struct rspamd_task *task,
 			g_free (lenvec);
 		}
 		break;
+	case RSPAMD_RE_EMAIL:
+		cnt = g_hash_table_size (MESSAGE_FIELD (task, emails));
+
+		if (cnt > 0) {
+			scvec = g_malloc (sizeof (*scvec) * cnt);
+			lenvec = g_malloc (sizeof (*lenvec) * cnt);
+			g_hash_table_iter_init (&it, MESSAGE_FIELD (task, emails));
+			i = 0;
+			raw = FALSE;
+
+			while (g_hash_table_iter_next (&it, &k, &v)) {
+				url = v;
+
+				if (url->userlen == 0 || url->hostlen == 0) {
+					continue;
+				}
+
+				in = url->user;
+				len = url->userlen + 1 + url->hostlen;
+				scvec[i] = (guchar *) in;
+				lenvec[i++] = len;
+			}
+
+			ret = rspamd_re_cache_process_regexp_data (rt, re,
+					task, scvec, lenvec, i, raw, &processed_hyperscan);
+			msg_debug_re_task ("checked email regexp: %s -> %d",
+					rspamd_regexp_get_pattern (re), ret);
+			g_free (scvec);
+			g_free (lenvec);
+		}
+		break;
 	case RSPAMD_RE_BODY:
 		raw = TRUE;
 		in = task->msg.begin;
@@ -1534,11 +1565,14 @@ rspamd_re_cache_type_to_string (enum rspamd_re_type type)
 	case RSPAMD_RE_URL:
 		ret = "url";
 		break;
+	case RSPAMD_RE_EMAIL:
+		ret = "email";
+		break;
 	case RSPAMD_RE_SABODY:
 		ret = "sa body";
 		break;
 	case RSPAMD_RE_SARAWBODY:
-		ret = "sa body";
+		ret = "sa raw body";
 		break;
 	case RSPAMD_RE_SELECTOR:
 		ret = "selector";
@@ -1597,6 +1631,9 @@ rspamd_re_cache_type_from_string (const char *str)
 		case G_GUINT64_CONSTANT(0x7D9ACDF6685661A1): /* uri */
 			ret = RSPAMD_RE_URL;
 			break;
+		case G_GUINT64_CONSTANT (0x7e232b0f60b571be): /* email */
+			ret = RSPAMD_RE_EMAIL;
+			break;
 		case G_GUINT64_CONSTANT(0x796d62205a8778c7): /* allheader */
 			ret = RSPAMD_RE_ALLHEADER;
 			break;
diff --git a/src/libserver/re_cache.h b/src/libserver/re_cache.h
index 6b5aa84f6..75cee0235 100644
--- a/src/libserver/re_cache.h
+++ b/src/libserver/re_cache.h
@@ -36,6 +36,7 @@ enum rspamd_re_type {
 	RSPAMD_RE_MIME,
 	RSPAMD_RE_RAWMIME,
 	RSPAMD_RE_URL,
+	RSPAMD_RE_EMAIL,
 	RSPAMD_RE_BODY, /* full in SA */
 	RSPAMD_RE_SABODY, /* body in SA */
 	RSPAMD_RE_SARAWBODY, /* rawbody in SA */


More information about the Commits mailing list