commit 976a824: [Project] Preliminary support of lua conditions for regexps
Vsevolod Stakhov
vsevolod at highsecure.ru
Fri Jul 24 19:35:12 UTC 2020
Author: Vsevolod Stakhov
Date: 2020-07-24 20:24:59 +0100
URL: https://github.com/rspamd/rspamd/commit/976a824a0ec586dd5bc82a86d14c3654b10fa4ef (HEAD -> master)
[Project] Preliminary support of lua conditions for regexps
---
src/libmime/mime_expressions.c | 10 ++--
src/libserver/re_cache.c | 120 +++++++++++++++++++++++++++++++----------
src/libserver/re_cache.h | 4 +-
src/lua/lua_config.c | 2 +-
4 files changed, 102 insertions(+), 34 deletions(-)
diff --git a/src/libmime/mime_expressions.c b/src/libmime/mime_expressions.c
index 6ff656cdb..8f2137d36 100644
--- a/src/libmime/mime_expressions.c
+++ b/src/libmime/mime_expressions.c
@@ -823,7 +823,7 @@ set:
mime_atom->d.re->regexp,
mime_atom->d.re->type,
mime_atom->d.re->extra.header,
- strlen (mime_atom->d.re->extra.header) + 1);
+ strlen (mime_atom->d.re->extra.header) + 1, -1);
/* Pass ownership to the cache */
rspamd_regexp_unref (own_re);
}
@@ -845,7 +845,7 @@ set:
mime_atom->d.re->regexp,
mime_atom->d.re->type,
mime_atom->d.re->extra.selector,
- strlen (mime_atom->d.re->extra.selector) + 1);
+ strlen (mime_atom->d.re->extra.selector) + 1, -1);
/* Pass ownership to the cache */
rspamd_regexp_unref (own_re);
}
@@ -865,7 +865,8 @@ set:
mime_atom->d.re->regexp,
mime_atom->d.re->type,
NULL,
- 0);
+ 0,
+ -1);
/* Pass ownership to the cache */
rspamd_regexp_unref (own_re);
}
@@ -940,7 +941,8 @@ set:
mime_atom->d.func = rspamd_mime_expr_parse_function_atom (pool,
mime_atom->str);
if (mime_atom->d.func == NULL) {
- g_set_error (err, rspamd_mime_expr_quark(), 200, "cannot parse function '%s'",
+ g_set_error (err, rspamd_mime_expr_quark(), 200,
+ "cannot parse function '%s'",
mime_atom->str);
goto err;
}
diff --git a/src/libserver/re_cache.c b/src/libserver/re_cache.c
index fe5e1476b..e7641a8b8 100644
--- a/src/libserver/re_cache.c
+++ b/src/libserver/re_cache.c
@@ -108,6 +108,7 @@ enum rspamd_re_cache_elt_match_type {
struct rspamd_re_cache_elt {
rspamd_regexp_t *re;
+ gint lua_cbref;
enum rspamd_re_cache_elt_match_type match_type;
};
@@ -212,6 +213,15 @@ rspamd_re_cache_destroy (struct rspamd_re_cache *cache)
luaL_unref (cache->L, LUA_REGISTRYINDEX, sref);
g_free (skey);
});
+
+ struct rspamd_re_cache_elt *elt;
+ guint i;
+
+ PTR_ARRAY_FOREACH (cache->re, i, elt) {
+ if (elt->lua_cbref != -1) {
+ luaL_unref (cache->L, LUA_REGISTRYINDEX, elt->lua_cbref);
+ }
+ }
}
kh_destroy (lua_selectors_hash, cache->selectors);
@@ -261,8 +271,11 @@ rspamd_re_cache_is_hs_loaded (struct rspamd_re_cache *cache)
}
rspamd_regexp_t *
-rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re,
- enum rspamd_re_type type, gconstpointer type_data, gsize datalen)
+rspamd_re_cache_add (struct rspamd_re_cache *cache,
+ rspamd_regexp_t *re,
+ enum rspamd_re_type type,
+ gconstpointer type_data, gsize datalen,
+ gint lua_cbref)
{
guint64 class_id;
struct rspamd_re_class *re_class;
@@ -304,6 +317,8 @@ rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re,
elt->re = rspamd_regexp_ref (re);
g_ptr_array_add (cache->re, elt);
rspamd_regexp_set_class (re, re_class);
+ elt->lua_cbref = lua_cbref;
+
g_hash_table_insert (re_class->re, rspamd_regexp_get_id (nre), nre);
}
@@ -529,11 +544,49 @@ rspamd_re_cache_get_stat (struct rspamd_re_runtime *rt)
return &rt->stat;
}
+static gboolean
+rspamd_re_cache_check_lua_condition (struct rspamd_task *task,
+ rspamd_regexp_t *re,
+ const guchar *in, gsize len,
+ goffset start, goffset end,
+ gint lua_cbref)
+{
+ lua_State *L = (lua_State *)task->cfg->lua_state;
+ GError *err = NULL;
+ struct rspamd_lua_text *t;
+ gint text_pos;
+
+ if (G_LIKELY (lua_cbref == -1)) {
+ return TRUE;
+ }
+
+ t = lua_new_text (L, in, len, FALSE);
+ text_pos = lua_gettop (L);
+
+ if (!rspamd_lua_universal_pcall (L, lua_cbref,
+ G_STRLOC, 1, "utii", &err,
+ "rspamd{task}", task,
+ text_pos, start, end)) {
+ msg_warn_task ("cannot call for re_cache_check_lua_condition for re %s: %e",
+ rspamd_regexp_get_pattern (re), err);
+ g_error_free (err);
+
+ return TRUE;
+ }
+
+ gboolean res = lua_toboolean (L, -1);
+
+ lua_settop (L, text_pos - 1);
+
+ return res;
+}
+
static guint
rspamd_re_cache_process_pcre (struct rspamd_re_runtime *rt,
rspamd_regexp_t *re, struct rspamd_task *task,
const guchar *in, gsize len,
- gboolean is_raw)
+ gboolean is_raw,
+ gint lua_cbref)
{
guint r = 0;
const gchar *start = NULL, *end = NULL;
@@ -570,12 +623,15 @@ rspamd_re_cache_process_pcre (struct rspamd_re_runtime *rt,
&end,
is_raw,
NULL)) {
- r++;
- msg_debug_re_task ("found regexp /%s/, total hits: %d",
- rspamd_regexp_get_pattern (re), r);
+ if (rspamd_re_cache_check_lua_condition (task, re, in, len,
+ start, end, lua_cbref)) {
+ r++;
+ msg_debug_re_task ("found regexp /%s/, total hits: %d",
+ rspamd_regexp_get_pattern (re), r);
- if (max_hits > 0 && r >= max_hits) {
- break;
+ if (max_hits > 0 && r >= max_hits) {
+ break;
+ }
}
}
@@ -621,25 +677,28 @@ rspamd_re_cache_hyperscan_cb (unsigned int id,
{
struct rspamd_re_hyperscan_cbdata *cbdata = ud;
struct rspamd_re_runtime *rt;
- struct rspamd_re_cache_elt *pcre_elt;
+ struct rspamd_re_cache_elt *cache_elt;
guint ret, maxhits, i, processed;
struct rspamd_task *task;
rt = cbdata->rt;
task = cbdata->task;
- pcre_elt = g_ptr_array_index (rt->cache->re, id);
- maxhits = rspamd_regexp_get_maxhits (pcre_elt->re);
-
- if (pcre_elt->match_type == RSPAMD_RE_CACHE_HYPERSCAN) {
- ret = 1;
- setbit (rt->checked, id);
-
- if (maxhits == 0 || rt->results[id] < maxhits) {
- rt->results[id] += ret;
- rt->stat.regexp_matched++;
+ cache_elt = g_ptr_array_index (rt->cache->re, id);
+ maxhits = rspamd_regexp_get_maxhits (cache_elt->re);
+
+ if (cache_elt->match_type == RSPAMD_RE_CACHE_HYPERSCAN) {
+ if (rspamd_re_cache_check_lua_condition (task, cache_elt->re,
+ cbdata->ins[0], cbdata->lens[0], from, to, cache_elt->lua_cbref)) {
+ ret = 1;
+ setbit (rt->checked, id);
+
+ if (maxhits == 0 || rt->results[id] < maxhits) {
+ rt->results[id] += ret;
+ rt->stat.regexp_matched++;
+ }
+ msg_debug_re_task ("found regexp /%s/ using hyperscan only, total hits: %d",
+ rspamd_regexp_get_pattern (cache_elt->re), rt->results[id]);
}
- msg_debug_re_task ("found regexp /%s/ using hyperscan only, total hits: %d",
- rspamd_regexp_get_pattern (pcre_elt->re), rt->results[id]);
}
else {
if (!isset (rt->checked, id)) {
@@ -648,11 +707,12 @@ rspamd_re_cache_hyperscan_cb (unsigned int id,
for (i = 0; i < cbdata->count; i ++) {
rspamd_re_cache_process_pcre (rt,
- pcre_elt->re,
+ cache_elt->re,
cbdata->task,
cbdata->ins[i],
cbdata->lens[i],
- FALSE);
+ FALSE,
+ cache_elt->lua_cbref);
setbit (rt->checked, id);
processed += cbdata->lens[i];
@@ -680,6 +740,7 @@ rspamd_re_cache_process_regexp_data (struct rspamd_re_runtime *rt,
guint64 re_id;
guint ret = 0;
guint i;
+ struct rspamd_re_cache_elt *cache_elt;
re_id = rspamd_regexp_get_cache_id (re);
@@ -690,6 +751,8 @@ rspamd_re_cache_process_regexp_data (struct rspamd_re_runtime *rt,
return ret;
}
+ cache_elt = (struct rspamd_re_cache_elt *)g_ptr_array_index (rt->cache->re, re_id);
+
#ifndef WITH_HYPERSCAN
for (i = 0; i < count; i++) {
ret = rspamd_re_cache_process_pcre (rt,
@@ -697,20 +760,20 @@ rspamd_re_cache_process_regexp_data (struct rspamd_re_runtime *rt,
task,
in[i],
lens[i],
- is_raw);
+ is_raw,
+ cache_elt->lua_cbref);
rt->results[re_id] = ret;
}
setbit (rt->checked, re_id);
#else
- struct rspamd_re_cache_elt *elt;
struct rspamd_re_class *re_class;
struct rspamd_re_hyperscan_cbdata cbdata;
- elt = g_ptr_array_index (rt->cache->re, re_id);
+ cache_elt = g_ptr_array_index (rt->cache->re, re_id);
re_class = rspamd_regexp_get_class (re);
- if (rt->cache->disable_hyperscan || elt->match_type == RSPAMD_RE_CACHE_PCRE ||
+ if (rt->cache->disable_hyperscan || cache_elt->match_type == RSPAMD_RE_CACHE_PCRE ||
!rt->has_hs || (is_raw && re_class->has_utf8)) {
for (i = 0; i < count; i++) {
ret = rspamd_re_cache_process_pcre (rt,
@@ -718,7 +781,8 @@ rspamd_re_cache_process_regexp_data (struct rspamd_re_runtime *rt,
task,
in[i],
lens[i],
- is_raw);
+ is_raw,
+ cache_elt->lua_cbref);
}
setbit (rt->checked, re_id);
diff --git a/src/libserver/re_cache.h b/src/libserver/re_cache.h
index 26ffa1603..c4517edda 100644
--- a/src/libserver/re_cache.h
+++ b/src/libserver/re_cache.h
@@ -68,11 +68,13 @@ struct rspamd_re_cache *rspamd_re_cache_new (void);
* @param type type of object
* @param type_data associated data with the type (e.g. header name)
* @param datalen associated data length
+ * @param lua_cbref optional lua callback reference for matching purposes
*/
rspamd_regexp_t *
rspamd_re_cache_add (struct rspamd_re_cache *cache, rspamd_regexp_t *re,
enum rspamd_re_type type,
- gconstpointer type_data, gsize datalen);
+ gconstpointer type_data, gsize datalen,
+ gint lua_cbref);
/**
* Replace regexp in the cache with another regexp
diff --git a/src/lua/lua_config.c b/src/lua/lua_config.c
index c880b235e..06a2f57b6 100644
--- a/src/lua/lua_config.c
+++ b/src/lua/lua_config.c
@@ -3024,7 +3024,7 @@ lua_config_register_regexp (lua_State *L)
}
cache_re = rspamd_re_cache_add (cfg->re_cache, re->re, type,
- (gpointer) header_str, header_len);
+ (gpointer) header_str, header_len, -1);
/*
* XXX: here are dragons!
More information about the Commits
mailing list