commit 3ccc19f: [Feature] Lua_task: Add get_urls_filtered method

Vsevolod Stakhov vsevolod at highsecure.ru
Fri Mar 19 17:07:06 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-03-19 17:04:41 +0000
URL: https://github.com/rspamd/rspamd/commit/3ccc19f48ab648b22312c0a1169a57ba7e8d30bb (HEAD -> master)

[Feature] Lua_task: Add get_urls_filtered method

---
 src/lua/lua_task.c |  81 ++++++++++++++++++++++++++++++++++
 src/lua/lua_url.c  | 124 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 src/lua/lua_url.h  |   9 +++-
 3 files changed, 210 insertions(+), 4 deletions(-)

diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c
index bce91b4fb..2de3fb5ed 100644
--- a/src/lua/lua_task.c
+++ b/src/lua/lua_task.c
@@ -256,6 +256,18 @@ local function phishing_cb(task)
 end
  */
 LUA_FUNCTION_DEF (task, get_urls);
+/***
+ * @method task:get_urls_filtered([{flags_include}, [{flags_exclude}]], [{protocols_mask}])
+ * Get urls managed by either exclude or include flags list
+ * - If flags include are nil then all but excluded urls are returned
+ * - If flags exclude are nil then only included explicitly urls are returned
+ * - If both parameters are nil then all urls are included
+ * @param {table|string} flags_include included flags
+ * @param {table|string} flags_exclude excluded flags
+ * @param {table|string} protocols_mask incude only specific protocols
+ * @return {table rspamd_url} list of urls matching conditions
+ */
+LUA_FUNCTION_DEF (task, get_urls_filtered);
 /***
  * @method task:has_urls([need_emails])
  * Returns 'true' if a task has urls listed
@@ -1212,6 +1224,7 @@ static const struct luaL_reg tasklib_m[] = {
 	LUA_INTERFACE_DEF (task, append_message),
 	LUA_INTERFACE_DEF (task, has_urls),
 	LUA_INTERFACE_DEF (task, get_urls),
+	LUA_INTERFACE_DEF (task, get_urls_filtered),
 	LUA_INTERFACE_DEF (task, inject_url),
 	LUA_INTERFACE_DEF (task, get_content),
 	LUA_INTERFACE_DEF (task, get_filename),
@@ -2463,6 +2476,74 @@ lua_task_get_urls (lua_State * L)
 	return 1;
 }
 
+static gint
+lua_task_get_urls_filtered (lua_State * L)
+{
+	LUA_TRACE_POINT;
+	struct rspamd_task *task = lua_check_task (L, 1);
+	struct lua_tree_cb_data cb;
+	struct rspamd_url *u;
+	static const gint default_protocols_mask = PROTOCOL_HTTP|PROTOCOL_HTTPS|
+											   PROTOCOL_FILE|PROTOCOL_FTP;
+	gsize sz, max_urls = 0;
+
+	if (task) {
+		if (task->cfg) {
+			max_urls = task->cfg->max_lua_urls;
+		}
+
+		if (task->message == NULL) {
+			lua_newtable (L);
+
+			return 1;
+		}
+
+		if (!lua_url_cbdata_fill_exclude_include (L, 2, &cb, default_protocols_mask, max_urls)) {
+			return luaL_error (L, "invalid arguments");
+		}
+
+		sz = kh_size (MESSAGE_FIELD (task, urls));
+		sz = lua_url_adjust_skip_prob (task->task_timestamp,
+				MESSAGE_FIELD (task, digest), &cb, sz);
+
+		lua_createtable (L, sz, 0);
+
+		if (cb.sort) {
+			struct rspamd_url **urls_sorted;
+			gint i = 0;
+
+			urls_sorted = g_new0 (struct rspamd_url *, sz);
+
+			kh_foreach_key (MESSAGE_FIELD(task, urls), u, {
+				if (i < sz) {
+					urls_sorted[i] = u;
+					i ++;
+				}
+			});
+
+			qsort (urls_sorted, i, sizeof (struct rspamd_url *), rspamd_url_cmp_qsort);
+
+			for (int j = 0; j < i; j ++) {
+				lua_tree_url_callback (urls_sorted[j], urls_sorted[j], &cb);
+			}
+
+			g_free (urls_sorted);
+		}
+		else {
+			kh_foreach_key (MESSAGE_FIELD(task, urls), u, {
+				lua_tree_url_callback(u, u, &cb);
+			});
+		}
+
+		lua_url_cbdata_dtor (&cb);
+	}
+	else {
+		return luaL_error (L, "invalid arguments, no task");
+	}
+
+	return 1;
+}
+
 static gint
 lua_task_has_urls (lua_State * L)
 {
diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c
index 69c7d79bf..b56f025c4 100644
--- a/src/lua/lua_url.c
+++ b/src/lua/lua_url.c
@@ -957,15 +957,26 @@ lua_tree_url_callback (gpointer key, gpointer value, gpointer ud)
 
 	if ((url->protocol & cb->protocols_mask) == url->protocol) {
 
-		if (cb->flags_mode == url_flags_mode_include_any) {
+		/* Handle different flags application logic */
+		switch (cb->flags_mode) {
+		case url_flags_mode_include_any:
 			if (url->flags != (url->flags & cb->flags_mask)) {
 				return;
 			}
-		}
-		else {
+			break;
+		case url_flags_mode_include_explicit:
 			if ((url->flags & cb->flags_mask) != cb->flags_mask) {
 				return;
 			}
+			break;
+		case url_flags_mode_exclude_include:
+			if (url->flags & cb->flags_exclude_mask) {
+				return;
+			}
+			if (url->flags != (url->flags & cb->flags_mask)) {
+				return;
+			}
+			break;
 		}
 
 		if (cb->skip_prob > 0) {
@@ -1207,6 +1218,113 @@ lua_url_cbdata_fill (lua_State *L,
 	return TRUE;
 }
 
+gboolean
+lua_url_cbdata_fill_exclude_include (lua_State *L,
+					 gint pos,
+					 struct lua_tree_cb_data *cbd,
+					 guint default_protocols,
+					 gsize max_urls)
+{
+	guint protocols_mask = default_protocols;
+	guint include_flags_mask, exclude_flags_mask;
+
+	gint pos_arg_type = lua_type (L, pos);
+
+	memset (cbd, 0, sizeof (*cbd));
+	cbd->flags_mode = url_flags_mode_exclude_include;
+
+	/* Include flags */
+	if (pos_arg_type == LUA_TTABLE) {
+		include_flags_mask = 0; /* Reset to no flags */
+
+		for (lua_pushnil(L); lua_next(L, pos); lua_pop (L, 1)) {
+			int nmask = 0;
+			const gchar *fname = lua_tostring (L, -1);
+
+			if (rspamd_url_flag_from_string(fname, &nmask)) {
+				include_flags_mask |= nmask;
+			}
+			else {
+				msg_info ("bad url include flag: %s", fname);
+				return FALSE;
+			}
+		}
+	}
+	else if (pos_arg_type == LUA_TNIL) {
+		/* Include all flags */
+		include_flags_mask = ~0U;
+	}
+	else {
+		msg_info ("bad arguments: wrong include mask");
+		return FALSE;
+	}
+
+	/* Exclude flags */
+	pos_arg_type = lua_type (L, pos + 1);
+	if (pos_arg_type == LUA_TTABLE) {
+		exclude_flags_mask = 0; /* Reset to no flags */
+
+		for (lua_pushnil(L); lua_next(L, pos); lua_pop (L, 1)) {
+			int nmask = 0;
+
+			const gchar *fname = lua_tostring (L, -1);
+
+			if (rspamd_url_flag_from_string(fname, &nmask)) {
+				exclude_flags_mask |= nmask;
+			}
+			else {
+				msg_info ("bad url exclude flag: %s", fname);
+				return FALSE;
+			}
+		}
+	}
+	else if (pos_arg_type == LUA_TNIL) {
+		/* Empty all exclude flags */
+		exclude_flags_mask = 0U;
+	}
+	else {
+		msg_info ("bad arguments: wrong exclude mask");
+		return FALSE;
+	}
+
+	if (lua_type (L, pos + 2) == LUA_TTABLE) {
+		protocols_mask = 0U; /* Reset all protocols */
+
+		for (lua_pushnil (L); lua_next (L, pos + 2); lua_pop (L, 1)) {
+			int nmask;
+			const gchar *pname = lua_tostring (L, -1);
+
+			nmask = rspamd_url_protocol_from_string (pname);
+
+			if (nmask != PROTOCOL_UNKNOWN) {
+				protocols_mask |= nmask;
+			}
+			else {
+				msg_info ("bad url protocol: %s", pname);
+				return FALSE;
+			}
+		}
+	}
+	else {
+		protocols_mask = default_protocols;
+	}
+
+	cbd->i = 1;
+	cbd->L = L;
+	cbd->max_urls = max_urls;
+	cbd->protocols_mask = protocols_mask;
+	cbd->flags_mask = include_flags_mask;
+	cbd->flags_exclude_mask = exclude_flags_mask;
+
+	/* This needs to be removed from the stack */
+	rspamd_lua_class_metatable (L, "rspamd{url}");
+	cbd->metatable_pos = lua_gettop (L);
+	(void)lua_checkstack (L, cbd->metatable_pos + 4);
+
+	return TRUE;
+}
+
+
 void
 lua_url_cbdata_dtor (struct lua_tree_cb_data *cbd)
 {
diff --git a/src/lua/lua_url.h b/src/lua/lua_url.h
index 705fe1615..904a56da7 100644
--- a/src/lua/lua_url.h
+++ b/src/lua/lua_url.h
@@ -27,15 +27,17 @@ struct lua_tree_cb_data {
 	int i;
 	int metatable_pos;
 	guint flags_mask;
+	guint flags_exclude_mask;
 	guint protocols_mask;
 	enum {
 		url_flags_mode_include_any,
 		url_flags_mode_include_explicit,
+		url_flags_mode_exclude_include,
 	} flags_mode;
+	gboolean sort;
 	gsize max_urls;
 	gdouble skip_prob;
 	guint64 xoroshiro_state[4];
-	gboolean sort;
 };
 
 void lua_tree_url_callback (gpointer key, gpointer value, gpointer ud);
@@ -53,6 +55,11 @@ gboolean lua_url_cbdata_fill (lua_State *L, gint pos,
 							  guint default_flags,
 							  gsize max_urls);
 
+gboolean lua_url_cbdata_fill_exclude_include (lua_State *L, gint pos,
+							  struct lua_tree_cb_data *cbd,
+							  guint default_protocols,
+							  gsize max_urls);
+
 /**
  * Cleanup url cbdata
  * @param cbd


More information about the Commits mailing list