commit 3ccc19f: [Feature] Lua_task: Add get_urls_filtered method
Vsevolod Stakhov
vsevolod at highsecure.ru
Fri Mar 19 17:07:06 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-03-19 17:04:41 +0000
URL: https://github.com/rspamd/rspamd/commit/3ccc19f48ab648b22312c0a1169a57ba7e8d30bb (HEAD -> master)
[Feature] Lua_task: Add get_urls_filtered method
---
src/lua/lua_task.c | 81 ++++++++++++++++++++++++++++++++++
src/lua/lua_url.c | 124 +++++++++++++++++++++++++++++++++++++++++++++++++++--
src/lua/lua_url.h | 9 +++-
3 files changed, 210 insertions(+), 4 deletions(-)
diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c
index bce91b4fb..2de3fb5ed 100644
--- a/src/lua/lua_task.c
+++ b/src/lua/lua_task.c
@@ -256,6 +256,18 @@ local function phishing_cb(task)
end
*/
LUA_FUNCTION_DEF (task, get_urls);
+/***
+ * @method task:get_urls_filtered([{flags_include}, [{flags_exclude}]], [{protocols_mask}])
+ * Get urls managed by either exclude or include flags list
+ * - If flags include are nil then all but excluded urls are returned
+ * - If flags exclude are nil then only included explicitly urls are returned
+ * - If both parameters are nil then all urls are included
+ * @param {table|string} flags_include included flags
+ * @param {table|string} flags_exclude excluded flags
+ * @param {table|string} protocols_mask incude only specific protocols
+ * @return {table rspamd_url} list of urls matching conditions
+ */
+LUA_FUNCTION_DEF (task, get_urls_filtered);
/***
* @method task:has_urls([need_emails])
* Returns 'true' if a task has urls listed
@@ -1212,6 +1224,7 @@ static const struct luaL_reg tasklib_m[] = {
LUA_INTERFACE_DEF (task, append_message),
LUA_INTERFACE_DEF (task, has_urls),
LUA_INTERFACE_DEF (task, get_urls),
+ LUA_INTERFACE_DEF (task, get_urls_filtered),
LUA_INTERFACE_DEF (task, inject_url),
LUA_INTERFACE_DEF (task, get_content),
LUA_INTERFACE_DEF (task, get_filename),
@@ -2463,6 +2476,74 @@ lua_task_get_urls (lua_State * L)
return 1;
}
+static gint
+lua_task_get_urls_filtered (lua_State * L)
+{
+ LUA_TRACE_POINT;
+ struct rspamd_task *task = lua_check_task (L, 1);
+ struct lua_tree_cb_data cb;
+ struct rspamd_url *u;
+ static const gint default_protocols_mask = PROTOCOL_HTTP|PROTOCOL_HTTPS|
+ PROTOCOL_FILE|PROTOCOL_FTP;
+ gsize sz, max_urls = 0;
+
+ if (task) {
+ if (task->cfg) {
+ max_urls = task->cfg->max_lua_urls;
+ }
+
+ if (task->message == NULL) {
+ lua_newtable (L);
+
+ return 1;
+ }
+
+ if (!lua_url_cbdata_fill_exclude_include (L, 2, &cb, default_protocols_mask, max_urls)) {
+ return luaL_error (L, "invalid arguments");
+ }
+
+ sz = kh_size (MESSAGE_FIELD (task, urls));
+ sz = lua_url_adjust_skip_prob (task->task_timestamp,
+ MESSAGE_FIELD (task, digest), &cb, sz);
+
+ lua_createtable (L, sz, 0);
+
+ if (cb.sort) {
+ struct rspamd_url **urls_sorted;
+ gint i = 0;
+
+ urls_sorted = g_new0 (struct rspamd_url *, sz);
+
+ kh_foreach_key (MESSAGE_FIELD(task, urls), u, {
+ if (i < sz) {
+ urls_sorted[i] = u;
+ i ++;
+ }
+ });
+
+ qsort (urls_sorted, i, sizeof (struct rspamd_url *), rspamd_url_cmp_qsort);
+
+ for (int j = 0; j < i; j ++) {
+ lua_tree_url_callback (urls_sorted[j], urls_sorted[j], &cb);
+ }
+
+ g_free (urls_sorted);
+ }
+ else {
+ kh_foreach_key (MESSAGE_FIELD(task, urls), u, {
+ lua_tree_url_callback(u, u, &cb);
+ });
+ }
+
+ lua_url_cbdata_dtor (&cb);
+ }
+ else {
+ return luaL_error (L, "invalid arguments, no task");
+ }
+
+ return 1;
+}
+
static gint
lua_task_has_urls (lua_State * L)
{
diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c
index 69c7d79bf..b56f025c4 100644
--- a/src/lua/lua_url.c
+++ b/src/lua/lua_url.c
@@ -957,15 +957,26 @@ lua_tree_url_callback (gpointer key, gpointer value, gpointer ud)
if ((url->protocol & cb->protocols_mask) == url->protocol) {
- if (cb->flags_mode == url_flags_mode_include_any) {
+ /* Handle different flags application logic */
+ switch (cb->flags_mode) {
+ case url_flags_mode_include_any:
if (url->flags != (url->flags & cb->flags_mask)) {
return;
}
- }
- else {
+ break;
+ case url_flags_mode_include_explicit:
if ((url->flags & cb->flags_mask) != cb->flags_mask) {
return;
}
+ break;
+ case url_flags_mode_exclude_include:
+ if (url->flags & cb->flags_exclude_mask) {
+ return;
+ }
+ if (url->flags != (url->flags & cb->flags_mask)) {
+ return;
+ }
+ break;
}
if (cb->skip_prob > 0) {
@@ -1207,6 +1218,113 @@ lua_url_cbdata_fill (lua_State *L,
return TRUE;
}
+gboolean
+lua_url_cbdata_fill_exclude_include (lua_State *L,
+ gint pos,
+ struct lua_tree_cb_data *cbd,
+ guint default_protocols,
+ gsize max_urls)
+{
+ guint protocols_mask = default_protocols;
+ guint include_flags_mask, exclude_flags_mask;
+
+ gint pos_arg_type = lua_type (L, pos);
+
+ memset (cbd, 0, sizeof (*cbd));
+ cbd->flags_mode = url_flags_mode_exclude_include;
+
+ /* Include flags */
+ if (pos_arg_type == LUA_TTABLE) {
+ include_flags_mask = 0; /* Reset to no flags */
+
+ for (lua_pushnil(L); lua_next(L, pos); lua_pop (L, 1)) {
+ int nmask = 0;
+ const gchar *fname = lua_tostring (L, -1);
+
+ if (rspamd_url_flag_from_string(fname, &nmask)) {
+ include_flags_mask |= nmask;
+ }
+ else {
+ msg_info ("bad url include flag: %s", fname);
+ return FALSE;
+ }
+ }
+ }
+ else if (pos_arg_type == LUA_TNIL) {
+ /* Include all flags */
+ include_flags_mask = ~0U;
+ }
+ else {
+ msg_info ("bad arguments: wrong include mask");
+ return FALSE;
+ }
+
+ /* Exclude flags */
+ pos_arg_type = lua_type (L, pos + 1);
+ if (pos_arg_type == LUA_TTABLE) {
+ exclude_flags_mask = 0; /* Reset to no flags */
+
+ for (lua_pushnil(L); lua_next(L, pos); lua_pop (L, 1)) {
+ int nmask = 0;
+
+ const gchar *fname = lua_tostring (L, -1);
+
+ if (rspamd_url_flag_from_string(fname, &nmask)) {
+ exclude_flags_mask |= nmask;
+ }
+ else {
+ msg_info ("bad url exclude flag: %s", fname);
+ return FALSE;
+ }
+ }
+ }
+ else if (pos_arg_type == LUA_TNIL) {
+ /* Empty all exclude flags */
+ exclude_flags_mask = 0U;
+ }
+ else {
+ msg_info ("bad arguments: wrong exclude mask");
+ return FALSE;
+ }
+
+ if (lua_type (L, pos + 2) == LUA_TTABLE) {
+ protocols_mask = 0U; /* Reset all protocols */
+
+ for (lua_pushnil (L); lua_next (L, pos + 2); lua_pop (L, 1)) {
+ int nmask;
+ const gchar *pname = lua_tostring (L, -1);
+
+ nmask = rspamd_url_protocol_from_string (pname);
+
+ if (nmask != PROTOCOL_UNKNOWN) {
+ protocols_mask |= nmask;
+ }
+ else {
+ msg_info ("bad url protocol: %s", pname);
+ return FALSE;
+ }
+ }
+ }
+ else {
+ protocols_mask = default_protocols;
+ }
+
+ cbd->i = 1;
+ cbd->L = L;
+ cbd->max_urls = max_urls;
+ cbd->protocols_mask = protocols_mask;
+ cbd->flags_mask = include_flags_mask;
+ cbd->flags_exclude_mask = exclude_flags_mask;
+
+ /* This needs to be removed from the stack */
+ rspamd_lua_class_metatable (L, "rspamd{url}");
+ cbd->metatable_pos = lua_gettop (L);
+ (void)lua_checkstack (L, cbd->metatable_pos + 4);
+
+ return TRUE;
+}
+
+
void
lua_url_cbdata_dtor (struct lua_tree_cb_data *cbd)
{
diff --git a/src/lua/lua_url.h b/src/lua/lua_url.h
index 705fe1615..904a56da7 100644
--- a/src/lua/lua_url.h
+++ b/src/lua/lua_url.h
@@ -27,15 +27,17 @@ struct lua_tree_cb_data {
int i;
int metatable_pos;
guint flags_mask;
+ guint flags_exclude_mask;
guint protocols_mask;
enum {
url_flags_mode_include_any,
url_flags_mode_include_explicit,
+ url_flags_mode_exclude_include,
} flags_mode;
+ gboolean sort;
gsize max_urls;
gdouble skip_prob;
guint64 xoroshiro_state[4];
- gboolean sort;
};
void lua_tree_url_callback (gpointer key, gpointer value, gpointer ud);
@@ -53,6 +55,11 @@ gboolean lua_url_cbdata_fill (lua_State *L, gint pos,
guint default_flags,
gsize max_urls);
+gboolean lua_url_cbdata_fill_exclude_include (lua_State *L, gint pos,
+ struct lua_tree_cb_data *cbd,
+ guint default_protocols,
+ gsize max_urls);
+
/**
* Cleanup url cbdata
* @param cbd
More information about the Commits
mailing list