commit df9d18c: [Feature] Lua_task: Add flexible method to get specific urls
Vsevolod Stakhov
vsevolod at highsecure.ru
Sat Feb 16 14:28:07 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-02-16 14:22:46 +0000
URL: https://github.com/rspamd/rspamd/commit/df9d18ccd37ef0d92db3fb720fa1d6b055812097
[Feature] Lua_task: Add flexible method to get specific urls
---
src/lua/lua_task.c | 111 ++++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 89 insertions(+), 22 deletions(-)
diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c
index b6c5c1fb7..197094ab9 100644
--- a/src/lua/lua_task.c
+++ b/src/lua/lua_task.c
@@ -174,13 +174,13 @@ end
*/
LUA_FUNCTION_DEF (task, append_message);
/***
- * @method task:get_urls([need_emails])
- * Get all URLs found in a message.
- * @param {boolean} need_emails if `true` then reutrn also email urls
+ * @method task:get_urls([need_emails|list_protos])
+ * Get all URLs found in a message. Telephone urls and emails are not included unless explicitly asked in `list_protos`
+ * @param {boolean} need_emails if `true` then reutrn also email urls, this can be a comma separated string of protocols desired or a table (e.g. `mailto` or `telephone`)
* @return {table rspamd_url} list of all urls found
@example
local function phishing_cb(task)
- local urls = task:get_urls();
+ local urls = task:get_urls({'https', 'http'});
if urls then
for _,url in ipairs(urls) do
@@ -1831,18 +1831,22 @@ lua_task_append_message (lua_State * L)
struct lua_tree_cb_data {
lua_State *L;
int i;
+ gint mask;
};
static void
lua_tree_url_callback (gpointer key, gpointer value, gpointer ud)
{
- struct rspamd_lua_url *url;
+ struct rspamd_lua_url *lua_url;
+ struct rspamd_url *url = (struct rspamd_url *)value;
struct lua_tree_cb_data *cb = ud;
- url = lua_newuserdata (cb->L, sizeof (struct rspamd_lua_url));
- rspamd_lua_setclass (cb->L, "rspamd{url}", -1);
- url->url = value;
- lua_rawseti (cb->L, -2, cb->i++);
+ if (url->protocol & cb->mask) {
+ lua_url = lua_newuserdata (cb->L, sizeof (struct rspamd_lua_url));
+ rspamd_lua_setclass (cb->L, "rspamd{url}", -1);
+ lua_url->url = url;
+ lua_rawseti (cb->L, -2, cb->i++);
+ }
}
static gint
@@ -1851,38 +1855,101 @@ lua_task_get_urls (lua_State * L)
LUA_TRACE_POINT;
struct rspamd_task *task = lua_check_task (L, 1);
struct lua_tree_cb_data cb;
- gboolean need_emails = FALSE;
+ gint protocols_mask = 0;
+ static const gint default_mask = PROTOCOL_HTTP|PROTOCOL_HTTPS|
+ PROTOCOL_FILE|PROTOCOL_FTP;
gsize sz;
if (task) {
if (lua_gettop (L) >= 2) {
- need_emails = lua_toboolean (L, 2);
+ if (lua_type (L, 2) == LUA_TBOOLEAN) {
+ protocols_mask = default_mask;
+ if (lua_toboolean (L, 2)) {
+ protocols_mask |= PROTOCOL_MAILTO;
+ }
+ }
+ else if (lua_type (L, 2) == LUA_TTABLE) {
+ for (lua_pushnil (L); lua_next (L, 2); lua_pop (L, 1)) {
+ int nmask;
+ const gchar *pname = lua_tostring (L, -1);
+
+ nmask = rspamd_url_protocol_from_string (pname);
+
+ if (nmask != PROTOCOL_UNKNOWN) {
+ protocols_mask |= nmask;
+ }
+ else {
+ msg_info ("bad url protocol: %s", pname);
+ }
+ }
+ }
+ else if (lua_type (L, 2) == LUA_TSTRING) {
+ const gchar *plist = lua_tostring (L, 2);
+ gchar **strvec;
+ gchar * const *cvec;
+
+ strvec = g_strsplit_set (plist, ",;", -1);
+ cvec = strvec;
+
+ while (*cvec) {
+ int nmask;
+
+ nmask = rspamd_url_protocol_from_string (*cvec);
+
+ if (nmask != PROTOCOL_UNKNOWN) {
+ protocols_mask |= nmask;
+ }
+ else {
+ msg_info ("bad url protocol: %s", cvec);
+ }
+
+ cvec ++;
+ }
+
+ g_strfreev (strvec);
+ }
+ }
+ else {
+ protocols_mask = default_mask;
}
- if (need_emails) {
+ cb.i = 1;
+ cb.L = L;
+ cb.mask = protocols_mask;
+
+ if (protocols_mask & PROTOCOL_MAILTO) {
sz = g_hash_table_size (task->urls) + g_hash_table_size (task->emails);
- if (!lua_task_get_cached (L, task, "emails+urls", sz)) {
+ if (protocols_mask == (default_mask|PROTOCOL_MAILTO)) {
+ /* Can use cached version */
+ if (!lua_task_get_cached (L, task, "emails+urls", sz)) {
+ lua_createtable (L, sz, 0);
+ g_hash_table_foreach (task->urls, lua_tree_url_callback, &cb);
+ g_hash_table_foreach (task->emails, lua_tree_url_callback, &cb);
+
+ lua_task_set_cached (L, task, "emails+urls", -1, sz);
+ }
+ }
+ else {
lua_createtable (L, sz, 0);
- cb.i = 1;
- cb.L = L;
g_hash_table_foreach (task->urls, lua_tree_url_callback, &cb);
g_hash_table_foreach (task->emails, lua_tree_url_callback, &cb);
-
- lua_task_set_cached (L, task, "emails+urls", -1, sz);
}
}
else {
sz = g_hash_table_size (task->urls);
- if (!lua_task_get_cached (L, task, "urls", sz)) {
+ if (protocols_mask == (default_mask)) {
+ if (!lua_task_get_cached (L, task, "urls", sz)) {
+ lua_createtable (L, sz, 0);
+ g_hash_table_foreach (task->urls, lua_tree_url_callback, &cb);
+ lua_task_set_cached (L, task, "urls", -1, sz);
+ }
+ }
+ else {
lua_createtable (L, sz, 0);
- cb.i = 1;
- cb.L = L;
g_hash_table_foreach (task->urls, lua_tree_url_callback, &cb);
-
- lua_task_set_cached (L, task, "urls", -1, sz);
}
}
}
More information about the Commits
mailing list