commit 9744716: [Project] Further content module work
Vsevolod Stakhov
vsevolod at highsecure.ru
Mon Nov 25 13:28:10 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-11-24 09:35:18 +0000
URL: https://github.com/rspamd/rspamd/commit/9744716ba7e00681a7dbe7091cdb6ecd421ad32b
[Project] Further content module work
---
lualib/lua_content/ical.lua | 9 ++--
lualib/lua_content/init.lua | 73 +++++++++++++++++++++++++++-
src/libmime/message.c | 113 ++++++++++++++++----------------------------
3 files changed, 118 insertions(+), 77 deletions(-)
diff --git a/lualib/lua_content/ical.lua b/lualib/lua_content/ical.lua
index 4f6b61919..c19723614 100644
--- a/lualib/lua_content/ical.lua
+++ b/lualib/lua_content/ical.lua
@@ -15,6 +15,7 @@ limitations under the License.
]]--
local l = require 'lpeg'
+local rspamd_text = require "rspamd_text"
local wsp = l.P" "
local crlf = l.P"\r"^-1 * l.P"\n"
@@ -25,7 +26,7 @@ local elt = name * ":" * wsp^0 * value * eol
local exports = {}
-local function ical_txt_values(input)
+local function process_ical(input, _, _)
local control={n='\n', r='\r'}
local escaper = l.Ct((elt / function(_,b) return (b:gsub("\\(.)", control)) end)^1)
@@ -35,13 +36,13 @@ local function ical_txt_values(input)
return nil
end
- return table.concat(values, "\n")
+ return rspamd_text.fromtable(values, "\n")
end
--[[[
--- @function lua_ical.ical_txt_values(input)
+-- @function lua_ical.process(input)
-- Returns all values from ical as a plain text. Names are completely ignored.
--]]
-exports.ical_txt_values = ical_txt_values
+exports.process = process_ical
return exports
\ No newline at end of file
diff --git a/lualib/lua_content/init.lua b/lualib/lua_content/init.lua
index a96852139..994d613f4 100644
--- a/lualib/lua_content/init.lua
+++ b/lualib/lua_content/init.lua
@@ -17,4 +17,75 @@ limitations under the License.
--[[[
-- @module lua_content
-- This module contains content processing logic
---]]
\ No newline at end of file
+--]]
+
+
+local exports = {}
+local N = "lua_content"
+local lua_util = require "lua_util"
+
+local content_modules = {
+ ical = {
+ mime_type = "text/calendar",
+ module = require "lua_content/ical",
+ extensions = {'ical'},
+ output = "text"
+ },
+}
+
+local modules_by_mime_type
+local modules_by_extension
+
+local function init()
+ modules_by_mime_type = {}
+ modules_by_extension = {}
+ for k,v in pairs(content_modules) do
+ if v.mime_type then
+ modules_by_mime_type[v.mime_type] = {k, v}
+ end
+ if v.extensions then
+ for _,ext in ipairs(v.extensions) do
+ modules_by_extension[ext] = {k, v}
+ end
+ end
+ end
+end
+
+exports.maybe_process_mime_part = function(part, log_obj)
+ if not modules_by_mime_type then
+ init()
+ end
+
+ local ctype, csubtype = part:get_type()
+ local mt = string.format("%s/%s", ctype or 'application',
+ csubtype or 'octet-stream')
+ local pair = modules_by_mime_type[mt]
+
+ if not pair then
+ local ext = part:get_detected_ext()
+
+ if ext then
+ pair = modules_by_extension[ext]
+ end
+ end
+
+ if pair then
+ lua_util.debugm(N, log_obj, "found known content of type %s: %s",
+ mt, pair[1])
+
+ local data = pair[2].module.process(part:get_content(), part, log_obj)
+
+ if data then
+ lua_util.debugm(N, log_obj, "extracted content from %s: %s type",
+ pair[1], type(data))
+ part:set_specific(data)
+ else
+ lua_util.debugm(N, log_obj, "failed to extract anything from %s",
+ pair[1])
+ end
+ end
+
+end
+
+
+return exports
\ No newline at end of file
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 7d2d81a7f..53c3cce27 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -694,71 +694,8 @@ rspamd_message_process_plain_text_part (struct rspamd_task *task,
rspamd_mime_text_part_maybe_convert (task, text_part);
if (text_part->utf_raw_content != NULL) {
- /* Check for ical */
- rspamd_ftok_t cal_ct;
-
- /*
- * TODO: If we want to process more than that, we need
- * to create some generic framework that accepts a part
- * and returns a processed data
- */
- RSPAMD_FTOK_ASSIGN (&cal_ct, "calendar");
-
- if (rspamd_ftok_casecmp (&cal_ct, &text_part->mime_part->ct->subtype) == 0) {
- lua_State *L = task->cfg->lua_state;
- gint err_idx;
-
- lua_pushcfunction (L, &rspamd_lua_traceback);
- err_idx = lua_gettop (L);
-
- /* Obtain function */
- if (!rspamd_lua_require_function (L, "lua_ical", "ical_txt_values")) {
- msg_err_task ("cannot require lua_ical.ical_txt_values");
- lua_settop (L, err_idx - 1);
-
- return FALSE;
- }
-
- lua_pushlstring (L, text_part->utf_raw_content->data,
- text_part->utf_raw_content->len);
-
- if (lua_pcall (L, 1, 1, err_idx) != 0) {
- msg_err_task ("cannot call lua lua_ical.ical_txt_values: %s",
- lua_tostring (L, -1));
- lua_settop (L, err_idx - 1);
-
- return FALSE;
- }
-
- if (lua_type (L, -1) == LUA_TSTRING) {
- const char *ndata;
- gsize nsize;
-
- ndata = lua_tolstring (L, -1, &nsize);
- text_part->utf_content = g_byte_array_sized_new (nsize);
- g_byte_array_append (text_part->utf_content, ndata, nsize);
- rspamd_mempool_add_destructor (task->task_pool,
- (rspamd_mempool_destruct_t) free_byte_array_callback,
- text_part->utf_content);
- }
- else if (lua_type (L, -1) == LUA_TNIL) {
- msg_info_task ("cannot convert text/calendar to plain text");
- text_part->utf_content = text_part->utf_raw_content;
- }
- else {
- msg_err_task ("invalid return type when calling lua_ical.ical_txt_values: %s",
- lua_typename (L, lua_type (L, -1)));
- lua_settop (L, err_idx - 1);
-
- return FALSE;
- }
-
- lua_settop (L, err_idx - 1);
- }
- else {
- /* Just have the same content */
- text_part->utf_content = text_part->utf_raw_content;
- }
+ /* Just have the same content */
+ text_part->utf_content = text_part->utf_raw_content;
}
else {
/*
@@ -1378,7 +1315,7 @@ rspamd_message_process (struct rspamd_task *task)
guint tw, *ptw, dw;
struct rspamd_mime_part *part;
lua_State *L = NULL;
- gint func_pos = -1;
+ gint magic_func_pos = -1, content_func_pos = -1, old_top = -1;
if (task->cfg) {
L = task->cfg->lua_state;
@@ -1386,20 +1323,32 @@ rspamd_message_process (struct rspamd_task *task)
rspamd_archives_process (task);
+ if (L) {
+ old_top = lua_gettop (L);
+ }
+
if (L && rspamd_lua_require_function (L,
"lua_magic", "detect_mime_part")) {
- func_pos = lua_gettop (L);
+ magic_func_pos = lua_gettop (L);
}
else {
msg_err_task ("cannot require lua_magic.detect_mime_part");
}
+ if (L && rspamd_lua_require_function (L,
+ "lua_content", "maybe_process_mime_part")) {
+ content_func_pos = lua_gettop (L);
+ }
+ else {
+ msg_err_task ("cannot require lua_content.maybe_process_mime_part");
+ }
+
PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
- if (func_pos != -1 && part->parsed_data.len > 0) {
+ if (magic_func_pos != -1 && part->parsed_data.len > 0) {
struct rspamd_mime_part **pmime;
struct rspamd_task **ptask;
- lua_pushvalue (L, func_pos);
+ lua_pushvalue (L, magic_func_pos);
pmime = lua_newuserdata (L, sizeof (struct rspamd_mime_part *));
rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
*pmime = part;
@@ -1447,7 +1396,27 @@ rspamd_message_process (struct rspamd_task *task)
}
}
- lua_settop (L, func_pos);
+ lua_settop (L, magic_func_pos);
+ }
+
+ /* Now detect content */
+ if (content_func_pos != -1 && part->parsed_data.len > 0) {
+ struct rspamd_mime_part **pmime;
+ struct rspamd_task **ptask;
+
+ lua_pushvalue (L, content_func_pos);
+ pmime = lua_newuserdata (L, sizeof (struct rspamd_mime_part *));
+ rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
+ *pmime = part;
+ ptask = lua_newuserdata (L, sizeof (struct rspamd_task *));
+ rspamd_lua_setclass (L, "rspamd{task}", -1);
+ *ptask = task;
+
+ if (lua_pcall (L, 2, 2, 0) != 0) {
+ msg_err_task ("cannot detect content: %s", lua_tostring (L, -1));
+ }
+
+ lua_settop (L, magic_func_pos);
}
if (part->part_type == RSPAMD_MIME_PART_UNDEFINED) {
@@ -1455,8 +1424,8 @@ rspamd_message_process (struct rspamd_task *task)
}
}
- if (func_pos != -1) {
- lua_settop (L, func_pos - 1);
+ if (old_top != -1) {
+ lua_settop (L, old_top);
}
/* Calculate average words length and number of short words */
More information about the Commits
mailing list