commit 9744716: [Project] Further content module work

Vsevolod Stakhov vsevolod at highsecure.ru
Mon Nov 25 13:28:10 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-11-24 09:35:18 +0000
URL: https://github.com/rspamd/rspamd/commit/9744716ba7e00681a7dbe7091cdb6ecd421ad32b

[Project] Further content module work

---
 lualib/lua_content/ical.lua |   9 ++--
 lualib/lua_content/init.lua |  73 +++++++++++++++++++++++++++-
 src/libmime/message.c       | 113 ++++++++++++++++----------------------------
 3 files changed, 118 insertions(+), 77 deletions(-)

diff --git a/lualib/lua_content/ical.lua b/lualib/lua_content/ical.lua
index 4f6b61919..c19723614 100644
--- a/lualib/lua_content/ical.lua
+++ b/lualib/lua_content/ical.lua
@@ -15,6 +15,7 @@ limitations under the License.
 ]]--
 
 local l = require 'lpeg'
+local rspamd_text = require "rspamd_text"
 
 local wsp = l.P" "
 local crlf = l.P"\r"^-1 * l.P"\n"
@@ -25,7 +26,7 @@ local elt = name * ":" * wsp^0 * value * eol
 
 local exports = {}
 
-local function ical_txt_values(input)
+local function process_ical(input, _, _)
   local control={n='\n', r='\r'}
   local escaper = l.Ct((elt / function(_,b) return (b:gsub("\\(.)", control)) end)^1)
 
@@ -35,13 +36,13 @@ local function ical_txt_values(input)
     return nil
   end
 
-  return table.concat(values, "\n")
+  return rspamd_text.fromtable(values, "\n")
 end
 
 --[[[
--- @function lua_ical.ical_txt_values(input)
+-- @function lua_ical.process(input)
 -- Returns all values from ical as a plain text. Names are completely ignored.
 --]]
-exports.ical_txt_values = ical_txt_values
+exports.process = process_ical
 
 return exports
\ No newline at end of file
diff --git a/lualib/lua_content/init.lua b/lualib/lua_content/init.lua
index a96852139..994d613f4 100644
--- a/lualib/lua_content/init.lua
+++ b/lualib/lua_content/init.lua
@@ -17,4 +17,75 @@ limitations under the License.
 --[[[
 -- @module lua_content
 -- This module contains content processing logic
---]]
\ No newline at end of file
+--]]
+
+
+local exports = {}
+local N = "lua_content"
+local lua_util = require "lua_util"
+
+local content_modules = {
+  ical = {
+    mime_type = "text/calendar",
+    module = require "lua_content/ical",
+    extensions = {'ical'},
+    output = "text"
+  },
+}
+
+local modules_by_mime_type
+local modules_by_extension
+
+local function init()
+  modules_by_mime_type = {}
+  modules_by_extension = {}
+  for k,v in pairs(content_modules) do
+    if v.mime_type then
+      modules_by_mime_type[v.mime_type] = {k, v}
+    end
+    if v.extensions then
+      for _,ext in ipairs(v.extensions) do
+        modules_by_extension[ext] = {k, v}
+      end
+    end
+  end
+end
+
+exports.maybe_process_mime_part = function(part, log_obj)
+  if not modules_by_mime_type then
+    init()
+  end
+
+  local ctype, csubtype = part:get_type()
+  local mt = string.format("%s/%s", ctype or 'application',
+      csubtype or 'octet-stream')
+  local pair = modules_by_mime_type[mt]
+
+  if not pair then
+    local ext = part:get_detected_ext()
+
+    if ext then
+      pair = modules_by_extension[ext]
+    end
+  end
+
+  if pair then
+    lua_util.debugm(N, log_obj, "found known content of type %s: %s",
+        mt, pair[1])
+
+    local data = pair[2].module.process(part:get_content(), part, log_obj)
+
+    if data then
+      lua_util.debugm(N, log_obj, "extracted content from %s: %s type",
+          pair[1], type(data))
+      part:set_specific(data)
+    else
+      lua_util.debugm(N, log_obj, "failed to extract anything from %s",
+          pair[1])
+    end
+  end
+
+end
+
+
+return exports
\ No newline at end of file
diff --git a/src/libmime/message.c b/src/libmime/message.c
index 7d2d81a7f..53c3cce27 100644
--- a/src/libmime/message.c
+++ b/src/libmime/message.c
@@ -694,71 +694,8 @@ rspamd_message_process_plain_text_part (struct rspamd_task *task,
 	rspamd_mime_text_part_maybe_convert (task, text_part);
 
 	if (text_part->utf_raw_content != NULL) {
-		/* Check for ical */
-		rspamd_ftok_t cal_ct;
-
-		/*
-		 * TODO: If we want to process more than that, we need
-		 * to create some generic framework that accepts a part
-		 * and returns a processed data
-		 */
-		RSPAMD_FTOK_ASSIGN (&cal_ct, "calendar");
-
-		if (rspamd_ftok_casecmp (&cal_ct, &text_part->mime_part->ct->subtype) == 0) {
-			lua_State *L = task->cfg->lua_state;
-			gint err_idx;
-
-			lua_pushcfunction (L, &rspamd_lua_traceback);
-			err_idx = lua_gettop (L);
-
-			/* Obtain function */
-			if (!rspamd_lua_require_function (L, "lua_ical", "ical_txt_values")) {
-				msg_err_task ("cannot require lua_ical.ical_txt_values");
-				lua_settop (L, err_idx - 1);
-
-				return FALSE;
-			}
-
-			lua_pushlstring (L, text_part->utf_raw_content->data,
-					text_part->utf_raw_content->len);
-
-			if (lua_pcall (L, 1, 1, err_idx) != 0) {
-				msg_err_task ("cannot call lua lua_ical.ical_txt_values: %s",
-						lua_tostring (L, -1));
-				lua_settop (L, err_idx - 1);
-
-				return FALSE;
-			}
-
-			if (lua_type (L, -1) == LUA_TSTRING) {
-				const char *ndata;
-				gsize nsize;
-
-				ndata = lua_tolstring (L, -1, &nsize);
-				text_part->utf_content = g_byte_array_sized_new (nsize);
-				g_byte_array_append (text_part->utf_content, ndata, nsize);
-				rspamd_mempool_add_destructor (task->task_pool,
-						(rspamd_mempool_destruct_t) free_byte_array_callback,
-						text_part->utf_content);
-			}
-			else if (lua_type (L, -1) == LUA_TNIL) {
-				msg_info_task ("cannot convert text/calendar to plain text");
-				text_part->utf_content = text_part->utf_raw_content;
-			}
-			else {
-				msg_err_task ("invalid return type when calling lua_ical.ical_txt_values: %s",
-						lua_typename (L, lua_type (L, -1)));
-				lua_settop (L, err_idx - 1);
-
-				return FALSE;
-			}
-
-			lua_settop (L, err_idx - 1);
-		}
-		else {
-			/* Just have the same content */
-			text_part->utf_content = text_part->utf_raw_content;
-		}
+		/* Just have the same content */
+		text_part->utf_content = text_part->utf_raw_content;
 	}
 	else {
 		/*
@@ -1378,7 +1315,7 @@ rspamd_message_process (struct rspamd_task *task)
 	guint tw, *ptw, dw;
 	struct rspamd_mime_part *part;
 	lua_State *L = NULL;
-	gint func_pos = -1;
+	gint magic_func_pos = -1, content_func_pos = -1, old_top = -1;
 
 	if (task->cfg) {
 		L = task->cfg->lua_state;
@@ -1386,20 +1323,32 @@ rspamd_message_process (struct rspamd_task *task)
 
 	rspamd_archives_process (task);
 
+	if (L) {
+		old_top = lua_gettop (L);
+	}
+
 	if (L && rspamd_lua_require_function (L,
 			"lua_magic", "detect_mime_part")) {
-		func_pos = lua_gettop (L);
+		magic_func_pos = lua_gettop (L);
 	}
 	else {
 		msg_err_task ("cannot require lua_magic.detect_mime_part");
 	}
 
+	if (L && rspamd_lua_require_function (L,
+			"lua_content", "maybe_process_mime_part")) {
+		content_func_pos = lua_gettop (L);
+	}
+	else {
+		msg_err_task ("cannot require lua_content.maybe_process_mime_part");
+	}
+
 	PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
-		if (func_pos != -1 && part->parsed_data.len > 0) {
+		if (magic_func_pos != -1 && part->parsed_data.len > 0) {
 			struct rspamd_mime_part **pmime;
 			struct rspamd_task **ptask;
 
-			lua_pushvalue (L, func_pos);
+			lua_pushvalue (L, magic_func_pos);
 			pmime = lua_newuserdata (L, sizeof (struct rspamd_mime_part *));
 			rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
 			*pmime = part;
@@ -1447,7 +1396,27 @@ rspamd_message_process (struct rspamd_task *task)
 				}
 			}
 
-			lua_settop (L, func_pos);
+			lua_settop (L, magic_func_pos);
+		}
+
+		/* Now detect content */
+		if (content_func_pos != -1 && part->parsed_data.len > 0) {
+			struct rspamd_mime_part **pmime;
+			struct rspamd_task **ptask;
+
+			lua_pushvalue (L, content_func_pos);
+			pmime = lua_newuserdata (L, sizeof (struct rspamd_mime_part *));
+			rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
+			*pmime = part;
+			ptask = lua_newuserdata (L, sizeof (struct rspamd_task *));
+			rspamd_lua_setclass (L, "rspamd{task}", -1);
+			*ptask = task;
+
+			if (lua_pcall (L, 2, 2, 0) != 0) {
+				msg_err_task ("cannot detect content: %s", lua_tostring (L, -1));
+			}
+
+			lua_settop (L, magic_func_pos);
 		}
 
 		if (part->part_type == RSPAMD_MIME_PART_UNDEFINED) {
@@ -1455,8 +1424,8 @@ rspamd_message_process (struct rspamd_task *task)
 		}
 	}
 
-	if (func_pos != -1) {
-		lua_settop (L, func_pos - 1);
+	if (old_top != -1) {
+		lua_settop (L, old_top);
 	}
 
 	/* Calculate average words length and number of short words */


More information about the Commits mailing list