commit 4fd673f: [Minor] Attach pdf urls to mime parts

Vsevolod Stakhov vsevolod at highsecure.ru
Tue May 5 14:56:08 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-05-05 15:11:40 +0100
URL: https://github.com/rspamd/rspamd/commit/4fd673f08e5ca4b0a2f4df97043571f5618c4c94

[Minor] Attach pdf urls to mime parts

---
 lualib/lua_content/pdf.lua |  8 ++++----
 src/lua/lua_task.c         | 14 +++++++++++++-
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/lualib/lua_content/pdf.lua b/lualib/lua_content/pdf.lua
index fbb7482c1..92b170534 100644
--- a/lualib/lua_content/pdf.lua
+++ b/lualib/lua_content/pdf.lua
@@ -1052,7 +1052,7 @@ local function search_text(task, pdf)
 end
 
 -- This function searches objects for `/URI` key and parses it's content
-local function search_urls(task, pdf)
+local function search_urls(task, pdf, mpart)
   local function recursive_object_traverse(obj, dict, rec)
     if rec > 10 then
       lua_util.debugm(N, task, 'object %s:%s recurses too much',
@@ -1071,7 +1071,7 @@ local function search_urls(task, pdf)
           if url then
             lua_util.debugm(N, task, 'found url %s in object %s:%s',
                 v, obj.major, obj.minor)
-            task:inject_url(url)
+            task:inject_url(url, mpart)
           end
         end
       end
@@ -1085,7 +1085,7 @@ local function search_urls(task, pdf)
   end
 end
 
-local function process_pdf(input, _, task)
+local function process_pdf(input, mpart, task)
 
   if not config.enabled then
     -- Skip processing
@@ -1135,7 +1135,7 @@ local function process_pdf(input, _, task)
         search_text(task, pdf_output)
       end
       if config.url_extraction then
-        search_urls(task, pdf_output)
+        search_urls(task, pdf_output, mpart)
       end
 
       if config.js_fuzzy and pdf_output.scripts then
diff --git a/src/lua/lua_task.c b/src/lua/lua_task.c
index 51936a532..98c0b06ed 100644
--- a/src/lua/lua_task.c
+++ b/src/lua/lua_task.c
@@ -2333,9 +2333,21 @@ lua_task_inject_url (lua_State * L)
 	LUA_TRACE_POINT;
 	struct rspamd_task *task = lua_check_task (L, 1);
 	struct rspamd_lua_url *url = lua_check_url (L, 2);
+	struct rspamd_mime_part *mpart = NULL;
+
+	if (lua_isuserdata (L, 3)) {
+		/* We also have a mime part there */
+		mpart = *((struct rspamd_mime_part **)rspamd_lua_check_udata_maybe (L,
+				3, "rspamd{mimepart}"));
+	}
 
 	if (task && task->message && url && url->url) {
-		rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url->url);
+		if (rspamd_url_set_add_or_increase (MESSAGE_FIELD (task, urls), url->url)) {
+			if (mpart && mpart->urls) {
+				/* Also add url to the mime part */
+				g_ptr_array_add (mpart->urls, url->url);
+			}
+		}
 	}
 	else {
 		return luaL_error (L, "invalid arguments");


More information about the Commits mailing list