commit 78ce6c3: [Minor] Lua_content: Make text/urls extraction optional
Vsevolod Stakhov
vsevolod at highsecure.ru
Sun Jan 19 09:56:06 UTC 2020
Author: Vsevolod Stakhov
Date: 2020-01-19 09:39:06 +0000
URL: https://github.com/rspamd/rspamd/commit/78ce6c313d9b8d1a104ba15d27363b5303cdc6c8
[Minor] Lua_content: Make text/urls extraction optional
---
lualib/lua_content/pdf.lua | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/lualib/lua_content/pdf.lua b/lualib/lua_content/pdf.lua
index 85f939869..b577677e8 100644
--- a/lualib/lua_content/pdf.lua
+++ b/lualib/lua_content/pdf.lua
@@ -122,6 +122,8 @@ local exports = {}
local config = {
max_extraction_size = 512 * 1024,
max_processing_size = 32 * 1024,
+ text_extraction = false, -- NYI feature
+ url_extraction = true,
enabled = true,
}
@@ -626,7 +628,11 @@ local function process_dict(task, pdf, obj, dict)
if obj.fonts[k] then
local font = obj.fonts[k]
- process_font(task, pdf, font, k)
+
+ if config.text_extraction then
+ process_font(task, pdf, font, k)
+ end
+
lua_util.debugm(N, task, 'found font "%s" for object %s:%s -> %s',
k, obj.major, obj.minor, font)
end
@@ -1047,8 +1053,12 @@ local function process_pdf(input, _, task)
if pdf_output.start_objects and pdf_output.end_objects then
-- Postprocess objects
postprocess_pdf_objects(task, input, pdf_output)
- search_text(task, pdf_output)
- search_urls(task, pdf_output)
+ if config.text_extraction then
+ search_text(task, pdf_output)
+ end
+ if config.url_extraction then
+ search_urls(task, pdf_output)
+ end
else
pdf_output.flags.no_objects = true
end
More information about the Commits
mailing list