commit 5f4002b: [Project] Lua_content: Implement ObjStm decoding
Vsevolod Stakhov
vsevolod at highsecure.ru
Tue Jan 14 15:07:07 UTC 2020
Author: Vsevolod Stakhov
Date: 2020-01-14 15:01:38 +0000
URL: https://github.com/rspamd/rspamd/commit/5f4002b2cf9386c9b8d72b56df8bf1d4083476dd (HEAD -> master)
[Project] Lua_content: Implement ObjStm decoding
---
lualib/lua_content/pdf.lua | 117 +++++++++++++++++++++++++++++----------------
1 file changed, 75 insertions(+), 42 deletions(-)
diff --git a/lualib/lua_content/pdf.lua b/lualib/lua_content/pdf.lua
index cd0276e0c..2a4ac7706 100644
--- a/lualib/lua_content/pdf.lua
+++ b/lualib/lua_content/pdf.lua
@@ -358,6 +358,46 @@ local function dereference_object(elt, pdf)
return nil
end
+local function parse_object_grammar(obj, task, pdf)
+ -- Parse grammar
+ local obj_dict_span
+ if obj.stream then
+ obj_dict_span = obj.data:span(1, obj.stream.start - obj.start)
+ else
+ obj_dict_span = obj.data
+ end
+
+ if obj_dict_span:len() < config.max_processing_size then
+ local ret,obj_or_err = pcall(pdf_outer_grammar.match, pdf_outer_grammar, obj_dict_span)
+
+ if ret then
+ if obj.stream then
+ obj.dict = obj_or_err
+ lua_util.debugm(N, task, 'stream object %s:%s is parsed to: %s',
+ obj.major, obj.minor, obj_or_err)
+ else
+ -- Direct object
+ pdf.ref[obj_ref(obj.major, obj.minor)] = obj_or_err
+ if type(obj_or_err) == 'table' then
+ obj.dict = obj_or_err
+ obj.uncompressed = obj_or_err
+ lua_util.debugm(N, task, 'direct object %s:%s is parsed to: %s',
+ obj.major, obj.minor, obj_or_err)
+ else
+ lua_util.debugm(N, task, 'direct object %s:%s cannot be parsed: %s',
+ obj.major, obj.minor, obj_dict_span)
+ end
+ end
+ else
+ lua_util.debugm(N, task, 'object %s:%s cannot be parsed: %s',
+ obj.major, obj.minor, obj_or_err)
+ end
+ else
+ lua_util.debugm(N, task, 'object %s:%s cannot be parsed: too large %s',
+ obj.major, obj.minor, obj_dict_span:len())
+ end
+end
+
local function process_dict(task, pdf, obj, dict)
if not obj.type and type(dict) == 'table' then
if dict.Type and type(dict.Type) == 'string' then
@@ -535,15 +575,45 @@ local function compound_obj_grammar_gen()
compound_obj_grammar = gen.ws^0 * (gen.comment * gen.ws^1)^0 *
lpeg.Ct(lpeg.Ct(gen.number * gen.ws^1 * gen.number * gen.ws^0)^1)
end
+
+ return compound_obj_grammar
end
-local function pdf_compound_object_unpack(obj, uncompressed, pdf, task)
+local function pdf_compound_object_unpack(_, uncompressed, pdf, task, first)
-- First, we need to parse data line by line likely to find a line
-- that consists of pairs of numbers
compound_obj_grammar_gen()
local elts = compound_obj_grammar:match(uncompressed)
if elts and #elts > 0 then
- lua_util.debugm(N, task, 'compound elts: %s',
- elts)
+ lua_util.debugm(N, task, 'compound elts (chunk length %s): %s',
+ #uncompressed, elts)
+
+ for i,pair in ipairs(elts) do
+ local obj_number,offset = pair[1], pair[2]
+
+ offset = offset + first
+ if offset < #uncompressed then
+ local span_len
+ if i == #elts then
+ span_len = #uncompressed - offset
+ else
+ span_len = (elts[i + 1][2] + first) - offset
+ end
+
+ if span_len > 0 then
+ local obj = {
+ major = obj_number,
+ minor = 0, -- Implicit
+ data = uncompressed:span(offset + 1, span_len),
+ ref = obj_ref(obj_number, 0)
+ }
+ parse_object_grammar(obj, task, pdf)
+
+ if obj.dict then
+ pdf.objects[#pdf.objects + 1] = obj
+ end
+ end
+ end
+ end
end
end
@@ -558,14 +628,14 @@ local function extract_pdf_compound_objects(task, pdf)
local first = tonumber(maybe_dereference_object(obj.dict.First, pdf, task))
if nobjs and first then
- local extend = maybe_dereference_object(obj.dict.Extends, pdf, task)
+ --local extend = maybe_dereference_object(obj.dict.Extends, pdf, task)
lua_util.debugm(N, task, 'extract ObjStm with %s objects (%s first) %s extend',
nobjs, first, obj.dict.Extends)
local uncompressed = maybe_extract_object_stream(obj, pdf, task)
if uncompressed then
- pdf_compound_object_unpack(obj, uncompressed, pdf, task)
+ pdf_compound_object_unpack(obj, uncompressed, pdf, task, first)
end
else
lua_util.debugm(N, task, 'ObjStm object %s:%s has bad dict: %s',
@@ -629,43 +699,6 @@ local function extract_outer_objects(task, input, pdf)
end
end
-local function parse_object_grammar(obj, task, pdf)
- -- Parse grammar
- local obj_dict_span
- if obj.stream then
- obj_dict_span = obj.data:span(1, obj.stream.start - obj.start)
- else
- obj_dict_span = obj.data
- end
-
- if obj_dict_span:len() < config.max_processing_size then
- local ret,obj_or_err = pcall(pdf_outer_grammar.match, pdf_outer_grammar, obj_dict_span)
-
- if ret then
- if obj.stream then
- obj.dict = obj_or_err
- lua_util.debugm(N, task, 'stream object %s:%s is parsed to: %s',
- obj.major, obj.minor, obj_or_err)
- else
- -- Direct object
- pdf.ref[obj_ref(obj.major, obj.minor)] = obj_or_err
- if type(obj_or_err) == 'table' then
- obj.dict = obj_or_err
- end
- obj.uncompressed = obj_or_err
- lua_util.debugm(N, task, 'direct object %s:%s is parsed to: %s',
- obj.major, obj.minor, obj_or_err)
- end
- else
- lua_util.debugm(N, task, 'object %s:%s cannot be parsed: %s',
- obj.major, obj.minor, obj_or_err)
- end
- else
- lua_util.debugm(N, task, 'object %s:%s cannot be parsed: too large %s',
- obj.major, obj.minor, obj_dict_span:len())
- end
-end
-
-- This function attaches streams to objects and processes outer pdf grammar
local function attach_pdf_streams(task, input, pdf)
if pdf.start_streams and pdf.end_streams then
More information about the Commits
mailing list