commit ae4fee7: [Project] Lua_content: Ignore fonts

Vsevolod Stakhov vsevolod at highsecure.ru
Sun Jan 5 18:42:06 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-01-05 09:54:18 +0000
URL: https://github.com/rspamd/rspamd/commit/ae4fee72512a84682ec7d54d17e68e36aac5f2de (HEAD -> master)

[Project] Lua_content: Ignore fonts

---
 lualib/lua_content/pdf.lua | 83 ++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 70 insertions(+), 13 deletions(-)

diff --git a/lualib/lua_content/pdf.lua b/lualib/lua_content/pdf.lua
index dd87a4b44..893b55e8e 100644
--- a/lualib/lua_content/pdf.lua
+++ b/lualib/lua_content/pdf.lua
@@ -201,6 +201,70 @@ local function obj_ref(major, minor)
   return major * 10.0 + 1.0 / (minor + 1.0)
 end
 
+-- Return indirect object reference (if needed)
+local function maybe_dereference_object(elt, pdf)
+  if type(elt) == 'table' and elt[1] == '%REF%' then
+    local ref = obj_ref(elt[2], elt[3])
+
+    if pdf.ref[ref] then
+      -- No recursion!
+      return pdf.ref[ref].dict
+    end
+  end
+
+  return elt
+end
+
+-- Enforced dereference
+local function dereference_object(elt, pdf)
+  if type(elt) == 'table' and elt[1] == '%REF%' then
+    local ref = obj_ref(elt[2], elt[3])
+
+    if pdf.ref[ref] then
+      -- Not a dict but the object!
+      return pdf.ref[ref]
+    end
+  end
+
+  return nil
+end
+
+local function process_dict(task, pdf, obj, dict)
+  if type(dict) == 'table' and dict.Type then
+    if dict.Type == 'FontDescriptor' then
+      obj.type = 'font'
+      obj.ignore = true
+
+      lua_util.debugm(N, task, "obj %s:%s is a font descriptor",
+         obj.major, obj.minor)
+
+      local stream_ref
+      if dict.FontFile then
+        stream_ref = dereference_object(dict.FontFile, pdf)
+      end
+      if dict.FontFile2 then
+        stream_ref = dereference_object(dict.FontFile2, pdf)
+      end
+      if dict.FontFile3 then
+        stream_ref = dereference_object(dict.FontFile3, pdf)
+      end
+
+      if stream_ref then
+        if not stream_ref.dict then
+          stream_ref.dict = {}
+        end
+        stream_ref.dict.type = 'font_data'
+        stream_ref.dict.ignore = true
+
+        lua_util.debugm(N, task, "obj %s:%s is a font data stream",
+            stream_ref.major, stream_ref.minor)
+      end
+    end
+  end
+end
+
+-- Processes PDF objects: extracts streams, object numbers, process outer grammar,
+-- augment object types
 local function postprocess_pdf_objects(task, input, pdf)
   local start_pos, end_pos = 1, 1
 
@@ -323,21 +387,14 @@ local function postprocess_pdf_objects(task, input, pdf)
 
   end
 
-  pdf.objects = objects
-end
-
--- Return indirect object reference (if needed)
-local function maybe_dereference_object(elt, pdf)
-  if type(elt) == 'table' and elt[1] == '%REF%' then
-    local ref = obj_ref(elt[2], elt[3])
-
-    if pdf.ref[ref] then
-      -- No recursion!
-      return pdf.ref[ref].dict
+  for _,obj in ipairs(objects) do
+    if obj.dict then
+      -- Types processing
+      process_dict(task, pdf, obj, obj.dict)
     end
   end
 
-  return elt
+  pdf.objects = objects
 end
 
 local function extract_pdf_objects(task, pdf)
@@ -367,7 +424,7 @@ local function extract_pdf_objects(task, pdf)
   end
 
   for _,obj in ipairs(pdf.objects or {}) do
-    if obj.stream and obj.dict and type(obj.dict) == 'table' then
+    if obj.stream and obj.dict and type(obj.dict) == 'table' and not obj.dict.ignore then
       maybe_extract_object(obj)
     end
   end


More information about the Commits mailing list