commit 8722472: [Minor] Lua_magic: Fix xml detection

Vsevolod Stakhov vsevolod at highsecure.ru
Fri Aug 13 13:07:04 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-08-13 14:00:35 +0100
URL: https://github.com/rspamd/rspamd/commit/872247289f842303f7a989298423179715a6f1bb

[Minor] Lua_magic: Fix xml detection

---
 lualib/lua_magic/heuristics.lua | 21 ++++++++++++---------
 lualib/lua_magic/types.lua      |  4 ++++
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/lualib/lua_magic/heuristics.lua b/lualib/lua_magic/heuristics.lua
index d3469e2f9..2571346b4 100644
--- a/lualib/lua_magic/heuristics.lua
+++ b/lualib/lua_magic/heuristics.lua
@@ -63,15 +63,15 @@ local zip_patterns = {
 local txt_trie
 local txt_patterns = {
   html = {
-    {[[(?i)\s*<html\b]], 30},
-    {[[(?i)\s*<script\b]], 20}, -- Commonly used by spammers
-    {[[(?i)\s*<\!DOCTYPE HTML\b]], 30},
-    {[[(?i)\s*<body\b]], 20},
-    {[[(?i)\s*<table\b]], 20},
-    {[[(?i)\s*<a\b]], 10},
-    {[[(?i)\s*<p\b]], 10},
-    {[[(?i)\s*<div\b]], 10},
-    {[[(?i)\s*<span\b]], 10},
+    {[[(?i)<html\b]], 32},
+    {[[(?i)<script\b]], 20}, -- Commonly used by spammers
+    {[[(?i)<\!DOCTYPE HTML\b]], 33},
+    {[[(?i)<body\b]], 20},
+    {[[(?i)<table\b]], 20},
+    {[[(?i)<a\b]], 10},
+    {[[(?i)<p\b]], 10},
+    {[[(?i)<div\b]], 10},
+    {[[(?i)<span\b]], 10},
   },
   csv = {
     {[[(?:[-a-zA-Z0-9_]+\s*,){2,}(?:[-a-zA-Z0-9_]+,?[ ]*[\r\n])]], 20}
@@ -82,6 +82,9 @@ local txt_patterns = {
   vcf = {
     {[[^BEGIN:VCARD\r?\n]], 40},
   },
+  xml = {
+    {[[(?i)\s*<\?xml\b.+\?>]], 31},
+  }
 }
 
 -- Used to match pattern index and extension
diff --git a/lualib/lua_magic/types.lua b/lualib/lua_magic/types.lua
index 9f64b12a2..f082a53e5 100644
--- a/lualib/lua_magic/types.lua
+++ b/lualib/lua_magic/types.lua
@@ -284,6 +284,10 @@ local types = {
     ct = 'image/vnd.dwg',
   },
   -- Text
+  xml = {
+    ct = 'application/xml',
+    type = 'text',
+  },
   txt = {
     type = 'text',
     ct = 'text/plain',


More information about the Commits mailing list