commit 8722472: [Minor] Lua_magic: Fix xml detection
Vsevolod Stakhov
vsevolod at highsecure.ru
Fri Aug 13 13:07:04 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-08-13 14:00:35 +0100
URL: https://github.com/rspamd/rspamd/commit/872247289f842303f7a989298423179715a6f1bb
[Minor] Lua_magic: Fix xml detection
---
lualib/lua_magic/heuristics.lua | 21 ++++++++++++---------
lualib/lua_magic/types.lua | 4 ++++
2 files changed, 16 insertions(+), 9 deletions(-)
diff --git a/lualib/lua_magic/heuristics.lua b/lualib/lua_magic/heuristics.lua
index d3469e2f9..2571346b4 100644
--- a/lualib/lua_magic/heuristics.lua
+++ b/lualib/lua_magic/heuristics.lua
@@ -63,15 +63,15 @@ local zip_patterns = {
local txt_trie
local txt_patterns = {
html = {
- {[[(?i)\s*<html\b]], 30},
- {[[(?i)\s*<script\b]], 20}, -- Commonly used by spammers
- {[[(?i)\s*<\!DOCTYPE HTML\b]], 30},
- {[[(?i)\s*<body\b]], 20},
- {[[(?i)\s*<table\b]], 20},
- {[[(?i)\s*<a\b]], 10},
- {[[(?i)\s*<p\b]], 10},
- {[[(?i)\s*<div\b]], 10},
- {[[(?i)\s*<span\b]], 10},
+ {[[(?i)<html\b]], 32},
+ {[[(?i)<script\b]], 20}, -- Commonly used by spammers
+ {[[(?i)<\!DOCTYPE HTML\b]], 33},
+ {[[(?i)<body\b]], 20},
+ {[[(?i)<table\b]], 20},
+ {[[(?i)<a\b]], 10},
+ {[[(?i)<p\b]], 10},
+ {[[(?i)<div\b]], 10},
+ {[[(?i)<span\b]], 10},
},
csv = {
{[[(?:[-a-zA-Z0-9_]+\s*,){2,}(?:[-a-zA-Z0-9_]+,?[ ]*[\r\n])]], 20}
@@ -82,6 +82,9 @@ local txt_patterns = {
vcf = {
{[[^BEGIN:VCARD\r?\n]], 40},
},
+ xml = {
+ {[[(?i)\s*<\?xml\b.+\?>]], 31},
+ }
}
-- Used to match pattern index and extension
diff --git a/lualib/lua_magic/types.lua b/lualib/lua_magic/types.lua
index 9f64b12a2..f082a53e5 100644
--- a/lualib/lua_magic/types.lua
+++ b/lualib/lua_magic/types.lua
@@ -284,6 +284,10 @@ local types = {
ct = 'image/vnd.dwg',
},
-- Text
+ xml = {
+ ct = 'application/xml',
+ type = 'text',
+ },
txt = {
type = 'text',
ct = 'text/plain',
More information about the Commits
mailing list