commit 42873dc: [Minor] Lua_magic: Try to reduce fp rate for html heuristic

Vsevolod Stakhov vsevolod at rspamd.com
Sun Jul 2 16:35:03 UTC 2023


Author: Vsevolod Stakhov
Date: 2023-07-02 17:33:28 +0100
URL: https://github.com/rspamd/rspamd/commit/42873dc11090b0ce0c6f49333a73123fa5de05cb (HEAD -> master)

[Minor] Lua_magic: Try to reduce fp rate for html heuristic

---
 lualib/lua_magic/heuristics.lua | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lualib/lua_magic/heuristics.lua b/lualib/lua_magic/heuristics.lua
index ea8f08e08..98cfb0eee 100644
--- a/lualib/lua_magic/heuristics.lua
+++ b/lualib/lua_magic/heuristics.lua
@@ -63,13 +63,13 @@ local zip_patterns = {
 local txt_trie
 local txt_patterns = {
   html = {
-    {[[(?i)<html\b]], 32},
+    {[=[(?i)<html[\s>]]=], 32},
     {[[(?i)<script\b]], 20}, -- Commonly used by spammers
     {[[<script\s+type="text\/javascript">]], 31}, -- Another spammy pattern
     {[[(?i)<\!DOCTYPE HTML\b]], 33},
     {[[(?i)<body\b]], 20},
     {[[(?i)<table\b]], 20},
-    {[[(?i)<a\b]], 10},
+    {[[(?i)<a\s]], 10},
     {[[(?i)<p\b]], 10},
     {[[(?i)<div\b]], 10},
     {[[(?i)<span\b]], 10},


More information about the Commits mailing list