commit f08ffd7: [Minor] Lua_magic: Improve html patterns

Vsevolod Stakhov vsevolod at highsecure.ru
Fri Sep 25 10:00:07 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-09-25 10:58:45 +0100
URL: https://github.com/rspamd/rspamd/commit/f08ffd73e83f152ebecaf695be575c8f59711a44 (HEAD -> master)

[Minor] Lua_magic: Improve html patterns

---
 lualib/lua_magic/heuristics.lua | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/lualib/lua_magic/heuristics.lua b/lualib/lua_magic/heuristics.lua
index d36b5633f..e623c09c1 100644
--- a/lualib/lua_magic/heuristics.lua
+++ b/lualib/lua_magic/heuristics.lua
@@ -61,15 +61,16 @@ local zip_patterns = {
 local txt_trie
 local txt_patterns = {
   html = {
-    {[[(?i)\s*<html]], 30},
-    {[[(?i)\s*<\!DOCTYPE HTML]], 30},
-    {[[(?i)\s*<xml]], 20},
-    {[[(?i)\s*<body]], 20},
-    {[[(?i)\s*<table]], 20},
-    {[[(?i)\s*<a]], 10},
-    {[[(?i)\s*<p]], 10},
-    {[[(?i)\s*<div]], 10},
-    {[[(?i)\s*<span]], 10},
+    {[[(?i)\s*<html\b]], 30},
+    {[[(?i)\s*<script\b]], 20}, -- Commonly used by spammers
+    {[[(?i)\s*<\!DOCTYPE HTML\b]], 30},
+    {[[(?i)\s*<xml\b]], 20},
+    {[[(?i)\s*<body\b]], 20},
+    {[[(?i)\s*<table\b]], 20},
+    {[[(?i)\s*<a\b]], 10},
+    {[[(?i)\s*<p\b]], 10},
+    {[[(?i)\s*<div\b]], 10},
+    {[[(?i)\s*<span\b]], 10},
   },
   csv = {
     {[[(?:[-a-zA-Z0-9_]+\s*,){2,}(?:[-a-zA-Z0-9_]+,?[ ]*[\r\n])]], 20}


More information about the Commits mailing list