commit 784bd1c: [Minor] Fix some more tld issues and tld tests

Vsevolod Stakhov vsevolod at highsecure.ru
Tue Feb 25 18:56:06 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-02-25 18:52:36 +0000
URL: https://github.com/rspamd/rspamd/commit/784bd1c3e3a30d6b89b832af2861b0efb11f2245 (HEAD -> master)

[Minor] Fix some more tld issues and tld tests

---
 src/libserver/url.c          | 51 ++++++++++++++++++++++----------------------
 test/functional/lua/tlds.lua | 14 ++++++------
 2 files changed, 34 insertions(+), 31 deletions(-)

diff --git a/src/libserver/url.c b/src/libserver/url.c
index 0407490d8..3c3dc4546 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -129,7 +129,6 @@ struct url_matcher {
 			url_match_t *match);
 
 	gint flags;
-	gsize patlen;
 };
 
 static gboolean url_file_start (struct url_callback_data *cb,
@@ -175,44 +174,44 @@ static gboolean url_tel_end (struct url_callback_data *cb,
 struct url_matcher static_matchers[] = {
 		/* Common prefixes */
 		{"file://",   "",          url_file_start,  url_file_end,
-				0, 0},
+				0},
 		{"file:\\\\",   "",        url_file_start,  url_file_end,
-				0, 0},
+				0},
 		{"ftp://",    "",          url_web_start,   url_web_end,
-				0, 0},
+				0},
 		{"ftp:\\\\",    "",        url_web_start,   url_web_end,
-				0, 0},
+				0},
 		{"sftp://",   "",          url_web_start,   url_web_end,
-				0, 0},
+				0},
 		{"http:",   "",            url_web_start,   url_web_end,
-				0, 0},
+				0},
 		{"https:",   "",           url_web_start,   url_web_end,
-				0, 0},
+				0},
 		{"news://",   "",          url_web_start,   url_web_end,
-				0, 0},
+				0},
 		{"nntp://",   "",          url_web_start,   url_web_end,
-				0, 0},
+				0},
 		{"telnet://", "",          url_web_start,   url_web_end,
-				0, 0},
+				0},
 		{"tel:", "",               url_tel_start,   url_tel_end,
-				0, 0},
+				0},
 		{"webcal://", "",          url_web_start,   url_web_end,
-				0, 0},
+				0},
 		{"mailto:",   "",          url_email_start, url_email_end,
-				0, 0},
+				0},
 		{"callto:", "",            url_tel_start,   url_tel_end,
-				0, 0},
+				0},
 		{"h323:",     "",          url_web_start,   url_web_end,
-				0, 0},
+				0},
 		{"sip:",      "",          url_web_start,   url_web_end,
-				0, 0},
+				0},
 		{"www.",      "http://",   url_web_start,   url_web_end,
-				URL_FLAG_NOHTML, 0},
+				URL_FLAG_NOHTML},
 		{"ftp.",      "ftp://",    url_web_start,   url_web_end,
-				URL_FLAG_NOHTML, 0},
+				URL_FLAG_NOHTML},
 		/* Likely emails */
 		{"@",         "mailto://", url_email_start, url_email_end,
-				URL_FLAG_NOHTML, 0}
+				URL_FLAG_NOHTML}
 };
 
 struct url_callback_data {
@@ -461,7 +460,7 @@ rspamd_url_parse_tld_file (const gchar *fname,
 				RSPAMD_MULTIPATTERN_TLD|RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8);
 		m.pattern = rspamd_multipattern_get_pattern (url_scanner->search_trie,
 				rspamd_multipattern_get_npatterns (url_scanner->search_trie) - 1);
-		m.patlen = strlen (m.pattern);
+
 		g_array_append_val (url_scanner->matchers, m);
 	}
 
@@ -488,8 +487,6 @@ rspamd_url_add_static_matchers (struct url_match_scanner *sc)
 					static_matchers[i].pattern,
 					RSPAMD_MULTIPATTERN_ICASE|RSPAMD_MULTIPATTERN_UTF8);
 		}
-
-		static_matchers[i].patlen = strlen (static_matchers[i].pattern);
 	}
 
 	g_array_append_vals (sc->matchers, static_matchers, n);
@@ -1511,14 +1508,15 @@ rspamd_tld_trie_callback (struct rspamd_multipattern *mp,
 	struct url_matcher *matcher;
 	const gchar *start, *pos, *p;
 	struct rspamd_url *url = context;
-	gint ndots = 1;
+	gint ndots;
 
 	matcher = &g_array_index (url_scanner->matchers, struct url_matcher,
 			strnum);
+	ndots = 1;
 
 	if (matcher->flags & URL_FLAG_STAR_MATCH) {
 		/* Skip one more tld component */
-		ndots = 2;
+		ndots ++;
 	}
 
 	pos = text + match_start;
@@ -2277,6 +2275,9 @@ rspamd_tld_trie_find_callback (struct rspamd_multipattern *mp,
 			ndots--;
 			pos = p + 1;
 		}
+		else {
+			pos = p;
+		}
 
 		p--;
 	}
diff --git a/test/functional/lua/tlds.lua b/test/functional/lua/tlds.lua
index efaf01af5..0908001da 100644
--- a/test/functional/lua/tlds.lua
+++ b/test/functional/lua/tlds.lua
@@ -13,10 +13,10 @@ rspamd_config:register_symbol({
       'example.com',
       'example.co.za',
       'example.in.net',
-      'example.kawasaki.jp',
+      'example.star.kawasaki.jp',
       'example.net',
       'example.net.in',
-      'example.nom.br',
+      'example.star.nom.br',
       'example.org',
       'example.org.ac',
       'example.ru.com',
@@ -34,22 +34,24 @@ rspamd_config:register_symbol({
         for _, p in ipairs(prefixes) do
           local test = rspamd_util.get_tld(p .. d)
           if (test ~= d) then
-            table.insert(worry, 'util.get_tld:' .. p .. d .. ':' .. test)
+            local opt = string.format('util.get_tld:p=%s;d=%s;got=%s', p, d, test)
+            table.insert(worry, opt)
             return
           end
           local u = rspamd_url.create(pool, p .. d)
           test = u:get_tld()
           if (test ~= d) then
-            table.insert(worry, 'url.get_tld:' .. p .. d .. ':' .. test)
+            local opt = string.format('url.create:p=%s;d=%s;got=%s', p, d, test)
+            table.insert(worry, opt)
             return
           end
         end
       end)()
     end
     if (#worry == 0) then
-      return true, "no worry"
+      return true, 1.0, "no worry"
     else
-      return true, table.concat(worry, ",")
+      return true, 1.0, worry
     end
   end
 })


More information about the Commits mailing list