commit 6e16b0a: [Fix] Another fix to deal with bad URLs
Vsevolod Stakhov
vsevolod at highsecure.ru
Wed Oct 9 15:28:06 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-10-09 16:15:03 +0100
URL: https://github.com/rspamd/rspamd/commit/6e16b0a8007bac6af3bb692c7c85fc793c2fbb71
[Fix] Another fix to deal with bad URLs
---
src/libserver/url.c | 42 ++++++++++++++++++++++++++++++------------
1 file changed, 30 insertions(+), 12 deletions(-)
diff --git a/src/libserver/url.c b/src/libserver/url.c
index 69932eab9..90398ad6b 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -2289,6 +2289,8 @@ url_tld_start (struct url_callback_data *cb,
url_match_t *match)
{
const gchar *p = pos;
+ guint processed = 0;
+ static const guint max_shift = 253 + sizeof ("https://");
/* Try to find the start of the url by finding any non-urlsafe character or whitespace/punctuation */
while (p >= cb->begin) {
@@ -2338,6 +2340,12 @@ url_tld_start (struct url_callback_data *cb,
}
p--;
+ processed ++;
+
+ if (processed > max_shift) {
+ /* Too long */
+ return FALSE;
+ }
}
return FALSE;
@@ -2398,15 +2406,24 @@ url_web_start (struct url_callback_data *cb,
url_match_t *match)
{
/* Check what we have found */
- if (pos > cb->begin &&
- (g_ascii_strncasecmp (pos, "www", 3) == 0 ||
- g_ascii_strncasecmp (pos, "ftp", 3) == 0)) {
-
- if (!(is_url_start (*(pos - 1)) ||
- g_ascii_isspace (*(pos - 1)) ||
- pos - 1 == match->prev_newline_pos ||
- (*(pos - 1) & 0x80))) { /* Chinese trick */
- return FALSE;
+ if (pos > cb->begin) {
+ if (g_ascii_strncasecmp (pos, "www", 3) == 0 ||
+ g_ascii_strncasecmp (pos, "ftp", 3) == 0) {
+
+ if (!(is_url_start (*(pos - 1)) ||
+ g_ascii_isspace (*(pos - 1)) ||
+ pos - 1 == match->prev_newline_pos ||
+ (*(pos - 1) & 0x80))) { /* Chinese trick */
+ return FALSE;
+ }
+ }
+ else {
+ guchar prev = *(pos - 1);
+
+ if (g_ascii_isalnum (prev)) {
+ /* Part of another url */
+ return FALSE;
+ }
}
}
@@ -2747,7 +2764,7 @@ rspamd_url_trie_callback (struct rspamd_multipattern *mp,
}
cb->start = m.m_begin;
- cb->fin = m.m_begin + m.m_len;
+ cb->fin = pos;
return 1;
}
@@ -2884,7 +2901,7 @@ rspamd_url_trie_generic_callback_common (struct rspamd_multipattern *mp,
}
cb->start = m.m_begin;
- cb->fin = m.m_begin + m.m_len;
+ cb->fin = pos;
url = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_url));
g_strstrip (cb->url_str);
rc = rspamd_url_parse (url, cb->url_str,
@@ -2898,7 +2915,8 @@ rspamd_url_trie_generic_callback_common (struct rspamd_multipattern *mp,
}
if (cb->func) {
- cb->func (url, cb->start - text, cb->fin - text, cb->funcd);
+ cb->func (url, cb->start - text, (m.m_begin + m.m_len) - text,
+ cb->funcd);
}
}
else if (rc != URI_ERRNO_OK) {
More information about the Commits
mailing list