commit 193879d: [Fix] Fix some corner cases in urls parsing, add limits

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Mar 19 15:28:09 UTC 2020


Author: Vsevolod Stakhov
Date: 2020-03-19 15:26:45 +0000
URL: https://github.com/rspamd/rspamd/commit/193879d7466b42f8b20fccc9b0cf403f41f82672 (HEAD -> master)

[Fix] Fix some corner cases in urls parsing, add limits

---
 src/libserver/url.c | 47 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 42 insertions(+), 5 deletions(-)

diff --git a/src/libserver/url.c b/src/libserver/url.c
index 73272799a..6b2ecdfde 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -623,6 +623,10 @@ is_domain_start (int p)
 	return FALSE;
 }
 
+static const guint max_domain_length = 253;
+static const guint max_dns_label = 63;
+static const guint max_email_user = 64;
+
 static gint
 rspamd_mailto_parse (struct http_parser_url *u,
 					 const gchar *str, gsize len,
@@ -654,6 +658,10 @@ rspamd_mailto_parse (struct http_parser_url *u,
 	while (p < last) {
 		t = *p;
 
+		if (p - str > max_email_user + max_domain_length + 1) {
+			goto out;
+		}
+
 		switch (st) {
 			case parse_mailto:
 				if (t == ':') {
@@ -725,6 +733,9 @@ rspamd_mailto_parse (struct http_parser_url *u,
 				else if (!is_mailsafe (t)) {
 					goto out;
 				}
+				else if (p - c > max_email_user) {
+					goto out;
+				}
 				p++;
 				break;
 			case parse_at:
@@ -739,6 +750,9 @@ rspamd_mailto_parse (struct http_parser_url *u,
 				else if (!is_domain (t) && t != '.' && t != '_') {
 					goto out;
 				}
+				else if (p - c > max_domain_length) {
+					goto out;
+				}
 				p++;
 				break;
 			case parse_suffix_question:
@@ -810,6 +824,10 @@ rspamd_telephone_parse (struct http_parser_url *u,
 	while (p < last) {
 		t = *p;
 
+		if (p - str > max_email_user) {
+			goto out;
+		}
+
 		switch (st) {
 		case parse_protocol:
 			if (t == ':') {
@@ -926,7 +944,7 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len,
 {
 	const gchar *p = str, *c = str, *last = str + len, *slash = NULL,
 			*password_start = NULL, *user_start = NULL;
-	gchar t;
+	gchar t = 0;
 	UChar32 uc;
 	glong pt;
 	gint ret = 1;
@@ -1075,6 +1093,10 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len,
 			else if (!g_ascii_isgraph (t)) {
 				goto out;
 			}
+			else if (p - c > max_email_user) {
+				goto out;
+			}
+
 			p++;
 			break;
 		case parse_multiple_at:
@@ -1130,6 +1152,9 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len,
 			else if (!g_ascii_isgraph (t)) {
 				goto out;
 			}
+			else if (p - c > max_domain_length) {
+				goto out;
+			}
 			p++;
 			break;
 		case parse_at:
@@ -1157,6 +1182,10 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len,
 			}
 			break;
 		case parse_domain:
+			if (p - c > max_domain_length) {
+				/* Too large domain */
+				goto out;
+			}
 			if (t == '/' || t == ':' || t == '?' || t == '#') {
 				if (p - c == 0) {
 					goto out;
@@ -1175,7 +1204,7 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len,
 					st = parse_part;
 					c = p + 1;
 				}
-				else if (!user_seen) {
+				else if (t == ':' && !user_seen) {
 					/*
 					 * Here we can have both port and password, hence we need
 					 * to apply some heuristic here
@@ -1193,7 +1222,7 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len,
 				p++;
 			}
 			else {
-				if (is_url_end (t)) {
+				if (is_url_end (t) || is_url_start (t)) {
 					goto set;
 				}
 				else if (*p == '@' && !user_seen) {
@@ -2615,6 +2644,7 @@ url_web_end (struct url_callback_data *cb,
 	}
 
 	match->m_len = (last - pos);
+	cb->fin = last + 1;
 
 	return TRUE;
 }
@@ -2909,7 +2939,10 @@ rspamd_url_trie_callback (struct rspamd_multipattern *mp,
 		}
 
 		cb->start = m.m_begin;
-		cb->fin = pos;
+
+		if (pos > cb->fin) {
+			cb->fin = pos;
+		}
 
 		return 1;
 	}
@@ -3047,7 +3080,11 @@ rspamd_url_trie_generic_callback_common (struct rspamd_multipattern *mp,
 		}
 
 		cb->start = m.m_begin;
-		cb->fin = pos;
+
+		if (pos > cb->fin) {
+			cb->fin = pos;
+		}
+
 		url = rspamd_mempool_alloc0 (pool, sizeof (struct rspamd_url));
 		g_strstrip (cb->url_str);
 		rc = rspamd_url_parse (url, cb->url_str,


More information about the Commits mailing list