commit f0462bf: [Minor] Ignore completely damaged urls

Vsevolod Stakhov vsevolod at highsecure.ru
Mon Mar 4 20:21:05 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-03-04 20:18:57 +0000
URL: https://github.com/rspamd/rspamd/commit/f0462bf947bafb429f47162a84b4daaf7379463e (HEAD -> master)

[Minor] Ignore completely damaged urls

---
 src/libserver/html.c | 4 +++-
 src/libserver/url.c  | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/libserver/html.c b/src/libserver/html.c
index 7da18cdfe..63638d28b 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -1441,7 +1441,9 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len,
 
 	rc = rspamd_url_parse (url, decoded, dlen, pool, RSPAMD_URL_PARSE_HREF);
 
-	if (rc == URI_ERRNO_OK) {
+	/* Filter some completely damaged urls */
+	if (rc == URI_ERRNO_OK && url->hostlen > 0 &&
+		!((url->flags & RSPAMD_URL_FLAG_OBSCURED) && (url->protocol & PROTOCOL_UNKNOWN))) {
 		url->flags |= saved_flags;
 
 		if (has_bad_chars) {
diff --git a/src/libserver/url.c b/src/libserver/url.c
index 6b4a0d2d0..f860eec0c 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -1388,7 +1388,7 @@ rspamd_web_parse (struct http_parser_url *u, const gchar *str, gsize len,
 	/* Parse remaining */
 	switch (st) {
 		case parse_domain:
-			if (p - c == 0) {
+			if (p - c == 0 || !is_domain (*(p - 1)) || !is_domain (*c)) {
 				goto out;
 			}
 			SET_U (u, UF_HOST);
@@ -2032,7 +2032,7 @@ rspamd_url_parse (struct rspamd_url *uri,
 		}
 		else {
 			/* Hack, hack, hack */
-			uri->protocol = PROTOCOL_HTTP;
+			uri->protocol = PROTOCOL_UNKNOWN;
 		}
 	}
 


More information about the Commits mailing list