commit 69795cd: [Feature] Core: Support telephone URLs

Vsevolod Stakhov vsevolod at highsecure.ru
Tue Jan 29 14:28:07 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-01-29 14:22:32 +0000
URL: https://github.com/rspamd/rspamd/commit/69795cdfc0f40e12024eff967690733ea9382864 (HEAD -> master)

[Feature] Core: Support telephone URLs

---
 src/libserver/url.c | 165 +++++++++++++++++++++++++++++++++++++++++++++++-----
 src/libserver/url.h |   1 +
 2 files changed, 150 insertions(+), 16 deletions(-)

diff --git a/src/libserver/url.c b/src/libserver/url.c
index 018ab78c3..4599f3ce1 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -97,6 +97,11 @@ static const struct {
 				.name = "mailto",
 				.len = 6
 		},
+		{
+				.proto = PROTOCOL_TELEPHONE,
+				.name = "tel",
+				.len = 3
+		},
 		{
 				.proto = PROTOCOL_UNKNOWN,
 				.name = NULL,
@@ -120,36 +125,44 @@ struct url_matcher {
 };
 
 static gboolean url_file_start (struct url_callback_data *cb,
-		const gchar *pos,
-		url_match_t *match);
+								const gchar *pos,
+								url_match_t *match);
 
 static gboolean url_file_end (struct url_callback_data *cb,
-		const gchar *pos,
-		url_match_t *match);
+							  const gchar *pos,
+							  url_match_t *match);
 
 static gboolean url_web_start (struct url_callback_data *cb,
-		const gchar *pos,
-		url_match_t *match);
+							   const gchar *pos,
+							   url_match_t *match);
 
 static gboolean url_web_end (struct url_callback_data *cb,
-		const gchar *pos,
-		url_match_t *match);
+							 const gchar *pos,
+							 url_match_t *match);
 
 static gboolean url_tld_start (struct url_callback_data *cb,
-		const gchar *pos,
-		url_match_t *match);
+							   const gchar *pos,
+							   url_match_t *match);
 
 static gboolean url_tld_end (struct url_callback_data *cb,
-		const gchar *pos,
-		url_match_t *match);
+							 const gchar *pos,
+							 url_match_t *match);
 
 static gboolean url_email_start (struct url_callback_data *cb,
-		const gchar *pos,
-		url_match_t *match);
+								 const gchar *pos,
+								 url_match_t *match);
 
 static gboolean url_email_end (struct url_callback_data *cb,
-		const gchar *pos,
-		url_match_t *match);
+							   const gchar *pos,
+							   url_match_t *match);
+
+static gboolean url_tel_start (struct url_callback_data *cb,
+							   const gchar *pos,
+							   url_match_t *match);
+
+static gboolean url_tel_end (struct url_callback_data *cb,
+							 const gchar *pos,
+							 url_match_t *match);
 
 struct url_matcher static_matchers[] = {
 		/* Common prefixes */
@@ -173,6 +186,8 @@ struct url_matcher static_matchers[] = {
 				0, 0},
 		{"telnet://", "",          url_web_start,   url_web_end,
 				0, 0},
+		{"tel:", "",               url_tel_start,   url_tel_end,
+				0, 0},
 		{"webcal://", "",          url_web_start,   url_web_end,
 				0, 0},
 		{"mailto:",   "",          url_email_start, url_email_end,
@@ -1646,6 +1661,76 @@ rspamd_url_parse (struct rspamd_url *uri,
 			ret = rspamd_mailto_parse (&u, uristring, len, &end, parse_flags,
 					&flags);
 		}
+		else if (g_ascii_strncasecmp (p, "tel:", sizeof ("tel:") - 1) == 0) {
+			/* Telephone url */
+			gint nlen = 0;
+			gboolean has_plus = FALSE;
+			end = p + len;
+			gchar *t, *tend;
+			UChar32 uc;
+
+			uri->raw = p;
+			uri->rawlen = len;
+			uri->string = rspamd_mempool_alloc (pool, len + 1);
+			t = uri->string;
+			tend = t + len;
+			i = 4;
+
+			memcpy (t, "tel:", 4);
+			t += 4;
+			p += 4;
+			nlen = 4;
+
+			if (*p == '+') {
+				has_plus = TRUE;
+				*t++ = *p++;
+				nlen ++;
+				i ++;
+			}
+
+			while (t < tend && i < len) {
+				U8_NEXT (uristring, i, len, uc);
+
+				if (u_isdigit (uc)) {
+					if (g_ascii_isdigit (uc)) {
+						*t++ = uc;
+						nlen ++;
+					}
+					else {
+						/* Obfuscated number */
+						uri->flags |= RSPAMD_URL_FLAG_OBSCURED;
+					}
+				}
+				else if (IS_OBSCURED_CHAR (uc)) {
+					uri->flags |= RSPAMD_URL_FLAG_OBSCURED;
+				}
+			}
+
+			*t = '\0';
+
+			if (rspamd_normalise_unicode_inplace (pool, uri->string, &nlen)) {
+				uri->flags |= RSPAMD_URL_FLAG_UNNORMALISED;
+			}
+
+			uri->urllen = nlen;
+
+			uri->protocol = PROTOCOL_TELEPHONE;
+			uri->protocollen = 4;
+
+			uri->host = uri->string + 4;
+			uri->hostlen = nlen - 4;
+
+			if (has_plus) {
+				uri->tld = uri->string + 5;
+				uri->tldlen = nlen - 5;
+			}
+			else {
+				uri->tld = uri->string + 4;
+				uri->tldlen = nlen - 4;
+			}
+
+			return URI_ERRNO_OK;
+		}
 		else {
 			ret = rspamd_web_parse (&u, uristring, len, &end, parse_flags,
 					&flags);
@@ -2276,6 +2361,54 @@ url_email_end (struct url_callback_data *cb,
 	return FALSE;
 }
 
+static gboolean
+url_tel_start (struct url_callback_data *cb,
+			   const gchar *pos,
+			   url_match_t *match)
+{
+	if (!(*pos == '+' || g_ascii_isdigit (*pos))) {
+		/* Urls cannot start with . */
+		return FALSE;
+	}
+
+	match->m_begin = pos;
+
+	return TRUE;
+}
+
+static gboolean
+url_tel_end (struct url_callback_data *cb,
+			 const gchar *pos,
+			 url_match_t *match)
+{
+	UChar32 uc;
+	gint len = cb->end - pos, i = 0;
+
+	if (match->newline_pos && match->st != '<') {
+		/* We should also limit our match end to the newline */
+		len = MIN (len, match->newline_pos - pos);
+	}
+
+	while (i < len) {
+		U8_NEXT (pos, i, len, uc);
+
+		if (uc < 0) {
+			break;
+		}
+
+		if (!(u_isdigit (uc) || u_isspace (uc) ||
+			  IS_OBSCURED_CHAR (uc) || uc == '+' ||
+			  uc == '-' || uc == '.')) {
+			break;
+		}
+	}
+
+	match->m_len = i;
+
+	return TRUE;
+}
+
+
 static gboolean
 rspamd_url_trie_is_match (struct url_matcher *matcher, const gchar *pos,
 		const gchar *end, const gchar *newline_pos)
diff --git a/src/libserver/url.h b/src/libserver/url.h
index 12a649ec7..fa5c69f00 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -84,6 +84,7 @@ enum rspamd_url_protocol {
 	PROTOCOL_HTTP,
 	PROTOCOL_HTTPS,
 	PROTOCOL_MAILTO,
+	PROTOCOL_TELEPHONE,
 	PROTOCOL_UNKNOWN
 };
 


More information about the Commits mailing list