commit 69795cd: [Feature] Core: Support telephone URLs
Vsevolod Stakhov
vsevolod at highsecure.ru
Tue Jan 29 14:28:07 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-01-29 14:22:32 +0000
URL: https://github.com/rspamd/rspamd/commit/69795cdfc0f40e12024eff967690733ea9382864 (HEAD -> master)
[Feature] Core: Support telephone URLs
---
src/libserver/url.c | 165 +++++++++++++++++++++++++++++++++++++++++++++++-----
src/libserver/url.h | 1 +
2 files changed, 150 insertions(+), 16 deletions(-)
diff --git a/src/libserver/url.c b/src/libserver/url.c
index 018ab78c3..4599f3ce1 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -97,6 +97,11 @@ static const struct {
.name = "mailto",
.len = 6
},
+ {
+ .proto = PROTOCOL_TELEPHONE,
+ .name = "tel",
+ .len = 3
+ },
{
.proto = PROTOCOL_UNKNOWN,
.name = NULL,
@@ -120,36 +125,44 @@ struct url_matcher {
};
static gboolean url_file_start (struct url_callback_data *cb,
- const gchar *pos,
- url_match_t *match);
+ const gchar *pos,
+ url_match_t *match);
static gboolean url_file_end (struct url_callback_data *cb,
- const gchar *pos,
- url_match_t *match);
+ const gchar *pos,
+ url_match_t *match);
static gboolean url_web_start (struct url_callback_data *cb,
- const gchar *pos,
- url_match_t *match);
+ const gchar *pos,
+ url_match_t *match);
static gboolean url_web_end (struct url_callback_data *cb,
- const gchar *pos,
- url_match_t *match);
+ const gchar *pos,
+ url_match_t *match);
static gboolean url_tld_start (struct url_callback_data *cb,
- const gchar *pos,
- url_match_t *match);
+ const gchar *pos,
+ url_match_t *match);
static gboolean url_tld_end (struct url_callback_data *cb,
- const gchar *pos,
- url_match_t *match);
+ const gchar *pos,
+ url_match_t *match);
static gboolean url_email_start (struct url_callback_data *cb,
- const gchar *pos,
- url_match_t *match);
+ const gchar *pos,
+ url_match_t *match);
static gboolean url_email_end (struct url_callback_data *cb,
- const gchar *pos,
- url_match_t *match);
+ const gchar *pos,
+ url_match_t *match);
+
+static gboolean url_tel_start (struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match);
+
+static gboolean url_tel_end (struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match);
struct url_matcher static_matchers[] = {
/* Common prefixes */
@@ -173,6 +186,8 @@ struct url_matcher static_matchers[] = {
0, 0},
{"telnet://", "", url_web_start, url_web_end,
0, 0},
+ {"tel:", "", url_tel_start, url_tel_end,
+ 0, 0},
{"webcal://", "", url_web_start, url_web_end,
0, 0},
{"mailto:", "", url_email_start, url_email_end,
@@ -1646,6 +1661,76 @@ rspamd_url_parse (struct rspamd_url *uri,
ret = rspamd_mailto_parse (&u, uristring, len, &end, parse_flags,
&flags);
}
+ else if (g_ascii_strncasecmp (p, "tel:", sizeof ("tel:") - 1) == 0) {
+ /* Telephone url */
+ gint nlen = 0;
+ gboolean has_plus = FALSE;
+ end = p + len;
+ gchar *t, *tend;
+ UChar32 uc;
+
+ uri->raw = p;
+ uri->rawlen = len;
+ uri->string = rspamd_mempool_alloc (pool, len + 1);
+ t = uri->string;
+ tend = t + len;
+ i = 4;
+
+ memcpy (t, "tel:", 4);
+ t += 4;
+ p += 4;
+ nlen = 4;
+
+ if (*p == '+') {
+ has_plus = TRUE;
+ *t++ = *p++;
+ nlen ++;
+ i ++;
+ }
+
+ while (t < tend && i < len) {
+ U8_NEXT (uristring, i, len, uc);
+
+ if (u_isdigit (uc)) {
+ if (g_ascii_isdigit (uc)) {
+ *t++ = uc;
+ nlen ++;
+ }
+ else {
+ /* Obfuscated number */
+ uri->flags |= RSPAMD_URL_FLAG_OBSCURED;
+ }
+ }
+ else if (IS_OBSCURED_CHAR (uc)) {
+ uri->flags |= RSPAMD_URL_FLAG_OBSCURED;
+ }
+ }
+
+ *t = '\0';
+
+ if (rspamd_normalise_unicode_inplace (pool, uri->string, &nlen)) {
+ uri->flags |= RSPAMD_URL_FLAG_UNNORMALISED;
+ }
+
+ uri->urllen = nlen;
+
+ uri->protocol = PROTOCOL_TELEPHONE;
+ uri->protocollen = 4;
+
+ uri->host = uri->string + 4;
+ uri->hostlen = nlen - 4;
+
+ if (has_plus) {
+ uri->tld = uri->string + 5;
+ uri->tldlen = nlen - 5;
+ }
+ else {
+ uri->tld = uri->string + 4;
+ uri->tldlen = nlen - 4;
+ }
+
+ return URI_ERRNO_OK;
+ }
else {
ret = rspamd_web_parse (&u, uristring, len, &end, parse_flags,
&flags);
@@ -2276,6 +2361,54 @@ url_email_end (struct url_callback_data *cb,
return FALSE;
}
+static gboolean
+url_tel_start (struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match)
+{
+ if (!(*pos == '+' || g_ascii_isdigit (*pos))) {
+ /* Urls cannot start with . */
+ return FALSE;
+ }
+
+ match->m_begin = pos;
+
+ return TRUE;
+}
+
+static gboolean
+url_tel_end (struct url_callback_data *cb,
+ const gchar *pos,
+ url_match_t *match)
+{
+ UChar32 uc;
+ gint len = cb->end - pos, i = 0;
+
+ if (match->newline_pos && match->st != '<') {
+ /* We should also limit our match end to the newline */
+ len = MIN (len, match->newline_pos - pos);
+ }
+
+ while (i < len) {
+ U8_NEXT (pos, i, len, uc);
+
+ if (uc < 0) {
+ break;
+ }
+
+ if (!(u_isdigit (uc) || u_isspace (uc) ||
+ IS_OBSCURED_CHAR (uc) || uc == '+' ||
+ uc == '-' || uc == '.')) {
+ break;
+ }
+ }
+
+ match->m_len = i;
+
+ return TRUE;
+}
+
+
static gboolean
rspamd_url_trie_is_match (struct url_matcher *matcher, const gchar *pos,
const gchar *end, const gchar *newline_pos)
diff --git a/src/libserver/url.h b/src/libserver/url.h
index 12a649ec7..fa5c69f00 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -84,6 +84,7 @@ enum rspamd_url_protocol {
PROTOCOL_HTTP,
PROTOCOL_HTTPS,
PROTOCOL_MAILTO,
+ PROTOCOL_TELEPHONE,
PROTOCOL_UNKNOWN
};
More information about the Commits
mailing list