commit a8f11fa: [Rework] Rework URL structure: adjust tld part
Vsevolod Stakhov
vsevolod at highsecure.ru
Mon Mar 9 10:49:11 UTC 2020
Author: Vsevolod Stakhov
Date: 2020-03-06 14:03:20 +0000
URL: https://github.com/rspamd/rspamd/commit/a8f11faf7f584916078d6fadb36e0c2f1984e2b0
[Rework] Rework URL structure: adjust tld part
---
src/libserver/html.c | 12 ++++++------
src/libserver/protocol.c | 6 ++++--
src/libserver/url.c | 10 +++++-----
src/libserver/url.h | 19 +++++++++----------
src/libstat/tokenizers/tokenizers.c | 2 +-
src/lua/lua_url.c | 4 ++--
6 files changed, 27 insertions(+), 26 deletions(-)
diff --git a/src/libserver/html.c b/src/libserver/html.c
index e1a211d2c..981141ad8 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -704,14 +704,14 @@ rspamd_html_url_is_phished (rspamd_mempool_t *pool,
/* Apply the same logic for TLD */
disp_tok.len = text_url->tldlen;
- disp_tok.begin = text_url->tld;
+ disp_tok.begin = rspamd_url_tld_unsafe (text_url);
#if U_ICU_VERSION_MAJOR_NUM >= 46
- if (rspamd_substring_search_caseless (text_url->tld,
+ if (rspamd_substring_search_caseless (rspamd_url_tld_unsafe (text_url),
text_url->tldlen, "xn--", 4) != -1) {
idn_hbuf = rspamd_mempool_alloc (pool, text_url->tldlen * 2 + 1);
/* We need to convert it to the normal value first */
disp_tok.len = uidna_nameToUnicodeUTF8 (udn,
- text_url->tld, text_url->tldlen,
+ rspamd_url_tld_unsafe (text_url), text_url->tldlen,
idn_hbuf, text_url->tldlen * 2 + 1, &uinfo, &uc_err);
if (uc_err != U_ZERO_ERROR) {
@@ -725,14 +725,14 @@ rspamd_html_url_is_phished (rspamd_mempool_t *pool,
}
#endif
href_tok.len = href_url->tldlen;
- href_tok.begin = href_url->tld;
+ href_tok.begin = rspamd_url_tld_unsafe (href_url);
#if U_ICU_VERSION_MAJOR_NUM >= 46
- if (rspamd_substring_search_caseless (href_url->tld,
+ if (rspamd_substring_search_caseless (rspamd_url_tld_unsafe (href_url),
href_url->tldlen, "xn--", 4) != -1) {
idn_hbuf = rspamd_mempool_alloc (pool, href_url->tldlen * 2 + 1);
/* We need to convert it to the normal value first */
href_tok.len = uidna_nameToUnicodeUTF8 (udn,
- href_url->tld, href_url->tldlen,
+ rspamd_url_tld_unsafe (href_url), href_url->tldlen,
idn_hbuf, href_url->tldlen * 2 + 1, &uinfo, &uc_err);
if (uc_err != U_ZERO_ERROR) {
diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c
index 16dc05491..739d3b950 100644
--- a/src/libserver/protocol.c
+++ b/src/libserver/protocol.c
@@ -878,11 +878,13 @@ rspamd_protocol_extended_url (struct rspamd_task *task,
ucl_object_insert_key (obj, elt, "url", 0, false);
if (url->tldlen > 0) {
- elt = ucl_object_fromstring_common (url->tld, url->tldlen, 0);
+ elt = ucl_object_fromstring_common (rspamd_url_tld_unsafe (url),
+ url->tldlen, 0);
ucl_object_insert_key (obj, elt, "tld", 0, false);
}
if (url->hostlen > 0) {
- elt = ucl_object_fromstring_common (rspamd_url_host_unsafe (url), url->hostlen, 0);
+ elt = ucl_object_fromstring_common (rspamd_url_host_unsafe (url),
+ url->hostlen, 0);
ucl_object_insert_key (obj, elt, "host", 0, false);
}
diff --git a/src/libserver/url.c b/src/libserver/url.c
index 7e85a460e..043f523f0 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -1561,7 +1561,7 @@ rspamd_tld_trie_callback (struct rspamd_multipattern *mp,
if ((ndots == 0 || p == start - 1) &&
url->tldlen < rspamd_url_host_unsafe (url) + url->hostlen - pos) {
- url->tld = (gchar *) pos;
+ url->tldshift = (pos - url->string);
url->tldlen = rspamd_url_host_unsafe (url) + url->hostlen - pos;
}
@@ -1590,11 +1590,11 @@ rspamd_url_regen_from_inet_addr (struct rspamd_url *uri, const void *addr, int a
(gint)(uri->hostshift),
uri->string);
uri->hostshift = r;
+ uri->tldshift = r;
start_offset = strbuf + r;
inet_ntop (af, addr, strbuf + r, slen - r + 1);
uri->hostlen = strlen (start_offset);
r += uri->hostlen;
- uri->tld = (const gchar *)start_offset;
uri->tldlen = uri->hostlen;
uri->flags |= RSPAMD_URL_FLAG_NUMERIC;
@@ -2214,7 +2214,7 @@ rspamd_url_parse (struct rspamd_url *uri,
} else {
if (!rspamd_url_is_ip (uri, pool)) {
/* Assume tld equal to host */
- uri->tld = rspamd_url_host_unsafe (uri);
+ uri->tldshift = uri->hostshift;
uri->tldlen = uri->hostlen;
}
}
@@ -2241,11 +2241,11 @@ rspamd_url_parse (struct rspamd_url *uri,
rspamd_telephone_normalise_inplace (uri);
if (rspamd_url_host_unsafe (uri)[0] == '+') {
- uri->tld = rspamd_url_host_unsafe (uri) + 1;
+ uri->tldshift = uri->hostshift + 1;
uri->tldlen = uri->hostlen - 1;
}
else {
- uri->tld = rspamd_url_host_unsafe (uri);
+ uri->tldshift = uri->hostshift;
uri->tldlen = uri->hostlen;
}
}
diff --git a/src/libserver/url.h b/src/libserver/url.h
index 87766c4e6..00f09ac30 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -53,15 +53,7 @@ struct rspamd_url {
guint datashift;
guint queryshift;
guint fragmentshift;
-
- gchar *tld;
- gchar *visible_part;
-
- struct rspamd_url *phished_url;
-
- guint urllen;
- guint rawlen;
- guint32 flags;
+ guint tldshift;
guint16 protocollen;
guint16 userlen;
@@ -70,8 +62,14 @@ struct rspamd_url {
guint16 querylen;
guint16 fragmentlen;
guint16 tldlen;
-
guint16 count;
+
+ guint urllen;
+ guint rawlen;
+ guint32 flags;
+
+ gchar *visible_part;
+ struct rspamd_url *phished_url;
};
#define rspamd_url_user(u) ((u)->userlen > 0 ? (u)->string + (u)->usershift : NULL)
@@ -79,6 +77,7 @@ struct rspamd_url {
#define rspamd_url_host(u) ((u)->hostlen > 0 ? (u)->string + (u)->hostshift : NULL)
#define rspamd_url_host_unsafe(u) ((u)->string + (u)->hostshift)
+#define rspamd_url_tld_unsafe(u) ((u)->string + (u)->tldshift)
#define rspamd_url_data_unsafe(u) ((u)->string + (u)->datashift)
#define rspamd_url_query_unsafe(u) ((u)->string + (u)->queryshift)
diff --git a/src/libstat/tokenizers/tokenizers.c b/src/libstat/tokenizers/tokenizers.c
index 77a924f41..9f1b14daf 100644
--- a/src/libstat/tokenizers/tokenizers.c
+++ b/src/libstat/tokenizers/tokenizers.c
@@ -262,7 +262,7 @@ rspamd_tokenize_exception (struct rspamd_process_exception *ex, GArray *res)
uri = ex->ptr;
if (uri && uri->tldlen > 0) {
- token.original.begin = uri->tld;
+ token.original.begin = rspamd_url_tld_unsafe (uri);
token.original.len = uri->tldlen;
}
diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c
index cb54a694c..efd34dc6c 100644
--- a/src/lua/lua_url.c
+++ b/src/lua/lua_url.c
@@ -560,7 +560,7 @@ lua_url_get_tld (lua_State *L)
struct rspamd_lua_url *url = lua_check_url (L, 1);
if (url != NULL && url->url->tldlen > 0) {
- lua_pushlstring (L, url->url->tld, url->url->tldlen);
+ lua_pushlstring (L, rspamd_url_tld_unsafe (url->url), url->url->tldlen);
}
else {
lua_pushnil (L);
@@ -672,7 +672,7 @@ lua_url_to_table (lua_State *L)
if (u->tldlen > 0) {
lua_pushstring (L, "tld");
- lua_pushlstring (L, u->tld, u->tldlen);
+ lua_pushlstring (L, rspamd_url_tld_unsafe (u), u->tldlen);
lua_settable (L, -3);
}
More information about the Commits
mailing list