commit eef2f3c: [Feature] Reorganise struct rspamd_url to be 64 bytes size
Vsevolod Stakhov
vsevolod at rspamd.com
Sun Jul 23 19:42:03 UTC 2023
Author: Vsevolod Stakhov
Date: 2023-07-23 20:41:02 +0100
URL: https://github.com/rspamd/rspamd/commit/eef2f3cac7c975af050efaf4cf1acafcb9b501e3 (HEAD -> master)
[Feature] Reorganise struct rspamd_url to be 64 bytes size
---
src/libserver/html/html_url.cxx | 21 ++++++++----
src/libserver/protocol.c | 6 ++--
src/libserver/url.c | 5 ++-
src/libserver/url.h | 73 ++++++++++++++++++++++++++++-------------
src/lua/lua_url.c | 30 ++++++++++-------
5 files changed, 87 insertions(+), 48 deletions(-)
diff --git a/src/libserver/html/html_url.cxx b/src/libserver/html/html_url.cxx
index 0068ea30f..ae2514ba1 100644
--- a/src/libserver/html/html_url.cxx
+++ b/src/libserver/html/html_url.cxx
@@ -183,8 +183,12 @@ html_url_is_phished(rspamd_mempool_t *pool,
if (!rspamd_url_is_subdomain(disp_tok, href_tok)) {
href_url->flags |= RSPAMD_URL_FLAG_PHISHED;
- href_url->linked_url = text_url;
text_url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED;
+
+ if (href_url->ext == nullptr) {
+ href_url->ext = rspamd_mempool_alloc0_type(pool, rspamd_url_ext);
+ }
+ href_url->ext->linked_url = text_url;
}
}
}
@@ -241,18 +245,21 @@ html_check_displayed_url(rspamd_mempool_t *pool,
return;
}
- url->visible_part = rspamd_mempool_alloc_buffer(pool, visible_part.size() + 1);
- rspamd_strlcpy(url->visible_part,
+ if (url->ext == nullptr) {
+ url->ext = rspamd_mempool_alloc0_type(pool, rspamd_url_ext);
+ }
+ url->ext->visible_part = rspamd_mempool_alloc_buffer(pool, visible_part.size() + 1);
+ rspamd_strlcpy(url->ext->visible_part,
visible_part.data(),
visible_part.size() + 1);
dlen = visible_part.size();
/* Strip unicode spaces from the start and the end */
- url->visible_part = const_cast<char *>(
- rspamd_string_unicode_trim_inplace(url->visible_part,
+ url->ext->visible_part = const_cast<char *>(
+ rspamd_string_unicode_trim_inplace(url->ext->visible_part,
&dlen));
auto maybe_url = html_url_is_phished(pool, url,
- {url->visible_part, dlen});
+ {url->ext->visible_part, dlen});
if (maybe_url) {
url->flags |= saved_flags;
@@ -300,7 +307,7 @@ html_check_displayed_url(rspamd_mempool_t *pool,
}
}
- rspamd_normalise_unicode_inplace(url->visible_part, &dlen);
+ rspamd_normalise_unicode_inplace(url->ext->visible_part, &dlen);
}
auto
diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c
index 3708d823f..1d1214c27 100644
--- a/src/libserver/protocol.c
+++ b/src/libserver/protocol.c
@@ -909,9 +909,9 @@ rspamd_protocol_extended_url (struct rspamd_task *task,
ucl_object_insert_key (obj, flags, "flags", 0, false);
- if (url->linked_url) {
- encoded = rspamd_url_encode (url->linked_url, &enclen, task->task_pool);
- elt = rspamd_protocol_extended_url (task, url->linked_url, encoded,
+ if (url->ext && url->ext->linked_url) {
+ encoded = rspamd_url_encode (url->ext->linked_url, &enclen, task->task_pool);
+ elt = rspamd_protocol_extended_url (task, url->ext->linked_url, encoded,
enclen);
ucl_object_insert_key (obj, elt, "linked_url", 0, false);
}
diff --git a/src/libserver/url.c b/src/libserver/url.c
index 824dc05cc..0deede068 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -1797,11 +1797,11 @@ rspamd_url_regen_from_inet_addr (struct rspamd_url *uri, const void *addr, int a
uri->flags |= RSPAMD_URL_FLAG_NUMERIC;
/* Reconstruct URL */
- if (uri->flags & RSPAMD_URL_FLAG_HAS_PORT) {
+ if (uri->flags & RSPAMD_URL_FLAG_HAS_PORT && uri->ext) {
p = strbuf + r;
start_offset = p + 1;
r += rspamd_snprintf (strbuf + r, slen - r, ":%ud",
- (unsigned int)uri->port);
+ (unsigned int)uri->ext->port);
}
if (uri->datalen > 0) {
p = strbuf + r;
@@ -2351,7 +2351,6 @@ rspamd_url_parse (struct rspamd_url *uri,
}
}
- uri->port = u.port;
uri->flags = flags;
if (!uri->hostlen) {
diff --git a/src/libserver/url.h b/src/libserver/url.h
index 0b326869b..9c5b7be28 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -52,35 +52,46 @@ struct rspamd_url_tag {
struct rspamd_url_tag *prev, *next;
};
-
+struct rspamd_url_ext;
+/**
+ * URL structure
+ */
struct rspamd_url {
- gchar *string;
- gchar *raw;
+ char *string;
+ char *raw;
+ struct rspamd_url_ext *ext;
- gchar *visible_part;
- struct rspamd_url *linked_url;
+ uint32_t flags;
- guint32 flags;
+ uint8_t protocol;
+ uint8_t protocollen;
- guint8 protocol;
- guint8 protocollen;
-
- guint16 port;
+ uint16_t hostshift;
+ uint16_t datashift;
+ uint16_t queryshift;
+ uint16_t fragmentshift;
+ uint16_t tldshift;
guint16 usershift;
- guint16 hostshift;
- guint16 datashift;
- guint16 queryshift;
- guint16 fragmentshift;
- guint16 tldshift;
guint16 userlen;
- guint16 hostlen;
- guint16 datalen;
- guint16 querylen;
- guint16 fragmentlen;
- guint16 tldlen;
- guint16 count;
- guint16 urllen;
- guint16 rawlen;
+
+ uint16_t hostlen;
+ uint16_t datalen;
+ uint16_t querylen;
+ uint16_t fragmentlen;
+ uint16_t tldlen;
+ uint16_t count;
+ uint16_t urllen;
+ uint16_t rawlen;
+};
+
+/**
+ * Rarely used url fields
+ */
+struct rspamd_url_ext {
+ gchar *visible_part;
+ struct rspamd_url *linked_url;
+
+ guint16 port;
};
#define rspamd_url_user(u) ((u)->userlen > 0 ? (u)->string + (u)->usershift : NULL)
@@ -350,6 +361,22 @@ int rspamd_url_cmp(const struct rspamd_url *u1, const struct rspamd_url *u2);
*/
int rspamd_url_cmp_qsort(const void *u1, const void *u2);
+static inline uint16_t rspamd_url_get_port(struct rspamd_url *u)
+{
+ if (u->flags & RSPAMD_URL_FLAG_HAS_PORT && u->ext) {
+ return u->ext->port;
+ }
+ else {
+ /* Assume standard port */
+ if (u->protocol == PROTOCOL_HTTPS) {
+ return 443;
+ }
+ else {
+ return 80;
+ }
+ }
+}
+
/**
* Normalize unicode input and set out url flags as appropriate
* @param pool
diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c
index 39b0293aa..a46f4e276 100644
--- a/src/lua/lua_url.c
+++ b/src/lua/lua_url.c
@@ -186,7 +186,7 @@ lua_url_get_port (lua_State *L)
struct rspamd_lua_url *url = lua_check_url (L, 1);
if (url != NULL) {
- lua_pushinteger (L, url->url->port);
+ lua_pushinteger (L, rspamd_url_get_port(url->url));
}
else {
lua_pushnil (L);
@@ -475,12 +475,13 @@ lua_url_get_phished (lua_State *L)
struct rspamd_lua_url *purl, *url = lua_check_url (L, 1);
if (url) {
- if (url->url->linked_url != NULL) {
+ if (url->url->ext && url->url->ext->linked_url != NULL) {
+ /* XXX: in fact, this is the only possible combination of flags, so this check is redundant */
if (url->url->flags &
(RSPAMD_URL_FLAG_PHISHED|RSPAMD_URL_FLAG_REDIRECTED)) {
purl = lua_newuserdata (L, sizeof (struct rspamd_lua_url));
rspamd_lua_setclass (L, "rspamd{url}", -1);
- purl->url = url->url->linked_url;
+ purl->url = url->url->ext->linked_url;
return 1;
}
@@ -535,7 +536,11 @@ lua_url_set_redirected (lua_State *L)
redir = lua_check_url (L, -1);
url->url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
- url->url->linked_url = redir->url;
+
+ if (url->url->ext == NULL) {
+ url->url->ext = rspamd_mempool_alloc0_type(pool, struct rspamd_url_ext);
+ }
+ url->url->ext->linked_url = redir->url;
}
}
else {
@@ -546,7 +551,10 @@ lua_url_set_redirected (lua_State *L)
}
url->url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
- url->url->linked_url = redir->url;
+ if (url->url->ext == NULL) {
+ url->url->ext = rspamd_mempool_alloc0_type(pool, struct rspamd_url_ext);
+ }
+ url->url->ext->linked_url = redir->url;
/* Push back on stack */
lua_pushvalue (L, 2);
@@ -629,8 +637,8 @@ lua_url_get_visible (lua_State *L)
LUA_TRACE_POINT;
struct rspamd_lua_url *url = lua_check_url (L, 1);
- if (url != NULL && url->url->visible_part) {
- lua_pushstring (L, url->url->visible_part);
+ if (url != NULL && url->url->ext && url->url->ext->visible_part) {
+ lua_pushstring (L, url->url->ext->visible_part);
}
else {
lua_pushnil (L);
@@ -671,11 +679,9 @@ lua_url_to_table (lua_State *L)
lua_settable (L, -3);
}
- if (u->port != 0) {
- lua_pushstring (L, "port");
- lua_pushinteger (L, u->port);
- lua_settable (L, -3);
- }
+ lua_pushstring (L, "port");
+ lua_pushinteger (L, rspamd_url_get_port(u));
+ lua_settable (L, -3);
if (u->tldlen > 0) {
lua_pushstring (L, "tld");
More information about the Commits
mailing list