commit eef2f3c: [Feature] Reorganise struct rspamd_url to be 64 bytes size

Vsevolod Stakhov vsevolod at rspamd.com
Sun Jul 23 19:42:03 UTC 2023


Author: Vsevolod Stakhov
Date: 2023-07-23 20:41:02 +0100
URL: https://github.com/rspamd/rspamd/commit/eef2f3cac7c975af050efaf4cf1acafcb9b501e3 (HEAD -> master)

[Feature] Reorganise struct rspamd_url to be 64 bytes size

---
 src/libserver/html/html_url.cxx | 21 ++++++++----
 src/libserver/protocol.c        |  6 ++--
 src/libserver/url.c             |  5 ++-
 src/libserver/url.h             | 73 ++++++++++++++++++++++++++++-------------
 src/lua/lua_url.c               | 30 ++++++++++-------
 5 files changed, 87 insertions(+), 48 deletions(-)

diff --git a/src/libserver/html/html_url.cxx b/src/libserver/html/html_url.cxx
index 0068ea30f..ae2514ba1 100644
--- a/src/libserver/html/html_url.cxx
+++ b/src/libserver/html/html_url.cxx
@@ -183,8 +183,12 @@ html_url_is_phished(rspamd_mempool_t *pool,
 
 						if (!rspamd_url_is_subdomain(disp_tok, href_tok)) {
 							href_url->flags |= RSPAMD_URL_FLAG_PHISHED;
-							href_url->linked_url = text_url;
 							text_url->flags |= RSPAMD_URL_FLAG_HTML_DISPLAYED;
+
+							if (href_url->ext == nullptr) {
+								href_url->ext = rspamd_mempool_alloc0_type(pool, rspamd_url_ext);
+							}
+							href_url->ext->linked_url = text_url;
 						}
 					}
 				}
@@ -241,18 +245,21 @@ html_check_displayed_url(rspamd_mempool_t *pool,
 		return;
 	}
 
-	url->visible_part = rspamd_mempool_alloc_buffer(pool, visible_part.size() + 1);
-	rspamd_strlcpy(url->visible_part,
+	if (url->ext == nullptr) {
+		url->ext = rspamd_mempool_alloc0_type(pool, rspamd_url_ext);
+	}
+	url->ext->visible_part = rspamd_mempool_alloc_buffer(pool, visible_part.size() + 1);
+	rspamd_strlcpy(url->ext->visible_part,
 			visible_part.data(),
 			visible_part.size() + 1);
 	dlen = visible_part.size();
 
 	/* Strip unicode spaces from the start and the end */
-	url->visible_part = const_cast<char *>(
-			rspamd_string_unicode_trim_inplace(url->visible_part,
+	url->ext->visible_part = const_cast<char *>(
+			rspamd_string_unicode_trim_inplace(url->ext->visible_part,
 			&dlen));
 	auto maybe_url = html_url_is_phished(pool, url,
-			{url->visible_part, dlen});
+			{url->ext->visible_part, dlen});
 
 	if (maybe_url) {
 		url->flags |= saved_flags;
@@ -300,7 +307,7 @@ html_check_displayed_url(rspamd_mempool_t *pool,
 		}
 	}
 
-	rspamd_normalise_unicode_inplace(url->visible_part, &dlen);
+	rspamd_normalise_unicode_inplace(url->ext->visible_part, &dlen);
 }
 
 auto
diff --git a/src/libserver/protocol.c b/src/libserver/protocol.c
index 3708d823f..1d1214c27 100644
--- a/src/libserver/protocol.c
+++ b/src/libserver/protocol.c
@@ -909,9 +909,9 @@ rspamd_protocol_extended_url (struct rspamd_task *task,
 
 	ucl_object_insert_key (obj, flags, "flags", 0, false);
 
-	if (url->linked_url) {
-		encoded = rspamd_url_encode (url->linked_url, &enclen, task->task_pool);
-		elt = rspamd_protocol_extended_url (task, url->linked_url, encoded,
+	if (url->ext && url->ext->linked_url) {
+		encoded = rspamd_url_encode (url->ext->linked_url, &enclen, task->task_pool);
+		elt = rspamd_protocol_extended_url (task, url->ext->linked_url, encoded,
 				enclen);
 		ucl_object_insert_key (obj, elt, "linked_url", 0, false);
 	}
diff --git a/src/libserver/url.c b/src/libserver/url.c
index 824dc05cc..0deede068 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -1797,11 +1797,11 @@ rspamd_url_regen_from_inet_addr (struct rspamd_url *uri, const void *addr, int a
 	uri->flags |= RSPAMD_URL_FLAG_NUMERIC;
 
 	/* Reconstruct URL */
-	if (uri->flags & RSPAMD_URL_FLAG_HAS_PORT) {
+	if (uri->flags & RSPAMD_URL_FLAG_HAS_PORT && uri->ext) {
 		p = strbuf + r;
 		start_offset = p + 1;
 		r += rspamd_snprintf (strbuf + r, slen - r, ":%ud",
-				(unsigned int)uri->port);
+				(unsigned int)uri->ext->port);
 	}
 	if (uri->datalen > 0) {
 		p = strbuf + r;
@@ -2351,7 +2351,6 @@ rspamd_url_parse (struct rspamd_url *uri,
 		}
 	}
 
-	uri->port = u.port;
 	uri->flags = flags;
 
 	if (!uri->hostlen) {
diff --git a/src/libserver/url.h b/src/libserver/url.h
index 0b326869b..9c5b7be28 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -52,35 +52,46 @@ struct rspamd_url_tag {
 	struct rspamd_url_tag *prev, *next;
 };
 
-
+struct rspamd_url_ext;
+/**
+ * URL structure
+ */
 struct rspamd_url {
-	gchar *string;
-	gchar *raw;
+	char *string;
+	char *raw;
+	struct rspamd_url_ext *ext;
 
-	gchar *visible_part;
-	struct rspamd_url *linked_url;
+	uint32_t flags;
 
-	guint32 flags;
+	uint8_t protocol;
+	uint8_t protocollen;
 
-	guint8 protocol;
-	guint8 protocollen;
-
-	guint16 port;
+	uint16_t hostshift;
+	uint16_t datashift;
+	uint16_t queryshift;
+	uint16_t fragmentshift;
+	uint16_t tldshift;
 	guint16 usershift;
-	guint16 hostshift;
-	guint16 datashift;
-	guint16 queryshift;
-	guint16 fragmentshift;
-	guint16 tldshift;
 	guint16 userlen;
-	guint16 hostlen;
-	guint16 datalen;
-	guint16 querylen;
-	guint16 fragmentlen;
-	guint16 tldlen;
-	guint16 count;
-	guint16 urllen;
-	guint16 rawlen;
+
+	uint16_t hostlen;
+	uint16_t datalen;
+	uint16_t querylen;
+	uint16_t fragmentlen;
+	uint16_t tldlen;
+	uint16_t count;
+	uint16_t urllen;
+	uint16_t rawlen;
+};
+
+/**
+ * Rarely used url fields
+ */
+struct rspamd_url_ext {
+	gchar *visible_part;
+	struct rspamd_url *linked_url;
+
+	guint16 port;
 };
 
 #define rspamd_url_user(u) ((u)->userlen > 0 ? (u)->string + (u)->usershift : NULL)
@@ -350,6 +361,22 @@ int rspamd_url_cmp(const struct rspamd_url *u1, const struct rspamd_url *u2);
  */
 int rspamd_url_cmp_qsort(const void *u1, const void *u2);
 
+static inline uint16_t rspamd_url_get_port(struct rspamd_url *u)
+{
+	if (u->flags & RSPAMD_URL_FLAG_HAS_PORT && u->ext) {
+		return u->ext->port;
+	}
+	else {
+		/* Assume standard port */
+		if (u->protocol == PROTOCOL_HTTPS) {
+			return 443;
+		}
+		else {
+			return 80;
+		}
+	}
+}
+
 /**
  * Normalize unicode input and set out url flags as appropriate
  * @param pool
diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c
index 39b0293aa..a46f4e276 100644
--- a/src/lua/lua_url.c
+++ b/src/lua/lua_url.c
@@ -186,7 +186,7 @@ lua_url_get_port (lua_State *L)
 	struct rspamd_lua_url *url = lua_check_url (L, 1);
 
 	if (url != NULL) {
-		lua_pushinteger (L, url->url->port);
+		lua_pushinteger (L, rspamd_url_get_port(url->url));
 	}
 	else {
 		lua_pushnil (L);
@@ -475,12 +475,13 @@ lua_url_get_phished (lua_State *L)
 	struct rspamd_lua_url *purl, *url = lua_check_url (L, 1);
 
 	if (url) {
-		if (url->url->linked_url != NULL) {
+		if (url->url->ext && url->url->ext->linked_url != NULL) {
+			/* XXX: in fact, this is the only possible combination of flags, so this check is redundant */
 			if (url->url->flags &
 					(RSPAMD_URL_FLAG_PHISHED|RSPAMD_URL_FLAG_REDIRECTED)) {
 				purl = lua_newuserdata (L, sizeof (struct rspamd_lua_url));
 				rspamd_lua_setclass (L, "rspamd{url}", -1);
-				purl->url = url->url->linked_url;
+				purl->url = url->url->ext->linked_url;
 
 				return 1;
 			}
@@ -535,7 +536,11 @@ lua_url_set_redirected (lua_State *L)
 			redir = lua_check_url (L, -1);
 
 			url->url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
-			url->url->linked_url = redir->url;
+
+			if (url->url->ext == NULL) {
+				url->url->ext = rspamd_mempool_alloc0_type(pool, struct rspamd_url_ext);
+			}
+			url->url->ext->linked_url = redir->url;
 		}
 	}
 	else {
@@ -546,7 +551,10 @@ lua_url_set_redirected (lua_State *L)
 		}
 
 		url->url->flags |= RSPAMD_URL_FLAG_REDIRECTED;
-		url->url->linked_url = redir->url;
+		if (url->url->ext == NULL) {
+			url->url->ext = rspamd_mempool_alloc0_type(pool, struct rspamd_url_ext);
+		}
+		url->url->ext->linked_url = redir->url;
 
 		/* Push back on stack */
 		lua_pushvalue (L, 2);
@@ -629,8 +637,8 @@ lua_url_get_visible (lua_State *L)
 	LUA_TRACE_POINT;
 	struct rspamd_lua_url *url = lua_check_url (L, 1);
 
-	if (url != NULL && url->url->visible_part) {
-		lua_pushstring (L, url->url->visible_part);
+	if (url != NULL && url->url->ext && url->url->ext->visible_part) {
+		lua_pushstring (L, url->url->ext->visible_part);
 	}
 	else {
 		lua_pushnil (L);
@@ -671,11 +679,9 @@ lua_url_to_table (lua_State *L)
 			lua_settable (L, -3);
 		}
 
-		if (u->port != 0) {
-			lua_pushstring (L, "port");
-			lua_pushinteger (L, u->port);
-			lua_settable (L, -3);
-		}
+		lua_pushstring (L, "port");
+		lua_pushinteger (L, rspamd_url_get_port(u));
+		lua_settable (L, -3);
 
 		if (u->tldlen > 0) {
 			lua_pushstring (L, "tld");


More information about the Commits mailing list