commit b2a0709: [Minor] Use lexicographic sorting for urls

Vsevolod Stakhov vsevolod at highsecure.ru
Mon Feb 22 16:14:08 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-02-22 16:00:59 +0000
URL: https://github.com/rspamd/rspamd/commit/b2a070946a1374ce07d05f1fb78b7fca0bd919ae

[Minor] Use lexicographic sorting for urls

---
 src/libserver/url.c | 60 +++++++++++++++++++++++++++++++++++++----------------
 src/libserver/url.h |  9 +++++++-
 2 files changed, 50 insertions(+), 19 deletions(-)

diff --git a/src/libserver/url.c b/src/libserver/url.c
index f64fba135..094456d7b 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -4167,37 +4167,61 @@ rspamd_url_flag_to_string (int flag)
 	return NULL;
 }
 
-int
+inline int
 rspamd_url_cmp (const struct rspamd_url *u1, const struct rspamd_url *u2)
 {
-	if (u1->protocol != u2->protocol || u1->urllen != u2->urllen) {
-		if (u1->protocol != u2->protocol) {
-			return u1->protocol < u2->protocol;
-		}
+	int min_len = MIN (u1->urllen, u2->urllen);
+	int r;
 
-		return (int)u1->urllen - (int)u2->urllen;
+	if (u1->protocol != u2->protocol) {
+		return u1->protocol < u2->protocol;
 	}
-	else {
-		int r;
 
-		if (u1->protocol & PROTOCOL_MAILTO) {
-			if ((r = rspamd_lc_cmp (rspamd_url_host_unsafe (u1),
-					rspamd_url_host_unsafe (u2), u1->hostlen)) == 0) {
+	if (u1->protocol & PROTOCOL_MAILTO) {
+		/* Emails specialisation (hosts must be compared in a case insensitive matter */
+		min_len = MIN (u1->hostlen, u2->hostlen);
+
+		if ((r = rspamd_lc_cmp (rspamd_url_host_unsafe (u1),
+				rspamd_url_host_unsafe (u2), min_len)) == 0) {
+			if (u1->hostlen == u2->hostlen) {
 				if (u1->userlen != u2->userlen || u1->userlen == 0) {
-					return (int)u1->userlen - (int)u2->userlen;
+					r = (int) u1->userlen - (int) u2->userlen;
 				}
 				else {
-					return rspamd_lc_cmp (rspamd_url_user_unsafe(u1),
+					r = memcmp (rspamd_url_user_unsafe(u1),
 							rspamd_url_user_unsafe(u2),
 							u1->userlen);
 				}
 			}
+			else {
+				r = u1->hostlen < u2->hostlen;
+			}
+		}
+	}
+	else {
+		if (u1->urllen != u2->urllen) {
+			/* Different length, compare common part and then compare length */
+			r = memcmp (u1->string, u2->string, min_len);
 
-			return r;
+			if (r == 0) {
+				r = u1->urllen < u2->urllen;
+			}
+		}
+		else {
+			/* Equal length */
+			r = memcmp (u1->string, u2->string, u1->urllen);
 		}
+	}
 
-		r = memcmp (u1->string, u2->string, u1->urllen);
+	return r;
+}
+
+int
+rspamd_url_cmp_qsort (const void *_u1, const void *_u2)
+{
+	const struct rspamd_url *u1 = *(struct rspamd_url **) _u1,
+			*u2 = *(struct rspamd_url **) _u2;
+
+	return rspamd_url_cmp (u1, u2);
+}
 
-		return r;
-	}
-}
\ No newline at end of file
diff --git a/src/libserver/url.h b/src/libserver/url.h
index ca111ecf1..567cdd137 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -325,12 +325,19 @@ bool rspamd_url_set_has (khash_t (rspamd_url_hash) *set, struct rspamd_url *u);
 bool rspamd_url_host_set_has (khash_t (rspamd_url_host_hash) *set, struct rspamd_url *u);
 
 /**
- * Compares two urls (similar to C comparison functions)
+ * Compares two urls (similar to C comparison functions) lexicographically
  * @param u1
  * @param u2
  * @return
  */
 int rspamd_url_cmp (const struct rspamd_url *u1, const struct rspamd_url *u2);
+/**
+ * Same but used for qsort to sort `struct rspamd_url *[]` array
+ * @param u1
+ * @param u2
+ * @return
+ */
+int rspamd_url_cmp_qsort (const void *u1, const void *u2);
 
 #ifdef  __cplusplus
 }


More information about the Commits mailing list