commit 79339e5: [Minor] Allow to compare utf8 strings of different length

Vsevolod Stakhov vsevolod at highsecure.ru
Mon Aug 2 20:35:05 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-08-02 21:24:14 +0100
URL: https://github.com/rspamd/rspamd/commit/79339e5d4f52643b702b207313a3230dc6a97bba

[Minor] Allow to compare utf8 strings of different length

---
 src/libutil/cxx/utf8_util.cxx | 19 +++++++++++++++----
 src/libutil/cxx/utf8_util.h   |  9 +++++++++
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/libutil/cxx/utf8_util.cxx b/src/libutil/cxx/utf8_util.cxx
index 8b99d1f35..cf6e70fe6 100644
--- a/src/libutil/cxx/utf8_util.cxx
+++ b/src/libutil/cxx/utf8_util.cxx
@@ -176,18 +176,23 @@ struct rspamd_icu_collate_storage {
 static rspamd_icu_collate_storage collate_storage;
 
 int
-rspamd_utf8_strcmp(const char *s1, const char *s2, gsize n)
+rspamd_utf8_strcmp_sizes(const char *s1, gsize n1, const char *s2, gsize n2)
 {
-	if (n >= std::numeric_limits<int>::max()) {
+	if (n1 >= std::numeric_limits<int>::max() || n2 >= std::numeric_limits<int>::max()) {
 		/*
 		 * It's hard to say what to do here... But libicu wants int, so we fall
 		 * back to g_ascii_strcasecmp which can deal with size_t
 		 */
-		return g_ascii_strncasecmp(s1, s2, n);
+		if (n1 == n2) {
+			return g_ascii_strncasecmp(s1, s2, n1);
+		}
+		else {
+			return n1 - n2;
+		}
 	}
 
 	UErrorCode success = U_ZERO_ERROR;
-	auto res = collate_storage.collator->compareUTF8({s1, (int) n}, {s2, (int) n},
+	auto res = collate_storage.collator->compareUTF8({s1, (int) n1}, {s2, (int) n2},
 			success);
 
 	switch (res) {
@@ -201,6 +206,12 @@ rspamd_utf8_strcmp(const char *s1, const char *s2, gsize n)
 	}
 }
 
+int
+rspamd_utf8_strcmp(const char *s1, const char *s2, gsize n)
+{
+	return rspamd_utf8_strcmp_sizes(s1, n, s2, n);
+}
+
 TEST_SUITE("utf8 utils") {
 TEST_CASE("utf8 normalise") {
 	std::tuple<const char *, const char *, int> cases[] = {
diff --git a/src/libutil/cxx/utf8_util.h b/src/libutil/cxx/utf8_util.h
index 28bd6a144..a9476f78d 100644
--- a/src/libutil/cxx/utf8_util.h
+++ b/src/libutil/cxx/utf8_util.h
@@ -59,6 +59,15 @@ enum rspamd_normalise_result rspamd_normalise_unicode_inplace(gchar *start, gsiz
  * @return an integer greater than, equal to, or less than 0, according as the string s1 is greater than, equal to, or less than the string s2.
  */
 int rspamd_utf8_strcmp(const char *s1, const char *s2, gsize n);
+/**
+ * Similar to rspamd_utf8_strcmp but accepts two sizes
+ * @param s1
+ * @param n1
+ * @param s2
+ * @param n2
+ * @return
+ */
+int rspamd_utf8_strcmp_sizes(const char *s1, gsize n1, const char *s2, gsize n2);
 
 #ifdef  __cplusplus
 }


More information about the Commits mailing list