commit 79339e5: [Minor] Allow to compare utf8 strings of different length
Vsevolod Stakhov
vsevolod at highsecure.ru
Mon Aug 2 20:35:05 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-08-02 21:24:14 +0100
URL: https://github.com/rspamd/rspamd/commit/79339e5d4f52643b702b207313a3230dc6a97bba
[Minor] Allow to compare utf8 strings of different length
---
src/libutil/cxx/utf8_util.cxx | 19 +++++++++++++++----
src/libutil/cxx/utf8_util.h | 9 +++++++++
2 files changed, 24 insertions(+), 4 deletions(-)
diff --git a/src/libutil/cxx/utf8_util.cxx b/src/libutil/cxx/utf8_util.cxx
index 8b99d1f35..cf6e70fe6 100644
--- a/src/libutil/cxx/utf8_util.cxx
+++ b/src/libutil/cxx/utf8_util.cxx
@@ -176,18 +176,23 @@ struct rspamd_icu_collate_storage {
static rspamd_icu_collate_storage collate_storage;
int
-rspamd_utf8_strcmp(const char *s1, const char *s2, gsize n)
+rspamd_utf8_strcmp_sizes(const char *s1, gsize n1, const char *s2, gsize n2)
{
- if (n >= std::numeric_limits<int>::max()) {
+ if (n1 >= std::numeric_limits<int>::max() || n2 >= std::numeric_limits<int>::max()) {
/*
* It's hard to say what to do here... But libicu wants int, so we fall
* back to g_ascii_strcasecmp which can deal with size_t
*/
- return g_ascii_strncasecmp(s1, s2, n);
+ if (n1 == n2) {
+ return g_ascii_strncasecmp(s1, s2, n1);
+ }
+ else {
+ return n1 - n2;
+ }
}
UErrorCode success = U_ZERO_ERROR;
- auto res = collate_storage.collator->compareUTF8({s1, (int) n}, {s2, (int) n},
+ auto res = collate_storage.collator->compareUTF8({s1, (int) n1}, {s2, (int) n2},
success);
switch (res) {
@@ -201,6 +206,12 @@ rspamd_utf8_strcmp(const char *s1, const char *s2, gsize n)
}
}
+int
+rspamd_utf8_strcmp(const char *s1, const char *s2, gsize n)
+{
+ return rspamd_utf8_strcmp_sizes(s1, n, s2, n);
+}
+
TEST_SUITE("utf8 utils") {
TEST_CASE("utf8 normalise") {
std::tuple<const char *, const char *, int> cases[] = {
diff --git a/src/libutil/cxx/utf8_util.h b/src/libutil/cxx/utf8_util.h
index 28bd6a144..a9476f78d 100644
--- a/src/libutil/cxx/utf8_util.h
+++ b/src/libutil/cxx/utf8_util.h
@@ -59,6 +59,15 @@ enum rspamd_normalise_result rspamd_normalise_unicode_inplace(gchar *start, gsiz
* @return an integer greater than, equal to, or less than 0, according as the string s1 is greater than, equal to, or less than the string s2.
*/
int rspamd_utf8_strcmp(const char *s1, const char *s2, gsize n);
+/**
+ * Similar to rspamd_utf8_strcmp but accepts two sizes
+ * @param s1
+ * @param n1
+ * @param s2
+ * @param n2
+ * @return
+ */
+int rspamd_utf8_strcmp_sizes(const char *s1, gsize n1, const char *s2, gsize n2);
#ifdef __cplusplus
}
More information about the Commits
mailing list