commit cc9369c: [Test] Add some more test cases

Vsevolod Stakhov vsevolod at rspamd.com
Tue Mar 21 22:14:03 UTC 2023


Author: Vsevolod Stakhov
Date: 2023-03-21 22:10:34 +0000
URL: https://github.com/rspamd/rspamd/commit/cc9369cdf97415e514c8f751e14788d0c90f073b (HEAD -> master)

[Test] Add some more test cases

---
 src/libutil/cxx/utf8_util.cxx | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/libutil/cxx/utf8_util.cxx b/src/libutil/cxx/utf8_util.cxx
index c69191e8b..dadd5fc6e 100644
--- a/src/libutil/cxx/utf8_util.cxx
+++ b/src/libutil/cxx/utf8_util.cxx
@@ -229,12 +229,24 @@ TEST_CASE("utf8 normalise") {
 			{"\xE2\x80\x8B""те""\xE2\x80\x8B""ст", "тест", RSPAMD_UNICODE_NORM_ZERO_SPACES},
 			/* Special case of diacritic */
 			{"13_\u0020\u0308\u0301\u038e\u03ab", "13_ ̈́ΎΫ", RSPAMD_UNICODE_NORM_UNNORMAL},
+			// String containing a non-joiner character
+			{ "س\u200Cت", "ست", RSPAMD_UNICODE_NORM_ZERO_SPACES },
+			// String containing a soft hyphen
+			{ "in\u00ADter\u00ADest\u00ADing", "interesting", RSPAMD_UNICODE_NORM_ZERO_SPACES },
+			// String with ligature
+			{ "fish", "fish", RSPAMD_UNICODE_NORM_UNNORMAL },
+			// String with accented characters and zero-width spaces
+			{ "café\u200Blatté\u200C", "cafélatté", RSPAMD_UNICODE_NORM_ZERO_SPACES },
 			/* Same with zw spaces */
 			{"13\u200C_\u0020\u0308\u0301\u038e\u03ab", "13_ ̈́ΎΫ",
 					RSPAMD_UNICODE_NORM_UNNORMAL|RSPAMD_UNICODE_NORM_ZERO_SPACES},
 			/* Buffer overflow case */
 			{"u\xC2\xC2\xC2\xC2\xC2\xC2""abcdef""abcdef", "u\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD",
 					RSPAMD_UNICODE_NORM_UNNORMAL|RSPAMD_UNICODE_NORM_ERROR},
+			// String with a mix of special characters, ligatures, and zero-width spaces
+			{ "fish\u200Bcafé\u200C\u200Dlatté\u200D\u00AD", "fishcafé\u200Dlatté\u200D", RSPAMD_UNICODE_NORM_UNNORMAL | RSPAMD_UNICODE_NORM_ZERO_SPACES },
+			// Empty string
+			{ "", "", RSPAMD_UNICODE_NORM_NORMAL},
 	};
 
 	for (const auto &c : cases) {


More information about the Commits mailing list