commit fba84f7: [Minor] Lua_util: Add lower_utf8 utility

Vsevolod Stakhov vsevolod at highsecure.ru
Fri Sep 13 15:00:04 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-09-13 15:55:44 +0100
URL: https://github.com/rspamd/rspamd/commit/fba84f7f415307fdc3df3efd60ec8b910e888ef5 (HEAD -> master)

[Minor] Lua_util: Add lower_utf8 utility
Issue: #3036

---
 src/lua/lua_util.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 48 insertions(+), 7 deletions(-)

diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c
index c25d20471..f86f975e7 100644
--- a/src/lua/lua_util.c
+++ b/src/lua/lua_util.c
@@ -233,6 +233,14 @@ LUA_FUNCTION_DEF (util, parse_mail_address);
  */
 LUA_FUNCTION_DEF (util, strlen_utf8);
 
+/***
+ * @function util.lower_utf8(str)
+ * Converts utf8 string to lower case
+ * @param {string} str utf8 encoded string
+ * @return {string} lowercased utf8 string
+ */
+LUA_FUNCTION_DEF (util, lower_utf8);
+
 
 /***
  * @function util.strcasecmp(str1, str2)
@@ -636,6 +644,7 @@ static const struct luaL_reg utillib_f[] = {
 	LUA_INTERFACE_DEF (util, glob),
 	LUA_INTERFACE_DEF (util, parse_mail_address),
 	LUA_INTERFACE_DEF (util, strlen_utf8),
+	LUA_INTERFACE_DEF (util, lower_utf8),
 	LUA_INTERFACE_DEF (util, strcasecmp_ascii),
 	LUA_INTERFACE_DEF (util, strequal_caseless),
 	LUA_INTERFACE_DEF (util, get_ticks),
@@ -1680,21 +1689,53 @@ static gint
 lua_util_strlen_utf8 (lua_State *L)
 {
 	LUA_TRACE_POINT;
-	const gchar *str, *end;
+	const gchar *str;
 	gsize len;
 
 	str = lua_tolstring (L, 1, &len);
 
 	if (str) {
-		if (g_utf8_validate (str, len, &end)) {
-			len = g_utf8_strlen (str, len);
+		gint32 i = 0, nchars = 0;
+		UChar32 uc;
+
+		while (i < len) {
+			U8_NEXT ((guint8 *) str, i, len, uc);
+			nchars ++;
 		}
-		else if (end != NULL && end > str) {
-			len = (g_utf8_strlen (str, end - str)) /* UTF part */
-					+ (len - (end - str)) /* raw part */;
+
+		lua_pushinteger (L, nchars);
+	}
+	else {
+		return luaL_error (L, "invalid arguments");
+	}
+
+	return 1;
+}
+
+static gint
+lua_util_lower_utf8 (lua_State *L)
+{
+	LUA_TRACE_POINT;
+	const gchar *str;
+	gchar *dst;
+	gsize len;
+	UChar32 uc;
+	UBool err = 0;
+	gint32 i = 0, j = 0;
+
+	str = lua_tolstring (L, 1, &len);
+
+	if (str) {
+		dst = g_malloc (len);
+
+		while (i < len && err == 0) {
+			U8_NEXT ((guint8 *) str, i, len, uc);
+			uc = u_tolower (uc);
+			U8_APPEND (dst, j, len, uc, err);
 		}
 
-		lua_pushinteger (L, len);
+		lua_pushlstring (L, dst, j);
+		g_free (dst);
 	}
 	else {
 		return luaL_error (L, "invalid arguments");


More information about the Commits mailing list