commit cd08c88: [Minor] Fix performance issue with is_utf_outside_range
Miecio Za
miecio at miecio.net
Tue Mar 19 10:42:04 UTC 2019
Author: Miecio Za
Date: 2019-03-18 14:06:56 +0100
URL: https://github.com/rspamd/rspamd/commit/cd08c8845f6ea0bac789ea8a49f7d8537f598b7d
[Minor] Fix performance issue with is_utf_outside_range
Fix performace issue, add some checking and add few tests
---
src/lua/lua_util.c | 67 +++++++++++++++++++++++++++++++------------
test/lua/unit/rspamd_util.lua | 67 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 116 insertions(+), 18 deletions(-)
diff --git a/src/lua/lua_util.c b/src/lua/lua_util.c
index 71d61da62..7c98a0989 100644
--- a/src/lua/lua_util.c
+++ b/src/lua/lua_util.c
@@ -22,6 +22,7 @@
#include "libmime/email_addr.h"
#include "libmime/content_type.h"
#include "libmime/mime_headers.h"
+#include "libutil/hash.h"
#include "linenoise.h"
#include <math.h>
#include <glob.h>
@@ -2458,6 +2459,12 @@ lua_util_is_utf_spoofed (lua_State *L)
uspoof_setChecks (spc_sgl,
USPOOF_INVISIBLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE,
&uc_err);
+ if (uc_err != U_ZERO_ERROR) {
+ msg_err ("Cannot set proper checks for uspoof: %s", u_errorName (uc_err));
+ lua_pushboolean (L, false);
+ uspoof_close(spc);
+ return 1;
+ }
}
ret = uspoof_checkUTF8 (spc_sgl, s1, l1, NULL, &uc_err);
@@ -2533,28 +2540,52 @@ lua_util_is_utf_outside_range(lua_State *L)
guint32 range_start = lua_tointeger (L, 2);
guint32 range_end = lua_tointeger (L, 3);
- USpoofChecker *spc_sgl;
- USet * allowed_chars;
- UErrorCode uc_err = U_ZERO_ERROR;
+ static rspamd_lru_hash_t *validators;
+
+ if (validators == NULL) {
+ validators = rspamd_lru_hash_new(16, g_free, (GDestroyNotify)uspoof_close);
+ }
if (string_to_check) {
- spc_sgl = uspoof_open (&uc_err);
- if (uc_err != U_ZERO_ERROR) {
- msg_err ("cannot init spoof checker: %s", u_errorName (uc_err));
- lua_pushboolean (L, false);
- uspoof_close(spc_sgl);
- return 1;
- }
+ guint64 hash_key = (guint64)range_end << 32 || range_start;
+
+ USpoofChecker *validator = rspamd_lru_hash_lookup(validators, &hash_key, time(NULL));
+
+ UErrorCode uc_err = U_ZERO_ERROR;
+
+ if (validator == NULL) {
+ USet * allowed_chars;
+ guint64 * creation_hash_key = g_malloc(sizeof(guint64));
+ *creation_hash_key = hash_key;
+
+ validator = uspoof_open (&uc_err);
+ if (uc_err != U_ZERO_ERROR) {
+ msg_err ("cannot init spoof checker: %s", u_errorName (uc_err));
+ lua_pushboolean (L, false);
+ uspoof_close(validator);
+ return 1;
+ }
+
+ allowed_chars = uset_openEmpty();
+ uset_addRange(allowed_chars, range_start, range_end);
+ uspoof_setAllowedChars(validator, allowed_chars, &uc_err);
+
+ uspoof_setChecks (validator,
+ USPOOF_CHAR_LIMIT | USPOOF_ANY_CASE, &uc_err);
- allowed_chars = uset_openEmpty();
- uset_addRange(allowed_chars, range_start, range_end);
- uspoof_setAllowedChars(spc_sgl, allowed_chars, &uc_err);
+ uset_close(allowed_chars);
+
+ if (uc_err != U_ZERO_ERROR) {
+ msg_err ("Cannot configure uspoof: %s", u_errorName (uc_err));
+ lua_pushboolean (L, false);
+ uspoof_close(validator);
+ return 1;
+ }
+
+ rspamd_lru_hash_insert(validators, creation_hash_key, validator, time(NULL), 0);
+ }
- uspoof_setChecks (spc_sgl,
- USPOOF_CHAR_LIMIT | USPOOF_ANY_CASE, &uc_err);
- ret = uspoof_checkUTF8 (spc_sgl, string_to_check, len_of_string, NULL, &uc_err);
- uset_close(allowed_chars);
- uspoof_close(spc_sgl);
+ ret = uspoof_checkUTF8 (validator, string_to_check, len_of_string, NULL, &uc_err);
}
else {
return luaL_error (L, "invalid arguments");
diff --git a/test/lua/unit/rspamd_util.lua b/test/lua/unit/rspamd_util.lua
new file mode 100644
index 000000000..802b400d2
--- /dev/null
+++ b/test/lua/unit/rspamd_util.lua
@@ -0,0 +1,67 @@
+context("Rspamd util for lua - check generic functions", function()
+ local util = require 'rspamd_util'
+
+ local cases = {
+ {
+ input = "test1",
+ result = false,
+ range_start = 0x0000,
+ range_end = 0x017f
+ },
+ {
+ input = "test test xxx",
+ result = false,
+ range_start = 0x0000,
+ range_end = 0x017f
+ },
+ {
+ input = "АбЫрвАлг",
+ result = true,
+ range_start = 0x0000,
+ range_end = 0x017f
+ },
+ {
+ input = "АбЫрвАлг example",
+ result = true,
+ range_start = 0x0000,
+ range_end = 0x017f
+ },
+ {
+ input = "example ąłśćżłóę",
+ result = false,
+ range_start = 0x0000,
+ range_end = 0x017f
+ },
+ {
+ input = "ąłśćżłóę АбЫрвАлг",
+ result = true,
+ range_start = 0x0000,
+ range_end = 0x017f
+ },
+ }
+
+ for i,c in ipairs(cases) do
+ test("is_utf_outside_range, test case #" .. i, function()
+ local actual = util.is_utf_outside_range(c.input, c.range_start, c.range_end)
+
+ assert_equal(c.result, actual)
+ end)
+ end
+
+ test("is_utf_outside_range, check cache", function ()
+ cache_size = 20
+ for i = 1,cache_size do
+ local res = util.is_utf_outside_range("a", 0x0000, 0x0000+i)
+ end
+ end)
+
+ test("is_utf_outside_range, check empty string", function ()
+ assert_error(util.is_utf_outside_range)
+ end)
+
+ test("get_string_stats, test case", function()
+ local res = util.get_string_stats("this is test 99")
+ assert_equal(res["letters"], 10)
+ assert_equal(res["digits"], 2)
+ end)
+end)
More information about the Commits
mailing list