commit fffd9e6: [Test] Improve tests

Vsevolod Stakhov vsevolod at highsecure.ru
Fri Nov 15 18:56:13 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-11-15 18:51:24 +0000
URL: https://github.com/rspamd/rspamd/commit/fffd9e6f1084f425a9fcda67cad90c2f8a04f264

[Test] Improve tests

---
 test/lua/unit/base64.lua |   2 +-
 test/lua/unit/utf.lua    | 130 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 131 insertions(+), 1 deletion(-)

diff --git a/test/lua/unit/base64.lua b/test/lua/unit/base64.lua
index dcf235791..43606e91e 100644
--- a/test/lua/unit/base64.lua
+++ b/test/lua/unit/base64.lua
@@ -12,7 +12,7 @@ context("Base64 encoding", function()
     void g_free(void *ptr);
     int memcmp(const void *a1, const void *a2, size_t len);
     double base64_test (bool generic, size_t niters, size_t len, size_t str_len);
-    double rspamd_get_ticks (void);
+    double rspamd_get_ticks (int);
   ]]
 
   ffi.C.rspamd_cryptobox_init()
diff --git a/test/lua/unit/utf.lua b/test/lua/unit/utf.lua
index 2d2c77f67..34217afa4 100644
--- a/test/lua/unit/utf.lua
+++ b/test/lua/unit/utf.lua
@@ -5,6 +5,13 @@ context("UTF8 check functions", function()
   ffi.cdef[[
     unsigned int rspamd_str_lc_utf8 (char *str, unsigned int size);
     unsigned int rspamd_str_lc (char *str, unsigned int size);
+    void rspamd_fast_utf8_library_init (unsigned flags);
+    void ottery_rand_bytes(void *buf, size_t n);
+    double rspamd_get_ticks(int allow);
+    size_t rspamd_fast_utf8_validate (const unsigned char *data, size_t len);
+    size_t rspamd_fast_utf8_validate_ref (const unsigned char *data, size_t len);
+    size_t rspamd_fast_utf8_validate_sse41 (const unsigned char *data, size_t len);
+    size_t rspamd_fast_utf8_validate_avx2 (const unsigned char *data, size_t len);
     char * rspamd_str_make_utf_valid (const char *src, size_t slen, size_t *dstlen, void *);
   ]]
 
@@ -69,4 +76,127 @@ context("UTF8 check functions", function()
       assert_equal(s, c[2])
     end)
   end
+
+  -- Enable sse and avx2
+  ffi.C.rspamd_fast_utf8_library_init(3)
+  local valid_cases = {
+    "a",
+    "\xc3\xb1",
+    "\xe2\x82\xa1",
+    "\xf0\x90\x8c\xbc",
+    "안녕하세요, 세상"
+  }
+  for i,c in ipairs(valid_cases) do
+    test("Unicode validate success: " .. tostring(i), function()
+      local buf = ffi.new("char[?]", #c + 1)
+      ffi.copy(buf, c)
+
+      local ret = ffi.C.rspamd_fast_utf8_validate(buf, #c)
+      assert_equal(ret, 0)
+    end)
+  end
+  local invalid_cases = {
+    "\xc3\x28",
+    "\xa0\xa1",
+    "\xe2\x28\xa1",
+    "\xe2\x82\x28",
+    "\xf0\x28\x8c\xbc",
+    "\xf0\x90\x28\xbc",
+    "\xf0\x28\x8c\x28",
+    "\xc0\x9f",
+    "\xf5\xff\xff\xff",
+    "\xed\xa0\x81",
+    "\xf8\x90\x80\x80\x80",
+    "123456789012345\xed",
+    "123456789012345\xf1",
+    "123456789012345\xc2",
+    "\xC2\x7F"
+  }
+  for i,c in ipairs(invalid_cases) do
+    test("Unicode validate fail: " .. tostring(i), function()
+      local buf = ffi.new("char[?]", #c + 1)
+      ffi.copy(buf, c)
+
+      local ret = ffi.C.rspamd_fast_utf8_validate(buf, #c)
+      assert_not_equal(ret, 0)
+    end)
+  end
+
+  local speed_iters = 10000
+  local function test_size(buflen, is_valid, impl)
+    local logger = require "rspamd_logger"
+    local test_str
+    if is_valid then
+      test_str = table.concat(valid_cases)
+    else
+      test_str = table.concat(valid_cases) .. table.concat(invalid_cases)
+    end
+
+    local buf = ffi.new("char[?]", buflen)
+    if #test_str < buflen then
+      local t = {}
+      local len = #test_str
+      while len < buflen do
+        t[#t + 1] = test_str
+        len = len + #test_str
+      end
+      test_str = table.concat(t)
+    end
+    ffi.copy(buf, test_str:sub(1, buflen))
+
+    local tm = 0
+
+    for _=1,speed_iters do
+      if impl == 'ref' then
+        local t1 = ffi.C.rspamd_get_ticks(1)
+        ffi.C.rspamd_fast_utf8_validate_ref(buf, buflen)
+        local t2 = ffi.C.rspamd_get_ticks(1)
+        tm = tm + (t2 - t1)
+      elseif impl == 'sse' then
+        local t1 = ffi.C.rspamd_get_ticks(1)
+        ffi.C.rspamd_fast_utf8_validate_sse41(buf, buflen)
+        local t2 = ffi.C.rspamd_get_ticks(1)
+        tm = tm + (t2 - t1)
+      else
+        local t1 = ffi.C.rspamd_get_ticks(1)
+        ffi.C.rspamd_fast_utf8_validate_avx2(buf, buflen)
+        local t2 = ffi.C.rspamd_get_ticks(1)
+        tm = tm + (t2 - t1)
+      end
+    end
+
+    logger.messagex("%s utf8 %s check (valid = %s): %s ticks per iter, %s ticks per byte",
+        impl, buflen, is_valid,
+        tm / speed_iters, tm / speed_iters / buflen)
+
+    return 0
+  end
+
+  for _,sz in ipairs({78, 512, 65535}) do
+    test(string.format("Utf8 test %s %d buffer, %s", 'ref', sz, 'valid'), function()
+      local res = test_size(sz, true, 'ref')
+      assert_equal(res, 0)
+    end)
+    test(string.format("Utf8 test %s %d buffer, %s", 'ref', sz, 'invalid'), function()
+      local res = test_size(sz, false, 'ref')
+      assert_equal(res, 0)
+    end)
+    test(string.format("Utf8 test %s %d buffer, %s", 'sse', sz, 'valid'), function()
+      local res = test_size(sz, true, 'sse')
+      assert_equal(res, 0)
+    end)
+    test(string.format("Utf8 test %s %d buffer, %s", 'sse', sz, 'invalid'), function()
+      local res = test_size(sz, false, 'sse')
+      assert_equal(res, 0)
+    end)
+    test(string.format("Utf8 test %s %d buffer, %s", 'avx2', sz, 'valid'), function()
+      local res = test_size(sz, true, 'avx2')
+      assert_equal(res, 0)
+    end)
+    test(string.format("Utf8 test %s %d buffer, %s", 'avx2', sz, 'invalid'), function()
+      local res = test_size(sz, false, 'avx2')
+      assert_equal(res, 0)
+    end)
+  end
+
 end)
\ No newline at end of file


More information about the Commits mailing list