commit fb43ff3: [Test] Improve urls extraction tests
Vsevolod Stakhov
vsevolod at highsecure.ru
Thu Aug 29 12:56:06 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-08-29 13:47:44 +0100
URL: https://github.com/rspamd/rspamd/commit/fb43ff309cad5f01a99941d413ba1e91e60aa0ed
[Test] Improve urls extraction tests
---
test/lua/unit/lua_util.extract_specific_urls.lua | 245 +++++++++++++++++------
1 file changed, 189 insertions(+), 56 deletions(-)
diff --git a/test/lua/unit/lua_util.extract_specific_urls.lua b/test/lua/unit/lua_util.extract_specific_urls.lua
index c84a7ca8d..73491ecb3 100644
--- a/test/lua/unit/lua_util.extract_specific_urls.lua
+++ b/test/lua/unit/lua_util.extract_specific_urls.lua
@@ -1,14 +1,78 @@
-local msg
-
-context("Lua util - extract_specific_urls", function()
- local util = require 'lua_util'
- local mpool = require "rspamd_mempool"
- local fun = require "fun"
- local url = require "rspamd_url"
- local logger = require "rspamd_logger"
- local rspamd_util = require "rspamd_util"
- local rspamd_task = require "rspamd_task"
+local msg, msg_img
+local logger = require "rspamd_logger"
+local rspamd_util = require "rspamd_util"
+local rspamd_task = require "rspamd_task"
+local util = require 'lua_util'
+local mpool = require "rspamd_mempool"
+local fun = require "fun"
+local url = require "rspamd_url"
+
+--[=========[ ******************* message ******************* ]=========]
+msg = [[
+From: <>
+To: <nobody at example.com>
+Subject: test
+Content-Type: multipart/alternative;
+ boundary="_000_6be055295eab48a5af7ad4022f33e2d0_"
+
+--_000_6be055295eab48a5af7ad4022f33e2d0_
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: base64
+
+Hello world
+
+
+--_000_6be055295eab48a5af7ad4022f33e2d0_
+Content-Type: text/html; charset="utf-8"
+
+<html><body>
+<a href="http://example.net">http://example.net</a>
+<a href="http://example1.net">http://example1.net</a>
+<a href="http://example2.net">http://example2.net</a>
+<a href="http://example3.net">http://example3.net</a>
+<a href="http://example4.net">http://example4.net</a>
+<a href="http://domain1.com">http://domain1.com</a>
+<a href="http://domain2.com">http://domain2.com</a>
+<a href="http://domain3.com">http://domain3.com</a>
+<a href="http://domain4.com">http://domain4.com</a>
+<a href="http://domain5.com">http://domain5.com</a>
+<a href="http://domain.com">http://example.net/</a>
+<img src="http://example5.org">hahaha</img>
+</html>
+]]
+msg_img = [[
+From: <>
+To: <nobody at example.com>
+Subject: test
+Content-Type: multipart/alternative;
+ boundary="_000_6be055295eab48a5af7ad4022f33e2d0_"
+
+--_000_6be055295eab48a5af7ad4022f33e2d0_
+Content-Type: text/plain; charset="utf-8"
+Content-Transfer-Encoding: base64
+
+Hello world
+
+
+--_000_6be055295eab48a5af7ad4022f33e2d0_
+Content-Type: text/html; charset="utf-8"
+
+<html><body>
+<a href="http://example.net">http://example.net</a>
+<a href="http://domain.com">http://example.net</a>
+<img src="http://example5.org">hahaha</img>
+</html>
+]]
+
+local function prepare_actual_result(actual)
+ return fun.totable(fun.map(
+ function(u) return u:get_raw():gsub('^%w+://', '') end,
+ actual
+ ))
+end
+
+context("Lua util - extract_specific_urls plain", function()
local test_helper = require "rspamd_test_helper"
test_helper.init_url_parser()
@@ -74,13 +138,6 @@ context("Lua util - extract_specific_urls", function()
}
}
- local function prepare_actual_result(actual)
- return fun.totable(fun.map(
- function(u) return u:get_raw():gsub('^%w+://', '') end,
- actual
- ))
- end
-
local pool = mpool.create()
local function prepare_url_list(list)
@@ -148,7 +205,9 @@ context("Lua util - extract_specific_urls", function()
table.sort(expect)
assert_rspamd_table_eq({actual = actual_result, expect = expect})
end)
+end)
+context("Lua util - extract_specific_urls message", function()
--[[ ******************* kinda functional *************************************** ]]
local test_dir = string.gsub(debug.getinfo(1).source, "^@(.+/)[^/]+$", "%1")
@@ -175,25 +234,65 @@ context("Lua util - extract_specific_urls", function()
}
}
- test("extract_specific_urls - from email", function()
- local cfg = rspamd_util.config_from_ucl(config, "INIT_URL,INIT_LIBS,INIT_SYMCACHE,INIT_VALIDATE,INIT_PRELOAD_MAPS")
- assert_not_nil(cfg)
+ local cfg = rspamd_util.config_from_ucl(config, "INIT_URL,INIT_LIBS,INIT_SYMCACHE,INIT_VALIDATE,INIT_PRELOAD_MAPS")
+ local res,task = rspamd_task.load_from_string(msg, cfg)
+
+ if not res then
+ assert(false, "failed to load message")
+ end
+
+ if not task:process_message() then
+ assert(false, "failed to process message")
+ end
+
+ test("extract_specific_urls - from email 1 limit", function()
+ local actual = util.extract_specific_urls({
+ task = task,
+ limit = 1,
+ esld_limit = 1,
+ })
+
+ local actual_result = prepare_actual_result(actual)
+
+ --[[
+ local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result)
+ print(s) --]]
+
+ assert_rspamd_table_eq({actual = actual_result, expect = {"domain.com"}})
+
+ end)
+ test("extract_specific_urls - from email 2 limit", function()
+ local actual = util.extract_specific_urls({
+ task = task,
+ limit = 2,
+ esld_limit = 1,
+ })
+
+ local actual_result = prepare_actual_result(actual)
- local expect = {"example.net", "domain.com"}
- local res,task = rspamd_task.load_from_string(msg, rspamd_config)
+ --[[
+ local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result)
+ print(s) --]]
+
+ assert_rspamd_table_eq({actual = actual_result, expect = {"domain.com", "example.net"}})
+
+ end)
- if not res then
- assert_true(false, "failed to load message")
- end
+ res,task = rspamd_task.load_from_string(msg_img, rspamd_config)
- if not task:process_message() then
- assert_true(false, "failed to process message")
- end
+ if not res then
+ assert_true(false, "failed to load message")
+ end
+ if not task:process_message() then
+ assert_true(false, "failed to process message")
+ end
+ test("extract_specific_urls - from email image 1 limit", function()
local actual = util.extract_specific_urls({
task = task,
limit = 1,
esld_limit = 1,
+ need_images = false,
})
local actual_result = prepare_actual_result(actual)
@@ -205,37 +304,71 @@ context("Lua util - extract_specific_urls", function()
assert_rspamd_table_eq({actual = actual_result, expect = {"domain.com"}})
end)
-end)
+ test("extract_specific_urls - from email image 2 limit", function()
+ local actual = util.extract_specific_urls({
+ task = task,
+ limit = 2,
+ esld_limit = 1,
+ need_images = false,
+ })
---[=========[ ******************* message ******************* ]=========]
-msg = [[
-From: <>
-To: <nobody at example.com>
-Subject: test
-Content-Type: multipart/alternative;
- boundary="_000_6be055295eab48a5af7ad4022f33e2d0_"
+ local actual_result = prepare_actual_result(actual)
---_000_6be055295eab48a5af7ad4022f33e2d0_
-Content-Type: text/plain; charset="utf-8"
-Content-Transfer-Encoding: base64
+ --[[
+ local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result)
+ print(s) --]]
-Hello world
+ assert_rspamd_table_eq({actual = actual_result, expect = {"domain.com", "example.net"}})
+ end)
+ test("extract_specific_urls - from email image 3 limit, no images", function()
+ local actual = util.extract_specific_urls({
+ task = task,
+ limit = 3,
+ esld_limit = 1,
+ need_images = false,
+ })
---_000_6be055295eab48a5af7ad4022f33e2d0_
-Content-Type: text/html; charset="utf-8"
+ local actual_result = prepare_actual_result(actual)
-<html><body>
-<a href="http://example.net">http://example.net</a>
-<a href="http://example1.net">http://example1.net</a>
-<a href="http://example2.net">http://example2.net</a>
-<a href="http://example3.net">http://example3.net</a>
-<a href="http://example4.net">http://example4.net</a>
-<a href="http://domain1.com">http://domain1.com</a>
-<a href="http://domain2.com">http://domain2.com</a>
-<a href="http://domain3.com">http://domain3.com</a>
-<a href="http://domain4.com">http://domain4.com</a>
-<a href="http://domain5.com">http://domain5.com</a>
-<a href="http://domain.com">http://example.net/</a>
-</html>
-]]
+ --[[
+ local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result)
+ print(s) --]]
+
+ assert_rspamd_table_eq({actual = actual_result, expect = {"domain.com", "example.net"}})
+ end)
+ test("extract_specific_urls - from email image 3 limit, has images", function()
+ local actual = util.extract_specific_urls({
+ task = task,
+ limit = 3,
+ esld_limit = 1,
+ need_images = true,
+ })
+
+ local actual_result = prepare_actual_result(actual)
+
+ --[[
+ local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result)
+ print(s) --]]
+
+ assert_rspamd_table_eq({actual = actual_result,
+ expect = {"domain.com", "example.net", "example5.org"}})
+ end)
+ test("extract_specific_urls - from email image 2 limit, has images", function()
+ local actual = util.extract_specific_urls({
+ task = task,
+ limit = 2,
+ esld_limit = 1,
+ need_images = true,
+ })
+
+ local actual_result = prepare_actual_result(actual)
+
+ --[[
+ local s = logger.slog("case[%1] %2 =?= %3", i, expect, actual_result)
+ print(s) --]]
+
+ assert_rspamd_table_eq({actual = actual_result,
+ expect = {"domain.com", "example.net"}})
+ end)
+end)
More information about the Commits
mailing list