commit ccf4d5d: [Test] Add tests for urls extraction
Vsevolod Stakhov
vsevolod at highsecure.ru
Tue Jul 13 13:14:05 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-07-13 13:57:13 +0100
URL: https://github.com/rspamd/rspamd/commit/ccf4d5d646a4cf1cd29ca9c6c5010c087e8ebe72
[Test] Add tests for urls extraction
---
src/libserver/html/html.cxx | 3 +++
src/libserver/html/html_tests.cxx | 32 ++++++++++++++++++++++++++++++++
2 files changed, 35 insertions(+)
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index d061f7726..bd323b43f 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -1331,6 +1331,9 @@ html_process_input(rspamd_mempool_t *pool,
url->count++;
}
}
+ if (part_urls) {
+ g_ptr_array_add(part_urls, url);
+ }
href_offset = hc->parsed.size();
}
diff --git a/src/libserver/html/html_tests.cxx b/src/libserver/html/html_tests.cxx
index 323858d71..07618b273 100644
--- a/src/libserver/html/html_tests.cxx
+++ b/src/libserver/html/html_tests.cxx
@@ -217,6 +217,38 @@ TEST_CASE("html text extraction")
rspamd_mempool_delete(pool);
}
+TEST_CASE("html urls extraction")
+{
+ using namespace std::string_literals;
+ const std::vector<std::pair<std::string, std::vector<std::string>>> cases{
+ {"<a href=\"https://example.com\">test</a>", {"https://example.com"}}
+ };
+
+ rspamd_url_init(NULL);
+ auto *pool = rspamd_mempool_new(rspamd_mempool_suggest_size(),
+ "html", 0);
+ auto i = 1;
+ for (const auto &c : cases) {
+ SUBCASE((fmt::format("html url extraction case {}", i)).c_str()) {
+ GPtrArray *purls = g_ptr_array_new();
+ GByteArray *tmp = g_byte_array_sized_new(c.first.size());
+ g_byte_array_append(tmp, (const guint8 *) c.first.data(), c.first.size());
+ auto *hc = html_process_input(pool, tmp, nullptr, nullptr, purls, true);
+ CHECK(hc != nullptr);
+ auto expected = c.second;
+ CHECK(expected.size() == purls->len);
+ for (auto j = 0; j < expected.size(); ++j) {
+ auto *url = (rspamd_url *)g_ptr_array_index(purls, j);
+ CHECK(expected[j] == std::string{url->string, url->urllen});
+ }
+ g_byte_array_free(tmp, TRUE);
+ g_ptr_array_free(purls, TRUE);
+ }
+ }
+
+ rspamd_mempool_delete(pool);
+}
+
}
} /* namespace rspamd::html */
More information about the Commits
mailing list