commit 7085305: [Fix] Find suspicious url encodings that could break url extraction
Vsevolod Stakhov
vsevolod at highsecure.ru
Wed Nov 17 21:56:04 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-11-17 21:48:00 +0000
URL: https://github.com/rspamd/rspamd/commit/708530514c4656b84bdbcf700084de011b8aaa04
[Fix] Find suspicious url encodings that could break url extraction
---
src/libserver/html/html_url.cxx | 39 +++++++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)
diff --git a/src/libserver/html/html_url.cxx b/src/libserver/html/html_url.cxx
index d073a25d9..8f721b3e8 100644
--- a/src/libserver/html/html_url.cxx
+++ b/src/libserver/html/html_url.cxx
@@ -376,6 +376,8 @@ html_process_url(rspamd_mempool_t *pool, std::string_view &input)
/*
* We also need to remove all internal newlines, spaces
* and encode unsafe characters
+ * Another obfuscation find in the wild was encoding of the SAFE url characters,
+ * including essential ones
*/
for (auto i = 0; i < sz; i++) {
if (G_UNLIKELY (g_ascii_isspace(s[i]))) {
@@ -388,6 +390,43 @@ html_process_url(rspamd_mempool_t *pool, std::string_view &input)
*d++ = hexdigests[s[i] & 0xf];
has_bad_chars = TRUE;
}
+ else if (G_UNLIKELY (s[i] == '%')) {
+ if (i + 2 < sz) {
+ auto [c1, c2] = std::tuple(s[i + 1], s[i + 2]);
+
+ if (g_ascii_isxdigit(c1) && g_ascii_isxdigit(c2)) {
+ auto codepoint = 0;
+
+ if (c1 >= '0' && c1 <= '9') codepoint = c1 - '0';
+ else if (c1 >= 'A' && c1 <= 'F') codepoint = c1 - 'A' + 10;
+ else if (c1 >= 'a' && c1 <= 'f') codepoint = c1 - 'a' + 10;
+
+ codepoint <<= 4;
+
+ if (c2 >= '0' && c2 <= '9') codepoint += c2 - '0';
+ else if (c2 >= 'A' && c2 <= 'F') codepoint += c2 - 'A' + 10;
+ else if (c2 >= 'a' && c2 <= 'f') codepoint += c2 - 'a' + 10;
+
+ /* Now check for 'interesting' codepoints */
+ if (codepoint == '@' || codepoint == ':' || codepoint == '|' ||
+ codepoint == '?' || codepoint == '\\' || codepoint == '/') {
+ /* Replace it back */
+ *d++ = (char)(codepoint & 0xff);
+ i += 2;
+ has_bad_chars = TRUE;
+ }
+ else {
+ *d++ = s[i];
+ }
+ }
+ else {
+ *d++ = s[i];
+ }
+ }
+ else {
+ *d++ = s[i];
+ }
+ }
else {
*d++ = s[i];
}
More information about the Commits
mailing list