commit 9bd9290: [Feature] URL: Apply stringprep to hostnames to filter garbage
Vsevolod Stakhov
vsevolod at highsecure.ru
Mon May 13 16:42:03 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-05-13 17:40:10 +0100
URL: https://github.com/rspamd/rspamd/commit/9bd929050d737c61f0af5ae4c35faa181aecf20c (HEAD -> master)
[Feature] URL: Apply stringprep to hostnames to filter garbage
---
src/libserver/url.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 49 insertions(+)
diff --git a/src/libserver/url.c b/src/libserver/url.c
index b26bad6c6..36c9a157a 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -49,6 +49,8 @@
#include "contrib/http-parser/http_parser.h"
#include <unicode/utf8.h>
#include <unicode/uchar.h>
+#include <unicode/usprep.h>
+#include <unicode/ucnv.h>
typedef struct url_match_s {
const gchar *m_begin;
@@ -1985,6 +1987,53 @@ rspamd_url_parse (struct rspamd_url *uri,
rspamd_url_shift (uri, unquoted_len, UF_HOST);
+ /* Apply nameprep algorithm */
+ static UStringPrepProfile *nameprep = NULL;
+ UErrorCode uc_err = U_ZERO_ERROR;
+
+ if (nameprep == NULL) {
+ /* Open and cache profile */
+ nameprep = usprep_openByType (USPREP_RFC3491_NAMEPREP, &uc_err);
+
+ g_assert (U_SUCCESS (uc_err));
+ }
+
+ UChar *utf16_hostname, *norm_utf16;
+ gint32 utf16_len, norm_utf16_len, norm_utf8_len;
+
+ utf16_hostname = rspamd_mempool_alloc (pool, uri->hostlen * sizeof (UChar));
+ struct UConverter *utf8_conv = rspamd_get_utf8_converter ();
+
+ utf16_len = ucnv_toUChars (utf8_conv, utf16_hostname, uri->hostlen,
+ uri->host, uri->hostlen, &uc_err);
+
+ if (!U_SUCCESS (uc_err)) {
+
+ return URI_ERRNO_BAD_FORMAT;
+ }
+
+ norm_utf16 = rspamd_mempool_alloc (pool, utf16_len * sizeof (UChar));
+ norm_utf16_len = usprep_prepare (nameprep, utf16_hostname, utf16_len,
+ norm_utf16, utf16_len, USPREP_DEFAULT, NULL, &uc_err);
+
+ if (!U_SUCCESS (uc_err)) {
+
+ return URI_ERRNO_BAD_FORMAT;
+ }
+
+ /* Convert back to utf8, sigh... */
+ norm_utf8_len = ucnv_fromUChars (utf8_conv, uri->host, uri->hostlen,
+ norm_utf16, norm_utf16_len, &uc_err);
+
+ if (!U_SUCCESS (uc_err)) {
+
+ return URI_ERRNO_BAD_FORMAT;
+ }
+
+ /* Final shift of lengths */
+ rspamd_url_shift (uri, norm_utf8_len, UF_HOST);
+
+ /* Process data part */
if (uri->datalen) {
unquoted_len = rspamd_url_decode (uri->data, uri->data, uri->datalen);
if (rspamd_normalise_unicode_inplace (pool, uri->data, &unquoted_len)) {
More information about the Commits
mailing list