commit a2eb042: [Minor] Strip visible parts of urls using utf rules
Vsevolod Stakhov
vsevolod at highsecure.ru
Fri May 14 16:00:04 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-05-14 16:59:30 +0100
URL: https://github.com/rspamd/rspamd/commit/a2eb042dcd36228b9e0a6d1417c54032489d91ff (HEAD -> master)
[Minor] Strip visible parts of urls using utf rules
---
src/libserver/html.c | 39 +++++++++++++++++++++++++++++++++++++--
1 file changed, 37 insertions(+), 2 deletions(-)
diff --git a/src/libserver/html.c b/src/libserver/html.c
index 326c8facc..30c2c022b 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -2617,8 +2617,43 @@ rspamd_html_check_displayed_url (rspamd_mempool_t *pool,
rspamd_strlcpy (url->visible_part, dest->data + href_offset,
dest->len - href_offset + 1);
dlen = dest->len - href_offset;
- url->visible_part =
- (gchar *)rspamd_string_len_strip (url->visible_part, &dlen, " \t\v\r\n");
+
+ /* Strip unicode spaces from the start and the end */
+ gchar *p = url->visible_part, *end = url->visible_part + dlen;
+ gint i = 0;
+
+ while (i < dlen) {
+ UChar32 uc;
+ gint prev_i = i;
+
+ U8_NEXT(p, i, dlen, uc);
+
+ if (!u_isspace (uc)) {
+ i = prev_i;
+ break;
+ }
+ }
+
+ p += i;
+ dlen -= i;
+ url->visible_part = p;
+ i = end - url->visible_part - 1;
+
+ if (i > 0) {
+ gint32 dl = dlen;
+
+ while (i > 0) {
+ UChar32 uc;
+
+ U8_PREV(p, i, dl, uc);
+
+ if (!u_isspace (uc)) {
+ break;
+ }
+ }
+
+ dlen = i;
+ }
rspamd_html_url_is_phished (pool, url,
More information about the Commits
mailing list