commit 10bb08d: [Minor] Various fixes for display link detection
Vsevolod Stakhov
vsevolod at highsecure.ru
Fri Mar 5 17:21:07 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-03-05 16:50:48 +0000
URL: https://github.com/rspamd/rspamd/commit/10bb08dd2d8484d0d1d2ae507b94aaa24f48b61b
[Minor] Various fixes for display link detection
---
src/controller.c | 4 ++--
src/libserver/html.c | 24 ++++++++++++++++++------
src/libserver/http/http_router.c | 2 +-
src/libserver/http/http_util.c | 2 +-
src/libserver/http/http_util.h | 2 +-
src/libserver/url.c | 2 +-
src/libutil/str_util.c | 2 +-
src/libutil/str_util.h | 2 +-
8 files changed, 26 insertions(+), 14 deletions(-)
diff --git a/src/controller.c b/src/controller.c
index 174382879..0ecaf860d 100644
--- a/src/controller.c
+++ b/src/controller.c
@@ -2764,7 +2764,7 @@ rspamd_controller_handle_custom (struct rspamd_http_connection_entry *conn_ent,
http_parser_parse_url (msg->url->str, msg->url->len, TRUE, &u);
if (u.field_set & (1 << UF_PATH)) {
- guint unnorm_len;
+ gsize unnorm_len;
lookup.begin = msg->url->str + u.field_data[UF_PATH].off;
lookup.len = u.field_data[UF_PATH].len;
@@ -2971,7 +2971,7 @@ rspamd_controller_handle_lua_plugin (struct rspamd_http_connection_entry *conn_e
http_parser_parse_url (msg->url->str, msg->url->len, TRUE, &u);
if (u.field_set & (1 << UF_PATH)) {
- guint unnorm_len;
+ gsize unnorm_len;
lookup.begin = msg->url->str + u.field_data[UF_PATH].off;
lookup.len = u.field_data[UF_PATH].len;
diff --git a/src/libserver/html.c b/src/libserver/html.c
index 5b3aafca0..401c55f31 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -1452,7 +1452,8 @@ rspamd_html_process_url (rspamd_mempool_t *pool, const gchar *start, guint len,
gsize decoded_len;
const gchar *p, *s, *prefix = "http://";
gchar *d;
- guint i, dlen;
+ guint i;
+ gsize dlen;
gboolean has_bad_chars = FALSE, no_prefix = FALSE;
static const gchar hexdigests[16] = "0123456789abcdef";
@@ -2588,8 +2589,11 @@ rspamd_html_check_displayed_url (rspamd_mempool_t *pool,
struct rspamd_url *turl;
gboolean url_found = FALSE;
struct rspamd_process_exception *ex;
+ enum rspamd_normalise_result norm_res;
+ guint saved_flags = 0;
+ gsize dlen;
- if (href_offset <= 0) {
+ if (href_offset < 0) {
/* No dispalyed url, just some text within <a> tag */
return;
}
@@ -2597,15 +2601,23 @@ rspamd_html_check_displayed_url (rspamd_mempool_t *pool,
url->visible_part = rspamd_mempool_alloc (pool, dest->len - href_offset + 1);
rspamd_strlcpy (url->visible_part, dest->data + href_offset,
dest->len - href_offset + 1);
- g_strstrip (url->visible_part);
+ dlen = dest->len - href_offset;
+ url->visible_part =
+ (gchar *)rspamd_string_len_strip (url->visible_part, &dlen, " \t\v\r\n");
+
+ norm_res = rspamd_normalise_unicode_inplace (pool, url->visible_part, &dlen);
+
+ if (norm_res & RSPAMD_UNICODE_NORM_UNNORMAL) {
+ saved_flags |= RSPAMD_URL_FLAG_UNNORMALISED;
+ }
rspamd_html_url_is_phished (pool, url,
- dest->data + href_offset,
- dest->len - href_offset,
+ url->visible_part,
+ dlen,
&url_found, &displayed_url);
if (url_found) {
- url->flags |= RSPAMD_URL_FLAG_DISPLAY_URL;
+ url->flags |= saved_flags|RSPAMD_URL_FLAG_DISPLAY_URL;
}
if (exceptions && url_found) {
diff --git a/src/libserver/http/http_router.c b/src/libserver/http/http_router.c
index a5b960e72..960df0ce3 100644
--- a/src/libserver/http/http_router.c
+++ b/src/libserver/http/http_router.c
@@ -291,7 +291,7 @@ rspamd_http_router_finish_handler (struct rspamd_http_connection *conn,
http_parser_parse_url (msg->url->str, msg->url->len, TRUE, &u);
if (u.field_set & (1 << UF_PATH)) {
- guint unnorm_len;
+ gsize unnorm_len;
pathbuf = g_malloc (u.field_data[UF_PATH].len);
memcpy (pathbuf, msg->url->str + u.field_data[UF_PATH].off,
diff --git a/src/libserver/http/http_util.c b/src/libserver/http/http_util.c
index ec9d9fa58..fd5adb3c1 100644
--- a/src/libserver/http/http_util.c
+++ b/src/libserver/http/http_util.c
@@ -302,7 +302,7 @@ rspamd_http_date_format (gchar *buf, gsize len, time_t time)
}
void
-rspamd_http_normalize_path_inplace (gchar *path, guint len, guint *nlen)
+rspamd_http_normalize_path_inplace (gchar *path, guint len, gsize *nlen)
{
const gchar *p, *end, *slash = NULL, *dot = NULL;
gchar *o;
diff --git a/src/libserver/http/http_util.h b/src/libserver/http/http_util.h
index 7a22ffb16..19b497f30 100644
--- a/src/libserver/http/http_util.h
+++ b/src/libserver/http/http_util.h
@@ -47,7 +47,7 @@ glong rspamd_http_date_format (gchar *buf, gsize len, time_t time);
* @param len
* @param nlen
*/
-void rspamd_http_normalize_path_inplace (gchar *path, guint len, guint *nlen);
+void rspamd_http_normalize_path_inplace (gchar *path, guint len, gsize *nlen);
#ifdef __cplusplus
}
diff --git a/src/libserver/url.c b/src/libserver/url.c
index d83c1988f..a5de7ebdf 100644
--- a/src/libserver/url.c
+++ b/src/libserver/url.c
@@ -2166,7 +2166,7 @@ rspamd_url_parse (struct rspamd_url *uri,
gchar *p;
const gchar *end;
guint i, complen, ret, flags = 0;
- guint unquoted_len = 0;
+ gsize unquoted_len = 0;
memset (uri, 0, sizeof (*uri));
memset (&u, 0, sizeof (u));
diff --git a/src/libutil/str_util.c b/src/libutil/str_util.c
index 5a44ed311..00774d588 100644
--- a/src/libutil/str_util.c
+++ b/src/libutil/str_util.c
@@ -3023,7 +3023,7 @@ rspamd_get_unicode_normalizer (void)
enum rspamd_normalise_result
rspamd_normalise_unicode_inplace (rspamd_mempool_t *pool, gchar *start,
- guint *len)
+ gsize *len)
{
#if U_ICU_VERSION_MAJOR_NUM >= 44
UErrorCode uc_err = U_ZERO_ERROR;
diff --git a/src/libutil/str_util.h b/src/libutil/str_util.h
index 0e66d0ed1..427d6b94e 100644
--- a/src/libutil/str_util.h
+++ b/src/libutil/str_util.h
@@ -491,7 +491,7 @@ enum rspamd_normalise_result {
* @return TRUE if a string has been normalised
*/
enum rspamd_normalise_result rspamd_normalise_unicode_inplace (rspamd_mempool_t *pool,
- gchar *start, guint *len);
+ gchar *start, gsize *len);
enum rspamd_regexp_escape_flags {
RSPAMD_REGEXP_ESCAPE_ASCII = 0,
More information about the Commits
mailing list