commit fff6f70: Merge pull request #2771 from miecio45/feat_url_visible_part
GitHub
noreply at github.com
Sat Mar 9 14:21:07 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-03-09 14:18:06 +0000
URL: https://github.com/rspamd/rspamd/commit/fff6f70c33bbf080301230cbecb6aa00bbb6c5b3 (HEAD -> master)
Merge pull request #2771 from miecio45/feat_url_visible_part
[Minor] Export url visible part to lua and add new url flag
src/libserver/html.c | 26 ++++++++++++++++++++++++++
src/libserver/url.h | 3 +++
src/lua/lua_url.c | 25 +++++++++++++++++++++++++
3 files changed, 54 insertions(+)
diff --combined src/libserver/html.c
index 63638d28b,fef769ec1..c831dc14e
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@@ -664,8 -664,7 +664,8 @@@ rspamd_html_url_is_phished (rspamd_memp
}
}
#endif
- if (rspamd_ftok_casecmp (&disp_tok, &href_tok) != 0) {
+ if (rspamd_ftok_casecmp (&disp_tok, &href_tok) != 0 &&
+ text_url->tldlen > 0 && href_url->tldlen > 0) {
/* Apply the same logic for TLD */
disp_tok.len = text_url->tldlen;
@@@ -1350,8 -1349,7 +1350,8 @@@ rspamd_html_process_url (rspamd_mempool
if (rspamd_substring_search (start, len, "://", 3) == -1) {
if (len >= sizeof ("mailto:") &&
(memcmp (start, "mailto:", sizeof ("mailto:") - 1) == 0 ||
- memcmp (start, "tel:", sizeof ("tel:") - 1) == 0)) {
+ memcmp (start, "tel:", sizeof ("tel:") - 1) == 0 ||
+ memcmp (start, "callto:", sizeof ("callto:") - 1) == 0)) {
/* Exclusion, has valid but 'strange' prefix */
}
else {
@@@ -1441,9 -1439,7 +1441,9 @@@
rc = rspamd_url_parse (url, decoded, dlen, pool, RSPAMD_URL_PARSE_HREF);
- if (rc == URI_ERRNO_OK) {
+ /* Filter some completely damaged urls */
+ if (rc == URI_ERRNO_OK && url->hostlen > 0 &&
+ !((url->flags & RSPAMD_URL_FLAG_OBSCURED) && (url->protocol & PROTOCOL_UNKNOWN))) {
url->flags |= saved_flags;
if (has_bad_chars) {
@@@ -2377,11 -2373,37 +2377,37 @@@ rspamd_html_check_displayed_url (rspamd
return;
}
+ url->visible_part = rspamd_mempool_alloc0(pool, dest->len - href_offset+1);
+ gchar *current_processed_char = dest->data + href_offset;
+ gchar *current_char_in_struct = url->visible_part;
+ gboolean previous_char_was_space = false;
+
+ while (current_processed_char < (gchar*) dest->data + dest->len) {
+ if (g_ascii_isspace(*current_processed_char)) {
+ if (previous_char_was_space) {
+ current_processed_char++;
+ continue;
+ }
+ previous_char_was_space = true;
+ *current_char_in_struct = ' ';
+ } else {
+ *current_char_in_struct = *current_processed_char;
+ previous_char_was_space = false;
+ }
+ current_char_in_struct++;
+ current_processed_char++;
+ }
+ *current_char_in_struct = '\0';
+ url->visible_partlen = current_char_in_struct - url->visible_part;
+
rspamd_html_url_is_phished (pool, url,
dest->data + href_offset,
dest->len - href_offset,
&url_found, &displayed_url);
+ if (url_found) {
+ url->flags |= RSPAMD_URL_FLAG_DISPLAY_URL;
+ }
if (exceptions && url_found) {
ex = rspamd_mempool_alloc (pool,
sizeof (*ex));
diff --combined src/libserver/url.h
index 3deeb8cf5,1a117c450..ad09c33ec
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@@ -28,6 -28,7 +28,7 @@@ enum rspamd_url_flags
RSPAMD_URL_FLAG_SCHEMALESS = 1 << 15,
RSPAMD_URL_FLAG_UNNORMALISED = 1 << 16,
RSPAMD_URL_FLAG_ZW_SPACES = 1 << 17,
+ RSPAMD_URL_FLAG_DISPLAY_URL = 1 << 18,
};
struct rspamd_url_tag {
@@@ -48,6 -49,7 +49,7 @@@ struct rspamd_url
gchar *fragment;
gchar *surbl;
gchar *tld;
+ gchar *visible_part;
struct rspamd_url *phished_url;
@@@ -61,6 -63,7 +63,7 @@@
guint tldlen;
guint urllen;
guint rawlen;
+ guint visible_partlen;
enum rspamd_url_flags flags;
guint count;
@@@ -85,7 -88,7 +88,7 @@@ enum rspamd_url_protocol
PROTOCOL_HTTPS = 1u << 3,
PROTOCOL_MAILTO = 1u << 4,
PROTOCOL_TELEPHONE = 1u << 5,
- PROTOCOL_UNKNOWN = -1,
+ PROTOCOL_UNKNOWN = 1u << 31,
};
/**
More information about the Commits
mailing list