commit fff6f70: Merge pull request #2771 from miecio45/feat_url_visible_part

GitHub noreply at github.com
Sat Mar 9 14:21:07 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-03-09 14:18:06 +0000
URL: https://github.com/rspamd/rspamd/commit/fff6f70c33bbf080301230cbecb6aa00bbb6c5b3 (HEAD -> master)

Merge pull request #2771 from miecio45/feat_url_visible_part
[Minor] Export url visible part to lua and add new url flag

 src/libserver/html.c | 26 ++++++++++++++++++++++++++
 src/libserver/url.h  |  3 +++
 src/lua/lua_url.c    | 25 +++++++++++++++++++++++++
 3 files changed, 54 insertions(+)

diff --combined src/libserver/html.c
index 63638d28b,fef769ec1..c831dc14e
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@@ -664,8 -664,7 +664,8 @@@ rspamd_html_url_is_phished (rspamd_memp
  				}
  			}
  #endif
 -			if (rspamd_ftok_casecmp (&disp_tok, &href_tok) != 0) {
 +			if (rspamd_ftok_casecmp (&disp_tok, &href_tok) != 0 &&
 +					text_url->tldlen > 0 && href_url->tldlen > 0) {
  
  				/* Apply the same logic for TLD */
  				disp_tok.len = text_url->tldlen;
@@@ -1350,8 -1349,7 +1350,8 @@@ rspamd_html_process_url (rspamd_mempool
  	if (rspamd_substring_search (start, len, "://", 3) == -1) {
  		if (len >= sizeof ("mailto:") &&
  				(memcmp (start, "mailto:", sizeof ("mailto:") - 1) == 0 ||
 -						memcmp (start, "tel:", sizeof ("tel:") - 1) == 0)) {
 +				 memcmp (start, "tel:", sizeof ("tel:") - 1) == 0 ||
 +				 memcmp (start, "callto:", sizeof ("callto:") - 1) == 0)) {
  			/* Exclusion, has valid but 'strange' prefix */
  		}
  		else {
@@@ -1441,9 -1439,7 +1441,9 @@@
  
  	rc = rspamd_url_parse (url, decoded, dlen, pool, RSPAMD_URL_PARSE_HREF);
  
 -	if (rc == URI_ERRNO_OK) {
 +	/* Filter some completely damaged urls */
 +	if (rc == URI_ERRNO_OK && url->hostlen > 0 &&
 +		!((url->flags & RSPAMD_URL_FLAG_OBSCURED) && (url->protocol & PROTOCOL_UNKNOWN))) {
  		url->flags |= saved_flags;
  
  		if (has_bad_chars) {
@@@ -2377,11 -2373,37 +2377,37 @@@ rspamd_html_check_displayed_url (rspamd
  		return;
  	}
  
+ 	url->visible_part = rspamd_mempool_alloc0(pool, dest->len - href_offset+1);
+ 	gchar *current_processed_char = dest->data + href_offset;
+ 	gchar *current_char_in_struct = url->visible_part;
+ 	gboolean previous_char_was_space = false;
+ 
+ 	while (current_processed_char < (gchar*) dest->data + dest->len) {
+ 		if (g_ascii_isspace(*current_processed_char)) {
+ 			if (previous_char_was_space) {
+ 				current_processed_char++;
+ 				continue;
+ 			}
+ 			previous_char_was_space = true;
+ 			*current_char_in_struct = ' ';
+ 		} else {
+ 			*current_char_in_struct = *current_processed_char;
+ 			previous_char_was_space = false;
+ 		}
+ 		current_char_in_struct++;
+ 		current_processed_char++;
+ 	}
+ 	*current_char_in_struct = '\0';
+ 	url->visible_partlen = current_char_in_struct - url->visible_part;
+ 
  	rspamd_html_url_is_phished (pool, url,
  			dest->data + href_offset,
  			dest->len - href_offset,
  			&url_found, &displayed_url);
  
+ 	if (url_found) {
+ 		url->flags |= RSPAMD_URL_FLAG_DISPLAY_URL;
+ 	}
  	if (exceptions && url_found) {
  		ex = rspamd_mempool_alloc (pool,
  				sizeof (*ex));
diff --combined src/libserver/url.h
index 3deeb8cf5,1a117c450..ad09c33ec
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@@ -28,6 -28,7 +28,7 @@@ enum rspamd_url_flags 
  	RSPAMD_URL_FLAG_SCHEMALESS = 1 << 15,
  	RSPAMD_URL_FLAG_UNNORMALISED = 1 << 16,
  	RSPAMD_URL_FLAG_ZW_SPACES = 1 << 17,
+ 	RSPAMD_URL_FLAG_DISPLAY_URL = 1 << 18,
  };
  
  struct rspamd_url_tag {
@@@ -48,6 -49,7 +49,7 @@@ struct rspamd_url 
  	gchar *fragment;
  	gchar *surbl;
  	gchar *tld;
+ 	gchar *visible_part;
  
  	struct rspamd_url *phished_url;
  
@@@ -61,6 -63,7 +63,7 @@@
  	guint tldlen;
  	guint urllen;
  	guint rawlen;
+ 	guint visible_partlen;
  
  	enum rspamd_url_flags flags;
  	guint count;
@@@ -85,7 -88,7 +88,7 @@@ enum rspamd_url_protocol 
  	PROTOCOL_HTTPS = 1u << 3,
  	PROTOCOL_MAILTO = 1u << 4,
  	PROTOCOL_TELEPHONE = 1u << 5,
 -	PROTOCOL_UNKNOWN = -1,
 +	PROTOCOL_UNKNOWN = 1u << 31,
  };
  
  /**


More information about the Commits mailing list