commit 30c0dca: [Rework] Rework image urls processing

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Aug 29 12:14:04 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-08-29 11:42:46 +0100
URL: https://github.com/rspamd/rspamd/commit/30c0dca001842d9dc009be0ff9ade49b69963bd2

[Rework] Rework image urls processing

---
 src/libserver/html.c | 19 +++++++++++++++++--
 src/libserver/url.h  |  1 +
 src/lua/lua_url.c    |  1 +
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/libserver/html.c b/src/libserver/html.c
index fa33ffdfb..014beff18 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -1677,7 +1677,7 @@ rspamd_html_process_data_image (rspamd_mempool_t *pool,
 
 static void
 rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
-		struct html_content *hc)
+		struct html_content *hc, GHashTable *urls)
 {
 	struct html_tag_component *comp;
 	struct html_image *img;
@@ -1717,8 +1717,23 @@ rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
 				else {
 					img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
 					if (img->src) {
+
 						img->url = rspamd_html_process_url (pool,
 								img->src, fstr.len, NULL);
+
+						if (img->url) {
+							struct rspamd_url *turl = g_hash_table_lookup (urls,
+									img->url);
+
+							img->url->flags |= RSPAMD_URL_FLAG_IMAGE;
+
+							if (turl == NULL) {
+								g_hash_table_insert (urls, img->url, img->url);
+							}
+							else {
+								turl->count++;
+							}
+						}
 					}
 				}
 			}
@@ -3041,7 +3056,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
 				}
 
 				if (cur_tag->id == Tag_IMG && !(cur_tag->flags & FL_CLOSING)) {
-					rspamd_html_process_img_tag (pool, cur_tag, hc);
+					rspamd_html_process_img_tag (pool, cur_tag, hc, urls);
 				}
 				else if (cur_tag->flags & FL_BLOCK) {
 					struct html_block *bl;
diff --git a/src/libserver/url.h b/src/libserver/url.h
index ae21b6ab3..83a2a7f17 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -33,6 +33,7 @@ enum rspamd_url_flags {
 	RSPAMD_URL_FLAG_UNNORMALISED = 1u << 16u,
 	RSPAMD_URL_FLAG_ZW_SPACES = 1u << 17u,
 	RSPAMD_URL_FLAG_DISPLAY_URL = 1u << 18u,
+	RSPAMD_URL_FLAG_IMAGE = 1u << 19u,
 };
 
 struct rspamd_url_tag {
diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c
index b30e560c9..8742a6027 100644
--- a/src/lua/lua_url.c
+++ b/src/lua/lua_url.c
@@ -881,6 +881,7 @@ lua_url_get_flags (lua_State *L)
 		PUSH_FLAG (RSPAMD_URL_FLAG_UNNORMALISED, "unnormalised");
 		PUSH_FLAG (RSPAMD_URL_FLAG_ZW_SPACES, "zw_spaces");
 		PUSH_FLAG (RSPAMD_URL_FLAG_DISPLAY_URL, "url_displayed");
+		PUSH_FLAG (RSPAMD_URL_FLAG_IMAGE, "image");
 	}
 	else {
 		return luaL_error (L, "invalid arguments");


More information about the Commits mailing list