commit 30c0dca: [Rework] Rework image urls processing
Vsevolod Stakhov
vsevolod at highsecure.ru
Thu Aug 29 12:14:04 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-08-29 11:42:46 +0100
URL: https://github.com/rspamd/rspamd/commit/30c0dca001842d9dc009be0ff9ade49b69963bd2
[Rework] Rework image urls processing
---
src/libserver/html.c | 19 +++++++++++++++++--
src/libserver/url.h | 1 +
src/lua/lua_url.c | 1 +
3 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/src/libserver/html.c b/src/libserver/html.c
index fa33ffdfb..014beff18 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -1677,7 +1677,7 @@ rspamd_html_process_data_image (rspamd_mempool_t *pool,
static void
rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
- struct html_content *hc)
+ struct html_content *hc, GHashTable *urls)
{
struct html_tag_component *comp;
struct html_image *img;
@@ -1717,8 +1717,23 @@ rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
else {
img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
if (img->src) {
+
img->url = rspamd_html_process_url (pool,
img->src, fstr.len, NULL);
+
+ if (img->url) {
+ struct rspamd_url *turl = g_hash_table_lookup (urls,
+ img->url);
+
+ img->url->flags |= RSPAMD_URL_FLAG_IMAGE;
+
+ if (turl == NULL) {
+ g_hash_table_insert (urls, img->url, img->url);
+ }
+ else {
+ turl->count++;
+ }
+ }
}
}
}
@@ -3041,7 +3056,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
}
if (cur_tag->id == Tag_IMG && !(cur_tag->flags & FL_CLOSING)) {
- rspamd_html_process_img_tag (pool, cur_tag, hc);
+ rspamd_html_process_img_tag (pool, cur_tag, hc, urls);
}
else if (cur_tag->flags & FL_BLOCK) {
struct html_block *bl;
diff --git a/src/libserver/url.h b/src/libserver/url.h
index ae21b6ab3..83a2a7f17 100644
--- a/src/libserver/url.h
+++ b/src/libserver/url.h
@@ -33,6 +33,7 @@ enum rspamd_url_flags {
RSPAMD_URL_FLAG_UNNORMALISED = 1u << 16u,
RSPAMD_URL_FLAG_ZW_SPACES = 1u << 17u,
RSPAMD_URL_FLAG_DISPLAY_URL = 1u << 18u,
+ RSPAMD_URL_FLAG_IMAGE = 1u << 19u,
};
struct rspamd_url_tag {
diff --git a/src/lua/lua_url.c b/src/lua/lua_url.c
index b30e560c9..8742a6027 100644
--- a/src/lua/lua_url.c
+++ b/src/lua/lua_url.c
@@ -881,6 +881,7 @@ lua_url_get_flags (lua_State *L)
PUSH_FLAG (RSPAMD_URL_FLAG_UNNORMALISED, "unnormalised");
PUSH_FLAG (RSPAMD_URL_FLAG_ZW_SPACES, "zw_spaces");
PUSH_FLAG (RSPAMD_URL_FLAG_DISPLAY_URL, "url_displayed");
+ PUSH_FLAG (RSPAMD_URL_FLAG_IMAGE, "image");
}
else {
return luaL_error (L, "invalid arguments");
More information about the Commits
mailing list