commit 141617d: [Rework] Html: Add images processing logic
Vsevolod Stakhov
vsevolod at highsecure.ru
Wed Jun 2 19:56:07 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-06-01 20:18:47 +0100
URL: https://github.com/rspamd/rspamd/commit/141617dab47bf741af4578c656d4cda5f18742ed
[Rework] Html: Add images processing logic
---
src/libmime/images.c | 49 +++++++++++++++++----------------------------
src/libserver/html/html.cxx | 32 +++++++++++++++++++++++++++++
src/libserver/html/html.h | 10 +++++++++
3 files changed, 60 insertions(+), 31 deletions(-)
diff --git a/src/libmime/images.c b/src/libmime/images.c
index 960036d78..4e0872f38 100644
--- a/src/libmime/images.c
+++ b/src/libmime/images.c
@@ -658,8 +658,8 @@ rspamd_image_process_part (struct rspamd_task *task, struct rspamd_mime_part *pa
struct rspamd_mime_header *rh;
struct rspamd_mime_text_part *tp;
struct html_image *himg;
- const gchar *cid, *html_cid;
- guint cid_len, i, j;
+ const gchar *cid;
+ guint cid_len, i;
struct rspamd_image *img;
img = (struct rspamd_image *)part->specific.img;
@@ -684,35 +684,22 @@ rspamd_image_process_part (struct rspamd_task *task, struct rspamd_mime_part *pa
}
PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, text_parts), i, tp) {
- if (IS_TEXT_PART_HTML (tp) && tp->html != NULL &&
- tp->html->images != NULL) {
- for (j = 0; j < tp->html->images->len; j ++) {
- himg = g_ptr_array_index (tp->html->images, j);
-
- if ((himg->flags & RSPAMD_HTML_FLAG_IMAGE_EMBEDDED) &&
- himg->src) {
- html_cid = himg->src;
-
- if (strncmp (html_cid, "cid:", 4) == 0) {
- html_cid += 4;
- }
-
- if (strlen (html_cid) == cid_len &&
- memcmp (html_cid, cid, cid_len) == 0) {
- img->html_image = himg;
- himg->embedded_image = img;
-
- msg_debug_images ("found linked image by cid: <%s>",
- cid);
-
- if (himg->height == 0) {
- himg->height = img->height;
- }
-
- if (himg->width == 0) {
- himg->width = img->width;
- }
- }
+ if (IS_TEXT_PART_HTML (tp) && tp->html != NULL) {
+ himg = rspamd_html_find_embedded_image(tp->html, cid, cid_len);
+
+ if (himg != NULL) {
+ img->html_image = himg;
+ himg->embedded_image = img;
+
+ msg_debug_images ("found linked image by cid: <%s>",
+ cid);
+
+ if (himg->height == 0) {
+ himg->height = img->height;
+ }
+
+ if (himg->width == 0) {
+ himg->width = img->width;
}
}
}
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 45a9afa18..00f1d331f 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -2277,6 +2277,23 @@ html_process_part_full (rspamd_mempool_t *pool,
return hc;
}
+static auto
+html_find_image_by_cid(const html_content &hc, std::string_view cid)
+ -> std::optional<const html_image *>
+{
+ for (const auto *html_image : hc.images) {
+ /* Filter embedded images */
+ if (html_image->flags & RSPAMD_HTML_FLAG_IMAGE_EMBEDDED &&
+ html_image->src != nullptr) {
+ if (cid == html_image->src) {
+ return html_image;
+ }
+ }
+ }
+
+ return std::nullopt;
+}
+
}
void *
@@ -2355,4 +2372,19 @@ rspamd_html_tag_name(void *p, gsize *len)
}
return tag->name.data();
+}
+
+struct html_image*
+rspamd_html_find_embedded_image(void *html_content,
+ const char *cid, gsize cid_len)
+{
+ auto *hc = rspamd::html::html_content::from_ptr(html_content);
+
+ auto maybe_img = rspamd::html::html_find_image_by_cid(*hc, {cid, cid_len});
+
+ if (maybe_img) {
+ return (html_image *)maybe_img.value();
+ }
+
+ return nullptr;
}
\ No newline at end of file
diff --git a/src/libserver/html/html.h b/src/libserver/html/html.h
index 94063b9be..1e71d0c2d 100644
--- a/src/libserver/html/html.h
+++ b/src/libserver/html/html.h
@@ -144,6 +144,16 @@ gint rspamd_html_tag_by_name(const gchar *name);
*/
const gchar *rspamd_html_tag_name(void *tag, gsize *len);
+/**
+ * Find HTML image by content id
+ * @param html_content
+ * @param cid
+ * @param cid_len
+ * @return
+ */
+struct html_image* rspamd_html_find_embedded_image(void *html_content,
+ const char *cid, gsize cid_len);
+
#ifdef __cplusplus
}
More information about the Commits
mailing list