commit f5b4030: [Rework] Further work to make html content private

Vsevolod Stakhov vsevolod at highsecure.ru
Wed Jun 2 19:56:05 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-05-31 20:54:41 +0100
URL: https://github.com/rspamd/rspamd/commit/f5b403091ac5e6e48f13d5a6d31743f2b097b709

[Rework] Further work to make html content private

---
 src/libserver/html/html.cxx | 102 +++++++++++++++++---------------------------
 src/libserver/html/html.h   |   3 +-
 2 files changed, 41 insertions(+), 64 deletions(-)

diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 973649791..45a9afa18 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -955,30 +955,19 @@ html_process_img_tag(rspamd_mempool_t *pool,
 
 	auto found_alt_it = tag->parameters.find(html_component_type::RSPAMD_HTML_COMPONENT_ALT);
 
-	if (found_alt_it != tag->parameters.end() && dest != NULL) {
-		if (dest->len > 0 && !g_ascii_isspace (dest->data[dest->len - 1])) {
+	if (found_alt_it != tag->parameters.end()) {
+		if (!hc->parsed.empty() && !g_ascii_isspace (hc->parsed.back())) {
 			/* Add a space */
-			g_byte_array_append(dest, reinterpret_cast<const guint8 *>(" "), 1);
+			hc->parsed += ' ';
 		}
+		hc->parsed.append(found_alt_it->second);
 
-		g_byte_array_append(dest,
-				reinterpret_cast<const guint8 *>(found_alt_it->second.data()),
-				found_alt_it->second.size());
-
-		if (!g_ascii_isspace (dest->data[dest->len - 1])) {
+		if (!g_ascii_isspace (hc->parsed.back())) {
 			/* Add a space */
-			g_byte_array_append(dest, reinterpret_cast<const guint8 *>(" "), 1);
+			hc->parsed += ' ';
 		}
 	}
 
-
-	if (hc->images == nullptr) {
-		hc->images = g_ptr_array_sized_new(4);
-		rspamd_mempool_notify_alloc(pool, 4 * sizeof(gpointer) + sizeof(GPtrArray));
-		rspamd_mempool_add_destructor(pool, rspamd_ptr_array_free_hard,
-				hc->images);
-	}
-
 	if (img->embedded_image) {
 		if (img->height == 0) {
 			img->height = img->embedded_image->height;
@@ -988,7 +977,7 @@ html_process_img_tag(rspamd_mempool_t *pool,
 		}
 	}
 
-	g_ptr_array_add(hc->images, img);
+	hc->images.push_back(img);
 	tag->extra = img;
 }
 
@@ -1002,7 +991,7 @@ html_process_link_tag(rspamd_mempool_t *pool, struct html_tag *tag,
 
 	if (found_rel_it != tag->parameters.end()) {
 		if (found_rel_it->second == "icon") {
-			html_process_img_tag(pool, tag, hc, url_set, part_urls, nullptr);
+			html_process_img_tag(pool, tag, hc, url_set, part_urls);
 		}
 	}
 }
@@ -1489,10 +1478,7 @@ static auto
 html_process_block_tag(rspamd_mempool_t *pool, struct html_tag *tag,
 					   struct html_content *hc) -> void
 {
-	struct html_block *bl;
-	rspamd_ftok_t fstr;
-
-	bl = rspamd_mempool_alloc0_type (pool, struct html_block);
+	auto *bl = rspamd_mempool_alloc0_type (pool, struct html_block);
 	bl->tag = tag;
 	bl->visible = TRUE;
 	bl->font_size = (guint) -1;
@@ -1531,6 +1517,7 @@ html_process_block_tag(rspamd_mempool_t *pool, struct html_tag *tag,
 
 	auto found_class_it = tag->parameters.find(html_component_type::RSPAMD_HTML_COMPONENT_CLASS);
 	if (found_class_it != tag->parameters.end()) {
+		rspamd_ftok_t fstr;
 		fstr.begin = found_class_it->second.data();
 		fstr.len = found_class_it->second.size();
 		bl->html_class = rspamd_mempool_ftokdup (pool, &fstr);
@@ -1538,14 +1525,7 @@ html_process_block_tag(rspamd_mempool_t *pool, struct html_tag *tag,
 				(int)tag->name.size(), tag->name.data(), bl->html_class);
 	}
 
-	if (hc->blocks == NULL) {
-		hc->blocks = g_ptr_array_sized_new(64);
-		rspamd_mempool_notify_alloc (pool, 64 * sizeof(gpointer) + sizeof(GPtrArray));
-		rspamd_mempool_add_destructor (pool, rspamd_ptr_array_free_hard,
-				hc->blocks);
-	}
-
-	g_ptr_array_add(hc->blocks, bl);
+	hc->blocks.push_back(bl);
 	tag->block = bl;
 }
 
@@ -1575,7 +1555,6 @@ html_propagate_style(struct html_content *hc,
 							struct html_block *bl,
 							std::vector<struct html_block *> &blocks) -> void
 {
-	struct html_block *bl_parent;
 	gboolean push_block = FALSE;
 
 	if (blocks.empty()) {
@@ -1583,41 +1562,39 @@ html_propagate_style(struct html_content *hc,
 		return;
 	}
 	/* Propagate from the parent if needed */
-	bl_parent = blocks.back();
+	auto *bl_parent = blocks.back();
 
-	if (bl_parent) {
-		if (!bl->background_color.valid) {
-			/* Try to propagate background color from parent nodes */
-			if (bl_parent->background_color.valid) {
-				memcpy(&bl->background_color, &bl_parent->background_color,
-						sizeof(bl->background_color));
-			}
-		}
-		else {
-			push_block = TRUE;
+	if (!bl->background_color.valid) {
+		/* Try to propagate background color from parent nodes */
+		if (bl_parent->background_color.valid) {
+			memcpy(&bl->background_color, &bl_parent->background_color,
+					sizeof(bl->background_color));
 		}
+	}
+	else {
+		push_block = TRUE;
+	}
 
-		if (!bl->font_color.valid) {
-			/* Try to propagate background color from parent nodes */
-			if (bl_parent->font_color.valid) {
-				memcpy(&bl->font_color, &bl_parent->font_color,
-						sizeof(bl->font_color));
-			}
-		}
-		else {
-			push_block = TRUE;
+	if (!bl->font_color.valid) {
+		/* Try to propagate background color from parent nodes */
+		if (bl_parent->font_color.valid) {
+			memcpy(&bl->font_color, &bl_parent->font_color,
+					sizeof(bl->font_color));
 		}
+	}
+	else {
+		push_block = TRUE;
+	}
 
-		/* Propagate font size */
-		if (bl->font_size == (guint) -1) {
-			if (bl_parent->font_size != (guint) -1) {
-				bl->font_size = bl_parent->font_size;
-			}
-		}
-		else {
-			push_block = TRUE;
+	/* Propagate font size */
+	if (bl->font_size == (guint) -1) {
+		if (bl_parent->font_size != (guint) -1) {
+			bl->font_size = bl_parent->font_size;
 		}
 	}
+	else {
+		push_block = TRUE;
+	}
 
 	/* Set bgcolor to the html bgcolor and font color to black as a last resort */
 	if (!bl->font_color.valid) {
@@ -2340,16 +2317,17 @@ rspamd_html_tag_by_name(const gchar *name)
 }
 
 gboolean
-rspamd_html_tag_seen(struct html_content *hc, const gchar *tagname)
+rspamd_html_tag_seen(void *ptr, const gchar *tagname)
 {
 	gint id;
+	auto *hc = rspamd::html::html_content::from_ptr(ptr);
 
 	g_assert (hc != NULL);
 
 	id = rspamd_html_tag_by_name(tagname);
 
 	if (id != -1) {
-		return isset(hc->tags_seen, id);
+		return hc->tags_seen[id];
 	}
 
 	return FALSE;
diff --git a/src/libserver/html/html.h b/src/libserver/html/html.h
index 23faa47d3..94063b9be 100644
--- a/src/libserver/html/html.h
+++ b/src/libserver/html/html.h
@@ -102,7 +102,6 @@ struct html_block {
 
 /* Forwarded declaration */
 struct rspamd_task;
-struct html_content;
 
 /*
  * Decode HTML entitles in text. Text is modified in place.
@@ -121,7 +120,7 @@ void *rspamd_html_process_part_full(rspamd_mempool_t *pool,
 /*
  * Returns true if a specified tag has been seen in a part
  */
-gboolean rspamd_html_tag_seen(struct html_content *hc, const gchar *tagname);
+gboolean rspamd_html_tag_seen(void *ptr, const gchar *tagname);
 
 /**
  * Returns name for the specified tag id


More information about the Commits mailing list