commit b1dca3c: [Fix] HTML: Fix entities in HTML attributes

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Jan 24 14:42:03 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-01-24 14:09:07 +0000
URL: https://github.com/rspamd/rspamd/commit/b1dca3c8bdba396ed5dc692cbcd22186f7dcc525

[Fix] HTML: Fix entities in HTML attributes

---
 src/libserver/html.c | 45 ++++++++++++++++++++++++++++++++++++---------
 src/libserver/html.h |  2 +-
 2 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/src/libserver/html.c b/src/libserver/html.c
index ee276d813..63d913762 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -337,9 +337,9 @@ rspamd_html_tag_by_id (gint id)
 
 /* Decode HTML entitles in text */
 guint
-rspamd_html_decode_entitles_inplace (gchar *s, guint len)
+rspamd_html_decode_entitles_inplace (gchar *s, gsize len)
 {
-	guint l, rep_len;
+	goffset l, rep_len;
 	gchar *t = s, *h = s, *e = s, *end_ptr;
 	const gchar *end;
 	const gchar *entity;
@@ -355,7 +355,7 @@ rspamd_html_decode_entitles_inplace (gchar *s, guint len)
 
 	end = s + l;
 
-	while (h - s < (gint)l) {
+	while (h - s < l) {
 		switch (state) {
 		/* Out of entity */
 		case 0:
@@ -448,7 +448,11 @@ rspamd_html_decode_entitles_inplace (gchar *s, guint len)
 								t += g_unichar_to_utf8 (val, t);
 							}
 							else {
-								/* Remove unknown entities */
+								/* Leave unknown entities as is */
+								if (end - t >= h - e) {
+									memmove (t, e, h - e);
+									t += h - e;
+								}
 							}
 						}
 					}
@@ -463,6 +467,15 @@ rspamd_html_decode_entitles_inplace (gchar *s, guint len)
 		}
 	}
 
+	/* Leftover */
+	if (state == 1 && h > e) {
+		/* Unfinished entity, copy as is */
+		if (end - t >= h - e) {
+			memmove (t, e, h - e);
+			t += h - e;
+		}
+	}
+
 	return (t - s);
 }
 
@@ -898,7 +911,7 @@ rspamd_html_parse_tag_component (rspamd_mempool_t *pool,
 	return ret;
 }
 
-static void
+static inline void
 rspamd_html_parse_tag_content (rspamd_mempool_t *pool,
 		struct html_content *hc, struct html_tag *tag, const guchar *in,
 		gint *statep, guchar const **savep)
@@ -1151,12 +1164,16 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool,
 
 		if (store) {
 			if (*savep != NULL) {
+				gchar *s;
+
 				g_assert (tag->params != NULL);
 				comp = g_queue_peek_tail (tag->params);
 				g_assert (comp != NULL);
 				comp->len = in - *savep;
-				comp->start = *savep;
-				/* We cannot use entities inside tag values ! */
+				s = rspamd_mempool_alloc (pool, comp->len);
+				memcpy (s, *savep, comp->len);
+				comp->len = rspamd_html_decode_entitles_inplace (s, comp->len);
+				comp->start = s;
 				*savep = NULL;
 			}
 		}
@@ -1169,11 +1186,16 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool,
 		}
 		if (store) {
 			if (*savep != NULL) {
+				gchar *s;
+
 				g_assert (tag->params != NULL);
 				comp = g_queue_peek_tail (tag->params);
 				g_assert (comp != NULL);
 				comp->len = in - *savep;
-				comp->start = *savep;
+				s = rspamd_mempool_alloc (pool, comp->len);
+				memcpy (s, *savep, comp->len);
+				comp->len = rspamd_html_decode_entitles_inplace (s, comp->len);
+				comp->start = s;
 				*savep = NULL;
 			}
 		}
@@ -1191,11 +1213,16 @@ rspamd_html_parse_tag_content (rspamd_mempool_t *pool,
 
 		if (store) {
 			if (*savep != NULL) {
+				gchar *s;
+
 				g_assert (tag->params != NULL);
 				comp = g_queue_peek_tail (tag->params);
 				g_assert (comp != NULL);
 				comp->len = in - *savep;
-				comp->start = *savep;
+				s = rspamd_mempool_alloc (pool, comp->len);
+				memcpy (s, *savep, comp->len);
+				comp->len = rspamd_html_decode_entitles_inplace (s, comp->len);
+				comp->start = s;
 				*savep = NULL;
 			}
 		}
diff --git a/src/libserver/html.h b/src/libserver/html.h
index a2f3a0b1d..f816567bd 100644
--- a/src/libserver/html.h
+++ b/src/libserver/html.h
@@ -127,7 +127,7 @@ struct html_content {
 /*
  * Decode HTML entitles in text. Text is modified in place.
  */
-guint rspamd_html_decode_entitles_inplace (gchar *s, guint len);
+guint rspamd_html_decode_entitles_inplace (gchar *s, gsize len);
 
 GByteArray* rspamd_html_process_part (rspamd_mempool_t *pool,
 		struct html_content *hc,


More information about the Commits mailing list