commit 12e8ccd: [Feature] HTML: Specially treat data urls in HTML

Vsevolod Stakhov vsevolod at highsecure.ru
Tue Jan 15 12:56:07 UTC 2019


Author: Vsevolod Stakhov
Date: 2019-01-15 12:32:34 +0000
URL: https://github.com/rspamd/rspamd/commit/12e8ccdae81889a10ac64e22c92203c5a2c32810

[Feature] HTML: Specially treat data urls in HTML

---
 src/libserver/html.c |  1 +
 src/libserver/html.h |  1 +
 src/lua/lua_html.c   | 25 ++++++++++++++++++++++++-
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/libserver/html.c b/src/libserver/html.c
index e8856db35..cbc0fe7da 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -1571,6 +1571,7 @@ rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
 				img->flags |=
 						(RSPAMD_HTML_FLAG_IMAGE_EMBEDDED|RSPAMD_HTML_FLAG_IMAGE_DATA);
 				rspamd_html_process_data_image (pool, img, comp);
+				hc->flags |= RSPAMD_HTML_FLAG_HAS_DATA_URLS;
 			}
 			else {
 				img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
diff --git a/src/libserver/html.h b/src/libserver/html.h
index 0400672ac..a2f3a0b1d 100644
--- a/src/libserver/html.h
+++ b/src/libserver/html.h
@@ -18,6 +18,7 @@
 #define RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS (1 << 4)
 #define RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS (1 << 5)
 #define RSPAMD_HTML_FLAG_TOO_MANY_TAGS (1 << 6)
+#define RSPAMD_HTML_FLAG_HAS_DATA_URLS (1 << 7)
 
 /*
  * Image flags
diff --git a/src/lua/lua_html.c b/src/lua/lua_html.c
index 47b8c7dfd..71578e1a4 100644
--- a/src/lua/lua_html.c
+++ b/src/lua/lua_html.c
@@ -229,6 +229,7 @@ lua_html_has_property (lua_State *L)
 		 * - `unknown_element`
 		 * - `duplicate_element`
 		 * - `unbalanced`
+		 * - `data_urls`
 		 */
 		if (strcmp (propname, "no_html") == 0) {
 			ret = hc->flags & RSPAMD_HTML_FLAG_BAD_START;
@@ -248,6 +249,12 @@ lua_html_has_property (lua_State *L)
 		else if (strcmp (propname, "unbalanced") == 0) {
 			ret = hc->flags & RSPAMD_HTML_FLAG_UNBALANCED;
 		}
+		else if (strcmp (propname, "unbalanced") == 0) {
+			ret = hc->flags & RSPAMD_HTML_FLAG_UNBALANCED;
+		}
+		else if (strcmp (propname, "data_urls") == 0) {
+			ret = hc->flags & RSPAMD_HTML_FLAG_HAS_DATA_URLS;
+		}
 	}
 
 	lua_pushboolean (L, ret);
@@ -266,7 +273,21 @@ lua_html_push_image (lua_State *L, struct html_image *img)
 
 	if (img->src) {
 		lua_pushstring (L, "src");
-		lua_pushstring (L, img->src);
+
+		if (img->flags & RSPAMD_HTML_FLAG_IMAGE_DATA) {
+			struct rspamd_lua_text *t;
+
+			t = lua_newuserdata (L, sizeof (*t));
+			t->start = img->src;
+			t->len = strlen (img->src);
+			t->flags = 0;
+
+			rspamd_lua_setclass (L, "rspamd{text}", -1);
+		}
+		else {
+			lua_pushstring (L, img->src);
+		}
+
 		lua_settable (L, -3);
 	}
 
@@ -294,6 +315,8 @@ lua_html_push_image (lua_State *L, struct html_image *img)
 	lua_settable (L, -3);
 	lua_pushstring (L, "embedded");
 	lua_pushboolean (L, img->flags & RSPAMD_HTML_FLAG_IMAGE_EMBEDDED);
+	lua_pushstring (L, "data");
+	lua_pushboolean (L, img->flags & RSPAMD_HTML_FLAG_IMAGE_DATA);
 	lua_settable (L, -3);
 }
 


More information about the Commits mailing list