commit 12e8ccd: [Feature] HTML: Specially treat data urls in HTML
Vsevolod Stakhov
vsevolod at highsecure.ru
Tue Jan 15 12:56:07 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-01-15 12:32:34 +0000
URL: https://github.com/rspamd/rspamd/commit/12e8ccdae81889a10ac64e22c92203c5a2c32810
[Feature] HTML: Specially treat data urls in HTML
---
src/libserver/html.c | 1 +
src/libserver/html.h | 1 +
src/lua/lua_html.c | 25 ++++++++++++++++++++++++-
3 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/src/libserver/html.c b/src/libserver/html.c
index e8856db35..cbc0fe7da 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -1571,6 +1571,7 @@ rspamd_html_process_img_tag (rspamd_mempool_t *pool, struct html_tag *tag,
img->flags |=
(RSPAMD_HTML_FLAG_IMAGE_EMBEDDED|RSPAMD_HTML_FLAG_IMAGE_DATA);
rspamd_html_process_data_image (pool, img, comp);
+ hc->flags |= RSPAMD_HTML_FLAG_HAS_DATA_URLS;
}
else {
img->flags |= RSPAMD_HTML_FLAG_IMAGE_EXTERNAL;
diff --git a/src/libserver/html.h b/src/libserver/html.h
index 0400672ac..a2f3a0b1d 100644
--- a/src/libserver/html.h
+++ b/src/libserver/html.h
@@ -18,6 +18,7 @@
#define RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS (1 << 4)
#define RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS (1 << 5)
#define RSPAMD_HTML_FLAG_TOO_MANY_TAGS (1 << 6)
+#define RSPAMD_HTML_FLAG_HAS_DATA_URLS (1 << 7)
/*
* Image flags
diff --git a/src/lua/lua_html.c b/src/lua/lua_html.c
index 47b8c7dfd..71578e1a4 100644
--- a/src/lua/lua_html.c
+++ b/src/lua/lua_html.c
@@ -229,6 +229,7 @@ lua_html_has_property (lua_State *L)
* - `unknown_element`
* - `duplicate_element`
* - `unbalanced`
+ * - `data_urls`
*/
if (strcmp (propname, "no_html") == 0) {
ret = hc->flags & RSPAMD_HTML_FLAG_BAD_START;
@@ -248,6 +249,12 @@ lua_html_has_property (lua_State *L)
else if (strcmp (propname, "unbalanced") == 0) {
ret = hc->flags & RSPAMD_HTML_FLAG_UNBALANCED;
}
+ else if (strcmp (propname, "unbalanced") == 0) {
+ ret = hc->flags & RSPAMD_HTML_FLAG_UNBALANCED;
+ }
+ else if (strcmp (propname, "data_urls") == 0) {
+ ret = hc->flags & RSPAMD_HTML_FLAG_HAS_DATA_URLS;
+ }
}
lua_pushboolean (L, ret);
@@ -266,7 +273,21 @@ lua_html_push_image (lua_State *L, struct html_image *img)
if (img->src) {
lua_pushstring (L, "src");
- lua_pushstring (L, img->src);
+
+ if (img->flags & RSPAMD_HTML_FLAG_IMAGE_DATA) {
+ struct rspamd_lua_text *t;
+
+ t = lua_newuserdata (L, sizeof (*t));
+ t->start = img->src;
+ t->len = strlen (img->src);
+ t->flags = 0;
+
+ rspamd_lua_setclass (L, "rspamd{text}", -1);
+ }
+ else {
+ lua_pushstring (L, img->src);
+ }
+
lua_settable (L, -3);
}
@@ -294,6 +315,8 @@ lua_html_push_image (lua_State *L, struct html_image *img)
lua_settable (L, -3);
lua_pushstring (L, "embedded");
lua_pushboolean (L, img->flags & RSPAMD_HTML_FLAG_IMAGE_EMBEDDED);
+ lua_pushstring (L, "data");
+ lua_pushboolean (L, img->flags & RSPAMD_HTML_FLAG_IMAGE_DATA);
lua_settable (L, -3);
}
More information about the Commits
mailing list