commit 198e242: [Feature] Treat all tags with HREF as a potential hyperlinks
Vsevolod Stakhov
vsevolod at highsecure.ru
Wed Mar 20 14:49:04 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-03-20 14:44:06 +0000
URL: https://github.com/rspamd/rspamd/commit/198e242157ed81b871671f6a77e3d525a57350a5 (HEAD -> master)
[Feature] Treat all tags with HREF as a potential hyperlinks
---
src/libserver/html.c | 17 +++++++----------
src/libserver/html.h | 1 +
src/lua/lua_html.c | 6 +++++-
3 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/src/libserver/html.c b/src/libserver/html.c
index 1f11f0cb2..6df545f00 100644
--- a/src/libserver/html.c
+++ b/src/libserver/html.c
@@ -53,12 +53,12 @@ INIT_LOG_MODULE(html)
static struct html_tag_def tag_defs[] = {
/* W3C defined elements */
- TAG_DEF(Tag_A, "a", 0),
+ TAG_DEF(Tag_A, "a", FL_HREF),
TAG_DEF(Tag_ABBR, "abbr", (CM_INLINE)),
TAG_DEF(Tag_ACRONYM, "acronym", (CM_INLINE)),
TAG_DEF(Tag_ADDRESS, "address", (CM_BLOCK)),
TAG_DEF(Tag_APPLET, "applet", (CM_OBJECT | CM_IMG | CM_INLINE | CM_PARAM)),
- TAG_DEF(Tag_AREA, "area", (CM_BLOCK | CM_EMPTY)),
+ TAG_DEF(Tag_AREA, "area", (CM_BLOCK | CM_EMPTY | FL_HREF)),
TAG_DEF(Tag_B, "b", (CM_INLINE|FL_BLOCK)),
TAG_DEF(Tag_BASE, "base", (CM_HEAD | CM_EMPTY)),
TAG_DEF(Tag_BASEFONT, "basefont", (CM_INLINE | CM_EMPTY)),
@@ -85,7 +85,7 @@ static struct html_tag_def tag_defs[] = {
TAG_DEF(Tag_FIELDSET, "fieldset", (CM_BLOCK)),
TAG_DEF(Tag_FONT, "font", (FL_BLOCK)),
TAG_DEF(Tag_FORM, "form", (CM_BLOCK)),
- TAG_DEF(Tag_FRAME, "frame", (CM_FRAMES | CM_EMPTY)),
+ TAG_DEF(Tag_FRAME, "frame", (CM_FRAMES | CM_EMPTY | FL_HREF)),
TAG_DEF(Tag_FRAMESET, "frameset", (CM_HTML | CM_FRAMES)),
TAG_DEF(Tag_H1, "h1", (CM_BLOCK | CM_HEADING)),
TAG_DEF(Tag_H2, "h2", (CM_BLOCK | CM_HEADING)),
@@ -97,7 +97,7 @@ static struct html_tag_def tag_defs[] = {
TAG_DEF(Tag_HR, "hr", (CM_BLOCK | CM_EMPTY)),
TAG_DEF(Tag_HTML, "html", (CM_HTML | CM_OPT | CM_OMITST | CM_UNIQUE)),
TAG_DEF(Tag_I, "i", (CM_INLINE)),
- TAG_DEF(Tag_IFRAME, "iframe", (0)),
+ TAG_DEF(Tag_IFRAME, "iframe", (FL_HREF)),
TAG_DEF(Tag_IMG, "img", (CM_INLINE | CM_IMG | CM_EMPTY)),
TAG_DEF(Tag_INPUT, "input", (CM_INLINE | CM_IMG | CM_EMPTY)),
TAG_DEF(Tag_INS, "ins", (CM_INLINE | CM_BLOCK | CM_MIXED)),
@@ -106,9 +106,9 @@ static struct html_tag_def tag_defs[] = {
TAG_DEF(Tag_LABEL, "label", (CM_INLINE)),
TAG_DEF(Tag_LEGEND, "legend", (CM_INLINE)),
TAG_DEF(Tag_LI, "li", (CM_LIST | CM_OPT | CM_NO_INDENT | FL_BLOCK)),
- TAG_DEF(Tag_LINK, "link", (CM_HEAD | CM_EMPTY)),
+ TAG_DEF(Tag_LINK, "link", (CM_HEAD | CM_EMPTY|FL_HREF)),
TAG_DEF(Tag_LISTING, "listing", (CM_BLOCK | CM_OBSOLETE)),
- TAG_DEF(Tag_MAP, "map", (CM_INLINE)),
+ TAG_DEF(Tag_MAP, "map", (CM_INLINE|FL_HREF)),
TAG_DEF(Tag_MENU, "menu", (CM_BLOCK | CM_OBSOLETE)),
TAG_DEF(Tag_META, "meta", (CM_HEAD | CM_INLINE | CM_EMPTY)),
TAG_DEF(Tag_NOFRAMES, "noframes", (CM_BLOCK | CM_FRAMES)),
@@ -2942,7 +2942,7 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
save_space = FALSE;
}
- if (cur_tag->id == Tag_A || cur_tag->id == Tag_IFRAME) {
+ if (cur_tag->flags & FL_HREF) {
if (!(cur_tag->flags & (FL_CLOSING))) {
url = rspamd_html_process_url_tag (pool, cur_tag, hc);
@@ -3012,9 +3012,6 @@ rspamd_html_process_part_full (rspamd_mempool_t *pool, struct html_content *hc,
}
}
}
- else if (cur_tag->id == Tag_LINK) {
- url = rspamd_html_process_url_tag (pool, cur_tag, hc);
- }
else if (cur_tag->id == Tag_BASE && !(cur_tag->flags & (FL_CLOSING))) {
struct html_tag *prev_tag = NULL;
diff --git a/src/libserver/html.h b/src/libserver/html.h
index f816567bd..f3515d627 100644
--- a/src/libserver/html.h
+++ b/src/libserver/html.h
@@ -98,6 +98,7 @@ struct html_block {
#define FL_BROKEN (1 << 26)
#define FL_IGNORE (1 << 27)
#define FL_BLOCK (1 << 28)
+#define FL_HREF (1 << 29)
struct html_tag {
gint id;
diff --git a/src/lua/lua_html.c b/src/lua/lua_html.c
index 1b5828564..39a4a77a0 100644
--- a/src/lua/lua_html.c
+++ b/src/lua/lua_html.c
@@ -616,6 +616,10 @@ lua_html_tag_get_flags (lua_State *L)
lua_pushstring (L, "closing");
lua_rawseti (L, -2, i++);
}
+ if (tag->flags & FL_HREF) {
+ lua_pushstring (L, "href");
+ lua_rawseti (L, -2, i++);
+ }
if (tag->flags & FL_CLOSED) {
lua_pushstring (L, "closed");
lua_rawseti (L, -2, i++);
@@ -692,7 +696,7 @@ lua_html_tag_get_extra (lua_State *L)
if (tag) {
if (tag->extra) {
- if (tag->id == Tag_A || tag->id == Tag_IFRAME || tag->id == Tag_LINK) {
+ if (tag->flags & FL_HREF) {
/* For A that's URL */
purl = lua_newuserdata (L, sizeof (gpointer));
*purl = tag->extra;
More information about the Commits
mailing list