commit 7d3e0fc: [Minor] Some fixes in content extraction for html tags
Vsevolod Stakhov
vsevolod at highsecure.ru
Thu Jul 29 09:35:05 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-07-29 10:32:58 +0100
URL: https://github.com/rspamd/rspamd/commit/7d3e0fc85e29b504e2fcfa83140708d7fea80eca (HEAD -> master)
[Minor] Some fixes in content extraction for html tags
---
src/libserver/html/html_tag.hxx | 11 +++++++++++
src/lua/lua_html.cxx | 14 ++++++++------
2 files changed, 19 insertions(+), 6 deletions(-)
diff --git a/src/libserver/html/html_tag.hxx b/src/libserver/html/html_tag.hxx
index d7e80f41b..5cd22d777 100644
--- a/src/libserver/html/html_tag.hxx
+++ b/src/libserver/html/html_tag.hxx
@@ -126,6 +126,17 @@ struct html_tag {
children.clear();
closing.clear();
}
+
+ constexpr auto get_content_length() const -> std::size_t {
+ if (flags & (FL_IGNORE|CM_HEAD)) {
+ return 0;
+ }
+ if (closing.start > content_offset) {
+ return closing.start - content_offset;
+ }
+
+ return 0;
+ }
};
static_assert(CM_USER_SHIFT + 7 < sizeof(html_tag::flags) * NBBY);
diff --git a/src/lua/lua_html.cxx b/src/lua/lua_html.cxx
index 250203d6e..9a562d2fa 100644
--- a/src/lua/lua_html.cxx
+++ b/src/lua/lua_html.cxx
@@ -441,13 +441,15 @@ lua_html_foreach_tag (lua_State *L)
if (hc && (any || !tags.empty()) && lua_isfunction (L, 3)) {
hc->traverse_all_tags([&](const rspamd::html::html_tag *tag) -> bool {
if (tag && (any || tags.contains(tag->id))) {
+ lua_pushcfunction (L, &rspamd_lua_traceback);
+ auto err_idx = lua_gettop(L);
lua_pushvalue(L, 3);
auto *ltag = static_cast<lua_html_tag *>(lua_newuserdata(L, sizeof(lua_html_tag)));
ltag->tag = tag;
ltag->html = hc;
rspamd_lua_setclass (L, "rspamd{html_tag}", -1);
- lua_pushinteger (L, tag->closing.start - tag->content_offset);
+ lua_pushinteger (L, tag->get_content_length());
/* Leaf flag */
if (tag->children.empty()) {
@@ -457,18 +459,18 @@ lua_html_foreach_tag (lua_State *L)
lua_pushboolean (L, false);
}
- if (lua_pcall (L, 3, 1, 0) != 0) {
+ if (lua_pcall (L, 3, 1, err_idx) != 0) {
msg_err ("error in foreach_tag callback: %s", lua_tostring (L, -1));
- lua_pop (L, 1);
+ lua_settop(L, err_idx - 1);
return false;
}
if (lua_toboolean (L, -1)) {
- lua_pop(L, 1);
+ lua_settop(L, err_idx - 1);
return false;
}
- lua_pop(L, 1);
+ lua_settop(L, err_idx - 1);
}
return true;
@@ -577,7 +579,7 @@ lua_html_tag_get_content (lua_State *L)
struct rspamd_lua_text *t;
if (ltag) {
- auto clen = ltag->tag->closing.start - ltag->tag->content_offset;
+ auto clen = ltag->tag->get_content_length();
if (ltag->html && clen &&
ltag->html->parsed.size() >= ltag->tag->content_offset + clen) {
t = static_cast<rspamd_lua_text *>(lua_newuserdata(L, sizeof(*t)));
More information about the Commits
mailing list