commit 7d3e0fc: [Minor] Some fixes in content extraction for html tags

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Jul 29 09:35:05 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-07-29 10:32:58 +0100
URL: https://github.com/rspamd/rspamd/commit/7d3e0fc85e29b504e2fcfa83140708d7fea80eca (HEAD -> master)

[Minor] Some fixes in content extraction for html tags

---
 src/libserver/html/html_tag.hxx | 11 +++++++++++
 src/lua/lua_html.cxx            | 14 ++++++++------
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/src/libserver/html/html_tag.hxx b/src/libserver/html/html_tag.hxx
index d7e80f41b..5cd22d777 100644
--- a/src/libserver/html/html_tag.hxx
+++ b/src/libserver/html/html_tag.hxx
@@ -126,6 +126,17 @@ struct html_tag {
 		children.clear();
 		closing.clear();
 	}
+
+	constexpr auto get_content_length() const -> std::size_t {
+		if (flags & (FL_IGNORE|CM_HEAD)) {
+			return 0;
+		}
+		if (closing.start > content_offset) {
+			return closing.start - content_offset;
+		}
+
+		return 0;
+	}
 };
 
 static_assert(CM_USER_SHIFT + 7 < sizeof(html_tag::flags) * NBBY);
diff --git a/src/lua/lua_html.cxx b/src/lua/lua_html.cxx
index 250203d6e..9a562d2fa 100644
--- a/src/lua/lua_html.cxx
+++ b/src/lua/lua_html.cxx
@@ -441,13 +441,15 @@ lua_html_foreach_tag (lua_State *L)
 	if (hc && (any || !tags.empty()) && lua_isfunction (L, 3)) {
 		hc->traverse_all_tags([&](const rspamd::html::html_tag *tag) -> bool {
 			if (tag && (any || tags.contains(tag->id))) {
+				lua_pushcfunction (L, &rspamd_lua_traceback);
+				auto err_idx = lua_gettop(L);
 				lua_pushvalue(L, 3);
 
 				auto *ltag = static_cast<lua_html_tag *>(lua_newuserdata(L, sizeof(lua_html_tag)));
 				ltag->tag = tag;
 				ltag->html = hc;
 				rspamd_lua_setclass (L, "rspamd{html_tag}", -1);
-				lua_pushinteger (L, tag->closing.start - tag->content_offset);
+				lua_pushinteger (L, tag->get_content_length());
 
 				/* Leaf flag */
 				if (tag->children.empty()) {
@@ -457,18 +459,18 @@ lua_html_foreach_tag (lua_State *L)
 					lua_pushboolean (L, false);
 				}
 
-				if (lua_pcall (L, 3, 1, 0) != 0) {
+				if (lua_pcall (L, 3, 1, err_idx) != 0) {
 					msg_err ("error in foreach_tag callback: %s", lua_tostring (L, -1));
-					lua_pop (L, 1);
+					lua_settop(L, err_idx - 1);
 					return false;
 				}
 
 				if (lua_toboolean (L, -1)) {
-					lua_pop(L, 1);
+					lua_settop(L, err_idx - 1);
 					return false;
 				}
 
-				lua_pop(L, 1);
+				lua_settop(L, err_idx - 1);
 			}
 
 			return true;
@@ -577,7 +579,7 @@ lua_html_tag_get_content (lua_State *L)
 	struct rspamd_lua_text *t;
 
 	if (ltag) {
-		auto clen = ltag->tag->closing.start - ltag->tag->content_offset;
+		auto clen = ltag->tag->get_content_length();
 		if (ltag->html && clen &&
 				ltag->html->parsed.size() >= ltag->tag->content_offset + clen) {
 			t = static_cast<rspamd_lua_text *>(lua_newuserdata(L, sizeof(*t)));


More information about the Commits mailing list