commit b369727: [Minor] Further fixes to the html tags content methods

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Jul 29 16:56:06 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-07-29 17:43:49 +0100
URL: https://github.com/rspamd/rspamd/commit/b369727b44bf851981ca88b764e9429428154b72

[Minor] Further fixes to the html tags content methods

---
 src/libserver/html/html.cxx     |  3 ++-
 src/libserver/html/html_tag.hxx | 14 ++++++++++++++
 src/lua/lua_html.cxx            | 30 ++++++++++++++++++------------
 3 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 53fe815dd..fd0bfa495 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -1976,7 +1976,8 @@ html_process_input(rspamd_mempool_t *pool,
 		}
 		break;
 	case tags_limit_overflow:
-		html_append_parsed(hc, {c, (std::size_t) (end - c)}, false, end - start);
+		html_append_parsed(hc, {c, (std::size_t) (end - c)},
+				false, end - start);
 		break;
 	default:
 		/* Do nothing */
diff --git a/src/libserver/html/html_tag.hxx b/src/libserver/html/html_tag.hxx
index 5cd22d777..357e11bfb 100644
--- a/src/libserver/html/html_tag.hxx
+++ b/src/libserver/html/html_tag.hxx
@@ -137,6 +137,20 @@ struct html_tag {
 
 		return 0;
 	}
+
+	constexpr auto get_content(std::string_view parsed) const -> std::string_view {
+		const auto clen = get_content_length();
+		if (content_offset < parsed.size()) {
+			if (parsed.size() - content_offset >= clen) {
+				return parsed.substr(content_offset, clen);
+			}
+			else {
+				return parsed.substr(content_offset, parsed.size() - content_offset);
+			}
+		}
+
+		return std::string_view{};
+	}
 };
 
 static_assert(CM_USER_SHIFT + 7 < sizeof(html_tag::flags) * NBBY);
diff --git a/src/lua/lua_html.cxx b/src/lua/lua_html.cxx
index 2b0f63c02..d20de03c0 100644
--- a/src/lua/lua_html.cxx
+++ b/src/lua/lua_html.cxx
@@ -448,8 +448,9 @@ lua_html_foreach_tag (lua_State *L)
 				auto *ltag = static_cast<lua_html_tag *>(lua_newuserdata(L, sizeof(lua_html_tag)));
 				ltag->tag = tag;
 				ltag->html = hc;
+				auto ct = ltag->tag->get_content(hc->parsed);
 				rspamd_lua_setclass (L, "rspamd{html_tag}", -1);
-				lua_pushinteger (L, tag->get_content_length());
+				lua_pushinteger (L, ct.size());
 
 				/* Leaf flag */
 				if (tag->children.empty()) {
@@ -579,17 +580,16 @@ lua_html_tag_get_content (lua_State *L)
 	struct rspamd_lua_text *t;
 
 	if (ltag) {
-		auto clen = ltag->tag->get_content_length();
-		if (ltag->html && clen && ltag->html->parsed.size() > ltag->tag->content_offset) {
-			if (ltag->html->parsed.size() - ltag->tag->content_offset < clen) {
-				clen = ltag->html->parsed.size() - ltag->tag->content_offset;
+
+		if (ltag->html) {
+			auto ct = ltag->tag->get_content(ltag->html->parsed);
+			if (ct.size() > 0) {
+				t = static_cast<rspamd_lua_text *>(lua_newuserdata(L, sizeof(*t)));
+				rspamd_lua_setclass(L, "rspamd{text}", -1);
+				t->start = ct.data();
+				t->len = ct.size();
+				t->flags = 0;
 			}
-			t = static_cast<rspamd_lua_text *>(lua_newuserdata(L, sizeof(*t)));
-			rspamd_lua_setclass (L, "rspamd{text}", -1);
-			t->start = reinterpret_cast<const char *>(ltag->html->parsed.data()) +
-					ltag->tag->content_offset;
-			t->len = clen;
-			t->flags = 0;
 		}
 		else {
 			lua_pushnil (L);
@@ -609,7 +609,13 @@ lua_html_tag_get_content_length (lua_State *L)
 	struct lua_html_tag *ltag = lua_check_html_tag (L, 1);
 
 	if (ltag) {
-		lua_pushinteger (L, ltag->tag->closing.start - ltag->tag->content_offset);
+		if (ltag->html) {
+			auto ct = ltag->tag->get_content(ltag->html->parsed);
+			lua_pushinteger (L, ct.size());
+		}
+		else {
+			lua_pushinteger (L, ltag->tag->get_content_length());
+		}
 	}
 	else {
 		return luaL_error (L, "invalid arguments");


More information about the Commits mailing list