commit b369727: [Minor] Further fixes to the html tags content methods
Vsevolod Stakhov
vsevolod at highsecure.ru
Thu Jul 29 16:56:06 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-07-29 17:43:49 +0100
URL: https://github.com/rspamd/rspamd/commit/b369727b44bf851981ca88b764e9429428154b72
[Minor] Further fixes to the html tags content methods
---
src/libserver/html/html.cxx | 3 ++-
src/libserver/html/html_tag.hxx | 14 ++++++++++++++
src/lua/lua_html.cxx | 30 ++++++++++++++++++------------
3 files changed, 34 insertions(+), 13 deletions(-)
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 53fe815dd..fd0bfa495 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -1976,7 +1976,8 @@ html_process_input(rspamd_mempool_t *pool,
}
break;
case tags_limit_overflow:
- html_append_parsed(hc, {c, (std::size_t) (end - c)}, false, end - start);
+ html_append_parsed(hc, {c, (std::size_t) (end - c)},
+ false, end - start);
break;
default:
/* Do nothing */
diff --git a/src/libserver/html/html_tag.hxx b/src/libserver/html/html_tag.hxx
index 5cd22d777..357e11bfb 100644
--- a/src/libserver/html/html_tag.hxx
+++ b/src/libserver/html/html_tag.hxx
@@ -137,6 +137,20 @@ struct html_tag {
return 0;
}
+
+ constexpr auto get_content(std::string_view parsed) const -> std::string_view {
+ const auto clen = get_content_length();
+ if (content_offset < parsed.size()) {
+ if (parsed.size() - content_offset >= clen) {
+ return parsed.substr(content_offset, clen);
+ }
+ else {
+ return parsed.substr(content_offset, parsed.size() - content_offset);
+ }
+ }
+
+ return std::string_view{};
+ }
};
static_assert(CM_USER_SHIFT + 7 < sizeof(html_tag::flags) * NBBY);
diff --git a/src/lua/lua_html.cxx b/src/lua/lua_html.cxx
index 2b0f63c02..d20de03c0 100644
--- a/src/lua/lua_html.cxx
+++ b/src/lua/lua_html.cxx
@@ -448,8 +448,9 @@ lua_html_foreach_tag (lua_State *L)
auto *ltag = static_cast<lua_html_tag *>(lua_newuserdata(L, sizeof(lua_html_tag)));
ltag->tag = tag;
ltag->html = hc;
+ auto ct = ltag->tag->get_content(hc->parsed);
rspamd_lua_setclass (L, "rspamd{html_tag}", -1);
- lua_pushinteger (L, tag->get_content_length());
+ lua_pushinteger (L, ct.size());
/* Leaf flag */
if (tag->children.empty()) {
@@ -579,17 +580,16 @@ lua_html_tag_get_content (lua_State *L)
struct rspamd_lua_text *t;
if (ltag) {
- auto clen = ltag->tag->get_content_length();
- if (ltag->html && clen && ltag->html->parsed.size() > ltag->tag->content_offset) {
- if (ltag->html->parsed.size() - ltag->tag->content_offset < clen) {
- clen = ltag->html->parsed.size() - ltag->tag->content_offset;
+
+ if (ltag->html) {
+ auto ct = ltag->tag->get_content(ltag->html->parsed);
+ if (ct.size() > 0) {
+ t = static_cast<rspamd_lua_text *>(lua_newuserdata(L, sizeof(*t)));
+ rspamd_lua_setclass(L, "rspamd{text}", -1);
+ t->start = ct.data();
+ t->len = ct.size();
+ t->flags = 0;
}
- t = static_cast<rspamd_lua_text *>(lua_newuserdata(L, sizeof(*t)));
- rspamd_lua_setclass (L, "rspamd{text}", -1);
- t->start = reinterpret_cast<const char *>(ltag->html->parsed.data()) +
- ltag->tag->content_offset;
- t->len = clen;
- t->flags = 0;
}
else {
lua_pushnil (L);
@@ -609,7 +609,13 @@ lua_html_tag_get_content_length (lua_State *L)
struct lua_html_tag *ltag = lua_check_html_tag (L, 1);
if (ltag) {
- lua_pushinteger (L, ltag->tag->closing.start - ltag->tag->content_offset);
+ if (ltag->html) {
+ auto ct = ltag->tag->get_content(ltag->html->parsed);
+ lua_pushinteger (L, ct.size());
+ }
+ else {
+ lua_pushinteger (L, ltag->tag->get_content_length());
+ }
}
else {
return luaL_error (L, "invalid arguments");
More information about the Commits
mailing list