commit 5b703a4: [Rework] Html: Add traverse function
Vsevolod Stakhov
vsevolod at highsecure.ru
Mon Jun 7 16:35:06 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-06-07 15:13:55 +0100
URL: https://github.com/rspamd/rspamd/commit/5b703a46b4b06dcdc2d1910940cd3ea105ab7e39
[Rework] Html: Add traverse function
---
src/libserver/html/html.cxx | 2 +-
src/libserver/html/html.hxx | 46 ++++++++++++++++--
src/lua/lua_html.cxx | 111 +++++++++++++++++---------------------------
3 files changed, 86 insertions(+), 73 deletions(-)
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 40ef240d5..a459ee0c6 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -108,7 +108,7 @@ html_check_balance(struct html_tag *tag,
}
/* Misuse */
- return false;
+ RSPAMD_UNREACHABLE;
}
static auto
diff --git a/src/libserver/html/html.hxx b/src/libserver/html/html.hxx
index c3b65a06f..5624f5d2e 100644
--- a/src/libserver/html/html.hxx
+++ b/src/libserver/html/html.hxx
@@ -26,6 +26,7 @@
#include <vector>
#include <memory>
+#include "function2/function2.hpp"
namespace rspamd::html {
@@ -64,12 +65,51 @@ struct html_content {
return static_cast<html_content* >(ptr);
}
-private:
- ~html_content() {
- g_node_destroy(html_tags);
+ enum class traverse_type {
+ PRE_ORDER,
+ POST_ORDER
+ };
+ auto traverse_tags(fu2::function<bool(const html_tag *)> &&func,
+ traverse_type how = traverse_type::PRE_ORDER) const -> bool {
+
+ auto rec_functor_pre_order = [&](const html_tag *root, auto &&rec) -> bool {
+ if (func(root)) {
+
+ for (const auto *c : root->children) {
+ if (!rec(c, rec)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+ return false;
+ };
+ auto rec_functor_post_order = [&](const html_tag *root, auto &&rec) -> bool {
+ for (const auto *c : root->children) {
+ if (!rec(c, rec)) {
+ return false;
+ }
+ }
+
+ return func(root);
+ };
+
+ switch(how) {
+ case traverse_type::PRE_ORDER:
+ return rec_functor_pre_order(root_tag, rec_functor_pre_order);
+ case traverse_type::POST_ORDER:
+ return rec_functor_post_order(root_tag, rec_functor_post_order);
+ default:
+ RSPAMD_UNREACHABLE;
+ }
}
+
+private:
+ ~html_content() = default;
};
+
}
#endif //RSPAMD_HTML_HXX
diff --git a/src/lua/lua_html.cxx b/src/lua/lua_html.cxx
index 91df192a9..76ca56de5 100644
--- a/src/lua/lua_html.cxx
+++ b/src/lua/lua_html.cxx
@@ -191,7 +191,7 @@ lua_check_html (lua_State * L, gint pos)
struct lua_html_tag {
rspamd::html::html_content *html;
- rspamd::html::html_tag *tag;
+ const rspamd::html::html_tag *tag;
};
static struct lua_html_tag *
@@ -427,72 +427,23 @@ lua_html_get_blocks (lua_State *L)
return 1;
}
-struct lua_html_traverse_ud {
- lua_State *L;
- rspamd::html::html_content *html;
- gint cbref;
- robin_hood::unordered_flat_set<int> tags;
- gboolean any;
-};
-
-static gboolean
-lua_html_node_foreach_cb (GNode *n, gpointer d)
-{
- struct lua_html_traverse_ud *ud = (struct lua_html_traverse_ud *)d;
- auto *tag = (rspamd::html::html_tag *)n->data;
- struct lua_html_tag *ltag;
-
- if (tag && (ud->any || ud->tags.contains(tag->id))) {
-
- lua_rawgeti (ud->L, LUA_REGISTRYINDEX, ud->cbref);
-
- ltag = static_cast<lua_html_tag *>(lua_newuserdata(ud->L, sizeof(*ltag)));
- ltag->tag = tag;
- ltag->html = ud->html;
- rspamd_lua_setclass (ud->L, "rspamd{html_tag}", -1);
- lua_pushinteger (ud->L, tag->content_length);
-
- /* Leaf flag */
- if (g_node_first_child (n)) {
- lua_pushboolean (ud->L, false);
- }
- else {
- lua_pushboolean (ud->L, true);
- }
-
- if (lua_pcall (ud->L, 3, 1, 0) != 0) {
- msg_err ("error in foreach_tag callback: %s", lua_tostring (ud->L, -1));
- lua_pop (ud->L, 1);
- return TRUE;
- }
- if (lua_toboolean (ud->L, -1)) {
- lua_pop (ud->L, 1);
- return TRUE;
- }
-
- lua_pop (ud->L, 1);
- }
-
- return FALSE;
-}
static gint
lua_html_foreach_tag (lua_State *L)
{
LUA_TRACE_POINT;
auto *hc = lua_check_html (L, 1);
- struct lua_html_traverse_ud ud;
const gchar *tagname;
gint id;
+ auto any = false;
+ robin_hood::unordered_flat_set<int> tags;
- ud.any = FALSE;
- ud.html = hc;
if (lua_type (L, 2) == LUA_TSTRING) {
tagname = luaL_checkstring (L, 2);
if (strcmp (tagname, "any") == 0) {
- ud.any = TRUE;
+ any = true;
}
else {
id = rspamd_html_tag_by_name(tagname);
@@ -502,7 +453,7 @@ lua_html_foreach_tag (lua_State *L)
}
- ud.tags.insert(id);
+ tags.insert(id);
}
}
else if (lua_type (L, 2) == LUA_TTABLE) {
@@ -511,7 +462,7 @@ lua_html_foreach_tag (lua_State *L)
for (lua_pushnil (L); lua_next (L, -2); lua_pop (L, 1)) {
tagname = luaL_checkstring (L, -1);
if (strcmp (tagname, "any") == 0) {
- ud.any = TRUE;
+ any = TRUE;
}
else {
id = rspamd_html_tag_by_name (tagname);
@@ -519,25 +470,48 @@ lua_html_foreach_tag (lua_State *L)
if (id == -1) {
return luaL_error (L, "invalid tagname: %s", tagname);
}
- ud.tags.insert(id);
+ tags.insert(id);
}
}
lua_pop (L, 1);
}
- if (hc && (ud.any || !ud.tags.empty()) && lua_isfunction (L, 3)) {
- if (hc->html_tags) {
+ if (hc && (any || !tags.empty()) && lua_isfunction (L, 3)) {
+ hc->traverse_tags([&](const rspamd::html::html_tag *tag) -> bool {
+ if (tag && (any || tags.contains(tag->id))) {
+ lua_pushvalue(L, 3);
- lua_pushvalue (L, 3);
- ud.cbref = luaL_ref (L, LUA_REGISTRYINDEX);
- ud.L = L;
+ auto *ltag = static_cast<lua_html_tag *>(lua_newuserdata(L, sizeof(lua_html_tag)));
+ ltag->tag = tag;
+ ltag->html = hc;
+ rspamd_lua_setclass (L, "rspamd{html_tag}", -1);
+ lua_pushinteger (L, tag->content_length);
- g_node_traverse (hc->html_tags, G_PRE_ORDER, G_TRAVERSE_ALL, -1,
- lua_html_node_foreach_cb, &ud);
+ /* Leaf flag */
+ if (tag->children.empty()) {
+ lua_pushboolean (L, true);
+ }
+ else {
+ lua_pushboolean (L, false);
+ }
- luaL_unref (L, LUA_REGISTRYINDEX, ud.cbref);
- }
+ if (lua_pcall (L, 3, 1, 0) != 0) {
+ msg_err ("error in foreach_tag callback: %s", lua_tostring (L, -1));
+ lua_pop (L, 1);
+ return false;
+ }
+
+ if (lua_toboolean (L, -1)) {
+ lua_pop(L, 1);
+ return false;
+ }
+
+ lua_pop(L, 1);
+ }
+
+ return true;
+ });
}
else {
return luaL_error (L, "invalid arguments");
@@ -575,14 +549,13 @@ lua_html_tag_get_parent (lua_State *L)
{
LUA_TRACE_POINT;
struct lua_html_tag *ltag = lua_check_html_tag (L, 1), *ptag;
- GNode *node;
if (ltag != NULL) {
- node = ltag->tag->parent;
+ auto *parent = ltag->tag->parent;
- if (node && node->data) {
+ if (parent) {
ptag = static_cast<lua_html_tag *>(lua_newuserdata(L, sizeof(*ptag)));
- ptag->tag = static_cast<rspamd::html::html_tag *>(node->data);
+ ptag->tag = static_cast<rspamd::html::html_tag *>(parent);
ptag->html = ltag->html;
rspamd_lua_setclass (L, "rspamd{html_tag}", -1);
}
More information about the Commits
mailing list