commit 5b703a4: [Rework] Html: Add traverse function

Vsevolod Stakhov vsevolod at highsecure.ru
Mon Jun 7 16:35:06 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-06-07 15:13:55 +0100
URL: https://github.com/rspamd/rspamd/commit/5b703a46b4b06dcdc2d1910940cd3ea105ab7e39

[Rework] Html: Add traverse function

---
 src/libserver/html/html.cxx |   2 +-
 src/libserver/html/html.hxx |  46 ++++++++++++++++--
 src/lua/lua_html.cxx        | 111 +++++++++++++++++---------------------------
 3 files changed, 86 insertions(+), 73 deletions(-)

diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 40ef240d5..a459ee0c6 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -108,7 +108,7 @@ html_check_balance(struct html_tag *tag,
 	}
 
 	/* Misuse */
-	return false;
+	RSPAMD_UNREACHABLE;
 }
 
 static auto
diff --git a/src/libserver/html/html.hxx b/src/libserver/html/html.hxx
index c3b65a06f..5624f5d2e 100644
--- a/src/libserver/html/html.hxx
+++ b/src/libserver/html/html.hxx
@@ -26,6 +26,7 @@
 
 #include <vector>
 #include <memory>
+#include "function2/function2.hpp"
 
 namespace rspamd::html {
 
@@ -64,12 +65,51 @@ struct html_content {
 		return static_cast<html_content* >(ptr);
 	}
 
-private:
-	~html_content() {
-		g_node_destroy(html_tags);
+	enum class traverse_type {
+		PRE_ORDER,
+		POST_ORDER
+	};
+	auto traverse_tags(fu2::function<bool(const html_tag *)> &&func,
+					traverse_type how = traverse_type::PRE_ORDER) const -> bool {
+
+		auto rec_functor_pre_order = [&](const html_tag *root, auto &&rec) -> bool {
+			if (func(root)) {
+
+				for (const auto *c : root->children) {
+					if (!rec(c, rec)) {
+						return false;
+					}
+				}
+
+				return true;
+			}
+			return false;
+		};
+		auto rec_functor_post_order = [&](const html_tag *root, auto &&rec) -> bool {
+			for (const auto *c : root->children) {
+				if (!rec(c, rec)) {
+					return false;
+				}
+			}
+
+			return func(root);
+		};
+
+		switch(how) {
+		case traverse_type::PRE_ORDER:
+			return rec_functor_pre_order(root_tag, rec_functor_pre_order);
+		case traverse_type::POST_ORDER:
+			return rec_functor_post_order(root_tag, rec_functor_post_order);
+		default:
+			RSPAMD_UNREACHABLE;
+		}
 	}
+
+private:
+	~html_content() = default;
 };
 
+
 }
 
 #endif //RSPAMD_HTML_HXX
diff --git a/src/lua/lua_html.cxx b/src/lua/lua_html.cxx
index 91df192a9..76ca56de5 100644
--- a/src/lua/lua_html.cxx
+++ b/src/lua/lua_html.cxx
@@ -191,7 +191,7 @@ lua_check_html (lua_State * L, gint pos)
 
 struct lua_html_tag {
 	rspamd::html::html_content *html;
-	rspamd::html::html_tag *tag;
+	const rspamd::html::html_tag *tag;
 };
 
 static struct lua_html_tag *
@@ -427,72 +427,23 @@ lua_html_get_blocks (lua_State *L)
 	return 1;
 }
 
-struct lua_html_traverse_ud {
-	lua_State *L;
-	rspamd::html::html_content *html;
-	gint cbref;
-	robin_hood::unordered_flat_set<int> tags;
-	gboolean any;
-};
-
-static gboolean
-lua_html_node_foreach_cb (GNode *n, gpointer d)
-{
-	struct lua_html_traverse_ud *ud = (struct lua_html_traverse_ud *)d;
-	auto *tag = (rspamd::html::html_tag *)n->data;
-	struct lua_html_tag *ltag;
-
-	if (tag && (ud->any || ud->tags.contains(tag->id))) {
-
-		lua_rawgeti (ud->L, LUA_REGISTRYINDEX, ud->cbref);
-
-		ltag = static_cast<lua_html_tag *>(lua_newuserdata(ud->L, sizeof(*ltag)));
-		ltag->tag = tag;
-		ltag->html = ud->html;
-		rspamd_lua_setclass (ud->L, "rspamd{html_tag}", -1);
-		lua_pushinteger (ud->L, tag->content_length);
-
-		/* Leaf flag */
-		if (g_node_first_child (n)) {
-			lua_pushboolean (ud->L, false);
-		}
-		else {
-			lua_pushboolean (ud->L, true);
-		}
-
-		if (lua_pcall (ud->L, 3, 1, 0) != 0) {
-			msg_err ("error in foreach_tag callback: %s", lua_tostring (ud->L, -1));
-			lua_pop (ud->L, 1);
-			return TRUE;
-		}
 
-		if (lua_toboolean (ud->L, -1)) {
-			lua_pop (ud->L, 1);
-			return TRUE;
-		}
-
-		lua_pop (ud->L, 1);
-	}
-
-	return FALSE;
-}
 
 static gint
 lua_html_foreach_tag (lua_State *L)
 {
 	LUA_TRACE_POINT;
 	auto *hc = lua_check_html (L, 1);
-	struct lua_html_traverse_ud ud;
 	const gchar *tagname;
 	gint id;
+	auto any = false;
+	robin_hood::unordered_flat_set<int> tags;
 
-	ud.any = FALSE;
-	ud.html = hc;
 
 	if (lua_type (L, 2) == LUA_TSTRING) {
 		tagname = luaL_checkstring (L, 2);
 		if (strcmp (tagname, "any") == 0) {
-			ud.any = TRUE;
+			any = true;
 		}
 		else {
 			id = rspamd_html_tag_by_name(tagname);
@@ -502,7 +453,7 @@ lua_html_foreach_tag (lua_State *L)
 			}
 
 
-			ud.tags.insert(id);
+			tags.insert(id);
 		}
 	}
 	else if (lua_type (L, 2) == LUA_TTABLE) {
@@ -511,7 +462,7 @@ lua_html_foreach_tag (lua_State *L)
 		for (lua_pushnil (L); lua_next (L, -2); lua_pop (L, 1)) {
 			tagname = luaL_checkstring (L, -1);
 			if (strcmp (tagname, "any") == 0) {
-				ud.any = TRUE;
+				any = TRUE;
 			}
 			else {
 				id = rspamd_html_tag_by_name (tagname);
@@ -519,25 +470,48 @@ lua_html_foreach_tag (lua_State *L)
 				if (id == -1) {
 					return luaL_error (L, "invalid tagname: %s", tagname);
 				}
-				ud.tags.insert(id);
+				tags.insert(id);
 			}
 		}
 
 		lua_pop (L, 1);
 	}
 
-	if (hc && (ud.any || !ud.tags.empty()) && lua_isfunction (L, 3)) {
-		if (hc->html_tags) {
+	if (hc && (any || !tags.empty()) && lua_isfunction (L, 3)) {
+		hc->traverse_tags([&](const rspamd::html::html_tag *tag) -> bool {
+			if (tag && (any || tags.contains(tag->id))) {
+				lua_pushvalue(L, 3);
 
-			lua_pushvalue (L, 3);
-			ud.cbref = luaL_ref (L, LUA_REGISTRYINDEX);
-			ud.L = L;
+				auto *ltag = static_cast<lua_html_tag *>(lua_newuserdata(L, sizeof(lua_html_tag)));
+				ltag->tag = tag;
+				ltag->html = hc;
+				rspamd_lua_setclass (L, "rspamd{html_tag}", -1);
+				lua_pushinteger (L, tag->content_length);
 
-			g_node_traverse (hc->html_tags, G_PRE_ORDER, G_TRAVERSE_ALL, -1,
-					lua_html_node_foreach_cb, &ud);
+				/* Leaf flag */
+				if (tag->children.empty()) {
+					lua_pushboolean (L, true);
+				}
+				else {
+					lua_pushboolean (L, false);
+				}
 
-			luaL_unref (L, LUA_REGISTRYINDEX, ud.cbref);
-		}
+				if (lua_pcall (L, 3, 1, 0) != 0) {
+					msg_err ("error in foreach_tag callback: %s", lua_tostring (L, -1));
+					lua_pop (L, 1);
+					return false;
+				}
+
+				if (lua_toboolean (L, -1)) {
+					lua_pop(L, 1);
+					return false;
+				}
+
+				lua_pop(L, 1);
+			}
+
+			return true;
+		});
 	}
 	else {
 		return luaL_error (L, "invalid arguments");
@@ -575,14 +549,13 @@ lua_html_tag_get_parent (lua_State *L)
 {
 	LUA_TRACE_POINT;
 	struct lua_html_tag *ltag = lua_check_html_tag (L, 1), *ptag;
-	GNode *node;
 
 	if (ltag != NULL) {
-		node = ltag->tag->parent;
+		auto *parent = ltag->tag->parent;
 
-		if (node && node->data) {
+		if (parent) {
 			ptag = static_cast<lua_html_tag *>(lua_newuserdata(L, sizeof(*ptag)));
-			ptag->tag = static_cast<rspamd::html::html_tag *>(node->data);
+			ptag->tag = static_cast<rspamd::html::html_tag *>(parent);
 			ptag->html = ltag->html;
 			rspamd_lua_setclass (L, "rspamd{html_tag}", -1);
 		}


More information about the Commits mailing list