commit 6c18226: [Rework] Html: Rework lua bindings
Vsevolod Stakhov
vsevolod at highsecure.ru
Thu May 27 14:07:13 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-05-27 15:05:01 +0100
URL: https://github.com/rspamd/rspamd/commit/6c1822676cb7b41bed1c76713bb53d187c28a27a (HEAD -> master)
[Rework] Html: Rework lua bindings
---
doc/Makefile | 4 +-
src/libserver/html/html.cxx | 12 ++++
src/libserver/html/html.h | 8 +--
src/lua/CMakeLists.txt | 2 +-
src/lua/{lua_html.c => lua_html.cxx} | 125 ++++++++++++++++-------------------
5 files changed, 76 insertions(+), 75 deletions(-)
diff --git a/doc/Makefile b/doc/Makefile
index e93481bb7..548c6b292 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -73,8 +73,8 @@ rspamd_tcp: ../src/lua/lua_tcp.c
$(LUADOC) < ../src/lua/lua_tcp.c > markdown/lua/rspamd_tcp.md
rspamd_mempool: ../src/lua/lua_mempool.c
$(LUADOC) < ../src/lua/lua_mempool.c > markdown/lua/rspamd_mempool.md
-rspamd_html: ../src/lua/lua_html.c
- $(LUADOC) < ../src/lua/lua_html.c > markdown/lua/rspamd_html.md
+rspamd_html: ../src/lua/lua_html.cxx
+ $(LUADOC) < ../src/lua/lua_html.cxx > markdown/lua/rspamd_html.md
rspamd_util: ../src/lua/lua_util.c
$(LUADOC) < ../src/lua/lua_util.c > markdown/lua/rspamd_util.md
rspamd_sqlite3: ../src/lua/lua_sqlite3.c
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 2ebf46305..cfafbf736 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -2386,4 +2386,16 @@ rspamd_html_tag_by_id(gint id)
}
return nullptr;
+}
+
+const gchar *
+rspamd_html_tag_name(void *p, gsize *len)
+{
+ auto *tag = reinterpret_cast<rspamd::html::html_tag *>(p);
+
+ if (len) {
+ *len = tag->name.size();
+ }
+
+ return tag->name.data();
}
\ No newline at end of file
diff --git a/src/libserver/html/html.h b/src/libserver/html/html.h
index 46ae99d48..6106688f3 100644
--- a/src/libserver/html/html.h
+++ b/src/libserver/html/html.h
@@ -152,13 +152,13 @@ const gchar *rspamd_html_tag_by_id(gint id);
gint rspamd_html_tag_by_name(const gchar *name);
/**
- * Extract URL from HTML tag component and sets component elements if needed
- * @param pool
- * @param start
+ * Gets a name for a tag
+ * @param tag
* @param len
- * @param comp
* @return
*/
+const gchar *rspamd_html_tag_name(void *tag, gsize *len);
+
#ifdef __cplusplus
}
diff --git a/src/lua/CMakeLists.txt b/src/lua/CMakeLists.txt
index 4782d3f8e..a504f99f8 100644
--- a/src/lua/CMakeLists.txt
+++ b/src/lua/CMakeLists.txt
@@ -21,7 +21,7 @@ SET(LUASRC ${CMAKE_CURRENT_SOURCE_DIR}/lua_common.c
${CMAKE_CURRENT_SOURCE_DIR}/lua_url.c
${CMAKE_CURRENT_SOURCE_DIR}/lua_util.c
${CMAKE_CURRENT_SOURCE_DIR}/lua_tcp.c
- ${CMAKE_CURRENT_SOURCE_DIR}/lua_html.c
+ ${CMAKE_CURRENT_SOURCE_DIR}/lua_html.cxx
${CMAKE_CURRENT_SOURCE_DIR}/lua_sqlite3.c
${CMAKE_CURRENT_SOURCE_DIR}/lua_cryptobox.c
${CMAKE_CURRENT_SOURCE_DIR}/lua_map.c
diff --git a/src/lua/lua_html.c b/src/lua/lua_html.cxx
similarity index 84%
rename from src/lua/lua_html.c
rename to src/lua/lua_html.cxx
index 5b1c9c452..629b4f66f 100644
--- a/src/lua/lua_html.c
+++ b/src/lua/lua_html.cxx
@@ -16,8 +16,12 @@
#include "lua_common.h"
#include "message.h"
#include "libserver/html/html.h"
+#include "libserver/html/html_tag.hxx"
#include "images.h"
-#include "contrib/mumhash/mum.h"
+
+#include <contrib/robin-hood/robin_hood.h>
+#include <frozen/string.h>
+#include <frozen/unordered_map.h>
/***
* @module rspamd_html
@@ -186,7 +190,7 @@ lua_check_html (lua_State * L, gint pos)
struct lua_html_tag {
struct html_content *html;
- struct html_tag *tag;
+ rspamd::html::html_tag *tag;
};
static struct lua_html_tag *
@@ -216,6 +220,20 @@ lua_html_has_tag (lua_State *L)
return 1;
}
+constexpr const auto prop_map = frozen::make_unordered_map<frozen::string, int>({
+ {"no_html", RSPAMD_HTML_FLAG_BAD_START},
+ {"bad_start", RSPAMD_HTML_FLAG_BAD_START},
+ {"bad_element", RSPAMD_HTML_FLAG_BAD_ELEMENTS},
+ {"bad_elements", RSPAMD_HTML_FLAG_BAD_ELEMENTS},
+ {"xml", RSPAMD_HTML_FLAG_XML},
+ {"unknown_element", RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS},
+ {"unknown_elements", RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS},
+ {"duplicate_element", RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS},
+ {"duplicate_elements", RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS},
+ {"unbalanced", RSPAMD_HTML_FLAG_UNBALANCED},
+ {"data_urls", RSPAMD_HTML_FLAG_HAS_DATA_URLS},
+});
+
static gint
lua_html_has_property (lua_State *L)
{
@@ -225,35 +243,10 @@ lua_html_has_property (lua_State *L)
gboolean ret = FALSE;
if (hc && propname) {
- /*
- * - `no_html`
- * - `bad_element`
- * - `xml`
- * - `unknown_element`
- * - `duplicate_element`
- * - `unbalanced`
- * - `data_urls`
- */
- if (strcmp (propname, "no_html") == 0) {
- ret = hc->flags & RSPAMD_HTML_FLAG_BAD_START;
- }
- else if (strcmp (propname, "bad_element") == 0) {
- ret = hc->flags & RSPAMD_HTML_FLAG_BAD_ELEMENTS;
- }
- else if (strcmp (propname, "xml") == 0) {
- ret = hc->flags & RSPAMD_HTML_FLAG_XML;
- }
- else if (strcmp (propname, "unknown_element") == 0) {
- ret = hc->flags & RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS;
- }
- else if (strcmp (propname, "duplicate_element") == 0) {
- ret = hc->flags & RSPAMD_HTML_FLAG_DUPLICATE_ELEMENTS;
- }
- else if (strcmp (propname, "unbalanced") == 0) {
- ret = hc->flags & RSPAMD_HTML_FLAG_UNBALANCED;
- }
- else if (strcmp (propname, "data_urls") == 0) {
- ret = hc->flags & RSPAMD_HTML_FLAG_HAS_DATA_URLS;
+ auto found_prop = prop_map.find(frozen::string(propname));
+
+ if (found_prop != prop_map.end()) {
+ ret = hc->flags & found_prop->second;
}
}
@@ -277,7 +270,7 @@ lua_html_push_image (lua_State *L, struct html_image *img)
if (img->flags & RSPAMD_HTML_FLAG_IMAGE_DATA) {
struct rspamd_lua_text *t;
- t = lua_newuserdata (L, sizeof (*t));
+ t = static_cast<rspamd_lua_text *>(lua_newuserdata(L, sizeof(*t)));
t->start = img->src;
t->len = strlen (img->src);
t->flags = 0;
@@ -293,7 +286,7 @@ lua_html_push_image (lua_State *L, struct html_image *img)
if (img->url) {
lua_pushstring (L, "url");
- purl = lua_newuserdata (L, sizeof (gpointer));
+ purl = static_cast<rspamd_url **>(lua_newuserdata(L, sizeof(gpointer)));
*purl = img->url;
rspamd_lua_setclass (L, "rspamd{url}", -1);
lua_settable (L, -3);
@@ -301,8 +294,8 @@ lua_html_push_image (lua_State *L, struct html_image *img)
if (img->tag) {
lua_pushstring (L, "tag");
- ltag = lua_newuserdata (L, sizeof (struct lua_html_tag));
- ltag->tag = img->tag;
+ ltag = static_cast<lua_html_tag *>(lua_newuserdata(L, sizeof(struct lua_html_tag)));
+ ltag->tag = static_cast<rspamd::html::html_tag *>(img->tag);
ltag->html = NULL;
rspamd_lua_setclass (L, "rspamd{html_tag}", -1);
lua_settable (L, -3);
@@ -360,8 +353,10 @@ lua_html_push_block (lua_State *L, struct html_block *bl)
lua_createtable (L, 0, 6);
if (bl->tag) {
+ gsize name_len;
+ const gchar *name = rspamd_html_tag_name(bl->tag, &name_len);
lua_pushstring (L, "tag");
- lua_pushlstring (L, bl->tag->name.start, bl->tag->name.len);
+ lua_pushlstring (L, name, name_len);
lua_settable (L, -3);
}
@@ -394,9 +389,9 @@ lua_html_push_block (lua_State *L, struct html_block *bl)
if (bl->style.len > 0) {
lua_pushstring (L, "style");
- t = lua_newuserdata (L, sizeof (*t));
+ t = static_cast<rspamd_lua_text *>(lua_newuserdata(L, sizeof(*t)));
rspamd_lua_setclass (L, "rspamd{text}", -1);
- t->start = bl->style.start;
+ t->start = bl->style.begin;
t->len = bl->style.len;
t->flags = 0;
lua_settable (L, -3);
@@ -425,7 +420,7 @@ lua_html_get_blocks (lua_State *L)
lua_createtable (L, hc->blocks->len, 0);
for (i = 0; i < hc->blocks->len; i ++) {
- bl = g_ptr_array_index (hc->blocks, i);
+ bl = static_cast<decltype(bl)>(g_ptr_array_index (hc->blocks, i));
lua_html_push_block (L, bl);
lua_rawseti (L, -2, i + 1);
}
@@ -445,23 +440,22 @@ struct lua_html_traverse_ud {
lua_State *L;
struct html_content *html;
gint cbref;
- GHashTable *tags;
+ robin_hood::unordered_flat_set<int> tags;
gboolean any;
};
static gboolean
lua_html_node_foreach_cb (GNode *n, gpointer d)
{
- struct lua_html_traverse_ud *ud = d;
- struct html_tag *tag = n->data;
+ struct lua_html_traverse_ud *ud = (struct lua_html_traverse_ud *)d;
+ auto *tag = (rspamd::html::html_tag *)n->data;
struct lua_html_tag *ltag;
- if (tag && (ud->any || g_hash_table_lookup (ud->tags,
- GSIZE_TO_POINTER (mum_hash64 (tag->id, 0))))) {
+ if (tag && (ud->any || ud->tags.contains(tag->id))) {
lua_rawgeti (ud->L, LUA_REGISTRYINDEX, ud->cbref);
- ltag = lua_newuserdata (ud->L, sizeof (*ltag));
+ ltag = static_cast<lua_html_tag *>(lua_newuserdata(ud->L, sizeof(*ltag)));
ltag->tag = tag;
ltag->html = ud->html;
rspamd_lua_setclass (ud->L, "rspamd{html_tag}", -1);
@@ -501,7 +495,6 @@ lua_html_foreach_tag (lua_State *L)
const gchar *tagname;
gint id;
- ud.tags = g_hash_table_new (g_direct_hash, g_direct_equal);
ud.any = FALSE;
ud.html = hc;
@@ -511,14 +504,14 @@ lua_html_foreach_tag (lua_State *L)
ud.any = TRUE;
}
else {
- id = rspamd_html_tag_by_name (tagname);
+ id = rspamd_html_tag_by_name(tagname);
if (id == -1) {
- g_hash_table_unref (ud.tags);
return luaL_error (L, "invalid tagname: %s", tagname);
}
- g_hash_table_insert (ud.tags, GSIZE_TO_POINTER (mum_hash64 (id, 0)),
- "1");
+
+
+ ud.tags.insert(id);
}
}
else if (lua_type (L, 2) == LUA_TTABLE) {
@@ -533,18 +526,16 @@ lua_html_foreach_tag (lua_State *L)
id = rspamd_html_tag_by_name (tagname);
if (id == -1) {
- g_hash_table_unref (ud.tags);
return luaL_error (L, "invalid tagname: %s", tagname);
}
- g_hash_table_insert (ud.tags,
- GSIZE_TO_POINTER (mum_hash64 (id, 0)), "1");
+ ud.tags.insert(id);
}
}
lua_pop (L, 1);
}
- if (hc && (ud.any || g_hash_table_size (ud.tags) > 0) && lua_isfunction (L, 3)) {
+ if (hc && (ud.any || !ud.tags.empty()) && lua_isfunction (L, 3)) {
if (hc->html_tags) {
lua_pushvalue (L, 3);
@@ -558,12 +549,9 @@ lua_html_foreach_tag (lua_State *L)
}
}
else {
- g_hash_table_unref (ud.tags);
return luaL_error (L, "invalid arguments");
}
- g_hash_table_unref (ud.tags);
-
return 0;
}
@@ -602,8 +590,8 @@ lua_html_tag_get_parent (lua_State *L)
node = ltag->tag->parent;
if (node && node->data) {
- ptag = lua_newuserdata (L, sizeof (*ptag));
- ptag->tag = node->data;
+ ptag = static_cast<lua_html_tag *>(lua_newuserdata(L, sizeof(*ptag)));
+ ptag->tag = static_cast<rspamd::html::html_tag *>(node->data);
ptag->html = ltag->html;
rspamd_lua_setclass (L, "rspamd{html_tag}", -1);
}
@@ -670,9 +658,10 @@ lua_html_tag_get_content (lua_State *L)
if (ltag) {
if (ltag->html && ltag->tag->content_length &&
ltag->html->parsed->len >= ltag->tag->content_offset + ltag->tag->content_length) {
- t = lua_newuserdata (L, sizeof (*t));
+ t = static_cast<rspamd_lua_text *>(lua_newuserdata(L, sizeof(*t)));
rspamd_lua_setclass (L, "rspamd{text}", -1);
- t->start = ltag->html->parsed->data + ltag->tag->content_offset;
+ t->start = reinterpret_cast<const char *>(ltag->html->parsed->data) +
+ ltag->tag->content_offset;
t->len = ltag->tag->content_length;
t->flags = 0;
}
@@ -712,19 +701,19 @@ lua_html_tag_get_extra (lua_State *L)
struct rspamd_url **purl;
if (ltag) {
- if (ltag->tag->extra) {
- if (ltag->tag->flags & FL_IMAGE) {
- img = ltag->tag->extra;
+ if (!std::holds_alternative<std::monostate>(ltag->tag->extra)) {
+ if (std::holds_alternative<struct html_image *>(ltag->tag->extra)) {
+ img = std::get<struct html_image *>(ltag->tag->extra);
lua_html_push_image (L, img);
}
- else if (ltag->tag->flags & FL_HREF) {
+ else if (std::holds_alternative<struct rspamd_url *>(ltag->tag->extra)) {
/* For A that's URL */
- purl = lua_newuserdata (L, sizeof (gpointer));
- *purl = ltag->tag->extra;
+ purl = static_cast<rspamd_url **>(lua_newuserdata(L, sizeof(gpointer)));
+ *purl = std::get<struct rspamd_url *>(ltag->tag->extra);
rspamd_lua_setclass (L, "rspamd{url}", -1);
}
else if (ltag->tag->flags & FL_BLOCK) {
- lua_html_push_block (L, ltag->tag->extra);
+ lua_html_push_block (L, ltag->tag->block);
}
else {
/* Unknown extra ? */
More information about the Commits
mailing list