commit ad9f3e2: [Rework] Remove tag name string
Vsevolod Stakhov
vsevolod at highsecure.ru
Tue Jun 29 17:35:04 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-06-29 13:21:47 +0100
URL: https://github.com/rspamd/rspamd/commit/ad9f3e26b430e9a1d39fc2109b22e580e3e91a25
[Rework] Remove tag name string
---
src/libserver/html/html.cxx | 29 ++++++++++++++++-------------
src/libserver/html/html_tag.hxx | 1 -
src/libserver/html/html_tag_defs.hxx | 9 +++++++++
3 files changed, 25 insertions(+), 14 deletions(-)
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index c5d35105c..a55266b19 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -369,12 +369,14 @@ find_tag_component_name(rspamd_mempool_t *pool,
struct tag_content_parser_state {
int cur_state = 0;
const char *saved_p = nullptr;
+ const char *tag_name_start = nullptr;
std::optional<html_component_type> cur_component;
void reset()
{
cur_state = 0;
saved_p = nullptr;
+ tag_name_start = nullptr;
cur_component = std::nullopt;
}
};
@@ -441,22 +443,22 @@ html_parse_tag_content(rspamd_mempool_t *pool,
}
else if (g_ascii_isalpha (*in)) {
state = parse_name;
- tag->name = std::string_view{in, 0};
+ parser_env.tag_name_start = in;
}
break;
case parse_name:
- if (g_ascii_isspace (*in) || *in == '>' || *in == '/') {
- const auto *start = tag->name.begin();
+ if ((g_ascii_isspace (*in) || *in == '>' || *in == '/') && parser_env.tag_name_start) {
+ const auto *start = parser_env.tag_name_start;
g_assert (in >= start);
if (*in == '/') {
tag->flags |= FL_CLOSED;
}
- tag->name = std::string_view{start, (std::size_t)(in - start)};
+ const auto tag_name_len = in - start;
- if (tag->name.empty()) {
+ if (tag_name_len== 0) {
hc->flags |= RSPAMD_HTML_FLAG_BAD_ELEMENTS;
tag->id = -1;
tag->flags |= FL_BROKEN;
@@ -466,14 +468,13 @@ html_parse_tag_content(rspamd_mempool_t *pool,
/*
* Copy tag name to the temporary buffer for modifications
*/
- auto *s = rspamd_mempool_alloc_buffer(pool, tag->name.size() + 1);
- rspamd_strlcpy(s, tag->name.data(), tag->name.size() + 1);
+ auto *s = rspamd_mempool_alloc_buffer(pool, tag_name_len + 1);
+ rspamd_strlcpy(s, parser_env.tag_name_start, tag_name_len + 1);
auto nsize = rspamd_html_decode_entitles_inplace(s,
- tag->name.size());
+ tag_name_len);
nsize = rspamd_str_lc_utf8(s, nsize);
- tag->name = std::string_view{s, nsize};
- const auto *tag_def = rspamd::html::html_tags_defs.by_name(tag->name);
+ const auto *tag_def = rspamd::html::html_tags_defs.by_name({s, nsize});
if (tag_def == nullptr) {
hc->flags |= RSPAMD_HTML_FLAG_UNKNOWN_ELEMENTS;
@@ -1847,7 +1848,8 @@ html_debug_structure(const html_content &hc) -> std::string
if (hc.root_tag) {
auto rec_functor = [&](const html_tag *t, int level, auto rec_functor) -> void {
std::string pluses(level, '+');
- output += fmt::format("{}{};", pluses, t->name);
+ output += fmt::format("{}{};", pluses,
+ html_tags_defs.name_by_id_safe(t->id));
for (const auto *cld : t->children) {
rec_functor(cld, level + 1, rec_functor);
}
@@ -2066,12 +2068,13 @@ const gchar *
rspamd_html_tag_name(void *p, gsize *len)
{
auto *tag = reinterpret_cast<rspamd::html::html_tag *>(p);
+ auto tname = rspamd::html::html_tags_defs.name_by_id_safe(tag->id);
if (len) {
- *len = tag->name.size();
+ *len = tname.size();
}
- return tag->name.data();
+ return tname.data();
}
struct html_image*
diff --git a/src/libserver/html/html_tag.hxx b/src/libserver/html/html_tag.hxx
index 36110c8c7..a79195a5f 100644
--- a/src/libserver/html/html_tag.hxx
+++ b/src/libserver/html/html_tag.hxx
@@ -78,7 +78,6 @@ struct html_tag {
std::uint32_t flags = 0;
std::int32_t id = -1;
- std::string_view name;
std::vector<html_tag_component> parameters;
html_tag_extra_t extra;
diff --git a/src/libserver/html/html_tag_defs.hxx b/src/libserver/html/html_tag_defs.hxx
index 36d3ba4ed..fe08d081b 100644
--- a/src/libserver/html/html_tag_defs.hxx
+++ b/src/libserver/html/html_tag_defs.hxx
@@ -190,6 +190,15 @@ public:
return nullptr;
}
+
+ auto name_by_id_safe(int id) const -> std::string_view {
+ auto it = tag_by_id.find(static_cast<tag_id_t>(id));
+ if (it != tag_by_id.end()) {
+ return it->second.name;
+ }
+
+ return "unknown";
+ }
};
}
More information about the Commits
mailing list