commit 4658a09: [Project] Css: Declarations parsing logic skeleton
Vsevolod Stakhov
vsevolod at highsecure.ru
Thu Feb 18 16:56:27 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-02-18 16:18:08 +0000
URL: https://github.com/rspamd/rspamd/commit/4658a093bf5cfc6c1961e6974772bdead4dda2f4
[Project] Css: Declarations parsing logic skeleton
---
CMakeLists.txt | 1 +
src/libserver/css/css_parser.cxx | 254 ++++++++++++------------------------
src/libserver/css/css_parser.hxx | 118 +++++++++++++++++
src/libserver/css/css_property.cxx | 36 ++++-
src/libserver/css/css_property.hxx | 8 +-
src/libserver/css/css_rule.cxx | 93 ++++++++++++-
src/libserver/css/css_rule.hxx | 6 +-
src/libserver/css/css_selector.cxx | 2 +-
src/libserver/css/css_selector.hxx | 5 +-
src/libserver/css/css_tokeniser.cxx | 3 +
src/libserver/css/css_tokeniser.hxx | 9 +-
src/libserver/css/css_value.cxx | 4 +-
src/libserver/css/css_value.hxx | 4 +-
13 files changed, 356 insertions(+), 187 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 698550f07..a218e28fd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -116,6 +116,7 @@ INCLUDE_DIRECTORIES("${CMAKE_SOURCE_DIR}/"
"${CMAKE_SOURCE_DIR}/contrib/aho-corasick"
"${CMAKE_SOURCE_DIR}/contrib/lc-btrie"
"${CMAKE_SOURCE_DIR}/contrib/lua-lpeg"
+ "${CMAKE_SOURCE_DIR}/contrib/frozen/include"
"${CMAKE_BINARY_DIR}/src" #Stored in the binary dir
"${CMAKE_BINARY_DIR}/src/libcryptobox")
diff --git a/src/libserver/css/css_parser.cxx b/src/libserver/css/css_parser.cxx
index 1a9231700..e4a8159f1 100644
--- a/src/libserver/css/css_parser.cxx
+++ b/src/libserver/css/css_parser.cxx
@@ -24,181 +24,97 @@
namespace rspamd::css {
-struct css_consumed_block;
-/*
- * Represents a consumed token by a parser
- */
-struct css_consumed_block {
- enum class parser_tag_type : std::uint8_t {
- css_top_block,
- css_qualified_rule,
- css_at_rule,
- css_simple_block,
- css_function,
- css_function_arg,
- css_component,
- css_selector,
- };
-
- using consumed_block_ptr = std::unique_ptr<css_consumed_block>;
-
- parser_tag_type tag;
- std::variant<std::monostate,
- std::vector<consumed_block_ptr>,
- css_parser_token> content;
-
- css_consumed_block() = delete;
-
- css_consumed_block(parser_tag_type tag) : tag(tag) {
- if (tag == parser_tag_type::css_top_block ||
- tag == parser_tag_type::css_qualified_rule ||
- tag == parser_tag_type::css_simple_block) {
- /* Pre-allocate content for known vector blocks */
- std::vector<consumed_block_ptr> vec;
- vec.reserve(4);
- content = std::move(vec);
- }
- }
- /* Construct a block from a single lexer token (for trivial blocks) */
- explicit css_consumed_block(parser_tag_type tag, css_parser_token &&tok) :
- tag(tag), content(std::move(tok)) {}
-
- /* Attach a new block to the compound block, consuming block inside */
- auto attach_block(consumed_block_ptr &&block) -> bool {
- if (content.index() == 0) {
- /* Switch from monostate */
- content = std::vector<consumed_block_ptr>();
- }
- else if (content.index() == 2) {
- /* A single component, cannot attach a block ! */
- return false;
- }
-
- auto &value_vec = std::get<std::vector<consumed_block_ptr>>(content);
- value_vec.push_back(std::move(block));
+const css_consumed_block css_parser_eof_block{};
- return true;
+auto css_consumed_block::attach_block(consumed_block_ptr &&block) -> bool {
+ if (content.index() == 0) {
+ /* Switch from monostate */
+ content = std::vector<consumed_block_ptr>();
}
-
- auto assign_token(css_parser_token &&tok) -> void
- {
- content = std::move(tok);
- }
-
- /* Empty blocks used to avoid type checks in loops */
- const inline static std::vector<consumed_block_ptr> empty_block_vec{};
-
- auto get_blocks_or_empty() const -> const std::vector<consumed_block_ptr>& {
- if (content.index() == 1) {
- return std::get<std::vector<consumed_block_ptr>>(content);
- }
-
- return empty_block_vec;
- }
-
- auto get_token_or_empty() const -> const css_parser_token& {
- if (content.index() == 2) {
- return std::get<css_parser_token>(content);
- }
-
- return css_parser_eof_token();
+ else if (content.index() == 2) {
+ /* A single component, cannot attach a block ! */
+ return false;
}
- auto token_type_str(void) const -> const char *
- {
- const auto *ret = "";
+ auto &value_vec = std::get<std::vector<consumed_block_ptr>>(content);
+ value_vec.push_back(std::move(block));
- switch(tag) {
- case parser_tag_type::css_top_block:
- ret = "top";
- break;
- case parser_tag_type::css_qualified_rule:
- ret = "qualified rule";
- break;
- case parser_tag_type::css_at_rule:
- ret = "at rule";
- break;
- case parser_tag_type::css_simple_block:
- ret = "simple block";
- break;
- case parser_tag_type::css_function:
- ret = "function";
- break;
- case parser_tag_type::css_function_arg:
- ret = "function args";
- break;
- case parser_tag_type::css_component:
- ret = "component";
- break;
- case parser_tag_type::css_selector:
- ret = "selector";
- break;
- }
+ return true;
+}
- return ret;
+auto css_consumed_block::token_type_str(void) const -> const char *
+{
+ const auto *ret = "";
+
+ switch(tag) {
+ case parser_tag_type::css_top_block:
+ ret = "top";
+ break;
+ case parser_tag_type::css_qualified_rule:
+ ret = "qualified rule";
+ break;
+ case parser_tag_type::css_at_rule:
+ ret = "at rule";
+ break;
+ case parser_tag_type::css_simple_block:
+ ret = "simple block";
+ break;
+ case parser_tag_type::css_function:
+ ret = "function";
+ break;
+ case parser_tag_type::css_function_arg:
+ ret = "function args";
+ break;
+ case parser_tag_type::css_component:
+ ret = "component";
+ break;
+ case parser_tag_type::css_selector:
+ ret = "selector";
+ break;
+ case parser_tag_type::css_eof_block:
+ ret = "eof";
+ break;
}
- auto size() const -> std::size_t {
- auto ret = 0;
-
- std::visit([&](auto& arg) {
- using T = std::decay_t<decltype(arg)>;
-
- if constexpr (std::is_same_v<T, std::vector<consumed_block_ptr>>) {
- /* Array of blocks */
- ret = arg.size();
- }
- else if constexpr (std::is_same_v<T, std::monostate>) {
- /* Empty block */
- ret = 0;
- }
- else {
- /* Single element block */
- ret = 1;
- }
- },
- content);
+ return ret;
+}
- return ret;
- }
+auto css_consumed_block::debug_str(void) -> std::string {
+ std::string ret = std::string(R"("type": ")") + token_type_str() + "\"";
- auto debug_str(void) -> std::string {
- std::string ret = std::string("\"type\": \"") + token_type_str() + "\"";
+ ret += ", \"value\": ";
- ret += ", \"value\": ";
+ std::visit([&](auto& arg) {
+ using T = std::decay_t<decltype(arg)>;
- std::visit([&](auto& arg) {
- using T = std::decay_t<decltype(arg)>;
+ if constexpr (std::is_same_v<T, std::vector<consumed_block_ptr>>) {
+ /* Array of blocks */
+ ret += "[";
+ for (const auto &block : arg) {
+ ret += "{";
+ ret += block->debug_str();
+ ret += "}, ";
+ }
- if constexpr (std::is_same_v<T, std::vector<consumed_block_ptr>>) {
- /* Array of blocks */
- ret += "[";
- for (const auto &block : arg) {
- ret += "{";
- ret += block->debug_str();
- ret += "}, ";
+ if (*(--ret.end()) == ' ') {
+ ret.pop_back();
+ ret.pop_back(); /* Last ',' */
+ }
+ ret += "]";
}
-
- if (*(--ret.end()) == ' ') {
- ret.pop_back();
- ret.pop_back(); /* Last ',' */
+ else if constexpr (std::is_same_v<T, std::monostate>) {
+ /* Empty block */
+ ret += R"("empty")";
}
- ret += "]";
- }
- else if constexpr (std::is_same_v<T, std::monostate>) {
- /* Empty block */
- ret += "\"empty\"";
- }
- else {
- /* Single element block */
- ret += "\"" + arg.debug_token_str() + "\"";
- }
- },
- content);
+ else {
+ /* Single element block */
+ ret += "\"" + arg.debug_token_str() + "\"";
+ }
+ },
+ content);
- return ret;
- }
-};
+ return ret;
+}
class css_parser {
public:
@@ -622,18 +538,18 @@ bool css_parser::consume_input(const std::string_view &sv)
auto selector_it = children.cbegin();
auto selector_token_functor = [&selector_it,&simple_block](void)
- -> const css_parser_token & {
+ -> const css_consumed_block & {
for (;;) {
if (selector_it == simple_block) {
- return css_parser_eof_token();
+ return css_parser_eof_block;
}
- const auto &ret = (*selector_it)->get_token_or_empty();
+ const auto &ret = (*selector_it);
++selector_it;
- if (ret.type != css_parser_token::token_type::eof_token) {
- return ret;
+ if (ret->get_token_or_empty().type != css_parser_token::token_type::eof_token) {
+ return *ret;
}
}
};
@@ -643,18 +559,18 @@ bool css_parser::consume_input(const std::string_view &sv)
auto decls_it = (*simple_block)->get_blocks_or_empty().cbegin();
auto decls_end = (*simple_block)->get_blocks_or_empty().cend();
auto declaration_token_functor = [&decls_it,&decls_end](void)
- -> const css_parser_token & {
+ -> const css_consumed_block & {
for (;;) {
if (decls_it == decls_end) {
- return css_parser_eof_token();
+ return css_parser_eof_block;
}
- const auto &ret = (*decls_it)->get_token_or_empty();
+ const auto &ret = (*decls_it);
++decls_it;
- if (ret.type != css_parser_token::token_type::eof_token) {
- return ret;
+ if (ret->get_token_or_empty().type != css_parser_token::token_type::eof_token) {
+ return *ret;
}
}
};
diff --git a/src/libserver/css/css_parser.hxx b/src/libserver/css/css_parser.hxx
index 2f10f994e..de982525a 100644
--- a/src/libserver/css/css_parser.hxx
+++ b/src/libserver/css/css_parser.hxx
@@ -19,13 +19,131 @@
#ifndef RSPAMD_CSS_PARSER_HXX
#define RSPAMD_CSS_PARSER_HXX
+#include <variant>
+#include <vector>
+#include <memory>
+#include <string>
+
+#include "css_tokeniser.hxx"
#include "css.hxx"
#include "parse_error.hxx"
#include "contrib/expected/expected.hpp"
#include "logger.h"
+
namespace rspamd::css {
+/*
+ * Represents a consumed token by a parser
+ */
+class css_consumed_block {
+public:
+ enum class parser_tag_type : std::uint8_t {
+ css_top_block,
+ css_qualified_rule,
+ css_at_rule,
+ css_simple_block,
+ css_function,
+ css_function_arg,
+ css_component,
+ css_selector,
+ css_eof_block,
+ };
+ using consumed_block_ptr = std::unique_ptr<css_consumed_block>;
+
+ css_consumed_block() : tag(parser_tag_type::css_eof_block) {}
+ css_consumed_block(parser_tag_type tag) : tag(tag) {
+ if (tag == parser_tag_type::css_top_block ||
+ tag == parser_tag_type::css_qualified_rule ||
+ tag == parser_tag_type::css_simple_block) {
+ /* Pre-allocate content for known vector blocks */
+ std::vector<consumed_block_ptr> vec;
+ vec.reserve(4);
+ content = std::move(vec);
+ }
+ }
+ /* Construct a block from a single lexer token (for trivial blocks) */
+ explicit css_consumed_block(parser_tag_type tag, css_parser_token &&tok) :
+ tag(tag), content(std::move(tok)) {}
+
+ /* Attach a new block to the compound block, consuming block inside */
+ auto attach_block(consumed_block_ptr &&block) -> bool;
+
+ auto assign_token(css_parser_token &&tok) -> void {
+ content = std::move(tok);
+ }
+
+ /* Empty blocks used to avoid type checks in loops */
+ const inline static std::vector<consumed_block_ptr> empty_block_vec{};
+
+ auto is_blocks_vec() const -> bool {
+ return (content.index() == 1);
+ }
+
+ auto get_blocks_or_empty() const -> const std::vector<consumed_block_ptr>& {
+ if (is_blocks_vec()) {
+ return std::get<std::vector<consumed_block_ptr>>(content);
+ }
+
+ return empty_block_vec;
+ }
+
+ auto is_token() const -> bool {
+ return (content.index() == 2);
+ }
+
+ auto get_token_or_empty() const -> const css_parser_token& {
+ if (is_token()) {
+ return std::get<css_parser_token>(content);
+ }
+
+ return css_parser_eof_token();
+ }
+
+ auto size() const -> std::size_t {
+ auto ret = 0;
+
+ std::visit([&](auto& arg) {
+ using T = std::decay_t<decltype(arg)>;
+
+ if constexpr (std::is_same_v<T, std::vector<consumed_block_ptr>>) {
+ /* Array of blocks */
+ ret = arg.size();
+ }
+ else if constexpr (std::is_same_v<T, std::monostate>) {
+ /* Empty block */
+ ret = 0;
+ }
+ else {
+ /* Single element block */
+ ret = 1;
+ }
+ },
+ content);
+
+ return ret;
+ }
+
+ auto is_eof() -> bool {
+ return tag == parser_tag_type::css_eof_block;
+ }
+
+ /* Debug methods */
+ auto token_type_str(void) const -> const char *;
+ auto debug_str(void) -> std::string;
+
+public:
+ parser_tag_type tag;
+private:
+ std::variant<std::monostate,
+ std::vector<consumed_block_ptr>,
+ css_parser_token> content;
+};
+
+extern const css_consumed_block css_parser_eof_block;
+
+using blocks_gen_functor = std::function<const css_consumed_block &(void)>;
+
auto parse_css (rspamd_mempool_t *pool, const std::string_view &st) ->
tl::expected<std::unique_ptr<css_style_sheet>,css_parse_error>;
diff --git a/src/libserver/css/css_property.cxx b/src/libserver/css/css_property.cxx
index 98543f75a..77927d724 100644
--- a/src/libserver/css/css_property.cxx
+++ b/src/libserver/css/css_property.cxx
@@ -15,12 +15,44 @@
*/
#include "css_property.hxx"
-
+#include "frozen/unordered_map.h"
+#include "frozen/string.h"
namespace rspamd::css {
-auto css_property::from_bytes (const char *input, size_t inlen) -> tl::expected<css_property,css_parse_error>
+constexpr const auto max_type = static_cast<int>(css_property_type::PROPERTY_NYI);
+constexpr frozen::unordered_map<frozen::string, css_property_type, max_type> type_map{
+ {"font", css_property_type::PROPERTY_FONT},
+ {"color", css_property_type::PROPERTY_COLOR},
+ {"bgcolor", css_property_type::PROPERTY_BGCOLOR},
+ {"background", css_property_type::PROPERTY_BACKGROUND},
+ {"height", css_property_type::PROPERTY_HEIGHT},
+ {"width", css_property_type::PROPERTY_WIDTH},
+ {"display", css_property_type::PROPERTY_DISPLAY},
+ {"visibility", css_property_type::PROPERTY_VISIBILITY},
+};
+
+auto token_string_to_property(const std::string_view &inp) -> css_property_type {
+
+ css_property_type ret = css_property_type::PROPERTY_NYI;
+
+ auto known_type = type_map.find(inp);
+
+ if (known_type != type_map.end()) {
+ ret = known_type->second;
+ }
+
+ return ret;
+}
+
+auto css_property::from_token(const css_parser_token &tok) -> tl::expected<css_property,css_parse_error>
{
+ if (tok.type == css_parser_token::token_type::ident_token) {
+ auto sv = tok.get_string_or_default("");
+
+ return css_property{token_string_to_property(sv)};
+ }
+
return tl::unexpected{css_parse_error(css_parse_error_type::PARSE_ERROR_NYI)};
}
diff --git a/src/libserver/css/css_property.hxx b/src/libserver/css/css_property.hxx
index 2e668c640..562e54894 100644
--- a/src/libserver/css/css_property.hxx
+++ b/src/libserver/css/css_property.hxx
@@ -19,6 +19,7 @@
#define RSPAMD_CSS_PROPERTY_HXX
#include <string>
+#include "css_tokeniser.hxx"
#include "parse_error.hxx"
#include "contrib/expected/expected.hpp"
@@ -29,7 +30,7 @@ namespace rspamd::css {
* point of view
*/
enum class css_property_type {
- PROPERTY_FONT,
+ PROPERTY_FONT = 0,
PROPERTY_COLOR,
PROPERTY_BGCOLOR,
PROPERTY_BACKGROUND,
@@ -37,12 +38,13 @@ enum class css_property_type {
PROPERTY_WIDTH,
PROPERTY_DISPLAY,
PROPERTY_VISIBILITY,
+ PROPERTY_NYI,
};
struct css_property {
css_property_type type;
- static tl::expected<css_property,css_parse_error> from_bytes (const char *input,
- size_t inlen);
+ static tl::expected<css_property,css_parse_error> from_token(
+ const css_parser_token &tok);
};
diff --git a/src/libserver/css/css_rule.cxx b/src/libserver/css/css_rule.cxx
index 44148b01a..cb0d4abad 100644
--- a/src/libserver/css/css_rule.cxx
+++ b/src/libserver/css/css_rule.cxx
@@ -19,10 +19,101 @@
namespace rspamd::css {
auto process_declaration_tokens(rspamd_mempool_t *pool,
- const tokeniser_gen_functor &next_token_functor)
+ const blocks_gen_functor &next_block_functor)
-> declarations_vec
{
declarations_vec ret;
+ bool can_continue = true;
+ css_property cur_property{css_property_type::PROPERTY_NYI};
+ static const css_property bad_property{css_property_type::PROPERTY_NYI};
+ std::unique_ptr<css_rule> cur_rule;
+
+ enum {
+ parse_property,
+ parse_value,
+ ignore_value, /* For unknown properties */
+ } state = parse_property;
+
+ while (can_continue) {
+ const auto &next_tok = next_block_functor();
+
+ switch (next_tok.tag) {
+ case css_consumed_block::parser_tag_type::css_component:
+ if (state == parse_property) {
+ cur_property = css_property::from_token(next_tok.get_token_or_empty())
+ .value_or(bad_property);
+
+ if (cur_property.type == css_property_type::PROPERTY_NYI) {
+ state = ignore_value;
+ /* Ignore everything till ; */
+ continue;
+ }
+
+ /* We now expect colon block */
+ const auto &expect_colon_block = next_block_functor();
+
+ if (expect_colon_block.tag != css_consumed_block::parser_tag_type::css_component) {
+
+ state = ignore_value; /* Ignore up to the next rule */
+ }
+ else {
+ const auto &expect_colon_tok = expect_colon_block.get_token_or_empty();
+
+ if (expect_colon_tok.type != css_parser_token::token_type::colon_token) {
+ msg_debug_css("invalid rule, no colon after property");
+ state = ignore_value; /* Ignore up to the next rule */
+ }
+ else {
+ state = parse_value;
+ cur_rule = std::make_unique<css_rule>(cur_property);
+ }
+ }
+ }
+ else if (state == parse_value) {
+ /* Check semicolon */
+ if (next_tok.is_token()) {
+ const auto &parser_tok = next_tok.get_token_or_empty();
+
+ if (parser_tok.type == css_parser_token::token_type::semicolon_token) {
+ ret.push_back(std::move(cur_rule));
+ state = parse_property;
+ continue;
+ }
+ }
+
+ auto maybe_value = css_value::from_css_block(next_tok);
+
+ if (maybe_value) {
+ cur_rule->add_value(maybe_value.value());
+ }
+ }
+ else {
+ /* Ignore all till ; */
+ if (next_tok.is_token()) {
+ const auto &parser_tok = next_tok.get_token_or_empty();
+
+ if (parser_tok.type == css_parser_token::token_type::semicolon_token) {
+ state = parse_property;
+ }
+ }
+ }
+ break;
+ case css_consumed_block::parser_tag_type::css_function:
+ case css_consumed_block::parser_tag_type::css_function_arg:
+ if (state == parse_value) {
+ auto maybe_value = css_value::from_css_block(next_tok);
+
+ if (maybe_value) {
+ cur_rule->add_value(maybe_value.value());
+ }
+ }
+ break;
+ case css_consumed_block::parser_tag_type::css_eof_block:
+ default:
+ can_continue = false;
+ break;
+ }
+ }
return ret; /* copy elision */
}
diff --git a/src/libserver/css/css_rule.hxx b/src/libserver/css/css_rule.hxx
index 725b6448b..929c5b263 100644
--- a/src/libserver/css/css_rule.hxx
+++ b/src/libserver/css/css_rule.hxx
@@ -20,7 +20,7 @@
#include "css_value.hxx"
#include "css_property.hxx"
-#include "css_tokeniser.hxx"
+#include "css_parser.hxx"
#include <vector>
#include <memory>
@@ -38,7 +38,7 @@ public:
css_rule(css_rule &&other) = default;
explicit css_rule(css_property &&prop, css_values_vec &&values) :
prop(prop), values(std::forward<css_values_vec>(values)) {}
- explicit css_rule(css_property &&prop) : prop(prop), values{} {}
+ explicit css_rule(const css_property &prop) : prop(prop), values{} {}
/* Methods */
void add_value(std::unique_ptr<css_value> &&value) {
values.emplace_back(std::forward<std::unique_ptr<css_value>>(value));
@@ -53,7 +53,7 @@ public:
using declarations_vec = std::vector<std::unique_ptr<css_rule>>;
auto process_declaration_tokens(rspamd_mempool_t *pool,
*** OUTPUT TRUNCATED, 120 LINES SKIPPED ***
More information about the Commits
mailing list