commit ab34f88: [Project] Css: Projected a parser
Vsevolod Stakhov
vsevolod at highsecure.ru
Wed Feb 3 16:14:06 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-02-02 18:18:45 +0000
URL: https://github.com/rspamd/rspamd/commit/ab34f8889570acb0eb7e687c48586925cb6f8616
[Project] Css: Projected a parser
---
src/libserver/css/css_parser.cxx | 144 +++++++++++++++++++++++++++++++++---
src/libserver/css/css_tokeniser.hxx | 1 +
src/libserver/css/parse_error.hxx | 1 +
3 files changed, 137 insertions(+), 9 deletions(-)
diff --git a/src/libserver/css/css_parser.cxx b/src/libserver/css/css_parser.cxx
index 54ccccd23..2133a7b36 100644
--- a/src/libserver/css/css_parser.cxx
+++ b/src/libserver/css/css_parser.cxx
@@ -16,11 +16,64 @@
#include "css_parser.hxx"
#include "css_tokeniser.hxx"
+#include <vector>
#include <unicode/utf8.h>
namespace rspamd::css {
+/*
+ * Represents a consumed token by a parser
+ */
+struct css_consumed_block {
+ enum class parser_tag_type : std::uint8_t {
+ css_top_block,
+ css_qualified_rule,
+ css_at_rule,
+ css_simple_block,
+ css_function,
+ css_component
+ };
+
+ using consumed_block_ptr = std::unique_ptr<css_consumed_block>;
+
+ parser_tag_type tag;
+ std::variant<std::monostate,
+ std::vector<consumed_block_ptr>,
+ css_parser_token> content;
+
+ css_consumed_block() = delete;
+
+ css_consumed_block(parser_tag_type tag) : tag(tag) {
+ if (tag == parser_tag_type::css_top_block ||
+ tag == parser_tag_type::css_qualified_rule ||
+ tag == parser_tag_type::css_simple_block) {
+ /* Pre-allocate content for known vector blocks */
+ content = std::vector<consumed_block_ptr>(4);
+ }
+ }
+ /* Construct a block from a single lexer token (for trivial blocks) */
+ explicit css_consumed_block(parser_tag_type tag, css_parser_token &&tok) :
+ tag(tag), content(std::move(tok)) {}
+
+ /* Attach a new block to the compound block, consuming block inside */
+ auto attach_block(consumed_block_ptr &&block) -> bool {
+ if (content.index() == 0) {
+ /* Switch from monostate */
+ content = std::vector<consumed_block_ptr>(1);
+ }
+ else if (content.index() == 2) {
+ /* A single component, cannot attach a block ! */
+ return false;
+ }
+
+ std::get<std::vector<consumed_block_ptr>>(content)
+ .push_back(std::move(block));
+
+ return true;
+ }
+};
+
class css_parser {
public:
css_parser(void) = delete; /* Require mempool to be set for logging */
@@ -31,10 +84,10 @@ public:
auto get_object_maybe(void) -> tl::expected<std::unique_ptr<css_style_sheet>, css_parse_error> {
if (state == parser_state::parse_done) {
state = parser_state::initial_state;
- return std::move (style_object);
+ return std::move(style_object);
}
- return tl::make_unexpected (error);
+ return tl::make_unexpected(error);
}
private:
@@ -93,17 +146,90 @@ bool css_parser::consume_input(const std::string_view &sv)
bool eof = false;
css_tokeniser css_tokeniser(pool, sv);
- while (!eof) {
+ auto consumed_blocks = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_top_block);
+ auto rec_level = 0;
+ const auto max_rec = 20;
+
+ auto component_value_consumer = [&](std::unique_ptr<css_consumed_block> &top) -> bool {
+
+ if (++rec_level > max_rec) {
+ error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING);
+ return false;
+ }
+
auto next_token = css_tokeniser.next_token();
- /* Top level parser */
switch (next_token.type) {
- case css_parser_token::token_type::eof_token:
- eof = true;
+
+ }
+
+ --rec_level;
+
+ return true;
+ };
+
+ auto qualified_rule_consumer = [&](std::unique_ptr<css_consumed_block> &top) -> bool {
+ if (++rec_level > max_rec) {
+ msg_err_css("max nesting reached, ignore style");
+ error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING);
+ return false;
+ }
+
+ auto ret = true;
+ auto block = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_qualified_rule);
+
+ while (ret && !eof) {
+ auto &&next_token = css_tokeniser.next_token();
+ switch (next_token.type) {
+ case css_parser_token::token_type::eof_token:
+ eof = true;
+ break;
+ case css_parser_token::token_type::ident_token:
+ case css_parser_token::token_type::hash_token:
+ /* Consume allowed complex tokens as a rule preamble */
+ ret = component_value_consumer(block);
+ break;
+ case css_parser_token::token_type::cdo_token:
+ case css_parser_token::token_type::cdc_token:
+ if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
+ /* Ignore */
+ ret = true;
+ }
+ else {
+
+ }
+ break;
+ };
+ }
+
+ if (ret) {
+ if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
+ top->attach_block(std::move(block));
+ }
+ }
+
+ --rec_level;
+
+ return ret;
+ };
+
+ auto get_parser_consumer = [&]() -> auto {
+ switch (state) {
+ case parser_state::initial_state:
+ /* Top level qualified parser */
+ return qualified_rule_consumer;
break;
- default:
- /* Ignore tokens */
- msg_debug_css("got token: %s", next_token.debug_token_str().c_str());
+ }
+ };
+
+ while (!eof) {
+ /* Get a token and a consumer lambda for the current parser state */
+
+ auto consumer = get_parser_consumer();
+
+ if (!consumer(consumed_blocks)) {
break;
}
}
diff --git a/src/libserver/css/css_tokeniser.hxx b/src/libserver/css/css_tokeniser.hxx
index b2da88500..7ef5f4643 100644
--- a/src/libserver/css/css_tokeniser.hxx
+++ b/src/libserver/css/css_tokeniser.hxx
@@ -90,6 +90,7 @@ struct css_parser_token {
css_parser_token() = delete;
explicit css_parser_token(token_type type, const value_type &value) :
value(value), type(type) {}
+ css_parser_token(css_parser_token &&other) = default;
auto adjust_dim(const css_parser_token &dim_token) -> bool;
/* Debugging routines */
diff --git a/src/libserver/css/parse_error.hxx b/src/libserver/css/parse_error.hxx
index 0a2cbc750..458469afc 100644
--- a/src/libserver/css/parse_error.hxx
+++ b/src/libserver/css/parse_error.hxx
@@ -30,6 +30,7 @@ namespace rspamd::css {
enum class css_parse_error_type {
PARSE_ERROR_UNKNOWN_OPTION,
PARSE_ERROR_INVALID_SYNTAX,
+ PARSE_ERROR_BAD_NESTING,
PARSE_ERROR_NYI,
PARSE_ERROR_UNKNOWN_ERROR,
};
More information about the Commits
mailing list