commit ab34f88: [Project] Css: Projected a parser

Vsevolod Stakhov vsevolod at highsecure.ru
Wed Feb 3 16:14:06 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-02-02 18:18:45 +0000
URL: https://github.com/rspamd/rspamd/commit/ab34f8889570acb0eb7e687c48586925cb6f8616

[Project] Css: Projected a parser

---
 src/libserver/css/css_parser.cxx    | 144 +++++++++++++++++++++++++++++++++---
 src/libserver/css/css_tokeniser.hxx |   1 +
 src/libserver/css/parse_error.hxx   |   1 +
 3 files changed, 137 insertions(+), 9 deletions(-)

diff --git a/src/libserver/css/css_parser.cxx b/src/libserver/css/css_parser.cxx
index 54ccccd23..2133a7b36 100644
--- a/src/libserver/css/css_parser.cxx
+++ b/src/libserver/css/css_parser.cxx
@@ -16,11 +16,64 @@
 
 #include "css_parser.hxx"
 #include "css_tokeniser.hxx"
+#include <vector>
 #include <unicode/utf8.h>
 
 
 namespace rspamd::css {
 
+/*
+ * Represents a consumed token by a parser
+ */
+struct css_consumed_block {
+	enum class parser_tag_type : std::uint8_t  {
+		css_top_block,
+		css_qualified_rule,
+		css_at_rule,
+		css_simple_block,
+		css_function,
+		css_component
+	};
+
+	using consumed_block_ptr = std::unique_ptr<css_consumed_block>;
+
+	parser_tag_type tag;
+	std::variant<std::monostate,
+		std::vector<consumed_block_ptr>,
+		css_parser_token> content;
+
+	css_consumed_block() = delete;
+
+	css_consumed_block(parser_tag_type tag) : tag(tag) {
+		if (tag == parser_tag_type::css_top_block ||
+			tag == parser_tag_type::css_qualified_rule ||
+			tag == parser_tag_type::css_simple_block) {
+			/* Pre-allocate content for known vector blocks */
+			content = std::vector<consumed_block_ptr>(4);
+		}
+	}
+	/* Construct a block from a single lexer token (for trivial blocks) */
+	explicit css_consumed_block(parser_tag_type tag, css_parser_token &&tok) :
+			tag(tag), content(std::move(tok)) {}
+
+	/* Attach a new block to the compound block, consuming block inside */
+	auto attach_block(consumed_block_ptr &&block) -> bool {
+		if (content.index() == 0) {
+			/* Switch from monostate */
+			content = std::vector<consumed_block_ptr>(1);
+		}
+		else if (content.index() == 2) {
+			/* A single component, cannot attach a block ! */
+			return false;
+		}
+
+		std::get<std::vector<consumed_block_ptr>>(content)
+		        .push_back(std::move(block));
+
+		return true;
+	}
+};
+
 class css_parser {
 public:
 	css_parser(void) = delete; /* Require mempool to be set for logging */
@@ -31,10 +84,10 @@ public:
 	auto get_object_maybe(void) -> tl::expected<std::unique_ptr<css_style_sheet>, css_parse_error> {
 		if (state == parser_state::parse_done) {
 			state = parser_state::initial_state;
-			return std::move (style_object);
+			return std::move(style_object);
 		}
 
-		return tl::make_unexpected (error);
+		return tl::make_unexpected(error);
 	}
 
 private:
@@ -93,17 +146,90 @@ bool css_parser::consume_input(const std::string_view &sv)
 	bool eof = false;
 	css_tokeniser css_tokeniser(pool, sv);
 
-	while (!eof) {
+	auto consumed_blocks = std::make_unique<css_consumed_block>(
+			css_consumed_block::parser_tag_type::css_top_block);
+	auto rec_level = 0;
+	const auto max_rec = 20;
+
+	auto component_value_consumer = [&](std::unique_ptr<css_consumed_block> &top) -> bool {
+
+		if (++rec_level > max_rec) {
+			error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING);
+			return false;
+		}
+
 		auto next_token = css_tokeniser.next_token();
 
-		/* Top level parser */
 		switch (next_token.type) {
-		case css_parser_token::token_type::eof_token:
-			eof = true;
+
+		}
+
+		--rec_level;
+
+		return true;
+	};
+
+	auto qualified_rule_consumer = [&](std::unique_ptr<css_consumed_block> &top) -> bool {
+		if (++rec_level > max_rec) {
+			msg_err_css("max nesting reached, ignore style");
+			error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING);
+			return false;
+		}
+
+		auto ret = true;
+		auto block = std::make_unique<css_consumed_block>(
+				css_consumed_block::parser_tag_type::css_qualified_rule);
+
+		while (ret && !eof) {
+			auto &&next_token = css_tokeniser.next_token();
+			switch (next_token.type) {
+			case css_parser_token::token_type::eof_token:
+				eof = true;
+				break;
+			case css_parser_token::token_type::ident_token:
+			case css_parser_token::token_type::hash_token:
+				/* Consume allowed complex tokens as a rule preamble */
+				ret = component_value_consumer(block);
+				break;
+			case css_parser_token::token_type::cdo_token:
+			case css_parser_token::token_type::cdc_token:
+				if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
+					/* Ignore */
+					ret = true;
+				}
+				else {
+
+				}
+				break;
+			};
+		}
+
+		if (ret) {
+			if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
+				top->attach_block(std::move(block));
+			}
+		}
+
+		--rec_level;
+
+		return ret;
+	};
+
+	auto get_parser_consumer = [&]() -> auto {
+		switch (state) {
+		case parser_state::initial_state:
+			/* Top level qualified parser */
+			return qualified_rule_consumer;
 			break;
-		default:
-			/* Ignore tokens */
-			msg_debug_css("got token: %s", next_token.debug_token_str().c_str());
+		}
+	};
+
+	while (!eof) {
+		/* Get a token and a consumer lambda for the current parser state */
+
+		auto consumer = get_parser_consumer();
+
+		if (!consumer(consumed_blocks)) {
 			break;
 		}
 	}
diff --git a/src/libserver/css/css_tokeniser.hxx b/src/libserver/css/css_tokeniser.hxx
index b2da88500..7ef5f4643 100644
--- a/src/libserver/css/css_tokeniser.hxx
+++ b/src/libserver/css/css_tokeniser.hxx
@@ -90,6 +90,7 @@ struct css_parser_token {
 	css_parser_token() = delete;
 	explicit css_parser_token(token_type type, const value_type &value) :
 			value(value), type(type) {}
+	css_parser_token(css_parser_token &&other) = default;
 	auto adjust_dim(const css_parser_token &dim_token) -> bool;
 
 	/* Debugging routines */
diff --git a/src/libserver/css/parse_error.hxx b/src/libserver/css/parse_error.hxx
index 0a2cbc750..458469afc 100644
--- a/src/libserver/css/parse_error.hxx
+++ b/src/libserver/css/parse_error.hxx
@@ -30,6 +30,7 @@ namespace rspamd::css {
 enum class css_parse_error_type {
 	PARSE_ERROR_UNKNOWN_OPTION,
 	PARSE_ERROR_INVALID_SYNTAX,
+	PARSE_ERROR_BAD_NESTING,
 	PARSE_ERROR_NYI,
 	PARSE_ERROR_UNKNOWN_ERROR,
 };


More information about the Commits mailing list