commit 8260c0c: [Project] Css: Further work on parser's methods

Vsevolod Stakhov vsevolod at highsecure.ru
Thu Feb 4 20:35:08 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-02-04 20:30:58 +0000
URL: https://github.com/rspamd/rspamd/commit/8260c0c2204f03186997b03f9854efbadbb62ca8 (HEAD -> master)

[Project] Css: Further work on parser's methods

---
 src/libserver/css/css_parser.cxx | 264 +++++++++++++++++++++++++++++----------
 1 file changed, 200 insertions(+), 64 deletions(-)

diff --git a/src/libserver/css/css_parser.cxx b/src/libserver/css/css_parser.cxx
index 2133a7b36..c2df00b35 100644
--- a/src/libserver/css/css_parser.cxx
+++ b/src/libserver/css/css_parser.cxx
@@ -32,6 +32,7 @@ struct css_consumed_block {
 		css_at_rule,
 		css_simple_block,
 		css_function,
+		css_function_arg,
 		css_component
 	};
 
@@ -72,6 +73,11 @@ struct css_consumed_block {
 
 		return true;
 	}
+
+	auto assign_token(css_parser_token &&tok) -> void
+	{
+		content = std::move(tok);
+	}
 };
 
 class css_parser {
@@ -100,12 +106,24 @@ private:
 	};
 	parser_state state = parser_state::initial_state;
 	std::unique_ptr<css_style_sheet> style_object;
+	std::unique_ptr<css_tokeniser> tokeniser;
 
 	css_parse_error error;
 	rspamd_mempool_t *pool;
 
+	int rec_level = 0;
+	const int max_rec = 20;
+	bool eof = false;
+
 	/* Helper parser methods */
 	bool need_unescape(const std::string_view &sv);
+
+	/* Consumers */
+	auto component_value_consumer(std::unique_ptr<css_consumed_block> &top) -> bool;
+	auto function_consumer(std::unique_ptr<css_consumed_block> &top) -> bool;
+	auto simple_block_consumer(std::unique_ptr<css_consumed_block> &top,
+							   css_parser_token::token_type expected_end) -> bool;
+	auto qualified_rule_consumer(std::unique_ptr<css_consumed_block> &top) -> bool;
 };
 
 /*
@@ -140,101 +158,219 @@ css_parser::need_unescape(const std::string_view &sv)
 	return false;
 }
 
-
-bool css_parser::consume_input(const std::string_view &sv)
+auto css_parser::function_consumer(std::unique_ptr<css_consumed_block> &top) -> bool
 {
-	bool eof = false;
-	css_tokeniser css_tokeniser(pool, sv);
+	auto ret = true, want_more = true;
 
-	auto consumed_blocks = std::make_unique<css_consumed_block>(
-			css_consumed_block::parser_tag_type::css_top_block);
-	auto rec_level = 0;
-	const auto max_rec = 20;
+	if (++rec_level > max_rec) {
+		msg_err_css("max nesting reached, ignore style");
+		error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING);
+		return false;
+	}
 
-	auto component_value_consumer = [&](std::unique_ptr<css_consumed_block> &top) -> bool {
+	while (ret && want_more && !eof) {
+		auto next_token = tokeniser->next_token();
 
-		if (++rec_level > max_rec) {
-			error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING);
-			return false;
+		switch (next_token.type) {
+		case css_parser_token::token_type::eof_token:
+			eof = true;
+			break;
+		case css_parser_token::token_type::whitespace_token:
+			/* Ignore whitespaces */
+			break;
+		case css_parser_token::token_type::ebrace_token:
+			ret = true;
+			want_more = false;
+			break;
+		default:
+			/* Attach everything to the function block */
+			top->attach_block(std::make_unique<css_consumed_block>(
+					css::css_consumed_block::parser_tag_type::css_function_arg,
+					std::move(next_token)));
+			break;
 		}
+	}
 
-		auto next_token = css_tokeniser.next_token();
+	--rec_level;
 
-		switch (next_token.type) {
+	return ret;
+}
 
-		}
+auto css_parser::simple_block_consumer(std::unique_ptr<css_consumed_block> &top,
+									   css_parser_token::token_type expected_end) -> bool
+{
+	auto ret = true;
 
-		--rec_level;
+	if (++rec_level > max_rec) {
+		msg_err_css("max nesting reached, ignore style");
+		error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING);
+		return false;
+	}
 
-		return true;
-	};
+	auto block = std::make_unique<css_consumed_block>(
+			css_consumed_block::parser_tag_type::css_simple_block);
 
-	auto qualified_rule_consumer = [&](std::unique_ptr<css_consumed_block> &top) -> bool {
-		if (++rec_level > max_rec) {
-			msg_err_css("max nesting reached, ignore style");
-			error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING);
-			return false;
-		}
+	while (ret && !eof) {
+		auto next_token = tokeniser->next_token();
 
-		auto ret = true;
-		auto block = std::make_unique<css_consumed_block>(
-				css_consumed_block::parser_tag_type::css_qualified_rule);
-
-		while (ret && !eof) {
-			auto &&next_token = css_tokeniser.next_token();
-			switch (next_token.type) {
-			case css_parser_token::token_type::eof_token:
-				eof = true;
-				break;
-			case css_parser_token::token_type::ident_token:
-			case css_parser_token::token_type::hash_token:
-				/* Consume allowed complex tokens as a rule preamble */
-				ret = component_value_consumer(block);
-				break;
-			case css_parser_token::token_type::cdo_token:
-			case css_parser_token::token_type::cdc_token:
-				if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
-					/* Ignore */
-					ret = true;
-				}
-				else {
+		if (next_token.type == expected_end) {
+			break;
+		}
 
-				}
-				break;
-			};
+		switch (next_token.type) {
+		case css_parser_token::token_type::eof_token:
+			eof = true;
+			break;
+		case css_parser_token::token_type::whitespace_token:
+			/* Ignore whitespaces */
+			break;
+		default:
+			tokeniser->pushback_token(std::move(next_token));
+			ret = component_value_consumer(block);
+			break;
 		}
+	}
+
+	--rec_level;
+
+	return ret;
+}
+
+auto css_parser::qualified_rule_consumer(std::unique_ptr<css_consumed_block> &top) -> bool
+{
+	if (++rec_level > max_rec) {
+		msg_err_css("max nesting reached, ignore style");
+		error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING);
+		return false;
+	}
+
+	auto ret = true;
+	auto block = std::make_unique<css_consumed_block>(
+			css_consumed_block::parser_tag_type::css_qualified_rule);
 
-		if (ret) {
+	while (ret && !eof) {
+		auto next_token = tokeniser->next_token();
+		switch (next_token.type) {
+		case css_parser_token::token_type::eof_token:
+			eof = true;
+			break;
+		case css_parser_token::token_type::ident_token:
+		case css_parser_token::token_type::hash_token:
+			/* Consume allowed complex tokens as a rule preamble */
+			ret = component_value_consumer(block);
+			break;
+		case css_parser_token::token_type::cdo_token:
+		case css_parser_token::token_type::cdc_token:
 			if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
-				top->attach_block(std::move(block));
+				/* Ignore */
+				ret = true;
+			}
+			else {
+
 			}
+			break;
+		};
+	}
+
+	if (ret) {
+		if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
+			top->attach_block(std::move(block));
 		}
+	}
 
-		--rec_level;
+	--rec_level;
 
-		return ret;
-	};
+	return ret;
+}
 
-	auto get_parser_consumer = [&]() -> auto {
-		switch (state) {
-		case parser_state::initial_state:
-			/* Top level qualified parser */
-			return qualified_rule_consumer;
+auto css_parser::component_value_consumer(std::unique_ptr<css_consumed_block> &top) -> bool
+{
+	auto ret = true, need_more = true;
+
+	if (++rec_level > max_rec) {
+		error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING);
+		return false;
+	}
+
+	auto block = std::make_unique<css_consumed_block>(
+			css_consumed_block::parser_tag_type::css_component);
+
+	while (ret && need_more && !eof) {
+		auto next_token = tokeniser->next_token();
+
+		switch (next_token.type) {
+		case css_parser_token::token_type::eof_token:
+			eof = true;
+			break;
+		case css_parser_token::token_type::ocurlbrace_token:
+			ret = simple_block_consumer(block,
+					css_parser_token::token_type::ecurlbrace_token);
+			need_more = false;
+			break;
+		case css_parser_token::token_type::obrace_token:
+			ret = simple_block_consumer(block,
+					css_parser_token::token_type::ebrace_token);
+			need_more = false;
+			break;
+		case css_parser_token::token_type::osqbrace_token:
+			ret = simple_block_consumer(block,
+					css_parser_token::token_type::esqbrace_token);
+			need_more = false;
+			break;
+		case css_parser_token::token_type::whitespace_token:
+			/* Ignore whitespaces */
+			break;
+		case css_parser_token::token_type::function_token: {
+			need_more = false;
+			auto fblock = std::make_unique<css_consumed_block>(
+					css_consumed_block::parser_tag_type::css_function,
+					std::move(next_token));
+
+			/* Consume the rest */
+			ret = function_consumer(fblock);
+
+			if (ret) {
+				block->attach_block(std::move(fblock));
+			}
 			break;
 		}
-	};
+		default:
+			block->assign_token(std::move(next_token));
+			need_more = false;
+			break;
+		}
+	}
+
+	if (ret) {
+		top->attach_block(std::move(block));
+	}
+
+	--rec_level;
+
+	return ret;
+}
+
+bool css_parser::consume_input(const std::string_view &sv)
+{
+	tokeniser = std::make_unique<css_tokeniser>(pool, sv);
+	auto ret = true;
+
+	auto consumed_blocks =
+			std::make_unique<css_consumed_block>(css_consumed_block::parser_tag_type::css_top_block);
 
 	while (!eof) {
 		/* Get a token and a consumer lambda for the current parser state */
 
-		auto consumer = get_parser_consumer();
-
-		if (!consumer(consumed_blocks)) {
+		switch (state) {
+		case parser_state::initial_state:
+			ret = qualified_rule_consumer(consumed_blocks);
 			break;
 		}
 	}
 
-	return true;
+	tokeniser.reset(nullptr); /* No longer needed */
+
+	return ret;
 }
 
 /*


More information about the Commits mailing list