commit 8260c0c: [Project] Css: Further work on parser's methods
Vsevolod Stakhov
vsevolod at highsecure.ru
Thu Feb 4 20:35:08 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-02-04 20:30:58 +0000
URL: https://github.com/rspamd/rspamd/commit/8260c0c2204f03186997b03f9854efbadbb62ca8 (HEAD -> master)
[Project] Css: Further work on parser's methods
---
src/libserver/css/css_parser.cxx | 264 +++++++++++++++++++++++++++++----------
1 file changed, 200 insertions(+), 64 deletions(-)
diff --git a/src/libserver/css/css_parser.cxx b/src/libserver/css/css_parser.cxx
index 2133a7b36..c2df00b35 100644
--- a/src/libserver/css/css_parser.cxx
+++ b/src/libserver/css/css_parser.cxx
@@ -32,6 +32,7 @@ struct css_consumed_block {
css_at_rule,
css_simple_block,
css_function,
+ css_function_arg,
css_component
};
@@ -72,6 +73,11 @@ struct css_consumed_block {
return true;
}
+
+ auto assign_token(css_parser_token &&tok) -> void
+ {
+ content = std::move(tok);
+ }
};
class css_parser {
@@ -100,12 +106,24 @@ private:
};
parser_state state = parser_state::initial_state;
std::unique_ptr<css_style_sheet> style_object;
+ std::unique_ptr<css_tokeniser> tokeniser;
css_parse_error error;
rspamd_mempool_t *pool;
+ int rec_level = 0;
+ const int max_rec = 20;
+ bool eof = false;
+
/* Helper parser methods */
bool need_unescape(const std::string_view &sv);
+
+ /* Consumers */
+ auto component_value_consumer(std::unique_ptr<css_consumed_block> &top) -> bool;
+ auto function_consumer(std::unique_ptr<css_consumed_block> &top) -> bool;
+ auto simple_block_consumer(std::unique_ptr<css_consumed_block> &top,
+ css_parser_token::token_type expected_end) -> bool;
+ auto qualified_rule_consumer(std::unique_ptr<css_consumed_block> &top) -> bool;
};
/*
@@ -140,101 +158,219 @@ css_parser::need_unescape(const std::string_view &sv)
return false;
}
-
-bool css_parser::consume_input(const std::string_view &sv)
+auto css_parser::function_consumer(std::unique_ptr<css_consumed_block> &top) -> bool
{
- bool eof = false;
- css_tokeniser css_tokeniser(pool, sv);
+ auto ret = true, want_more = true;
- auto consumed_blocks = std::make_unique<css_consumed_block>(
- css_consumed_block::parser_tag_type::css_top_block);
- auto rec_level = 0;
- const auto max_rec = 20;
+ if (++rec_level > max_rec) {
+ msg_err_css("max nesting reached, ignore style");
+ error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING);
+ return false;
+ }
- auto component_value_consumer = [&](std::unique_ptr<css_consumed_block> &top) -> bool {
+ while (ret && want_more && !eof) {
+ auto next_token = tokeniser->next_token();
- if (++rec_level > max_rec) {
- error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING);
- return false;
+ switch (next_token.type) {
+ case css_parser_token::token_type::eof_token:
+ eof = true;
+ break;
+ case css_parser_token::token_type::whitespace_token:
+ /* Ignore whitespaces */
+ break;
+ case css_parser_token::token_type::ebrace_token:
+ ret = true;
+ want_more = false;
+ break;
+ default:
+ /* Attach everything to the function block */
+ top->attach_block(std::make_unique<css_consumed_block>(
+ css::css_consumed_block::parser_tag_type::css_function_arg,
+ std::move(next_token)));
+ break;
}
+ }
- auto next_token = css_tokeniser.next_token();
+ --rec_level;
- switch (next_token.type) {
+ return ret;
+}
- }
+auto css_parser::simple_block_consumer(std::unique_ptr<css_consumed_block> &top,
+ css_parser_token::token_type expected_end) -> bool
+{
+ auto ret = true;
- --rec_level;
+ if (++rec_level > max_rec) {
+ msg_err_css("max nesting reached, ignore style");
+ error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING);
+ return false;
+ }
- return true;
- };
+ auto block = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_simple_block);
- auto qualified_rule_consumer = [&](std::unique_ptr<css_consumed_block> &top) -> bool {
- if (++rec_level > max_rec) {
- msg_err_css("max nesting reached, ignore style");
- error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING);
- return false;
- }
+ while (ret && !eof) {
+ auto next_token = tokeniser->next_token();
- auto ret = true;
- auto block = std::make_unique<css_consumed_block>(
- css_consumed_block::parser_tag_type::css_qualified_rule);
-
- while (ret && !eof) {
- auto &&next_token = css_tokeniser.next_token();
- switch (next_token.type) {
- case css_parser_token::token_type::eof_token:
- eof = true;
- break;
- case css_parser_token::token_type::ident_token:
- case css_parser_token::token_type::hash_token:
- /* Consume allowed complex tokens as a rule preamble */
- ret = component_value_consumer(block);
- break;
- case css_parser_token::token_type::cdo_token:
- case css_parser_token::token_type::cdc_token:
- if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
- /* Ignore */
- ret = true;
- }
- else {
+ if (next_token.type == expected_end) {
+ break;
+ }
- }
- break;
- };
+ switch (next_token.type) {
+ case css_parser_token::token_type::eof_token:
+ eof = true;
+ break;
+ case css_parser_token::token_type::whitespace_token:
+ /* Ignore whitespaces */
+ break;
+ default:
+ tokeniser->pushback_token(std::move(next_token));
+ ret = component_value_consumer(block);
+ break;
}
+ }
+
+ --rec_level;
+
+ return ret;
+}
+
+auto css_parser::qualified_rule_consumer(std::unique_ptr<css_consumed_block> &top) -> bool
+{
+ if (++rec_level > max_rec) {
+ msg_err_css("max nesting reached, ignore style");
+ error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING);
+ return false;
+ }
+
+ auto ret = true;
+ auto block = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_qualified_rule);
- if (ret) {
+ while (ret && !eof) {
+ auto next_token = tokeniser->next_token();
+ switch (next_token.type) {
+ case css_parser_token::token_type::eof_token:
+ eof = true;
+ break;
+ case css_parser_token::token_type::ident_token:
+ case css_parser_token::token_type::hash_token:
+ /* Consume allowed complex tokens as a rule preamble */
+ ret = component_value_consumer(block);
+ break;
+ case css_parser_token::token_type::cdo_token:
+ case css_parser_token::token_type::cdc_token:
if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
- top->attach_block(std::move(block));
+ /* Ignore */
+ ret = true;
+ }
+ else {
+
}
+ break;
+ };
+ }
+
+ if (ret) {
+ if (top->tag == css_consumed_block::parser_tag_type::css_top_block) {
+ top->attach_block(std::move(block));
}
+ }
- --rec_level;
+ --rec_level;
- return ret;
- };
+ return ret;
+}
- auto get_parser_consumer = [&]() -> auto {
- switch (state) {
- case parser_state::initial_state:
- /* Top level qualified parser */
- return qualified_rule_consumer;
+auto css_parser::component_value_consumer(std::unique_ptr<css_consumed_block> &top) -> bool
+{
+ auto ret = true, need_more = true;
+
+ if (++rec_level > max_rec) {
+ error = css_parse_error(css_parse_error_type::PARSE_ERROR_BAD_NESTING);
+ return false;
+ }
+
+ auto block = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_component);
+
+ while (ret && need_more && !eof) {
+ auto next_token = tokeniser->next_token();
+
+ switch (next_token.type) {
+ case css_parser_token::token_type::eof_token:
+ eof = true;
+ break;
+ case css_parser_token::token_type::ocurlbrace_token:
+ ret = simple_block_consumer(block,
+ css_parser_token::token_type::ecurlbrace_token);
+ need_more = false;
+ break;
+ case css_parser_token::token_type::obrace_token:
+ ret = simple_block_consumer(block,
+ css_parser_token::token_type::ebrace_token);
+ need_more = false;
+ break;
+ case css_parser_token::token_type::osqbrace_token:
+ ret = simple_block_consumer(block,
+ css_parser_token::token_type::esqbrace_token);
+ need_more = false;
+ break;
+ case css_parser_token::token_type::whitespace_token:
+ /* Ignore whitespaces */
+ break;
+ case css_parser_token::token_type::function_token: {
+ need_more = false;
+ auto fblock = std::make_unique<css_consumed_block>(
+ css_consumed_block::parser_tag_type::css_function,
+ std::move(next_token));
+
+ /* Consume the rest */
+ ret = function_consumer(fblock);
+
+ if (ret) {
+ block->attach_block(std::move(fblock));
+ }
break;
}
- };
+ default:
+ block->assign_token(std::move(next_token));
+ need_more = false;
+ break;
+ }
+ }
+
+ if (ret) {
+ top->attach_block(std::move(block));
+ }
+
+ --rec_level;
+
+ return ret;
+}
+
+bool css_parser::consume_input(const std::string_view &sv)
+{
+ tokeniser = std::make_unique<css_tokeniser>(pool, sv);
+ auto ret = true;
+
+ auto consumed_blocks =
+ std::make_unique<css_consumed_block>(css_consumed_block::parser_tag_type::css_top_block);
while (!eof) {
/* Get a token and a consumer lambda for the current parser state */
- auto consumer = get_parser_consumer();
-
- if (!consumer(consumed_blocks)) {
+ switch (state) {
+ case parser_state::initial_state:
+ ret = qualified_rule_consumer(consumed_blocks);
break;
}
}
- return true;
+ tokeniser.reset(nullptr); /* No longer needed */
+
+ return ret;
}
/*
More information about the Commits
mailing list