commit 668b6c3: [Project] Css: Add some logical skeleton for declarations parser
Vsevolod Stakhov
vsevolod at highsecure.ru
Wed Feb 17 21:21:07 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-02-17 21:16:45 +0000
URL: https://github.com/rspamd/rspamd/commit/668b6c3f8fab7c38dc29a501808024556fe09e34 (HEAD -> master)
[Project] Css: Add some logical skeleton for declarations parser
---
src/libserver/css/CMakeLists.txt | 1 +
src/libserver/css/css_parser.cxx | 58 +++++++++++++++++++++--
src/libserver/css/css_parser.hxx | 2 -
src/libserver/css/{css_value.cxx => css_rule.cxx} | 12 +++--
src/libserver/css/css_rule.hxx | 7 +++
src/libserver/css/css_selector.cxx | 9 ++--
src/libserver/css/css_selector.hxx | 15 ++++--
src/libserver/css/css_tokeniser.hxx | 12 +++++
8 files changed, 97 insertions(+), 19 deletions(-)
diff --git a/src/libserver/css/CMakeLists.txt b/src/libserver/css/CMakeLists.txt
index 84ed2cf8b..e0eab20d7 100644
--- a/src/libserver/css/CMakeLists.txt
+++ b/src/libserver/css/CMakeLists.txt
@@ -16,6 +16,7 @@ SET(LIBCSSSRC "${CMAKE_CURRENT_SOURCE_DIR}/css.cxx"
"${CMAKE_CURRENT_SOURCE_DIR}/css_selector.cxx"
"${CMAKE_CURRENT_SOURCE_DIR}/css_tokeniser.cxx"
"${CMAKE_CURRENT_SOURCE_DIR}/css_util.cxx"
+ "${CMAKE_CURRENT_SOURCE_DIR}/css_rule.cxx"
"${CMAKE_CURRENT_SOURCE_DIR}/css_parser.cxx"
"${RAGEL_ragel_css_selector_parser_OUTPUTS}"
"${RAGEL_ragel_css_rule_parser_OUTPUTS}"
diff --git a/src/libserver/css/css_parser.cxx b/src/libserver/css/css_parser.cxx
index a0a9d8847..1a9231700 100644
--- a/src/libserver/css/css_parser.cxx
+++ b/src/libserver/css/css_parser.cxx
@@ -16,6 +16,8 @@
#include "css_parser.hxx"
#include "css_tokeniser.hxx"
+#include "css_selector.hxx"
+#include "css_rule.hxx"
#include <vector>
#include <unicode/utf8.h>
@@ -94,6 +96,14 @@ struct css_consumed_block {
return empty_block_vec;
}
+ auto get_token_or_empty() const -> const css_parser_token& {
+ if (content.index() == 2) {
+ return std::get<css_parser_token>(content);
+ }
+
+ return css_parser_eof_token();
+ }
+
auto token_type_str(void) const -> const char *
{
const auto *ret = "";
@@ -604,13 +614,53 @@ bool css_parser::consume_input(const std::string_view &sv)
if (simple_block != children.end()) {
/*
* We have a component and a simple block,
- * so we can parse a declaration
+ * so we can parse a selector and then extract
+ * declarations from a simple block
*/
/* First, tag all components as preamble */
- for (auto it = children.begin(); it != simple_block; ++it) {
- (*it)->tag = css_consumed_block::parser_tag_type::css_selector;
- }
+ auto selector_it = children.cbegin();
+
+ auto selector_token_functor = [&selector_it,&simple_block](void)
+ -> const css_parser_token & {
+ for (;;) {
+ if (selector_it == simple_block) {
+ return css_parser_eof_token();
+ }
+
+ const auto &ret = (*selector_it)->get_token_or_empty();
+
+ ++selector_it;
+
+ if (ret.type != css_parser_token::token_type::eof_token) {
+ return ret;
+ }
+ }
+ };
+
+ auto selectors_vec = process_selector_tokens(pool, selector_token_functor);
+
+ auto decls_it = (*simple_block)->get_blocks_or_empty().cbegin();
+ auto decls_end = (*simple_block)->get_blocks_or_empty().cend();
+ auto declaration_token_functor = [&decls_it,&decls_end](void)
+ -> const css_parser_token & {
+ for (;;) {
+ if (decls_it == decls_end) {
+ return css_parser_eof_token();
+ }
+
+ const auto &ret = (*decls_it)->get_token_or_empty();
+
+ ++decls_it;
+
+ if (ret.type != css_parser_token::token_type::eof_token) {
+ return ret;
+ }
+ }
+ };
+
+ auto declarations_vec = process_declaration_tokens(pool,
+ declaration_token_functor);
}
}
}
diff --git a/src/libserver/css/css_parser.hxx b/src/libserver/css/css_parser.hxx
index e009fef70..2f10f994e 100644
--- a/src/libserver/css/css_parser.hxx
+++ b/src/libserver/css/css_parser.hxx
@@ -26,8 +26,6 @@
namespace rspamd::css {
-INIT_LOG_MODULE(chartable)
-
auto parse_css (rspamd_mempool_t *pool, const std::string_view &st) ->
tl::expected<std::unique_ptr<css_style_sheet>,css_parse_error>;
diff --git a/src/libserver/css/css_value.cxx b/src/libserver/css/css_rule.cxx
similarity index 74%
copy from src/libserver/css/css_value.cxx
copy to src/libserver/css/css_rule.cxx
index af4691daf..44148b01a 100644
--- a/src/libserver/css/css_value.cxx
+++ b/src/libserver/css/css_rule.cxx
@@ -14,14 +14,16 @@
* limitations under the License.
*/
-#include "css_value.hxx"
+#include "css_rule.hxx"
namespace rspamd::css {
-tl::expected<css_value,css_parse_error> css_value::from_bytes (const char *input,
- size_t inlen)
+auto process_declaration_tokens(rspamd_mempool_t *pool,
+ const tokeniser_gen_functor &next_token_functor)
+ -> declarations_vec
{
- return tl::unexpected{css_parse_error(css_parse_error_type::PARSE_ERROR_NYI)};
-}
+ declarations_vec ret;
+ return ret; /* copy elision */
}
+}
\ No newline at end of file
diff --git a/src/libserver/css/css_rule.hxx b/src/libserver/css/css_rule.hxx
index 6afaa8bc6..725b6448b 100644
--- a/src/libserver/css/css_rule.hxx
+++ b/src/libserver/css/css_rule.hxx
@@ -20,6 +20,7 @@
#include "css_value.hxx"
#include "css_property.hxx"
+#include "css_tokeniser.hxx"
#include <vector>
#include <memory>
@@ -49,6 +50,12 @@ public:
constexpr const css_property& get_prop(void) const { return prop; }
};
+using declarations_vec = std::vector<std::unique_ptr<css_rule>>;
+
+auto process_declaration_tokens(rspamd_mempool_t *pool,
+ const tokeniser_gen_functor &next_token_functor)
+ -> declarations_vec;
+
}
/* Make rules hashable by property */
diff --git a/src/libserver/css/css_selector.cxx b/src/libserver/css/css_selector.cxx
index 2aeaaa8c9..1d6f727ea 100644
--- a/src/libserver/css/css_selector.cxx
+++ b/src/libserver/css/css_selector.cxx
@@ -18,10 +18,13 @@
namespace rspamd::css {
-tl::expected<css_selector,css_parse_error> css_selector::from_bytes (const char *input,
- size_t inlen)
+auto process_selector_tokens(rspamd_mempool_t *pool,
+ const tokeniser_gen_functor &next_token_functor)
+ -> selectors_vec
{
- return tl::unexpected{css_parse_error(css_parse_error_type::PARSE_ERROR_NYI)};
+ selectors_vec ret;
+
+ return ret; /* copy elision */
}
}
diff --git a/src/libserver/css/css_selector.hxx b/src/libserver/css/css_selector.hxx
index 8611630fd..1e5f5bb13 100644
--- a/src/libserver/css/css_selector.hxx
+++ b/src/libserver/css/css_selector.hxx
@@ -22,7 +22,8 @@
#include <variant>
#include <string>
#include <optional>
-#include "contrib/expected/expected.hpp"
+#include <vector>
+#include <functional>
#include "parse_error.hxx"
#include "css_tokeniser.hxx"
#include "html_tags.h"
@@ -54,13 +55,17 @@ struct css_selector {
return std::string_view(std::get<std::string>(value));
}
return std::nullopt;
- }
-
- static tl::expected<css_selector,css_parse_error> from_bytes (const char *input,
- size_t inlen);
+ };
};
+using selectors_vec = std::vector<std::unique_ptr<css_selector>>;
+/*
+ * Consume selectors token and split them to the list of selectors
+ */
+auto process_selector_tokens(rspamd_mempool_t *pool,
+ const tokeniser_gen_functor &next_token_functor)
+ -> selectors_vec;
}
diff --git a/src/libserver/css/css_tokeniser.hxx b/src/libserver/css/css_tokeniser.hxx
index f1e9d05fc..e3ba47437 100644
--- a/src/libserver/css/css_tokeniser.hxx
+++ b/src/libserver/css/css_tokeniser.hxx
@@ -23,6 +23,7 @@
#include <utility>
#include <variant>
#include <list>
+#include <functional>
#include "mem_pool.h"
namespace rspamd::css {
@@ -101,6 +102,15 @@ struct css_parser_token {
auto debug_token_str() -> std::string;
};
+static auto css_parser_eof_token(void) -> const css_parser_token & {
+ static css_parser_token eof_tok {
+ css_parser_token::token_type::eof_token,
+ css_parser_token_placeholder()
+ };
+
+ return eof_tok;
+}
+
/* Ensure that parser tokens are simple enough */
/*
* compiler must implement P0602 "variant and optional should propagate copy/move triviality"
@@ -129,6 +139,8 @@ private:
auto consume_ident() -> struct css_parser_token;
};
+using tokeniser_gen_functor = std::function<const css_parser_token &(void)>;
+
}
More information about the Commits
mailing list