commit 668b6c3: [Project] Css: Add some logical skeleton for declarations parser

Vsevolod Stakhov vsevolod at highsecure.ru
Wed Feb 17 21:21:07 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-02-17 21:16:45 +0000
URL: https://github.com/rspamd/rspamd/commit/668b6c3f8fab7c38dc29a501808024556fe09e34 (HEAD -> master)

[Project] Css: Add some logical skeleton for declarations parser

---
 src/libserver/css/CMakeLists.txt                  |  1 +
 src/libserver/css/css_parser.cxx                  | 58 +++++++++++++++++++++--
 src/libserver/css/css_parser.hxx                  |  2 -
 src/libserver/css/{css_value.cxx => css_rule.cxx} | 12 +++--
 src/libserver/css/css_rule.hxx                    |  7 +++
 src/libserver/css/css_selector.cxx                |  9 ++--
 src/libserver/css/css_selector.hxx                | 15 ++++--
 src/libserver/css/css_tokeniser.hxx               | 12 +++++
 8 files changed, 97 insertions(+), 19 deletions(-)

diff --git a/src/libserver/css/CMakeLists.txt b/src/libserver/css/CMakeLists.txt
index 84ed2cf8b..e0eab20d7 100644
--- a/src/libserver/css/CMakeLists.txt
+++ b/src/libserver/css/CMakeLists.txt
@@ -16,6 +16,7 @@ SET(LIBCSSSRC    "${CMAKE_CURRENT_SOURCE_DIR}/css.cxx"
                  "${CMAKE_CURRENT_SOURCE_DIR}/css_selector.cxx"
                  "${CMAKE_CURRENT_SOURCE_DIR}/css_tokeniser.cxx"
                  "${CMAKE_CURRENT_SOURCE_DIR}/css_util.cxx"
+                 "${CMAKE_CURRENT_SOURCE_DIR}/css_rule.cxx"
                  "${CMAKE_CURRENT_SOURCE_DIR}/css_parser.cxx"
                  "${RAGEL_ragel_css_selector_parser_OUTPUTS}"
                  "${RAGEL_ragel_css_rule_parser_OUTPUTS}"
diff --git a/src/libserver/css/css_parser.cxx b/src/libserver/css/css_parser.cxx
index a0a9d8847..1a9231700 100644
--- a/src/libserver/css/css_parser.cxx
+++ b/src/libserver/css/css_parser.cxx
@@ -16,6 +16,8 @@
 
 #include "css_parser.hxx"
 #include "css_tokeniser.hxx"
+#include "css_selector.hxx"
+#include "css_rule.hxx"
 #include <vector>
 #include <unicode/utf8.h>
 
@@ -94,6 +96,14 @@ struct css_consumed_block {
 		return empty_block_vec;
 	}
 
+	auto get_token_or_empty() const -> const css_parser_token& {
+		if (content.index() == 2) {
+			return std::get<css_parser_token>(content);
+		}
+
+		return css_parser_eof_token();
+	}
+
 	auto token_type_str(void) const -> const char *
 	{
 		const auto *ret = "";
@@ -604,13 +614,53 @@ bool css_parser::consume_input(const std::string_view &sv)
 			if (simple_block != children.end()) {
 				/*
 				 * We have a component and a simple block,
-				 * so we can parse a declaration
+				 * so we can parse a selector and then extract
+				 * declarations from a simple block
 				 */
 
 				/* First, tag all components as preamble */
-				for (auto it = children.begin(); it != simple_block; ++it) {
-					(*it)->tag = css_consumed_block::parser_tag_type::css_selector;
-				}
+				auto selector_it = children.cbegin();
+
+				auto selector_token_functor = [&selector_it,&simple_block](void)
+						-> const css_parser_token & {
+					for (;;) {
+						if (selector_it == simple_block) {
+							return css_parser_eof_token();
+						}
+
+						const auto &ret = (*selector_it)->get_token_or_empty();
+
+						++selector_it;
+
+						if (ret.type != css_parser_token::token_type::eof_token) {
+							return ret;
+						}
+					}
+				};
+
+				auto selectors_vec = process_selector_tokens(pool, selector_token_functor);
+
+				auto decls_it = (*simple_block)->get_blocks_or_empty().cbegin();
+				auto decls_end = (*simple_block)->get_blocks_or_empty().cend();
+				auto declaration_token_functor = [&decls_it,&decls_end](void)
+						-> const css_parser_token & {
+					for (;;) {
+						if (decls_it == decls_end) {
+							return css_parser_eof_token();
+						}
+
+						const auto &ret = (*decls_it)->get_token_or_empty();
+
+						++decls_it;
+
+						if (ret.type != css_parser_token::token_type::eof_token) {
+							return ret;
+						}
+					}
+				};
+
+				auto declarations_vec = process_declaration_tokens(pool,
+						declaration_token_functor);
 			}
 		}
 	}
diff --git a/src/libserver/css/css_parser.hxx b/src/libserver/css/css_parser.hxx
index e009fef70..2f10f994e 100644
--- a/src/libserver/css/css_parser.hxx
+++ b/src/libserver/css/css_parser.hxx
@@ -26,8 +26,6 @@
 
 namespace rspamd::css {
 
-INIT_LOG_MODULE(chartable)
-
 auto parse_css (rspamd_mempool_t *pool, const std::string_view &st) ->
 		tl::expected<std::unique_ptr<css_style_sheet>,css_parse_error>;
 
diff --git a/src/libserver/css/css_value.cxx b/src/libserver/css/css_rule.cxx
similarity index 74%
copy from src/libserver/css/css_value.cxx
copy to src/libserver/css/css_rule.cxx
index af4691daf..44148b01a 100644
--- a/src/libserver/css/css_value.cxx
+++ b/src/libserver/css/css_rule.cxx
@@ -14,14 +14,16 @@
  * limitations under the License.
  */
 
-#include "css_value.hxx"
+#include "css_rule.hxx"
 
 namespace rspamd::css {
 
-tl::expected<css_value,css_parse_error> css_value::from_bytes (const char *input,
-													size_t inlen)
+auto process_declaration_tokens(rspamd_mempool_t *pool,
+								const tokeniser_gen_functor &next_token_functor)
+	-> declarations_vec
 {
-	return tl::unexpected{css_parse_error(css_parse_error_type::PARSE_ERROR_NYI)};
-}
+	declarations_vec ret;
 
+	return ret; /* copy elision */
 }
+}
\ No newline at end of file
diff --git a/src/libserver/css/css_rule.hxx b/src/libserver/css/css_rule.hxx
index 6afaa8bc6..725b6448b 100644
--- a/src/libserver/css/css_rule.hxx
+++ b/src/libserver/css/css_rule.hxx
@@ -20,6 +20,7 @@
 
 #include "css_value.hxx"
 #include "css_property.hxx"
+#include "css_tokeniser.hxx"
 #include <vector>
 #include <memory>
 
@@ -49,6 +50,12 @@ public:
 	constexpr const css_property& get_prop(void) const { return prop; }
 };
 
+using declarations_vec = std::vector<std::unique_ptr<css_rule>>;
+
+auto process_declaration_tokens(rspamd_mempool_t *pool,
+							 const tokeniser_gen_functor &next_token_functor)
+	-> declarations_vec;
+
 }
 
 /* Make rules hashable by property */
diff --git a/src/libserver/css/css_selector.cxx b/src/libserver/css/css_selector.cxx
index 2aeaaa8c9..1d6f727ea 100644
--- a/src/libserver/css/css_selector.cxx
+++ b/src/libserver/css/css_selector.cxx
@@ -18,10 +18,13 @@
 
 namespace rspamd::css {
 
-tl::expected<css_selector,css_parse_error> css_selector::from_bytes (const char *input,
-															   size_t inlen)
+auto process_selector_tokens(rspamd_mempool_t *pool,
+							 const tokeniser_gen_functor &next_token_functor)
+	-> selectors_vec
 {
-	return tl::unexpected{css_parse_error(css_parse_error_type::PARSE_ERROR_NYI)};
+	selectors_vec ret;
+
+	return ret; /* copy elision */
 }
 
 }
diff --git a/src/libserver/css/css_selector.hxx b/src/libserver/css/css_selector.hxx
index 8611630fd..1e5f5bb13 100644
--- a/src/libserver/css/css_selector.hxx
+++ b/src/libserver/css/css_selector.hxx
@@ -22,7 +22,8 @@
 #include <variant>
 #include <string>
 #include <optional>
-#include "contrib/expected/expected.hpp"
+#include <vector>
+#include <functional>
 #include "parse_error.hxx"
 #include "css_tokeniser.hxx"
 #include "html_tags.h"
@@ -54,13 +55,17 @@ struct css_selector {
 			return std::string_view(std::get<std::string>(value));
 		}
 		return std::nullopt;
-	}
-
-	static tl::expected<css_selector,css_parse_error> from_bytes (const char *input,
-															   size_t inlen);
+	};
 };
 
+using selectors_vec = std::vector<std::unique_ptr<css_selector>>;
 
+/*
+ * Consume selectors token and split them to the list of selectors
+ */
+auto process_selector_tokens(rspamd_mempool_t *pool,
+							 const tokeniser_gen_functor &next_token_functor)
+	-> selectors_vec;
 
 }
 
diff --git a/src/libserver/css/css_tokeniser.hxx b/src/libserver/css/css_tokeniser.hxx
index f1e9d05fc..e3ba47437 100644
--- a/src/libserver/css/css_tokeniser.hxx
+++ b/src/libserver/css/css_tokeniser.hxx
@@ -23,6 +23,7 @@
 #include <utility>
 #include <variant>
 #include <list>
+#include <functional>
 #include "mem_pool.h"
 
 namespace rspamd::css {
@@ -101,6 +102,15 @@ struct css_parser_token {
 	auto debug_token_str() -> std::string;
 };
 
+static auto css_parser_eof_token(void) -> const css_parser_token & {
+	static css_parser_token eof_tok {
+		css_parser_token::token_type::eof_token,
+				css_parser_token_placeholder()
+	};
+
+	return eof_tok;
+}
+
 /* Ensure that parser tokens are simple enough */
 /*
  * compiler must implement P0602 "variant and optional should propagate copy/move triviality"
@@ -129,6 +139,8 @@ private:
 	auto consume_ident() -> struct css_parser_token;
 };
 
+using tokeniser_gen_functor = std::function<const css_parser_token &(void)>;
+
 }
 
 


More information about the Commits mailing list