commit 61d518b: [Project] Css: Add preliminary stylesheet support

Vsevolod Stakhov vsevolod at highsecure.ru
Tue Mar 23 15:21:05 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-03-23 15:14:07 +0000
URL: https://github.com/rspamd/rspamd/commit/61d518bf1d96f079f2eed66dd3101cac64d83e00 (HEAD -> master)

[Project] Css: Add preliminary stylesheet support

---
 src/libserver/css/css.cxx        | 76 +++++++++++++++++++++++++++++++++++-----
 src/libserver/css/css.hxx        |  5 ++-
 src/libserver/css/css_parser.cxx | 13 ++++---
 src/libserver/css/css_parser.hxx |  3 +-
 src/libserver/css/css_rule.cxx   | 32 +++++++++++++++++
 src/libserver/css/css_rule.hxx   |  8 +++++
 6 files changed, 122 insertions(+), 15 deletions(-)

diff --git a/src/libserver/css/css.cxx b/src/libserver/css/css.cxx
index 4587085a8..bd26cee1e 100644
--- a/src/libserver/css/css.cxx
+++ b/src/libserver/css/css.cxx
@@ -16,7 +16,7 @@
 
 #include "css.h"
 #include "css.hxx"
-#include "css_style.hxx"
+#include "contrib/robin-hood/robin_hood.h"
 #include "css_parser.hxx"
 #define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
 #define DOCTEST_CONFIG_IMPLEMENT
@@ -28,8 +28,6 @@ rspamd_css_parse_style (rspamd_mempool_t *pool, const guchar *begin, gsize len,
 {
 	auto parse_res = rspamd::css::parse_css(pool, {(const char* )begin, len});
 
-#if 0
-	/* Return once semantical parsing is done */
 	if (parse_res.has_value()) {
 		return reinterpret_cast<rspamd_css>(parse_res.value().release());
 	}
@@ -39,9 +37,6 @@ rspamd_css_parse_style (rspamd_mempool_t *pool, const guchar *begin, gsize len,
 				"parse error");
 		return nullptr;
 	}
-#else
-	return nullptr;
-#endif
 }
 
 namespace rspamd::css {
@@ -49,10 +44,73 @@ namespace rspamd::css {
 INIT_LOG_MODULE_PUBLIC(css);
 
 class css_style_sheet::impl {
-
+public:
+	using selector_ptr = std::unique_ptr<css_selector>;
+	using selectors_hash = robin_hood::unordered_flat_map<selector_ptr, css_declarations_block_ptr>;
+	using universal_selector_t = std::pair<selector_ptr, css_declarations_block_ptr>;
+	selectors_hash tags_selector;
+	selectors_hash class_selectors;
+	selectors_hash id_selectors;
+	std::optional<universal_selector_t> universal_selector;
 };
 
-css_style_sheet::css_style_sheet () : pimpl(new impl) {}
-css_style_sheet::~css_style_sheet () {}
+css_style_sheet::css_style_sheet(rspamd_mempool_t *pool)
+		:  pool(pool), pimpl(new impl) {}
+css_style_sheet::~css_style_sheet() {}
+
+auto
+css_style_sheet::add_selector_rule(std::unique_ptr<css_selector> &&selector,
+									css_declarations_block_ptr decls) -> void
+{
+	impl::selectors_hash *target_hash = nullptr;
+
+	switch(selector->type) {
+	case css_selector::selector_type::SELECTOR_ALL:
+		if (pimpl->universal_selector) {
+			/* Another universal selector */
+			msg_debug_css("redefined universal selector, merging rules");
+			pimpl->universal_selector->second->merge_block(*decls);
+		}
+		else {
+			msg_debug_css("added universal selector");
+			pimpl->universal_selector = std::make_pair(std::move(selector),
+					decls);
+		}
+		break;
+	case css_selector::selector_type::SELECTOR_CLASS:
+		target_hash = &pimpl->class_selectors;
+		break;
+	case css_selector::selector_type::SELECTOR_ID:
+		target_hash = &pimpl->id_selectors;
+		break;
+	case css_selector::selector_type::SELECTOR_ELEMENT:
+		target_hash = &pimpl->tags_selector;
+		break;
+	}
+
+	if (target_hash) {
+		auto found_it = target_hash->find(selector);
+
+		if (found_it == target_hash->end()) {
+			/* Easy case, new element */
+			target_hash->insert({std::move(selector), decls});
+		}
+		else {
+			/* The problem with merging is actually in how to handle selectors chains
+			 * For example, we have 2 selectors:
+			 * 1. class id tag -> meaning that we first match class, then we ensure that
+			 * id is also the same and finally we check the tag
+			 * 2. tag class id -> it means that we check first tag, then class and then id
+			 * So we have somehow equal path in the xpath terms.
+			 * I suppose now, that we merely check parent stuff and handle duplicates
+			 * merging when finally resolving paths.
+			 */
+			auto sel_str = selector->to_string().value_or("unknown");
+			msg_debug_css("found duplicate selector: %*s", (int)sel_str.size(),
+					sel_str.data());
+			found_it->second->merge_block(*decls);
+		}
+	}
+}
 
 }
\ No newline at end of file
diff --git a/src/libserver/css/css.hxx b/src/libserver/css/css.hxx
index 9ed323ec3..739ad3251 100644
--- a/src/libserver/css/css.hxx
+++ b/src/libserver/css/css.hxx
@@ -40,10 +40,13 @@ extern unsigned int rspamd_css_log_id;
 
 class css_style_sheet {
 public:
-	css_style_sheet();
+	css_style_sheet(rspamd_mempool_t *pool);
 	~css_style_sheet(); /* must be declared separately due to pimpl */
+	auto add_selector_rule(std::unique_ptr<css_selector> &&selector,
+						   css_declarations_block_ptr decls) -> void;
 private:
 	class impl;
+	rspamd_mempool_t *pool;
 	std::unique_ptr<impl> pimpl;
 };
 
diff --git a/src/libserver/css/css_parser.cxx b/src/libserver/css/css_parser.cxx
index 415039e19..f80386fc2 100644
--- a/src/libserver/css/css_parser.cxx
+++ b/src/libserver/css/css_parser.cxx
@@ -559,7 +559,7 @@ bool css_parser::consume_input(const std::string_view &sv)
 		return false;
 	}
 
-	style_object = std::make_unique<css_style_sheet>();
+	style_object = std::make_unique<css_style_sheet>(pool);
 
 	for (auto &&rule : rules) {
 		/*
@@ -627,6 +627,10 @@ bool css_parser::consume_input(const std::string_view &sv)
 						msg_debug_css("processed %d rules",
 								(int)declarations_vec->get_rules().size());
 
+						for (auto &&selector : selectors_vec) {
+							style_object->add_selector_rule(std::move(selector),
+									   declarations_vec);
+						}
 					}
 				}
 			}
@@ -677,15 +681,16 @@ get_selectors_parser_functor(rspamd_mempool_t *pool,
  * Wrapper for the parser
  */
 auto parse_css(rspamd_mempool_t *pool, const std::string_view &st) ->
-	bool
+		tl::expected<std::unique_ptr<css_style_sheet>, css_parse_error>
 {
 	css_parser parser(pool);
 
 	if (parser.consume_input(st)) {
-		return true;
+		return parser.get_object_maybe();
 	}
 
-	return false;
+	return tl::make_unexpected(css_parse_error{css_parse_error_type::PARSE_ERROR_INVALID_SYNTAX,
+											"cannot parse input"});
 }
 
 TEST_SUITE("css parser") {
diff --git a/src/libserver/css/css_parser.hxx b/src/libserver/css/css_parser.hxx
index be788ea81..845031618 100644
--- a/src/libserver/css/css_parser.hxx
+++ b/src/libserver/css/css_parser.hxx
@@ -183,8 +183,9 @@ extern const css_consumed_block css_parser_eof_block;
 
 using blocks_gen_functor = std::function<const css_consumed_block &(void)>;
 
+class css_style_sheet;
 auto parse_css(rspamd_mempool_t *pool, const std::string_view &st) ->
-		bool;
+	tl::expected<std::unique_ptr<css_style_sheet>, css_parse_error>;
 
 auto get_selectors_parser_functor(rspamd_mempool_t *pool,
 								  const std::string_view &st) -> blocks_gen_functor;
diff --git a/src/libserver/css/css_rule.cxx b/src/libserver/css/css_rule.cxx
index bd589da95..cf04eb689 100644
--- a/src/libserver/css/css_rule.cxx
+++ b/src/libserver/css/css_rule.cxx
@@ -313,6 +313,38 @@ auto process_declaration_tokens(rspamd_mempool_t *pool,
 	return ret; /* copy elision */
 }
 
+auto
+css_declarations_block::merge_block(const css_declarations_block &other, merge_type how)
+	-> void
+{
+	const auto &other_rules = other.get_rules();
+
+	for (const auto &rule : other_rules) {
+		auto &&found_it = rules.find(rule);
+
+		if (found_it != rules.end()) {
+			/* Duplicate, need to merge */
+			switch(how) {
+			case merge_type::merge_override:
+				/* Override */
+				rules.insert(rule);
+				break;
+			case merge_type::merge_duplicate:
+				/* Merge values */
+				(*found_it)->merge_values(*rule);
+				break;
+			case merge_type::merge_parent:
+				/* Do not merge parent rule if more specific local one is presented */
+				break;
+			}
+		}
+		else {
+			/* New property, just insert */
+			rules.insert(rule);
+		}
+	}
+}
+
 void css_rule::add_value(const css_value &value)
 {
 	values.push_back(value);
diff --git a/src/libserver/css/css_rule.hxx b/src/libserver/css/css_rule.hxx
index 05c3fd82d..c7cbae45d 100644
--- a/src/libserver/css/css_rule.hxx
+++ b/src/libserver/css/css_rule.hxx
@@ -81,8 +81,16 @@ public:
 	using rule_shared_ptr = std::shared_ptr<css_rule>;
 	using rule_shared_hash = shared_ptr_hash<css_rule>;
 	using rule_shared_eq = shared_ptr_equal<css_rule>;
+	enum class merge_type {
+		merge_duplicate,
+		merge_parent,
+		merge_override
+	};
+
 	css_declarations_block() = default;
 	auto add_rule(rule_shared_ptr &&rule) -> bool;
+	auto merge_block(const css_declarations_block &other,
+				  merge_type how = merge_type::merge_duplicate) -> void;
 	auto get_rules(void) const -> const auto & {
 		return rules;
 	}


More information about the Commits mailing list