commit ef252b1: [Project] Css: Implement simple css selectors lookup

Vsevolod Stakhov vsevolod at highsecure.ru
Tue Jun 15 14:28:08 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-06-15 14:55:02 +0100
URL: https://github.com/rspamd/rspamd/commit/ef252b1d2cb9fe410392f2dae52f4202392ff12d

[Project] Css: Implement simple css selectors lookup

---
 src/libserver/css/css.cxx          | 117 ++++++++++++++++++++++++++++++++++++-
 src/libserver/css/css.hxx          |   9 +++
 src/libserver/css/css_rule.hxx     |   4 +-
 src/libserver/css/css_selector.hxx |  31 +++++++++-
 4 files changed, 157 insertions(+), 4 deletions(-)

diff --git a/src/libserver/css/css.cxx b/src/libserver/css/css.cxx
index 12f7753c7..c68148341 100644
--- a/src/libserver/css/css.cxx
+++ b/src/libserver/css/css.cxx
@@ -17,6 +17,9 @@
 #include "css.hxx"
 #include "contrib/robin-hood/robin_hood.h"
 #include "css_parser.hxx"
+#include "libserver/html/html_tag.hxx"
+#include "libserver/html/html_block.hxx"
+
 /* Keep unit tests implementation here (it'll possibly be moved outside one day) */
 #define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
 #define DOCTEST_CONFIG_IMPLEMENT
@@ -28,8 +31,11 @@ INIT_LOG_MODULE_PUBLIC(css);
 
 class css_style_sheet::impl {
 public:
+	using sel_shared_hash = smart_ptr_hash<css_selector>;
+	using sel_shared_eq = smart_ptr_equal<css_selector>;
 	using selector_ptr = std::unique_ptr<css_selector>;
-	using selectors_hash = robin_hood::unordered_flat_map<selector_ptr, css_declarations_block_ptr>;
+	using selectors_hash = robin_hood::unordered_flat_map<selector_ptr, css_declarations_block_ptr,
+			sel_shared_hash, sel_shared_eq>;
 	using universal_selector_t = std::pair<selector_ptr, css_declarations_block_ptr>;
 	selectors_hash tags_selector;
 	selectors_hash class_selectors;
@@ -96,6 +102,115 @@ css_style_sheet::add_selector_rule(std::unique_ptr<css_selector> &&selector,
 	}
 }
 
+auto
+css_style_sheet::check_tag_block(const rspamd::html::html_tag *tag) ->
+		rspamd::html::html_block *
+{
+	std::optional<std::string_view> id_comp, class_comp;
+	rspamd::html::html_block *res = nullptr;
+
+	if (!tag) {
+		return nullptr;
+	}
+
+	/* First, find id in a tag and a class */
+	for (const auto &param : tag->parameters) {
+		if (param.type == html::html_component_type::RSPAMD_HTML_COMPONENT_ID) {
+			id_comp = param.value;
+		}
+		else if (param.type == html::html_component_type::RSPAMD_HTML_COMPONENT_CLASS) {
+			class_comp = param.value;
+		}
+	}
+
+	/* ID part */
+	if (id_comp && !pimpl->id_selectors.empty()) {
+		auto found_id_sel = pimpl->id_selectors.find(css_selector{id_comp.value()});
+
+		if (found_id_sel != pimpl->id_selectors.end()) {
+			const auto &decl = *(found_id_sel->second);
+			res = decl.compile_to_block(pool);
+		}
+	}
+
+	/* Class part */
+	if (class_comp && !pimpl->class_selectors.empty()) {
+		auto sv_split = [](auto strv, std::string_view delims = " ") -> std::vector<std::string_view> {
+			std::vector<decltype(strv)> ret;
+			std::size_t start = 0;
+
+			while (start < strv.size()) {
+				const auto last = strv.find_first_of(delims, start);
+				if (start != last) {
+					ret.emplace_back(strv.substr(start, last - start));
+				}
+
+				if (last == std::string_view::npos) {
+					if (start < strv.size()) {
+						ret.emplace_back(strv.substr(start));
+					}
+					break;
+				}
+
+				start = last + 1;
+			}
+
+			return ret;
+		};
+
+		auto elts = sv_split(class_comp.value());
+
+		for (const auto &e : elts) {
+			auto found_class_sel = pimpl->class_selectors.find(
+					css_selector{e, css_selector::selector_type::SELECTOR_CLASS});
+
+			if (found_class_sel != pimpl->id_selectors.end()) {
+				const auto &decl = *(found_class_sel->second);
+				auto *tmp = decl.compile_to_block(pool);
+
+				if (res == nullptr) {
+					res = tmp;
+				}
+				else {
+					res->propagate_block(*tmp);
+				}
+			}
+		}
+	}
+
+	/* Tags part */
+	if (!pimpl->tags_selector.empty()) {
+		auto found_tag_sel = pimpl->class_selectors.find(
+				css_selector{static_cast<tag_id_t>(tag->id)});
+
+		if (found_tag_sel != pimpl->id_selectors.end()) {
+			const auto &decl = *(found_tag_sel->second);
+			auto *tmp = decl.compile_to_block(pool);
+
+			if (res == nullptr) {
+				res = tmp;
+			}
+			else {
+				res->propagate_block(*tmp);
+			}
+		}
+	}
+
+	/* Finally, universal selector */
+	if (pimpl->universal_selector) {
+		auto *tmp = pimpl->universal_selector->second->compile_to_block(pool);
+
+		if (res == nullptr) {
+			res = tmp;
+		}
+		else {
+			res->propagate_block(*tmp);
+		}
+	}
+
+	return res;
+}
+
 auto
 css_parse_style(rspamd_mempool_t *pool,
 					 std::string_view input,
diff --git a/src/libserver/css/css.hxx b/src/libserver/css/css.hxx
index a169a1052..21114cc86 100644
--- a/src/libserver/css/css.hxx
+++ b/src/libserver/css/css.hxx
@@ -24,6 +24,12 @@
 #include "css_rule.hxx"
 #include "css_selector.hxx"
 
+namespace rspamd::html {
+/* Forward declaration */
+struct html_tag;
+struct html_block;
+}
+
 namespace rspamd::css {
 
 extern unsigned int rspamd_css_log_id;
@@ -43,6 +49,9 @@ public:
 	~css_style_sheet(); /* must be declared separately due to pimpl */
 	auto add_selector_rule(std::unique_ptr<css_selector> &&selector,
 						   css_declarations_block_ptr decls) -> void;
+
+	auto check_tag_block(const rspamd::html::html_tag *tag) ->
+		rspamd::html::html_block *;
 private:
 	class impl;
 	rspamd_mempool_t *pool;
diff --git a/src/libserver/css/css_rule.hxx b/src/libserver/css/css_rule.hxx
index b29bf298f..8de6c7891 100644
--- a/src/libserver/css/css_rule.hxx
+++ b/src/libserver/css/css_rule.hxx
@@ -87,8 +87,8 @@ namespace rspamd::css {
 class css_declarations_block {
 public:
 	using rule_shared_ptr = std::shared_ptr<css_rule>;
-	using rule_shared_hash = shared_ptr_hash<css_rule>;
-	using rule_shared_eq = shared_ptr_equal<css_rule>;
+	using rule_shared_hash = smart_ptr_hash<css_rule>;
+	using rule_shared_eq = smart_ptr_equal<css_rule>;
 	enum class merge_type {
 		merge_duplicate,
 		merge_parent,
diff --git a/src/libserver/css/css_selector.hxx b/src/libserver/css/css_selector.hxx
index 633b12c70..1e8145732 100644
--- a/src/libserver/css/css_selector.hxx
+++ b/src/libserver/css/css_selector.hxx
@@ -29,6 +29,7 @@
 #include "parse_error.hxx"
 #include "css_parser.hxx"
 #include "libserver/html/html_tags.h"
+#include "libcryptobox/cryptobox.h"
 
 namespace rspamd::css {
 
@@ -67,13 +68,23 @@ struct css_selector {
 	}
 
 	auto to_string(void) const -> std::optional<const std::string_view> {
-		if (type == selector_type::SELECTOR_ELEMENT) {
+		if (type != selector_type::SELECTOR_ELEMENT) {
 			return std::string_view(std::get<std::string_view>(value));
 		}
 		return std::nullopt;
 	};
 
 	explicit css_selector(selector_type t) : type(t) {}
+	explicit css_selector(tag_id_t t) : type(selector_type::SELECTOR_ELEMENT) {
+		value = t;
+	}
+	explicit css_selector(const std::string_view &st, selector_type t = selector_type::SELECTOR_ID) : type(t) {
+		value = st;
+	}
+
+	auto operator ==(const css_selector &other) const -> bool {
+		return type == other.type && value == other.value;
+	}
 
 	auto debug_str(void) const -> std::string;
 };
@@ -90,4 +101,22 @@ auto process_selector_tokens(rspamd_mempool_t *pool,
 
 }
 
+/* Selectors hashing */
+namespace std {
+template<>
+class hash<rspamd::css::css_selector> {
+public:
+	auto operator() (const rspamd::css::css_selector &sel) const -> auto {
+		if (sel.type == rspamd::css::css_selector::selector_type::SELECTOR_ELEMENT) {
+			return static_cast<std::uint64_t>(std::get<tag_id_t>(sel.value));
+		}
+		else {
+			const auto &sv = std::get<std::string_view>(sel.value);
+
+			return rspamd_cryptobox_fast_hash(sv.data(), sv.size(), 0xdeadbabe);
+		}
+	}
+};
+}
+
 #endif //RSPAMD_CSS_SELECTOR_HXX


More information about the Commits mailing list