commit fcfaab4: [Project] Css: Rework tokens structure
Vsevolod Stakhov
vsevolod at highsecure.ru
Mon Jan 25 16:42:13 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-01-25 16:35:23 +0000
URL: https://github.com/rspamd/rspamd/commit/fcfaab40b8ea772ce9d72773930c329a6277da6d (HEAD -> master)
[Project] Css: Rework tokens structure
---
src/libserver/css/css_parser.cxx | 12 +++----
src/libserver/css/css_tokeniser.cxx | 72 ++++++++++++++++++++++++++-----------
src/libserver/css/css_tokeniser.hxx | 72 +++++++++++++++++++++++++------------
3 files changed, 106 insertions(+), 50 deletions(-)
diff --git a/src/libserver/css/css_parser.cxx b/src/libserver/css/css_parser.cxx
index 207cfcb9d..68f03cdfa 100644
--- a/src/libserver/css/css_parser.cxx
+++ b/src/libserver/css/css_parser.cxx
@@ -94,16 +94,16 @@ bool css_parser::consume_input(const std::string_view &sv)
css_tokeniser css_tokeniser(pool, sv);
while (!eof) {
- auto token_pair = css_tokeniser.next_token();
+ auto next_token = css_tokeniser.next_token();
/* Top level parser */
- switch (token_pair.first) {
- case css_parser_token::eof_token:
+ switch (next_token.type) {
+ case css_parser_token::token_type::eof_token:
eof = true;
break;
- case css_parser_token::whitespace_token:
- case css_parser_token::cdc_token:
- case css_parser_token::cdo_token:
+ case css_parser_token::token_type::whitespace_token:
+ case css_parser_token::token_type::cdc_token:
+ case css_parser_token::token_type::cdo_token:
/* Ignore tokens */
break;
}
diff --git a/src/libserver/css/css_tokeniser.cxx b/src/libserver/css/css_tokeniser.cxx
index 40f202b01..058f7504e 100644
--- a/src/libserver/css/css_tokeniser.cxx
+++ b/src/libserver/css/css_tokeniser.cxx
@@ -19,8 +19,46 @@
namespace rspamd::css {
+/* Helpers to create tokens */
-auto css_tokeniser::next_token (void) -> std::pair<css_parser_token, std::string_view>
+/*
+ * This helper is intended to create tokens either with a tag and value
+ * or with just a tag.
+ */
+template<css_parser_token::token_type T, typename ...Args>
+auto make_token(const Args&... args) -> css_parser_token;
+
+template<>
+auto make_token<css_parser_token::token_type::string_token, std::string_view>(const std::string_view &s)
+ -> css_parser_token
+{
+ return css_parser_token{css_parser_token::token_type::string_token, s};
+}
+
+template<>
+auto make_token<css_parser_token::token_type::whitespace_token, std::string_view>(const std::string_view &s)
+ -> css_parser_token
+{
+ return css_parser_token{css_parser_token::token_type::whitespace_token, s};
+}
+
+template<>
+auto make_token<css_parser_token::token_type::delim_token, char>(const char &c)
+ -> css_parser_token
+{
+ return css_parser_token{css_parser_token::token_type::delim_token, c};
+}
+
+/*
+ * Generic tokens with no value (non-terminals)
+ */
+template<css_parser_token::token_type T>
+auto make_token(void) -> css_parser_token
+{
+ return css_parser_token{T, css_parser_token_placeholder()};
+}
+
+auto css_tokeniser::next_token(void) -> struct css_parser_token
{
/* Helpers */
@@ -29,7 +67,7 @@ auto css_tokeniser::next_token (void) -> std::pair<css_parser_token, std::string
* offset is set to the next character after a comment (or eof)
* Nothing is returned
*/
- auto consume_comment = [this] () {
+ auto consume_comment = [this]() {
auto i = offset;
auto nested = 0;
@@ -64,7 +102,7 @@ auto css_tokeniser::next_token (void) -> std::pair<css_parser_token, std::string
* is set one character after the string. Css unescaping is done automatically
* Accepts a quote char to find end of string
*/
- auto consume_string = [this] (auto quote_char) -> auto {
+ auto consume_string = [this](auto quote_char) -> auto {
auto i = offset;
bool need_unescape = false;
@@ -122,8 +160,7 @@ auto css_tokeniser::next_token (void) -> std::pair<css_parser_token, std::string
}
else {
offset = i + 1;
- return std::make_pair (css_parser_token::delim_token,
- std::string_view (&input[offset - 1], 1));
+ return make_token<css_parser_token::token_type::delim_token>(c);
}
break;
case ' ':
@@ -136,48 +173,41 @@ auto css_tokeniser::next_token (void) -> std::pair<css_parser_token, std::string
c = input[++i];
} while (i < input.size () && g_ascii_isspace (c));
- auto ret = std::make_pair (css_parser_token::whitespace_token,
- std::string_view (&input[offset], i - offset));
+ auto ret = make_token<css_parser_token::token_type::whitespace_token>(
+ std::string_view(&input[offset], i - offset));
offset = i;
return ret;
}
case '"':
case '\'':
offset = i + 1;
- return std::make_pair (css_parser_token::string_token,
- consume_string (c));
+ return make_token<css_parser_token::token_type::string_token>(consume_string(c));
case '(':
offset = i + 1;
- return std::make_pair (css_parser_token::obrace_token,
- std::string_view (&input[offset - 1], 1));
+ return make_token<css_parser_token::token_type::obrace_token>();
case ')':
offset = i + 1;
- return std::make_pair (css_parser_token::ebrace_token,
- std::string_view (&input[offset - 1], 1));
+ return make_token<css_parser_token::token_type::ebrace_token>();
case ',':
- offset = i + 1;
- return std::make_pair (css_parser_token::comma_token,
- std::string_view (&input[offset - 1], 1));
+ return make_token<css_parser_token::token_type::comma_token>();
case '<':
/* Maybe an xml like comment */
if (i + 3 < input.size () && input[i + 1] == '!'
&& input[i + 2] == '-' && input[i + 3] == '-') {
offset += 3;
- return std::make_pair (css_parser_token::cdo_token,
- std::string_view (&input[offset - 3], 3));
+ return make_token<css_parser_token::token_type::cdo_token>();
}
else {
offset = i + 1;
- return std::make_pair (css_parser_token::delim_token,
- std::string_view (&input[offset - 1], 1));
+ return make_token<css_parser_token::token_type::delim_token>(c);
}
break;
}
}
- return std::make_pair (css_parser_token::eof_token, std::string_view ());
+ return make_token<css_parser_token::token_type::eof_token>();
}
}
\ No newline at end of file
diff --git a/src/libserver/css/css_tokeniser.hxx b/src/libserver/css/css_tokeniser.hxx
index 4c6824389..cff5877c2 100644
--- a/src/libserver/css/css_tokeniser.hxx
+++ b/src/libserver/css/css_tokeniser.hxx
@@ -21,41 +21,67 @@
#include <string_view>
#include <utility>
+#include <variant>
#include "mem_pool.h"
namespace rspamd::css {
-enum class css_parser_token {
- whitespace_token,
- ident_token,
- function_token,
- at_keyword_token,
- hash_token,
- string_token,
- number_token,
- url_token,
- dimension_token,
- percentage_token,
- cdo_token, /* xml open comment */
- cdc_token, /* xml close comment */
- delim_token,
- obrace_token, /* ( */
- ebrace_token, /* ) */
- osqbrace_token, /* [ */
- esqbrace_token, /* ] */
- comma_token,
- colon_token,
- semicolon_token,
- eof_token,
+struct css_parser_token_placeholder {}; /* For empty tokens */
+
+struct css_parser_token {
+ enum class token_type : std::uint8_t {
+ whitespace_token,
+ ident_token,
+ function_token,
+ at_keyword_token,
+ hash_token,
+ string_token,
+ number_token,
+ url_token,
+ dimension_token,
+ percentage_token,
+ cdo_token, /* xml open comment */
+ cdc_token, /* xml close comment */
+ delim_token,
+ obrace_token, /* ( */
+ ebrace_token, /* ) */
+ osqbrace_token, /* [ */
+ esqbrace_token, /* ] */
+ comma_token,
+ colon_token,
+ semicolon_token,
+ eof_token,
+ };
+
+ static const std::uint8_t default_flags = 0;
+ static const std::uint8_t flag_bad_string = (1u << 0u);
+ using value_type = std::variant<std::string_view, /* For strings and string like tokens */
+ char, /* For delimiters (might need to move to unicode point) */
+ double, /* For numeric stuff */
+ css_parser_token_placeholder /* For general no token stuff */
+ >;
+
+ /* Typed storage */
+ value_type value;
+ token_type type;
+ std::uint8_t flags = default_flags;
+
+ css_parser_token() = delete;
+ explicit css_parser_token(token_type type, const value_type &value) :
+ value(value), type(type) {}
};
+/* Ensure that parser tokens are simple enough */
+static_assert(std::is_trivially_copyable_v<css_parser_token>);
+
class css_tokeniser {
public:
css_tokeniser() = delete;
css_tokeniser(rspamd_mempool_t *pool, const std::string_view &sv) :
input(sv), offset(0), pool(pool) {}
- auto next_token(void) -> std::pair<css_parser_token, std::string_view>;
+ auto next_token(void) -> struct css_parser_token;
+ auto get_offset(void) const { return offset; }
private:
std::string_view input;
std::size_t offset;
More information about the Commits
mailing list