commit 0883ad6: [Project] Css: Further fixes to lexer
Vsevolod Stakhov
vsevolod at highsecure.ru
Fri Jan 29 20:21:07 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-01-29 20:20:11 +0000
URL: https://github.com/rspamd/rspamd/commit/0883ad698f2164131836e8d8f738c1263a6b0a52 (HEAD -> master)
[Project] Css: Further fixes to lexer
---
src/libserver/css/css_tokeniser.cxx | 37 +++++++++++++++++++++++--------------
1 file changed, 23 insertions(+), 14 deletions(-)
diff --git a/src/libserver/css/css_tokeniser.cxx b/src/libserver/css/css_tokeniser.cxx
index d43920817..b8f4f5ebc 100644
--- a/src/libserver/css/css_tokeniser.cxx
+++ b/src/libserver/css/css_tokeniser.cxx
@@ -89,9 +89,18 @@ auto make_token(void) -> css_parser_token
return css_parser_token{T, css_parser_token_placeholder()};
}
+static constexpr inline auto is_plain_ident_start(char c) -> bool
+{
+ if ((c & 0x80) || g_ascii_isalpha(c) || c == '_') {
+ return true;
+ }
+
+ return false;
+};
+
static constexpr inline auto is_plain_ident(char c) -> bool
{
- if ((c & 0x80) || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') {
+ if (is_plain_ident_start(c) || c == '-' || g_ascii_isdigit(c)) {
return true;
}
@@ -200,6 +209,7 @@ css_parser_token::adjust_dim(const css_parser_token &dim_token) -> bool
}
else {
flags |= css_parser_token::flag_bad_dimension;
+ msg_err("hui: %*s", (int)sv.size(), sv.begin());
return false;
}
@@ -247,7 +257,9 @@ auto css_tokeniser::consume_ident() -> struct css_parser_token
while (i < input.size()) {
auto c = input[i];
- if (!is_plain_ident(c)) {
+ auto is_plain_c = allow_middle_minus ? is_plain_ident(c) :
+ is_plain_ident_start(c);
+ if (!is_plain_c) {
if (c == '\\' && i + 1 < input.size ()) {
/* Escape token */
need_escape = true;
@@ -383,6 +395,7 @@ auto css_tokeniser::consume_number() -> struct css_parser_token
//auto conv_res = std::from_chars(&input[offset], &input[i], num);
std::string numbuf{&input[offset], (i - offset)};
num = std::stod(numbuf);
+ offset = i;
auto ret = make_token<css_parser_token::token_type::number_token>(num);
@@ -393,11 +406,11 @@ auto css_tokeniser::consume_number() -> struct css_parser_token
offset = i;
}
- else if (is_plain_ident(input[i])) {
+ else if (is_plain_ident_start(input[i])) {
auto dim_token = consume_ident();
if (dim_token.type == css_parser_token::token_type::ident_token) {
- if (!dim_token.adjust_dim(dim_token)) {
+ if (!ret.adjust_dim(dim_token)) {
auto sv = std::get<std::string_view>(dim_token.value);
msg_debug_css("cannot apply dimension from the token %*s; number value = %.1f",
(int)sv.size(), sv.begin(), num);
@@ -408,17 +421,12 @@ auto css_tokeniser::consume_number() -> struct css_parser_token
else {
/* We have no option but to uncosume ident token in this case */
msg_debug_css("got invalid ident like token after number, unconsume it");
- offset = i;
}
}
else {
- /* Plain number */
- offset = i;
+ /* Plain number, nothing to do */
}
}
- else {
- offset = i;
- }
return ret;
}
@@ -655,7 +663,7 @@ auto css_tokeniser::next_token(void) -> struct css_parser_token
break;
case '@':
if (i + 3 < input.size()) {
- if (is_plain_ident(input[i + 1]) &&
+ if (is_plain_ident_start(input[i + 1]) &&
is_plain_ident(input[i + 2]) && is_plain_ident(input[i + 3])) {
offset = i + 1;
auto ident_token = consume_ident();
@@ -680,8 +688,9 @@ auto css_tokeniser::next_token(void) -> struct css_parser_token
case '#':
/* TODO: make it more conformant */
if (i + 2 < input.size()) {
- if (is_plain_ident(input[i + 1]) &&
- is_plain_ident(input[i + 2])) {
+ auto next_c = input[i + 1], next_next_c = input[i + 2];
+ if ((is_plain_ident(next_c) || next_c == '-') &&
+ (is_plain_ident(next_next_c) || next_next_c == '-')) {
offset = i + 1;
auto ident_token = consume_ident();
@@ -708,7 +717,7 @@ auto css_tokeniser::next_token(void) -> struct css_parser_token
if (g_ascii_isdigit(c)) {
return consume_number();
}
- else if (is_plain_ident(c)) {
+ else if (is_plain_ident_start(c)) {
return consume_ident();
}
else {
More information about the Commits
mailing list