commit c8c91b2: [Minor] Some more fixes to spaces normalisation
Vsevolod Stakhov
vsevolod at highsecure.ru
Wed Jun 23 11:21:04 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-06-23 12:19:22 +0100
URL: https://github.com/rspamd/rspamd/commit/c8c91b2f1ee7ee321c68219b9eb515359a5ae962 (HEAD -> master)
[Minor] Some more fixes to spaces normalisation
---
src/libserver/html/html.cxx | 2 +-
src/libserver/html/html_entities.cxx | 14 +++++---------
2 files changed, 6 insertions(+), 10 deletions(-)
diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 20a38ee09..694a172b2 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -1824,7 +1824,7 @@ TEST_CASE("html text extraction")
" <div class=\"moz-forward-container\"><br>\n"
" <br>\n"
" test</div>"
- "</body>", "\ntest\n"},
+ "</body>", "\n\n\ntest\n"},
};
rspamd_url_init(NULL);
diff --git a/src/libserver/html/html_entities.cxx b/src/libserver/html/html_entities.cxx
index d024c12e1..b0e682807 100644
--- a/src/libserver/html/html_entities.cxx
+++ b/src/libserver/html/html_entities.cxx
@@ -2550,13 +2550,9 @@ decode_html_entitles_inplace(char *s, std::size_t len, bool norm_spaces)
}
}
- if (norm_spaces && g_ascii_isspace(*t)) {
- do {
+ if (norm_spaces) {
+ while (t > s && g_ascii_isspace(*(t - 1))) {
t --;
- } while (t > s && g_ascii_isspace(*t));
-
- if (!g_ascii_isspace(*t)) {
- t++; /* Preserve last space character */
}
}
@@ -2573,13 +2569,13 @@ TEST_SUITE("html") {
{"abc def", "abc def"},
{"abc\ndef", "abc def"},
{"abc\n \tdef", "abc def"},
- {" abc def ", " abc def "},
+ {" abc def ", "abc def"},
{"FOO>BAR", "FOO>BAR"},
{"FOO>BAR", "FOO>BAR"},
{"FOO> BAR", "FOO>BAR"},
{"FOO>;;BAR", "FOO>;;BAR"},
- {"I'm ¬it; ", "I'm ¬it; "},
- {"I'm ∉ ", "I'm ∉ "},
+ {"I'm ¬it; ", "I'm ¬it;"},
+ {"I'm ∉ ", "I'm ∉"},
{"FOO& BAR", "FOO& BAR"},
{"FOO&&&>BAR", "FOO&&&>BAR"},
{"FOO)BAR", "FOO)BAR"},
More information about the Commits
mailing list