commit c8c91b2: [Minor] Some more fixes to spaces normalisation

Vsevolod Stakhov vsevolod at highsecure.ru
Wed Jun 23 11:21:04 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-06-23 12:19:22 +0100
URL: https://github.com/rspamd/rspamd/commit/c8c91b2f1ee7ee321c68219b9eb515359a5ae962 (HEAD -> master)

[Minor] Some more fixes to spaces normalisation

---
 src/libserver/html/html.cxx          |  2 +-
 src/libserver/html/html_entities.cxx | 14 +++++---------
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 20a38ee09..694a172b2 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -1824,7 +1824,7 @@ TEST_CASE("html text extraction")
 			 "    <div class=\"moz-forward-container\"><br>\n"
 			 "      <br>\n"
 			 "      test</div>"
-			 "</body>", "\ntest\n"},
+			 "</body>", "\n\n\ntest\n"},
 	};
 
 	rspamd_url_init(NULL);
diff --git a/src/libserver/html/html_entities.cxx b/src/libserver/html/html_entities.cxx
index d024c12e1..b0e682807 100644
--- a/src/libserver/html/html_entities.cxx
+++ b/src/libserver/html/html_entities.cxx
@@ -2550,13 +2550,9 @@ decode_html_entitles_inplace(char *s, std::size_t len, bool norm_spaces)
 		}
 	}
 
-	if (norm_spaces && g_ascii_isspace(*t)) {
-		do {
+	if (norm_spaces) {
+		while (t > s && g_ascii_isspace(*(t - 1))) {
 			t --;
-		} while (t > s && g_ascii_isspace(*t));
-
-		if (!g_ascii_isspace(*t)) {
-			t++; /* Preserve last space character */
 		}
 	}
 
@@ -2573,13 +2569,13 @@ TEST_SUITE("html") {
 				{"abc     def", "abc def"},
 				{"abc\ndef", "abc def"},
 				{"abc\n \tdef", "abc def"},
-				{"    abc def   ", " abc def "},
+				{"    abc def   ", "abc def"},
 				{"FOO>BAR", "FOO>BAR"},
 				{"FOO&gtBAR", "FOO>BAR"},
 				{"FOO&gt BAR", "FOO>BAR"},
 				{"FOO>;;BAR", "FOO>;;BAR"},
-				{"I'm ¬it; ", "I'm ¬it; "},
-				{"I'm ∉ ", "I'm ∉ "},
+				{"I'm ¬it; ", "I'm ¬it;"},
+				{"I'm ∉ ", "I'm ∉"},
 				{"FOO& BAR", "FOO& BAR"},
 				{"FOO&&&>BAR", "FOO&&&>BAR"},
 				{"FOO)BAR", "FOO)BAR"},


More information about the Commits mailing list