commit d504e72: [Minor] Fix some base tag parsing issues

Vsevolod Stakhov vsevolod at highsecure.ru
Mon Jul 26 16:07:04 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-07-26 17:05:11 +0100
URL: https://github.com/rspamd/rspamd/commit/d504e72e0d768de1f24b02d18a165b1dddf5140d (HEAD -> master)

[Minor] Fix some base tag parsing issues

---
 src/libserver/html/html.cxx | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 088aad369..97a8640c5 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -567,6 +567,35 @@ html_parse_tag_content(rspamd_mempool_t *pool,
 	parser_env.cur_state = state;
 }
 
+static inline auto
+html_is_absolute_url(std::string_view st) -> bool
+{
+	auto alnum_pos = std::find_if(std::begin(st), std::end(st),
+			[](auto c) {return !g_ascii_isalnum(c);});
+
+	if (alnum_pos != std::end(st)) {
+		std::advance(alnum_pos, 1);
+
+		if (alnum_pos != std::end(st)) {
+			if (*alnum_pos == ':') {
+				if (st.substr(0, std::distance(std::begin(st), alnum_pos)) == "mailto") {
+					return true;
+				}
+
+				std::advance(alnum_pos, 1);
+				if (alnum_pos != std::end(st)) {
+					/* Include even malformed urls */
+					if (*alnum_pos == '/' || *alnum_pos == '\\') {
+						return true;
+					}
+				}
+			}
+		}
+	}
+
+	return false;
+}
+
 static auto
 html_process_url_tag(rspamd_mempool_t *pool,
 					 struct html_tag *tag,
@@ -586,7 +615,7 @@ html_process_url_tag(rspamd_mempool_t *pool,
 			 * slash
 			 */
 
-			if (rspamd_substring_search(href_value.data(), href_value.size(), "://", 3) == -1) {
+			if (!html_is_absolute_url(href_value)) {
 
 				if (href_value.size() >= sizeof("data:") &&
 					g_ascii_strncasecmp(href_value.data(), "data:", sizeof("data:") - 1) == 0) {


More information about the Commits mailing list