commit 420a261: [Minor] Set special flag for non A generated html urls

Vsevolod Stakhov vsevolod at highsecure.ru
Tue Jan 11 20:28:06 UTC 2022


Author: Vsevolod Stakhov
Date: 2022-01-11 20:25:27 +0000
URL: https://github.com/rspamd/rspamd/commit/420a261aa40ed55163fd638aa71561207574eea8

[Minor] Set special flag for non A generated html urls

---
 src/libserver/html/html.cxx     | 18 ++++++++++++++----
 src/libserver/html/html_url.cxx | 14 +++++++++++---
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/src/libserver/html/html.cxx b/src/libserver/html/html.cxx
index 79120d109..4711a3caf 100644
--- a/src/libserver/html/html.cxx
+++ b/src/libserver/html/html.cxx
@@ -677,10 +677,17 @@ html_process_url_tag(rspamd_mempool_t *pool,
 			}
 		}
 
-		auto url = html_process_url(pool, href_value);
+		auto url = html_process_url(pool, href_value).value_or(nullptr);
 
-		if (url && std::holds_alternative<std::monostate>(tag->extra)) {
-			tag->extra = url.value();
+		if (url) {
+			if (tag->id != Tag_A) {
+				/* Mark special tags special */
+				url->flags |= RSPAMD_URL_FLAG_SPECIAL;
+			}
+
+			if (std::holds_alternative<std::monostate>(tag->extra)) {
+				tag->extra = url;
+			}
 		}
 
 		return url;
@@ -949,7 +956,10 @@ html_process_img_tag(rspamd_mempool_t *pool,
 	}
 
 	hc->images.push_back(img);
-	tag->extra = img;
+
+	if (std::holds_alternative<std::monostate>(tag->extra)) {
+		tag->extra = img;
+	}
 }
 
 static auto
diff --git a/src/libserver/html/html_url.cxx b/src/libserver/html/html_url.cxx
index 93fef68d1..34775ba13 100644
--- a/src/libserver/html/html_url.cxx
+++ b/src/libserver/html/html_url.cxx
@@ -276,9 +276,17 @@ html_check_displayed_url(rspamd_mempool_t *pool,
 			 * HTML part, we assume that it is also
 			 * hint only.
 			 */
-			if (turl->flags &
-				RSPAMD_URL_FLAG_FROM_TEXT) {
-				turl->flags |= displayed_url->flags;
+			if (turl->flags & RSPAMD_URL_FLAG_FROM_TEXT) {
+
+				/*
+				 * We have the same URL for href and displayed url, so we
+				 * know that this url cannot be both target and display (as
+				 * it breaks logic in many places), so we do not
+				 * propagate html flags
+				 */
+				if (!(turl->flags & RSPAMD_URL_FLAG_DISPLAY_URL)) {
+					turl->flags |= displayed_url->flags;
+				}
 				turl->flags &= ~RSPAMD_URL_FLAG_FROM_TEXT;
 			}
 


More information about the Commits mailing list