commit e0befa6: [Test] Add unit tests for unfolding

Vsevolod Stakhov vsevolod at rspamd.com
Mon Jul 29 17:49:21 UTC 2024


Author: Vsevolod Stakhov
Date: 2023-11-25 13:42:03 +0000
URL: https://github.com/rspamd/rspamd/commit/e0befa616f7f0fdfc823b8a442f398e8c649cd95 (refs/pull/4716/head)

[Test] Add unit tests for unfolding

---
 src/libmime/mime_headers.c     | 74 +++++++++++++++++++++++++++++++++++++++++-
 src/libmime/mime_headers.h     |  8 +++++
 test/rspamd_cxx_unit_utils.hxx | 51 +++++++++++++++++++++++++++--
 3 files changed, 129 insertions(+), 4 deletions(-)

diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c
index 2b6b2acc0..e250e84f1 100644
--- a/src/libmime/mime_headers.c
+++ b/src/libmime/mime_headers.c
@@ -1025,6 +1025,76 @@ rspamd_message_headers_new(void)
 	return nhdrs;
 }
 
+gsize rspamd_message_header_unfold_inplace(char *hdr, gsize len)
+{
+	/*
+	 * t - tortoise (destination)
+	 * h - hare (source)
+	 */
+	char *t = hdr, *h = hdr, *end = (hdr + len);
+	enum {
+		copy_chars,
+		folding_cr,
+		folding_lf,
+		folding_ws,
+	} state = copy_chars;
+
+	while (h < end) {
+		switch (state) {
+		case copy_chars:
+			if (*h == '\r') {
+				state = folding_cr;
+				h++;
+			}
+			else if (*h == '\n') {
+				state = folding_lf;
+				h++;
+			}
+			else {
+				*t++ = *h++;
+			}
+			break;
+		case folding_cr:
+			if (*h == '\n') {
+				state = folding_lf;
+				h++;
+			}
+			else if (g_ascii_isspace(*h)) {
+				state = folding_ws;
+				h++;
+			}
+			else {
+				/* It is weird, not like a folding, so we need to revert back */
+				*t++ = '\r';
+				state = copy_chars;
+			}
+			break;
+		case folding_lf:
+			if (g_ascii_isspace(*h)) {
+				state = folding_ws;
+				h++;
+			}
+			else {
+				/* It is weird, not like a folding, so we need to revert back */
+				*t++ = '\n';
+				state = copy_chars;
+			}
+			break;
+		case folding_ws:
+			if (!g_ascii_isspace(*h)) {
+				*t++ = ' ';
+				state = copy_chars;
+			}
+			else {
+				h++;
+			}
+			break;
+		}
+	}
+
+	return t - hdr;
+}
+
 void rspamd_message_set_modified_header(struct rspamd_task *task,
 										struct rspamd_mime_headers_table *hdrs,
 										const gchar *hdr_name,
@@ -1201,8 +1271,10 @@ void rspamd_message_set_modified_header(struct rspamd_task *task,
 					nhdr->name = hdr_elt->name;
 					nhdr->value = rspamd_mempool_alloc(task->task_pool,
 													   raw_len + 1);
+					/* Strlcpy will ensure that value will have no embedded \0 */
 					rspamd_strlcpy(nhdr->value, raw_value, raw_len + 1);
-					/* TODO: unfold header value, sigh */
+					gsize value_len = rspamd_message_header_unfold_inplace(nhdr->value, raw_len);
+					nhdr->value[value_len] = '\0';
 
 					/* Deal with the raw value */
 					size_t namelen = strlen(hdr_elt->name);
diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h
index ffa863e8b..60015a20e 100644
--- a/src/libmime/mime_headers.h
+++ b/src/libmime/mime_headers.h
@@ -185,6 +185,14 @@ bool rspamd_mime_headers_foreach(const struct rspamd_mime_headers_table *,
  */
 gsize rspamd_strip_smtp_comments_inplace(gchar *input, gsize len);
 
+/**
+ * Unfold header in place
+ * @param hdr header value
+ * @param len length of the header
+ * @return new unfolded length
+ */
+gsize rspamd_message_header_unfold_inplace(char *hdr, gsize len);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/test/rspamd_cxx_unit_utils.hxx b/test/rspamd_cxx_unit_utils.hxx
index 3e53a6d33..126253fd6 100644
--- a/test/rspamd_cxx_unit_utils.hxx
+++ b/test/rspamd_cxx_unit_utils.hxx
@@ -1,11 +1,11 @@
-/*-
- * Copyright 2021 Vsevolod Stakhov
+/*
+ * Copyright 2023 Vsevolod Stakhov
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
- *   http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -159,6 +159,51 @@ TEST_SUITE("rspamd_utils")
 			rspamd_fstring_free(fstr);
 		}
 	}
+
+	TEST_CASE("rspamd_message_header_unfold_inplace")
+	{
+		std::vector<std::pair<std::string, std::string>> cases{
+			{"abc", "abc"},
+			{"abc\r\n def", "abc def"},
+			{"abc\r\n\tdef", "abc def"},
+			{"abc\r\n\tdef\r\n\tghi", "abc def ghi"},
+			{"abc\r\n\tdef\r\n\tghi\r\n", "abc def ghi"},
+			{"abc\r\n\tdef\r\n\tghi\r\n\t", "abc def ghi"},
+			{"abc\r\n\tdef\r\n\tghi\r\n\tjkl", "abc def ghi jkl"},
+			{"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n", "abc def ghi jkl"},
+			{"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\t", "abc def ghi jkl"},
+			{"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno", "abc def ghi jkl mno"},
+			{"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n", "abc def ghi jkl mno"},
+			{"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\t", "abc def ghi jkl mno"},
+			{"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr", "abc def ghi jkl mno pqr"},
+			{"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr\r\n", "abc def ghi jkl mno pqr"},
+			{"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr\r\n\t", "abc def ghi jkl mno pqr"},
+			{"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr\r\n\tstu", "abc def ghi jkl mno pqr stu"},
+			// Newline at the end
+			{
+				"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr\r\n\tstu\r\n", "abc def ghi jkl mno pqr stu"},
+			// Spaces at the end
+			{
+				"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr\r\n\tstu\r\n\t", "abc def ghi jkl mno pqr stu"},
+			// Multiple spaces at the end
+			{
+				"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr\r\n\tstu\r\n\t   ", "abc def ghi jkl mno pqr stu"},
+			// Multiple spaces in middle
+			{
+				"abc\r\n\tdef\r\n\tghi\r\n\tjkl\r\n\tmno\r\n\tpqr\r\n\tstu   \r\n\t   a", "abc def ghi jkl mno pqr stu    a"},
+		};
+
+		for (const auto &c: cases) {
+			SUBCASE(("unfold header " + c.second).c_str())
+			{
+				auto *cpy = new char[c.first.size()];
+				memcpy(cpy, c.first.data(), c.first.size());
+				auto nlen = rspamd_message_header_unfold_inplace(cpy, c.first.size());
+				CHECK(std::string{cpy, nlen} == c.second);
+				delete[] cpy;
+			}
+		}
+	}
 }
 
 #endif


More information about the Commits mailing list