commit de7ac4e: [Minor] Add a simple routine to remove smtp comments in place

Vsevolod Stakhov vsevolod at highsecure.ru
Fri Jun 18 12:42:04 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-06-18 13:35:05 +0100
URL: https://github.com/rspamd/rspamd/commit/de7ac4e37284fcd241060213619297df41a71dce

[Minor] Add a simple routine to remove smtp comments in place

---
 src/libmime/mime_headers.c     | 87 ++++++++++++++++++++++++++++++++++++++++++
 src/libmime/mime_headers.h     |  8 ++++
 test/rspamd_cxx_unit.cxx       |  3 +-
 test/rspamd_cxx_unit_utils.hxx | 51 +++++++++++++++++++++++++
 4 files changed, 148 insertions(+), 1 deletion(-)

diff --git a/src/libmime/mime_headers.c b/src/libmime/mime_headers.c
index 9dc336cf7..0bca94024 100644
--- a/src/libmime/mime_headers.c
+++ b/src/libmime/mime_headers.c
@@ -1967,4 +1967,91 @@ rspamd_message_set_modified_header (struct rspamd_task *task,
 			}
 		}
 	}
+}
+
+gsize
+rspamd_strip_smtp_comments_inplace (gchar *input, gsize len)
+{
+	enum parser_state {
+		parse_normal,
+		parse_obrace,
+		parse_comment,
+		parse_quoted_copy,
+		parse_quoted_ignore,
+	} state = parse_normal, next_state = parse_normal;
+	gchar *d = input, *end = input + len, *start = input;
+	gchar t;
+	int obraces = 0, ebraces = 0;
+
+	while (input < end) {
+		t = *input;
+		switch (state) {
+		case parse_normal:
+			if (t == '(') {
+				state = parse_obrace;
+			}
+			else if (t == '\\') {
+				state = parse_quoted_copy;
+				next_state = parse_normal;
+			}
+			else {
+				*d++ = t;
+			}
+			input ++;
+			break;
+		case parse_obrace:
+			obraces ++;
+			if (t == '(') {
+				obraces ++;
+			}
+			else if (t == ')') {
+				ebraces ++;
+
+				if (obraces == ebraces) {
+					obraces = 0;
+					ebraces = 0;
+					state = parse_normal;
+				}
+			}
+			else if (t == '\\') {
+				state = parse_quoted_ignore;
+				next_state = parse_comment;
+			}
+			else {
+				state = parse_comment;
+			}
+			input ++;
+			break;
+		case parse_comment:
+			if (t == '(') {
+				state = parse_obrace;
+			}
+			else if (t == ')') {
+				ebraces ++;
+
+				if (obraces == ebraces) {
+					obraces = 0;
+					ebraces = 0;
+					state = parse_normal;
+				}
+			}
+			else if (t == '\\') {
+				state = parse_quoted_ignore;
+				next_state = parse_comment;
+			}
+			input ++;
+			break;
+		case parse_quoted_copy:
+			*d++ = t;
+			state = next_state;
+			input ++;
+			break;
+		case parse_quoted_ignore:
+			state = next_state;
+			input ++;
+			break;
+		}
+	}
+
+	return (d - start);
 }
\ No newline at end of file
diff --git a/src/libmime/mime_headers.h b/src/libmime/mime_headers.h
index ad8f1b68f..f24b0d6c6 100644
--- a/src/libmime/mime_headers.h
+++ b/src/libmime/mime_headers.h
@@ -206,6 +206,14 @@ struct rspamd_mime_headers_table * rspamd_message_headers_ref (struct rspamd_mim
  */
 struct rspamd_mime_headers_table* rspamd_message_headers_new (void);
 
+/**
+ * Strip rfc822 CFWS sequences from a string in place
+ * @param input input
+ * @param len length of the input
+ * @return new length of the input
+ */
+gsize rspamd_strip_smtp_comments_inplace (gchar *input, gsize len);
+
 #ifdef  __cplusplus
 }
 #endif
diff --git a/test/rspamd_cxx_unit.cxx b/test/rspamd_cxx_unit.cxx
index 9d37ff56a..e67060dca 100644
--- a/test/rspamd_cxx_unit.cxx
+++ b/test/rspamd_cxx_unit.cxx
@@ -19,9 +19,10 @@
 #include <memory>
 
 #define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
-
 #include "doctest/doctest.h"
 
+#include "rspamd_cxx_unit_utils.hxx"
+
 static gboolean verbose = false;
 static const GOptionEntry entries[] =
 		{
diff --git a/test/rspamd_cxx_unit_utils.hxx b/test/rspamd_cxx_unit_utils.hxx
new file mode 100644
index 000000000..f9aa86a95
--- /dev/null
+++ b/test/rspamd_cxx_unit_utils.hxx
@@ -0,0 +1,51 @@
+/*-
+ * Copyright 2021 Vsevolod Stakhov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Detached unit tests for the utils */
+
+#ifndef RSPAMD_RSPAMD_CXX_UNIT_UTILS_HXX
+#define RSPAMD_RSPAMD_CXX_UNIT_UTILS_HXX
+
+#define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
+#include "doctest/doctest.h"
+
+#include "libmime/mime_headers.h"
+#include <vector>
+#include <utility>
+#include <string>
+
+TEST_CASE("rspamd_strip_smtp_comments_inplace") {
+	std::vector<std::pair<std::string, std::string>> cases{
+			{"abc", "abc"},
+			{"abc(foo)", "abc"},
+			{"abc(foo()", "abc"},
+			{"abc(foo))", "abc)"},
+			{"abc(foo(bar))", "abc"},
+			{"(bar)abc(foo)", "abc"},
+			{"ab(ololo)c(foo)", "abc"},
+			{"ab(trol\\\1lo)c(foo)", "abc"},
+			{"\\ab(trol\\\1lo)c(foo)", "abc"},
+	};
+
+	for (const auto &c : cases) {
+		auto *cpy = new char[c.first.size()];
+		memcpy(cpy, c.first.data(), c.first.size());
+		auto nlen = rspamd_strip_smtp_comments_inplace(cpy, c.first.size());
+		CHECK(std::string{cpy,nlen} == c.second);
+	}
+}
+
+#endif


More information about the Commits mailing list