commit 7245099: [Project] Add constant iterators

Vsevolod Stakhov vsevolod at highsecure.ru
Wed Sep 29 20:14:05 UTC 2021


Author: Vsevolod Stakhov
Date: 2021-09-29 21:13:20 +0100
URL: https://github.com/rspamd/rspamd/commit/72450991eeb435a5bc1fa74da31fc862bb431e4a (HEAD -> master)

[Project] Add constant iterators

---
 src/libmime/mime_string.cxx |  38 +++++--
 src/libmime/mime_string.hxx | 258 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 284 insertions(+), 12 deletions(-)

diff --git a/src/libmime/mime_string.cxx b/src/libmime/mime_string.cxx
index 1785e9188..96f829574 100644
--- a/src/libmime/mime_string.cxx
+++ b/src/libmime/mime_string.cxx
@@ -20,34 +20,35 @@
 #include "unicode/uchar.h"
 
 TEST_SUITE("mime_string") {
+using namespace rspamd::mime;
 TEST_CASE("mime_string unfiltered ctors")
 {
 	SUBCASE("empty") {
-		rspamd::mime_string st;
+		mime_string st;
 		CHECK(st.size() == 0);
 		CHECK(st == "");
 	}
 	SUBCASE("unfiltered valid") {
-		rspamd::mime_string st{std::string_view("abcd")};
+		mime_string st{std::string_view("abcd")};
 		CHECK(st == "abcd");
 	}
 	SUBCASE("unfiltered zero character") {
-		rspamd::mime_string st{"abc\0d", 5};
+		mime_string st{"abc\0d", 5};
 		CHECK(st.has_zeroes());
 		CHECK(st == "abcd");
 	}
 	SUBCASE("unfiltered invalid character - middle") {
-		rspamd::mime_string st{std::string("abc\234d")};
+		mime_string st{std::string("abc\234d")};
 		CHECK(st.has_invalid());
 		CHECK(st == "abc\uFFFDd");
 	}
 	SUBCASE("unfiltered invalid character - end") {
-		rspamd::mime_string st{std::string("abc\234")};
+		mime_string st{std::string("abc\234")};
 		CHECK(st.has_invalid());
 		CHECK(st == "abc\uFFFD");
 	}
 	SUBCASE("unfiltered invalid character - start") {
-		rspamd::mime_string st{std::string("\234abc")};
+		mime_string st{std::string("\234abc")};
 		CHECK(st.has_invalid());
 		CHECK(st == "\uFFFDabc");
 	}
@@ -68,32 +69,47 @@ TEST_CASE("mime_string filtered ctors")
 	};
 
 	SUBCASE("empty") {
-		rspamd::mime_string st{std::string_view(""), tolower_filter};
+		mime_string st{std::string_view(""), tolower_filter};
 		CHECK(st.size() == 0);
 		CHECK(st == "");
 	}
 	SUBCASE("filtered valid") {
-		rspamd::mime_string st{std::string("AbCdУ"), tolower_filter};
+		mime_string st{std::string("AbCdУ"), tolower_filter};
 		CHECK(st == "abcdу");
 	}
 	SUBCASE("filtered invalid + filtered") {
-		rspamd::mime_string st{std::string("abcd\234\1"), print_filter};
+		mime_string st{std::string("abcd\234\1"), print_filter};
 		CHECK(st == "abcd\uFFFD");
 	}
 }
 TEST_CASE("mime_string assign")
 {
 	SUBCASE("assign from valid") {
-		rspamd::mime_string st;
+		mime_string st;
 
 		CHECK(st.assign_if_valid(std::string("test")));
 		CHECK(st == "test");
 	}
 	SUBCASE("assign from invalid") {
-		rspamd::mime_string st;
+		mime_string st;
 
 		CHECK(!st.assign_if_valid(std::string("test\234t")));
 		CHECK(st == "");
 	}
 }
+
+TEST_CASE("mime_string iterators")
+{
+
+	SUBCASE("unfiltered iterator ascii") {
+		auto in = std::string("abcd");
+		mime_string st{in};
+		CHECK(st == "abcd");
+
+		int i = 0;
+		for (auto &&c : st) {
+			CHECK(c == in[i++]);
+		}
+	}
+}
 }
\ No newline at end of file
diff --git a/src/libmime/mime_string.hxx b/src/libmime/mime_string.hxx
index c15dfd566..32eafde19 100644
--- a/src/libmime/mime_string.hxx
+++ b/src/libmime/mime_string.hxx
@@ -28,7 +28,7 @@
 #include "unicode/utf8.h"
 #include "contrib/fastutf8/fastutf8.h"
 
-namespace rspamd {
+namespace rspamd::mime {
 /*
  * The motivation for another string is to have utf8 valid string replacing
  * all bad things with FFFFD replacement character and filtering \0 and other
@@ -64,12 +64,227 @@ bool operator !(mime_string_flags fl)
 	return fl == mime_string_flags::MIME_STRING_DEFAULT;
 }
 
+// Codepoint iterator base class
+template<typename Container, bool Raw = false>
+struct iterator_base
+{
+	template<typename, typename>
+	friend class basic_mime_string;
+
+public:
+	using value_type = typename Container::value_type;
+	using difference_type = typename Container::difference_type;
+	using codepoint_type = typename Container::codepoint_type;
+	using reference_type = codepoint_type;
+	using iterator_category = std::bidirectional_iterator_tag;
+
+	bool operator==(const iterator_base &it) const noexcept
+	{
+		return idx == it.idx;
+	}
+
+	bool operator!=(const iterator_base &it) const noexcept
+	{
+		return idx != it.idx;
+	}
+
+	iterator_base(difference_type index, Container *instance) noexcept:
+			idx(index), cont_instance(instance) {}
+	iterator_base() noexcept = default;
+	iterator_base(const iterator_base &) noexcept = default;
+
+	iterator_base &operator=(const iterator_base &) noexcept = default;
+
+	Container *get_instance() const noexcept
+	{
+		return cont_instance;
+	}
+
+	codepoint_type get_value() const noexcept {
+		auto i = idx;
+		codepoint_type uc;
+		U8_NEXT_UNSAFE(cont_instance->data(), i, uc);
+		return uc;
+	}
+
+protected:
+	difference_type		idx;
+	Container*			cont_instance = nullptr;
+protected:
+	void advance(difference_type n) noexcept {
+		if (n > 0) {
+			U8_FWD_N_UNSAFE(cont_instance->data(), idx, n);
+		}
+		else if (n < 0) {
+			U8_BACK_N_UNSAFE(cont_instance->data(), idx, (-n));
+		}
+	}
+	void increment() noexcept {
+		codepoint_type uc;
+		U8_NEXT_UNSAFE(cont_instance->data(), idx, uc);
+	}
+
+	void decrement() noexcept {
+		codepoint_type uc;
+		U8_PREV_UNSAFE(cont_instance->data(), idx, uc);
+	}
+};
+
+// Partial spec for raw Byte-based iterator base
+template<typename Container>
+struct iterator_base<Container, true>
+{
+	template<typename, typename, typename>
+	friend class basic_string;
+
+public:
+	using value_type = typename Container::value_type;
+	using difference_type = typename Container::difference_type;
+	using reference_type = value_type;
+	using iterator_category = std::bidirectional_iterator_tag;
+
+	bool operator==( const iterator_base& it ) const noexcept { return idx == it.idx; }
+	bool operator!=( const iterator_base& it ) const noexcept { return idx != it.idx; }
+
+	iterator_base(difference_type index, Container *instance) noexcept:
+			idx(index), cont_instance(instance) {}
+
+	iterator_base() noexcept = default;
+	iterator_base( const iterator_base& ) noexcept = default;
+	iterator_base& operator=( const iterator_base& ) noexcept = default;
+	Container* get_instance() const noexcept { return cont_instance; }
+
+	value_type get_value() const noexcept { return cont_instance->storage.at(idx, std::nothrow); }
+protected:
+	difference_type		idx;
+	Container*			cont_instance = nullptr;
+
+protected:
+
+	//! Advance the iterator n times (negative values allowed!)
+	void advance( difference_type n ) noexcept {
+		idx += n;
+	}
+
+	void increment() noexcept { idx ++; }
+	void decrement() noexcept { idx --; }
+};
+
+template<typename Container, bool Raw> struct iterator;
+template<typename Container, bool Raw> struct const_iterator;
+
+template<typename Container, bool Raw = false>
+struct iterator : iterator_base<Container, Raw> {
+	iterator(typename iterator_base<Container, Raw>::difference_type index, Container *instance) noexcept:
+			iterator_base<Container, Raw>(index, instance)
+	{
+	}
+	iterator() noexcept = default;
+	iterator(const iterator &) noexcept = default;
+
+	iterator &operator=(const iterator &) noexcept = default;
+	/* Disallow creating from const_iterator */
+	iterator(const const_iterator<Container, Raw> &) = delete;
+
+	/* Prefix */
+	iterator &operator++() noexcept
+	{
+		this->increment();
+		return *this;
+	}
+
+	/* Postfix */
+	iterator operator++(int) noexcept
+	{
+		iterator tmp{this->idx, this->cont_instance};
+		this->increment();
+		return tmp;
+	}
+
+	/* Prefix */
+	iterator &operator--() noexcept
+	{
+		this->decrement();
+		return *this;
+	}
+
+	/* Postfix */
+	iterator operator--(int) noexcept
+	{
+		iterator tmp{this->idx, this->cont_instance};
+		this->decrement();
+		return tmp;
+	}
+
+	iterator operator+(typename iterator_base<Container, Raw>::difference_type n) const noexcept
+	{
+		iterator it{*this};
+		it.advance(n);
+		return it;
+	}
+
+	iterator &operator+=(typename iterator_base<Container, Raw>::difference_type n) noexcept
+	{
+		this->advance(n);
+		return *this;
+	}
+
+	iterator operator-(typename iterator_base<Container, Raw>::difference_type n) const noexcept
+	{
+		iterator it{*this};
+		it.advance(-n);
+		return it;
+	}
+
+	iterator &operator-=(typename iterator_base<Container, Raw>::difference_type n) noexcept
+	{
+		this->advance(-n);
+		return *this;
+	}
+
+	typename iterator::reference_type operator*() const noexcept
+	{
+		return this->get_value();
+	}
+};
+
+template<typename Container, bool Raw>
+struct const_iterator : iterator<Container, Raw> {
+	const_iterator(typename iterator_base<Container, Raw>::difference_type index, const Container *instance) noexcept:
+			iterator<Container, Raw>(index, const_cast<Container *>(instance))
+	{
+	}
+
+	const_iterator(const iterator<Container, Raw> &other) noexcept:
+			iterator<Container, Raw>(other)
+	{
+	}
+
+	const_iterator() noexcept = default;
+
+	const_iterator(const const_iterator &) noexcept = default;
+
+	const_iterator &operator=(const const_iterator &) noexcept = default;
+
+	const typename iterator<Container, Raw>::reference_type operator*() const noexcept
+	{
+		return this->get_value();
+	}
+};
+
 template<class T, class Allocator>
 class basic_mime_string : private Allocator {
 public:
 	using storage_type = std::basic_string<T, std::char_traits<T>, Allocator>;
 	using view_type = std::basic_string_view<T, std::char_traits<T>>;
 	using filter_type = fu2::function_view<UChar32 (UChar32)>;
+	using codepoint_type = UChar32;
+	using value_type = T;
+	using difference_type = std::ptrdiff_t;
+	using iterator = rspamd::mime::iterator<basic_mime_string, false>;
+	using const_iterator = rspamd::mime::const_iterator<basic_mime_string, false>;
+	using raw_iterator = rspamd::mime::iterator<basic_mime_string, true>;
+	using raw_const_iterator = rspamd::mime::const_iterator<basic_mime_string, true>;
 	/* Ctors */
 	basic_mime_string() noexcept : Allocator() {}
 	explicit basic_mime_string(const Allocator& alloc) noexcept : Allocator(alloc) {}
@@ -204,6 +419,47 @@ public:
 		return false;
 	}
 
+	inline iterator begin() noexcept
+	{
+		return {0, this};
+	}
+
+	inline const_iterator begin() const noexcept
+	{
+		return {0, this};
+	}
+
+	inline raw_iterator raw_begin() noexcept
+	{
+		return {0, this};
+	}
+
+	inline raw_const_iterator raw_begin() const noexcept
+	{
+		return {0, this};
+	}
+
+	inline iterator end() noexcept
+	{
+		return {(difference_type) size(), this};
+	}
+
+	inline const_iterator end() const noexcept
+	{
+		return {(difference_type) size(), this};
+	}
+
+	inline raw_iterator raw_end() noexcept
+	{
+		return {(difference_type) size(), this};
+	}
+
+	inline raw_const_iterator raw_end() const noexcept
+	{
+		return {(difference_type) size(), this};
+	}
+
+	/* For doctest stringify */
 	friend std::ostream& operator<< (std::ostream& os, const T& value) {
 		os << value.storage;
 		return os;


More information about the Commits mailing list