commit 7245099: [Project] Add constant iterators
Vsevolod Stakhov
vsevolod at highsecure.ru
Wed Sep 29 20:14:05 UTC 2021
Author: Vsevolod Stakhov
Date: 2021-09-29 21:13:20 +0100
URL: https://github.com/rspamd/rspamd/commit/72450991eeb435a5bc1fa74da31fc862bb431e4a (HEAD -> master)
[Project] Add constant iterators
---
src/libmime/mime_string.cxx | 38 +++++--
src/libmime/mime_string.hxx | 258 +++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 284 insertions(+), 12 deletions(-)
diff --git a/src/libmime/mime_string.cxx b/src/libmime/mime_string.cxx
index 1785e9188..96f829574 100644
--- a/src/libmime/mime_string.cxx
+++ b/src/libmime/mime_string.cxx
@@ -20,34 +20,35 @@
#include "unicode/uchar.h"
TEST_SUITE("mime_string") {
+using namespace rspamd::mime;
TEST_CASE("mime_string unfiltered ctors")
{
SUBCASE("empty") {
- rspamd::mime_string st;
+ mime_string st;
CHECK(st.size() == 0);
CHECK(st == "");
}
SUBCASE("unfiltered valid") {
- rspamd::mime_string st{std::string_view("abcd")};
+ mime_string st{std::string_view("abcd")};
CHECK(st == "abcd");
}
SUBCASE("unfiltered zero character") {
- rspamd::mime_string st{"abc\0d", 5};
+ mime_string st{"abc\0d", 5};
CHECK(st.has_zeroes());
CHECK(st == "abcd");
}
SUBCASE("unfiltered invalid character - middle") {
- rspamd::mime_string st{std::string("abc\234d")};
+ mime_string st{std::string("abc\234d")};
CHECK(st.has_invalid());
CHECK(st == "abc\uFFFDd");
}
SUBCASE("unfiltered invalid character - end") {
- rspamd::mime_string st{std::string("abc\234")};
+ mime_string st{std::string("abc\234")};
CHECK(st.has_invalid());
CHECK(st == "abc\uFFFD");
}
SUBCASE("unfiltered invalid character - start") {
- rspamd::mime_string st{std::string("\234abc")};
+ mime_string st{std::string("\234abc")};
CHECK(st.has_invalid());
CHECK(st == "\uFFFDabc");
}
@@ -68,32 +69,47 @@ TEST_CASE("mime_string filtered ctors")
};
SUBCASE("empty") {
- rspamd::mime_string st{std::string_view(""), tolower_filter};
+ mime_string st{std::string_view(""), tolower_filter};
CHECK(st.size() == 0);
CHECK(st == "");
}
SUBCASE("filtered valid") {
- rspamd::mime_string st{std::string("AbCdУ"), tolower_filter};
+ mime_string st{std::string("AbCdУ"), tolower_filter};
CHECK(st == "abcdу");
}
SUBCASE("filtered invalid + filtered") {
- rspamd::mime_string st{std::string("abcd\234\1"), print_filter};
+ mime_string st{std::string("abcd\234\1"), print_filter};
CHECK(st == "abcd\uFFFD");
}
}
TEST_CASE("mime_string assign")
{
SUBCASE("assign from valid") {
- rspamd::mime_string st;
+ mime_string st;
CHECK(st.assign_if_valid(std::string("test")));
CHECK(st == "test");
}
SUBCASE("assign from invalid") {
- rspamd::mime_string st;
+ mime_string st;
CHECK(!st.assign_if_valid(std::string("test\234t")));
CHECK(st == "");
}
}
+
+TEST_CASE("mime_string iterators")
+{
+
+ SUBCASE("unfiltered iterator ascii") {
+ auto in = std::string("abcd");
+ mime_string st{in};
+ CHECK(st == "abcd");
+
+ int i = 0;
+ for (auto &&c : st) {
+ CHECK(c == in[i++]);
+ }
+ }
+}
}
\ No newline at end of file
diff --git a/src/libmime/mime_string.hxx b/src/libmime/mime_string.hxx
index c15dfd566..32eafde19 100644
--- a/src/libmime/mime_string.hxx
+++ b/src/libmime/mime_string.hxx
@@ -28,7 +28,7 @@
#include "unicode/utf8.h"
#include "contrib/fastutf8/fastutf8.h"
-namespace rspamd {
+namespace rspamd::mime {
/*
* The motivation for another string is to have utf8 valid string replacing
* all bad things with FFFFD replacement character and filtering \0 and other
@@ -64,12 +64,227 @@ bool operator !(mime_string_flags fl)
return fl == mime_string_flags::MIME_STRING_DEFAULT;
}
+// Codepoint iterator base class
+template<typename Container, bool Raw = false>
+struct iterator_base
+{
+ template<typename, typename>
+ friend class basic_mime_string;
+
+public:
+ using value_type = typename Container::value_type;
+ using difference_type = typename Container::difference_type;
+ using codepoint_type = typename Container::codepoint_type;
+ using reference_type = codepoint_type;
+ using iterator_category = std::bidirectional_iterator_tag;
+
+ bool operator==(const iterator_base &it) const noexcept
+ {
+ return idx == it.idx;
+ }
+
+ bool operator!=(const iterator_base &it) const noexcept
+ {
+ return idx != it.idx;
+ }
+
+ iterator_base(difference_type index, Container *instance) noexcept:
+ idx(index), cont_instance(instance) {}
+ iterator_base() noexcept = default;
+ iterator_base(const iterator_base &) noexcept = default;
+
+ iterator_base &operator=(const iterator_base &) noexcept = default;
+
+ Container *get_instance() const noexcept
+ {
+ return cont_instance;
+ }
+
+ codepoint_type get_value() const noexcept {
+ auto i = idx;
+ codepoint_type uc;
+ U8_NEXT_UNSAFE(cont_instance->data(), i, uc);
+ return uc;
+ }
+
+protected:
+ difference_type idx;
+ Container* cont_instance = nullptr;
+protected:
+ void advance(difference_type n) noexcept {
+ if (n > 0) {
+ U8_FWD_N_UNSAFE(cont_instance->data(), idx, n);
+ }
+ else if (n < 0) {
+ U8_BACK_N_UNSAFE(cont_instance->data(), idx, (-n));
+ }
+ }
+ void increment() noexcept {
+ codepoint_type uc;
+ U8_NEXT_UNSAFE(cont_instance->data(), idx, uc);
+ }
+
+ void decrement() noexcept {
+ codepoint_type uc;
+ U8_PREV_UNSAFE(cont_instance->data(), idx, uc);
+ }
+};
+
+// Partial spec for raw Byte-based iterator base
+template<typename Container>
+struct iterator_base<Container, true>
+{
+ template<typename, typename, typename>
+ friend class basic_string;
+
+public:
+ using value_type = typename Container::value_type;
+ using difference_type = typename Container::difference_type;
+ using reference_type = value_type;
+ using iterator_category = std::bidirectional_iterator_tag;
+
+ bool operator==( const iterator_base& it ) const noexcept { return idx == it.idx; }
+ bool operator!=( const iterator_base& it ) const noexcept { return idx != it.idx; }
+
+ iterator_base(difference_type index, Container *instance) noexcept:
+ idx(index), cont_instance(instance) {}
+
+ iterator_base() noexcept = default;
+ iterator_base( const iterator_base& ) noexcept = default;
+ iterator_base& operator=( const iterator_base& ) noexcept = default;
+ Container* get_instance() const noexcept { return cont_instance; }
+
+ value_type get_value() const noexcept { return cont_instance->storage.at(idx, std::nothrow); }
+protected:
+ difference_type idx;
+ Container* cont_instance = nullptr;
+
+protected:
+
+ //! Advance the iterator n times (negative values allowed!)
+ void advance( difference_type n ) noexcept {
+ idx += n;
+ }
+
+ void increment() noexcept { idx ++; }
+ void decrement() noexcept { idx --; }
+};
+
+template<typename Container, bool Raw> struct iterator;
+template<typename Container, bool Raw> struct const_iterator;
+
+template<typename Container, bool Raw = false>
+struct iterator : iterator_base<Container, Raw> {
+ iterator(typename iterator_base<Container, Raw>::difference_type index, Container *instance) noexcept:
+ iterator_base<Container, Raw>(index, instance)
+ {
+ }
+ iterator() noexcept = default;
+ iterator(const iterator &) noexcept = default;
+
+ iterator &operator=(const iterator &) noexcept = default;
+ /* Disallow creating from const_iterator */
+ iterator(const const_iterator<Container, Raw> &) = delete;
+
+ /* Prefix */
+ iterator &operator++() noexcept
+ {
+ this->increment();
+ return *this;
+ }
+
+ /* Postfix */
+ iterator operator++(int) noexcept
+ {
+ iterator tmp{this->idx, this->cont_instance};
+ this->increment();
+ return tmp;
+ }
+
+ /* Prefix */
+ iterator &operator--() noexcept
+ {
+ this->decrement();
+ return *this;
+ }
+
+ /* Postfix */
+ iterator operator--(int) noexcept
+ {
+ iterator tmp{this->idx, this->cont_instance};
+ this->decrement();
+ return tmp;
+ }
+
+ iterator operator+(typename iterator_base<Container, Raw>::difference_type n) const noexcept
+ {
+ iterator it{*this};
+ it.advance(n);
+ return it;
+ }
+
+ iterator &operator+=(typename iterator_base<Container, Raw>::difference_type n) noexcept
+ {
+ this->advance(n);
+ return *this;
+ }
+
+ iterator operator-(typename iterator_base<Container, Raw>::difference_type n) const noexcept
+ {
+ iterator it{*this};
+ it.advance(-n);
+ return it;
+ }
+
+ iterator &operator-=(typename iterator_base<Container, Raw>::difference_type n) noexcept
+ {
+ this->advance(-n);
+ return *this;
+ }
+
+ typename iterator::reference_type operator*() const noexcept
+ {
+ return this->get_value();
+ }
+};
+
+template<typename Container, bool Raw>
+struct const_iterator : iterator<Container, Raw> {
+ const_iterator(typename iterator_base<Container, Raw>::difference_type index, const Container *instance) noexcept:
+ iterator<Container, Raw>(index, const_cast<Container *>(instance))
+ {
+ }
+
+ const_iterator(const iterator<Container, Raw> &other) noexcept:
+ iterator<Container, Raw>(other)
+ {
+ }
+
+ const_iterator() noexcept = default;
+
+ const_iterator(const const_iterator &) noexcept = default;
+
+ const_iterator &operator=(const const_iterator &) noexcept = default;
+
+ const typename iterator<Container, Raw>::reference_type operator*() const noexcept
+ {
+ return this->get_value();
+ }
+};
+
template<class T, class Allocator>
class basic_mime_string : private Allocator {
public:
using storage_type = std::basic_string<T, std::char_traits<T>, Allocator>;
using view_type = std::basic_string_view<T, std::char_traits<T>>;
using filter_type = fu2::function_view<UChar32 (UChar32)>;
+ using codepoint_type = UChar32;
+ using value_type = T;
+ using difference_type = std::ptrdiff_t;
+ using iterator = rspamd::mime::iterator<basic_mime_string, false>;
+ using const_iterator = rspamd::mime::const_iterator<basic_mime_string, false>;
+ using raw_iterator = rspamd::mime::iterator<basic_mime_string, true>;
+ using raw_const_iterator = rspamd::mime::const_iterator<basic_mime_string, true>;
/* Ctors */
basic_mime_string() noexcept : Allocator() {}
explicit basic_mime_string(const Allocator& alloc) noexcept : Allocator(alloc) {}
@@ -204,6 +419,47 @@ public:
return false;
}
+ inline iterator begin() noexcept
+ {
+ return {0, this};
+ }
+
+ inline const_iterator begin() const noexcept
+ {
+ return {0, this};
+ }
+
+ inline raw_iterator raw_begin() noexcept
+ {
+ return {0, this};
+ }
+
+ inline raw_const_iterator raw_begin() const noexcept
+ {
+ return {0, this};
+ }
+
+ inline iterator end() noexcept
+ {
+ return {(difference_type) size(), this};
+ }
+
+ inline const_iterator end() const noexcept
+ {
+ return {(difference_type) size(), this};
+ }
+
+ inline raw_iterator raw_end() noexcept
+ {
+ return {(difference_type) size(), this};
+ }
+
+ inline raw_const_iterator raw_end() const noexcept
+ {
+ return {(difference_type) size(), this};
+ }
+
+ /* For doctest stringify */
friend std::ostream& operator<< (std::ostream& os, const T& value) {
os << value.storage;
return os;
More information about the Commits
mailing list