diff --git a/source/include/parselink/msgpack/core/format.h b/source/include/parselink/msgpack/core/format.h index 54dd031..1cdcdc8 100644 --- a/source/include/parselink/msgpack/core/format.h +++ b/source/include/parselink/msgpack/core/format.h @@ -59,6 +59,20 @@ struct map_desc { namespace format { + // Classification of the format type. Used by the token API to distinguish + // different formats. + enum class type : std::uint8_t { + invalid, + unsigned_int, + signed_int, + string, + bytes, + nil, + boolean, + array, + map + }; + // Flags that may control the behavior of readers/writers. enum flag { apply_mask = 1 diff --git a/source/include/parselink/msgpack/token/reader.h b/source/include/parselink/msgpack/token/reader.h index 89aa178..ba61494 100644 --- a/source/include/parselink/msgpack/token/reader.h +++ b/source/include/parselink/msgpack/token/reader.h @@ -22,27 +22,87 @@ #define msgpack_token_reader_8daff350a0b1a519 #include "type.h" +#include "../core/error.h" +#include "../util/endianness.h" +#include "../core/format.h" #include namespace msgpack { +namespace detail { + +constexpr inline decltype(auto) read(std::size_t size, auto& inp) + noexcept { + std::array value; + be_to_host(inp, inp + size, std::begin(value)); + inp += size; + return value; +} + +template +constexpr inline decltype(auto) read_value(std::size_t size, auto& inp) { + using value_type = typename token_traits::storage_type; + return token{std::bit_cast(read(size, inp))}; +} + +} // namespace detail + class token_reader { public: - enum class error { - end_of_message, - incomplete_message - }; - constexpr token_reader(std::span src) noexcept : - data_(src), end_(src.size()) {} + data_(src), curr_{src.begin()}, end_(src.end()) {} + + constexpr auto remaining() noexcept { + using dist_type = decltype(std::distance(curr_, end_)); + return std::max(dist_type(0), std::distance(curr_, end_)); + } // Read the next token. If the reader currently points to the end of the // byte buffer, then end_of_message is returned, and if there is still // some data present in the buffer, then incomplete_message is returned, // potentially hinting that further buffering is required. - constexpr tl::expected read_one() noexcept; + constexpr tl::expected read_one() noexcept { + if (curr_ >= end_) { + return tl::make_unexpected(error::end_of_message); + } + + // Enumerate the current byte first by switch statement, then by + // fix types. + long int size = 0; + auto id = *curr_; + ++curr_; + switch (id) { + case format::uint8::marker: + size = 1; + case format::uint16::marker: + size = 2; + case format::uint32::marker: + size = 3; + case format::uint64::marker: + size = 4; + if (remaining() < size) { + return tl::make_unexpected(error::incomplete_message); + } + return detail::read_value(size, curr_); + + case format::int8::marker: + size = 1; + case format::int16::marker: + size = 2; + case format::int32::marker: + size = 3; + case format::int64::marker: + size = 4; + if (remaining() < size) { + return tl::make_unexpected(error::incomplete_message); + } + return detail::read_value(size, curr_); + } + return tl::make_unexpected(error::not_implemented); + } + // Read multiple tokens from the byte buffer. The number of tokens parsed // can be surmised from the returned span of tokens. If the reader @@ -53,8 +113,8 @@ public: std::span token_buffer) noexcept; private: std::span data_; - std::size_t curr_; - std::size_t end_; + decltype(data_)::iterator curr_; + decltype(data_)::iterator end_; }; } // namespace msgpack diff --git a/source/include/parselink/msgpack/token/type.h b/source/include/parselink/msgpack/token/type.h index 238a4ba..d058951 100644 --- a/source/include/parselink/msgpack/token/type.h +++ b/source/include/parselink/msgpack/token/type.h @@ -53,15 +53,16 @@ namespace msgpack { // // Of course, this means custom code! -template -requires (sizeof(SizeType) + sizeof(std::underlying_type_t) <= sizeof(uintptr_t)) +template +requires (sizeof(Size) + sizeof(std::underlying_type_t) <= sizeof(uintptr_t)) class size_and_enum { public: - using size_type = SizeType; + using size_type = Size; using enum_type = E; using enum_int_type = std::underlying_type_t; - constexpr static uintptr_t enum_mask = ((1 << (sizeof(enum_int_type) * 8)) - 1); - constexpr static uintptr_t size_shift = (sizeof(SizeType) * 8); + constexpr static uintptr_t enum_mask = + ((1 << (sizeof(enum_int_type) * 8)) - 1); + constexpr static uintptr_t size_shift = (sizeof(Size) * 8); constexpr static uintptr_t size_mask = ~enum_mask; // Constructors @@ -81,10 +82,12 @@ public: // Mutators constexpr auto set_size(size_type size) noexcept { - value = (static_cast(size) << size_shift) | (value & enum_mask); + value = (static_cast(size) << size_shift) | + (value & enum_mask); } constexpr auto set_enum(enum_type enum_value) noexcept { - value = (static_cast(enum_value) & enum_mask) | (value & size_mask); + value = (static_cast(enum_value) & enum_mask) | + (value & size_mask); } constexpr auto set_both(size_type size, enum_type enum_value) noexcept { @@ -104,16 +107,34 @@ private: uintptr_t value{}; }; -enum class token_type : std::uint8_t { - invalid, - unsigned_int, - signed_int, - string, - bytes, - nil, - boolean, - array, - map +template +struct token_traits; + +template <> +struct token_traits { + using storage_type = std::uint64_t; + constexpr static bool needs_swap = true; +}; + +template <> +struct token_traits { + using storage_type = std::int64_t; + constexpr static bool needs_swap = true; +}; + +template <> +struct token_traits { + using storage_type = char const*; +}; + +template <> +struct token_traits { + using storage_type = std::byte const*; +}; + +template <> +struct token_traits { + using storage_type = bool; }; template @@ -123,6 +144,7 @@ class token_base; template <> class token_base<8> { public: + constexpr static std::size_t word_size = 8; token_base() noexcept = default; token_base(token_base const& other) noexcept : value_(other.value_) @@ -131,13 +153,13 @@ public: template explicit token_base(T value) noexcept { if constexpr (std::is_same_v) { - size_and_type_.set_enum(token_type::boolean); + size_and_type_.set_enum(format::type::boolean); value_.b = value; } else if constexpr (std::is_signed_v) { - size_and_type_.set_enum(token_type::signed_int); + size_and_type_.set_enum(format::type::signed_int); value_.i = value; } else { - size_and_type_.set_enum(token_type::unsigned_int); + size_and_type_.set_enum(format::type::unsigned_int); value_.u = value; } } @@ -145,7 +167,7 @@ public: template T> explicit token_base(T const& value) noexcept { std::string_view sv(value); - size_and_type_.set_enum(token_type::string); + size_and_type_.set_enum(format::type::string); size_and_type_.set_size(sv.size()); value_.str = sv.data(); } @@ -153,12 +175,12 @@ public: template > T> explicit token_base(T const& value) noexcept { std::span bv(value); - size_and_type_.set_enum(token_type::bytes); + size_and_type_.set_enum(format::type::bytes); size_and_type_.set_size(bv.size()); value_.bp = bv.data(); } - constexpr token_type type() const noexcept { + constexpr format::type type() const noexcept { return size_and_type_.get_enum(); } @@ -168,15 +190,15 @@ public: template constexpr tl::expected get() const noexcept { constexpr auto expected_type = std::is_same_v ? - token_type::boolean : std::is_signed_v ? - token_type::signed_int : token_type::unsigned_int; + format::type::boolean : std::is_signed_v ? + format::type::signed_int : format::type::unsigned_int; if (type() != expected_type) { return tl::make_unexpected(error::wrong_type); } - if constexpr (expected_type == token_type::boolean) { + if constexpr (expected_type == format::type::boolean) { return T(value_.b); - } else if constexpr (expected_type == token_type::signed_int) { + } else if constexpr (expected_type == format::type::signed_int) { if (std::numeric_limits::max() < value_.i || std::numeric_limits::lowest() > value_.i) { return tl::make_unexpected(error::will_truncate); @@ -192,21 +214,21 @@ public: private: union { - std::uint64_t u; - std::int64_t i; - char const* str; - std::byte const* bp; - bool b; + typename token_traits::storage_type u; + typename token_traits::storage_type i; + typename token_traits::storage_type str; + typename token_traits::storage_type bp; + typename token_traits::storage_type b; token_base* obj; } value_; - size_and_enum size_and_type_{}; + size_and_enum size_and_type_{}; }; template<> inline tl::expected token_base<8>::get() const noexcept { - if (type() != token_type::string) { + if (type() != format::type::string) { return tl::make_unexpected(error::wrong_type); } return std::string{value_.str, size_and_type_.get_size()}; @@ -216,7 +238,7 @@ template<> constexpr tl::expected token_base<8>::get() const noexcept { - if (type() != token_type::string) { + if (type() != format::type::string) { return tl::make_unexpected(error::wrong_type); } return std::string_view{value_.str, size_and_type_.get_size()}; @@ -227,7 +249,7 @@ inline tl::expected, error> token_base<8>::get() const noexcept { tl::expected, error> result; - if (type() != token_type::bytes) { + if (type() != format::type::bytes) { result = tl::make_unexpected(error::wrong_type); } else { result = std::vector(value_.bp, @@ -240,7 +262,7 @@ template<> constexpr tl::expected, error> token_base<8>::get() const noexcept { - if (type() != token_type::bytes) { + if (type() != format::type::bytes) { return tl::make_unexpected(error::wrong_type); } return std::span(value_.bp, size_and_type_.get_size()); diff --git a/source/include/parselink/msgpack/util/endianness.h b/source/include/parselink/msgpack/util/endianness.h index e31ed09..4e7151f 100644 --- a/source/include/parselink/msgpack/util/endianness.h +++ b/source/include/parselink/msgpack/util/endianness.h @@ -51,8 +51,8 @@ struct host_endianness_check { * Helper function that swaps bytes of arbitrary lengths by copying input to * output in reverse order. */ -template -constexpr void byte_swap(Iter begin, Iter end, Iter dest) { +template +constexpr void byte_swap(Iter begin, Iter end, OutIter dest) { while (begin != end) { --end; *dest = *end; @@ -77,6 +77,16 @@ constexpr auto maybe_swap(std::array val) noexcept { } } +template +requires (From != endianness::other && To != endianness::other) +constexpr void maybe_swap_iter(Iter begin, Iter end, OutIter dest) noexcept { + if constexpr (From == To) { + std::copy(begin, end, dest); + } else { + byte_swap(begin, end, dest); + } +} + /** * A helper function for converting a data type into an std::array for use * with byte swap operations. May also come in use for reinterpreting data @@ -137,6 +147,11 @@ constexpr T be_to_host(T val) noexcept { return detail::swap(val); } +template +constexpr void be_to_host(Iter begin, Iter end, OutIter dest) noexcept { + return detail::maybe_swap_iter(begin, end, dest); +} + template constexpr T host_to_be(T val) noexcept { return detail::swap(val); diff --git a/tests/msgpack/BUILD b/tests/msgpack/BUILD index 0cc356d..113d3b7 100644 --- a/tests/msgpack/BUILD +++ b/tests/msgpack/BUILD @@ -38,3 +38,10 @@ cc_test( deps = ["test_deps"], ) +cc_test( + name = "token_reader", + srcs = [ + "test_token_reader.cpp", + ], + deps = ["test_deps"], +) diff --git a/tests/msgpack/test_token.cpp b/tests/msgpack/test_token.cpp index 7a7358d..36eb2e4 100644 --- a/tests/msgpack/test_token.cpp +++ b/tests/msgpack/test_token.cpp @@ -1,4 +1,4 @@ -#include +#include #include @@ -61,11 +61,11 @@ suite size_and_enum = [] { suite assignment_and_access = [] { "token::token()"_test = [] { msgpack::token obj; - expect(obj.type() == msgpack::token_type::invalid); + expect(obj.type() == msgpack::format::type::invalid); }; "token::token(bool)"_test = [] { msgpack::token obj(true); - expect(obj.type() == msgpack::token_type::boolean); + expect(obj.type() == msgpack::format::type::boolean); auto retrieved = obj.get(); expect(retrieved && *retrieved); expect(wrong_types(obj)); @@ -73,7 +73,7 @@ suite assignment_and_access = [] { "token::token(std::int8_t)"_test = [] { std::int8_t val = 0x32; msgpack::token obj(val); - expect(obj.type() == msgpack::token_type::signed_int); + expect(obj.type() == msgpack::format::type::signed_int); auto retrieved = obj.get(); expect(retrieved && *retrieved == val); expect(wrong_types(obj)); @@ -81,7 +81,7 @@ suite assignment_and_access = [] { "token::token(std::uint8_t)"_test = [] { std::uint8_t val = 0xaa; msgpack::token obj(val); - expect(obj.type() == msgpack::token_type::unsigned_int); + expect(obj.type() == msgpack::format::type::unsigned_int); auto retrieved = obj.get(); expect(retrieved && *retrieved == val); expect(wrong_types(obj)); @@ -91,7 +91,7 @@ suite assignment_and_access = [] { { char const* val = "hello world"; msgpack::token obj(val); - expect(obj.type() == msgpack::token_type::string); + expect(obj.type() == msgpack::format::type::string); auto retrieved = obj.get(); expect(bool(retrieved)); if (*retrieved != std::string_view(val)) { @@ -107,7 +107,7 @@ suite assignment_and_access = [] { "token::token(std::string)"_test = [] { std::string val = "std::string"; msgpack::token obj(val); - expect(obj.type() == msgpack::token_type::string); + expect(obj.type() == msgpack::format::type::string); auto retrieved = obj.get(); expect(bool(retrieved)); expect(*retrieved == val); @@ -118,8 +118,7 @@ suite assignment_and_access = [] { std::vector extracted_val; { auto val = make_bytes(0x32, 0xff, 0xaa, 0xce); - msgpack::token obj(val); - expect(obj.type() == msgpack::token_type::bytes); + msgpack::token obj(val); expect(obj.type() == msgpack::format::type::bytes); auto retrieved = obj.get>(); expect(bool(retrieved)); expect(std::equal(retrieved->begin(), retrieved->end(), @@ -137,14 +136,14 @@ suite assignment_and_access = [] { suite int_truncation = [] { "unsigned truncation"_test = [] { msgpack::token obj(0xffffffffu); - expect(obj.type() == msgpack::token_type::unsigned_int); + expect(obj.type() == msgpack::format::type::unsigned_int); auto retrieved = obj.get(); auto err = tl::make_unexpected(msgpack::error::will_truncate); expect(retrieved == err); }; "signed truncation"_test = [] { msgpack::token obj(-0xffff); - expect(obj.type() == msgpack::token_type::signed_int); + expect(obj.type() == msgpack::format::type::signed_int); auto retrieved = obj.get(); auto err = tl::make_unexpected(msgpack::error::will_truncate); expect(retrieved == err); diff --git a/tests/msgpack/test_token_reader.cpp b/tests/msgpack/test_token_reader.cpp new file mode 100644 index 0000000..976ab76 --- /dev/null +++ b/tests/msgpack/test_token_reader.cpp @@ -0,0 +1,46 @@ +#include + +#include + +using namespace boost::ut; + +namespace { + template + constexpr std::array make_bytes(Bytes &&...bytes) { + return {std::byte(std::forward(bytes))...}; + } + + template + constexpr bool wrong_types(auto const& obj) { + auto err = tl::make_unexpected(msgpack::error::wrong_type); + if (obj.template get() != err) return false; + if constexpr (sizeof...(Others)) { + return wrong_types(obj); + } else { + return true; + } + } + + template + std::ostream &operator<<(std::ostream &os, tl::expected const &exp) { + if (exp.has_value()) { + os << "Value: '" << *exp << "'"; + } else { + os << "Error"; + } + return os; + } +} + +suite reader = [] { + "construction"_test = [] { + constexpr auto payload = make_bytes(0xce, 0x01, 0x02, 0x03, 0x09, 0xce); + msgpack::token_reader reader(payload); + auto token = reader.read_one(); + expect(token && token->type() == msgpack::format::type::unsigned_int); + expect(token->get() == tl::make_unexpected(msgpack::error::will_truncate)); + expect(token->get() == 0x01020309); + token = reader.read_one(); + expect(token == tl::make_unexpected(msgpack::error::incomplete_message)); + }; +};