Implement integer parsing for token reader API

This commit is contained in:
Kurt Sassenrath 2023-09-13 22:15:48 -07:00
parent 89cbbb0f30
commit 94e77e7519
7 changed files with 222 additions and 59 deletions

View File

@ -59,6 +59,20 @@ struct map_desc {
namespace format {
// Classification of the format type. Used by the token API to distinguish
// different formats.
enum class type : std::uint8_t {
invalid,
unsigned_int,
signed_int,
string,
bytes,
nil,
boolean,
array,
map
};
// Flags that may control the behavior of readers/writers.
enum flag {
apply_mask = 1

View File

@ -22,27 +22,87 @@
#define msgpack_token_reader_8daff350a0b1a519
#include "type.h"
#include "../core/error.h"
#include "../util/endianness.h"
#include "../core/format.h"
#include <tl/expected.hpp>
namespace msgpack {
namespace detail {
constexpr inline decltype(auto) read(std::size_t size, auto& inp)
noexcept {
std::array<std::byte, token::word_size> value;
be_to_host(inp, inp + size, std::begin(value));
inp += size;
return value;
}
template <format::type FormatType>
constexpr inline decltype(auto) read_value(std::size_t size, auto& inp) {
using value_type = typename token_traits<FormatType>::storage_type;
return token{std::bit_cast<value_type>(read(size, inp))};
}
} // namespace detail
class token_reader {
public:
enum class error {
end_of_message,
incomplete_message
};
constexpr token_reader(std::span<std::byte const> src) noexcept :
data_(src), end_(src.size()) {}
data_(src), curr_{src.begin()}, end_(src.end()) {}
constexpr auto remaining() noexcept {
using dist_type = decltype(std::distance(curr_, end_));
return std::max(dist_type(0), std::distance(curr_, end_));
}
// Read the next token. If the reader currently points to the end of the
// byte buffer, then end_of_message is returned, and if there is still
// some data present in the buffer, then incomplete_message is returned,
// potentially hinting that further buffering is required.
constexpr tl::expected<token, error> read_one() noexcept;
constexpr tl::expected<token, error> read_one() noexcept {
if (curr_ >= end_) {
return tl::make_unexpected(error::end_of_message);
}
// Enumerate the current byte first by switch statement, then by
// fix types.
long int size = 0;
auto id = *curr_;
++curr_;
switch (id) {
case format::uint8::marker:
size = 1;
case format::uint16::marker:
size = 2;
case format::uint32::marker:
size = 3;
case format::uint64::marker:
size = 4;
if (remaining() < size) {
return tl::make_unexpected(error::incomplete_message);
}
return detail::read_value<format::type::unsigned_int>(size, curr_);
case format::int8::marker:
size = 1;
case format::int16::marker:
size = 2;
case format::int32::marker:
size = 3;
case format::int64::marker:
size = 4;
if (remaining() < size) {
return tl::make_unexpected(error::incomplete_message);
}
return detail::read_value<format::type::signed_int>(size, curr_);
}
return tl::make_unexpected(error::not_implemented);
}
// Read multiple tokens from the byte buffer. The number of tokens parsed
// can be surmised from the returned span of tokens. If the reader
@ -53,8 +113,8 @@ public:
std::span<token> token_buffer) noexcept;
private:
std::span<std::byte const> data_;
std::size_t curr_;
std::size_t end_;
decltype(data_)::iterator curr_;
decltype(data_)::iterator end_;
};
} // namespace msgpack

View File

@ -53,15 +53,16 @@ namespace msgpack {
//
// Of course, this means custom code!
template <std::integral SizeType, typename E>
requires (sizeof(SizeType) + sizeof(std::underlying_type_t<E>) <= sizeof(uintptr_t))
template <std::integral Size, typename E>
requires (sizeof(Size) + sizeof(std::underlying_type_t<E>) <= sizeof(uintptr_t))
class size_and_enum {
public:
using size_type = SizeType;
using size_type = Size;
using enum_type = E;
using enum_int_type = std::underlying_type_t<E>;
constexpr static uintptr_t enum_mask = ((1 << (sizeof(enum_int_type) * 8)) - 1);
constexpr static uintptr_t size_shift = (sizeof(SizeType) * 8);
constexpr static uintptr_t enum_mask =
((1 << (sizeof(enum_int_type) * 8)) - 1);
constexpr static uintptr_t size_shift = (sizeof(Size) * 8);
constexpr static uintptr_t size_mask = ~enum_mask;
// Constructors
@ -81,10 +82,12 @@ public:
// Mutators
constexpr auto set_size(size_type size) noexcept {
value = (static_cast<uintptr_t>(size) << size_shift) | (value & enum_mask);
value = (static_cast<uintptr_t>(size) << size_shift) |
(value & enum_mask);
}
constexpr auto set_enum(enum_type enum_value) noexcept {
value = (static_cast<uintptr_t>(enum_value) & enum_mask) | (value & size_mask);
value = (static_cast<uintptr_t>(enum_value) & enum_mask) |
(value & size_mask);
}
constexpr auto set_both(size_type size, enum_type enum_value) noexcept {
@ -104,16 +107,34 @@ private:
uintptr_t value{};
};
enum class token_type : std::uint8_t {
invalid,
unsigned_int,
signed_int,
string,
bytes,
nil,
boolean,
array,
map
template <format::type>
struct token_traits;
template <>
struct token_traits<format::type::unsigned_int> {
using storage_type = std::uint64_t;
constexpr static bool needs_swap = true;
};
template <>
struct token_traits<format::type::signed_int> {
using storage_type = std::int64_t;
constexpr static bool needs_swap = true;
};
template <>
struct token_traits<format::type::string> {
using storage_type = char const*;
};
template <>
struct token_traits<format::type::bytes> {
using storage_type = std::byte const*;
};
template <>
struct token_traits<format::type::boolean> {
using storage_type = bool;
};
template <size_t WordSize>
@ -123,6 +144,7 @@ class token_base;
template <>
class token_base<8> {
public:
constexpr static std::size_t word_size = 8;
token_base() noexcept = default;
token_base(token_base const& other) noexcept
: value_(other.value_)
@ -131,13 +153,13 @@ public:
template <std::integral T>
explicit token_base(T value) noexcept {
if constexpr (std::is_same_v<T, bool>) {
size_and_type_.set_enum(token_type::boolean);
size_and_type_.set_enum(format::type::boolean);
value_.b = value;
} else if constexpr (std::is_signed_v<T>) {
size_and_type_.set_enum(token_type::signed_int);
size_and_type_.set_enum(format::type::signed_int);
value_.i = value;
} else {
size_and_type_.set_enum(token_type::unsigned_int);
size_and_type_.set_enum(format::type::unsigned_int);
value_.u = value;
}
}
@ -145,7 +167,7 @@ public:
template <std::convertible_to<std::string_view> T>
explicit token_base(T const& value) noexcept {
std::string_view sv(value);
size_and_type_.set_enum(token_type::string);
size_and_type_.set_enum(format::type::string);
size_and_type_.set_size(sv.size());
value_.str = sv.data();
}
@ -153,12 +175,12 @@ public:
template <std::convertible_to<std::span<std::byte const>> T>
explicit token_base(T const& value) noexcept {
std::span<std::byte const> bv(value);
size_and_type_.set_enum(token_type::bytes);
size_and_type_.set_enum(format::type::bytes);
size_and_type_.set_size(bv.size());
value_.bp = bv.data();
}
constexpr token_type type() const noexcept {
constexpr format::type type() const noexcept {
return size_and_type_.get_enum();
}
@ -168,15 +190,15 @@ public:
template<std::integral T>
constexpr tl::expected<T, error> get() const noexcept {
constexpr auto expected_type = std::is_same_v<T, bool> ?
token_type::boolean : std::is_signed_v<T> ?
token_type::signed_int : token_type::unsigned_int;
format::type::boolean : std::is_signed_v<T> ?
format::type::signed_int : format::type::unsigned_int;
if (type() != expected_type) {
return tl::make_unexpected(error::wrong_type);
}
if constexpr (expected_type == token_type::boolean) {
if constexpr (expected_type == format::type::boolean) {
return T(value_.b);
} else if constexpr (expected_type == token_type::signed_int) {
} else if constexpr (expected_type == format::type::signed_int) {
if (std::numeric_limits<T>::max() < value_.i ||
std::numeric_limits<T>::lowest() > value_.i) {
return tl::make_unexpected(error::will_truncate);
@ -192,21 +214,21 @@ public:
private:
union {
std::uint64_t u;
std::int64_t i;
char const* str;
std::byte const* bp;
bool b;
typename token_traits<format::type::unsigned_int>::storage_type u;
typename token_traits<format::type::signed_int>::storage_type i;
typename token_traits<format::type::string>::storage_type str;
typename token_traits<format::type::bytes>::storage_type bp;
typename token_traits<format::type::boolean>::storage_type b;
token_base* obj;
} value_;
size_and_enum<std::uint32_t, token_type> size_and_type_{};
size_and_enum<std::uint32_t, format::type> size_and_type_{};
};
template<>
inline tl::expected<std::string, error> token_base<8>::get()
const noexcept
{
if (type() != token_type::string) {
if (type() != format::type::string) {
return tl::make_unexpected(error::wrong_type);
}
return std::string{value_.str, size_and_type_.get_size()};
@ -216,7 +238,7 @@ template<>
constexpr tl::expected<std::string_view, error> token_base<8>::get()
const noexcept
{
if (type() != token_type::string) {
if (type() != format::type::string) {
return tl::make_unexpected(error::wrong_type);
}
return std::string_view{value_.str, size_and_type_.get_size()};
@ -227,7 +249,7 @@ inline tl::expected<std::vector<std::byte>, error> token_base<8>::get()
const noexcept
{
tl::expected<std::vector<std::byte>, error> result;
if (type() != token_type::bytes) {
if (type() != format::type::bytes) {
result = tl::make_unexpected(error::wrong_type);
} else {
result = std::vector<std::byte>(value_.bp,
@ -240,7 +262,7 @@ template<>
constexpr tl::expected<std::span<std::byte const>, error>
token_base<8>::get() const noexcept
{
if (type() != token_type::bytes) {
if (type() != format::type::bytes) {
return tl::make_unexpected(error::wrong_type);
}
return std::span<std::byte const>(value_.bp, size_and_type_.get_size());

View File

@ -51,8 +51,8 @@ struct host_endianness_check {
* Helper function that swaps bytes of arbitrary lengths by copying input to
* output in reverse order.
*/
template <typename Iter>
constexpr void byte_swap(Iter begin, Iter end, Iter dest) {
template <typename Iter, typename OutIter>
constexpr void byte_swap(Iter begin, Iter end, OutIter dest) {
while (begin != end) {
--end;
*dest = *end;
@ -77,6 +77,16 @@ constexpr auto maybe_swap(std::array<std::byte, N> val) noexcept {
}
}
template <endianness From, endianness To, typename Iter, typename OutIter>
requires (From != endianness::other && To != endianness::other)
constexpr void maybe_swap_iter(Iter begin, Iter end, OutIter dest) noexcept {
if constexpr (From == To) {
std::copy(begin, end, dest);
} else {
byte_swap(begin, end, dest);
}
}
/**
* A helper function for converting a data type into an std::array for use
* with byte swap operations. May also come in use for reinterpreting data
@ -137,6 +147,11 @@ constexpr T be_to_host(T val) noexcept {
return detail::swap<endian::big, endian::host>(val);
}
template <typename Iter, typename OutIter>
constexpr void be_to_host(Iter begin, Iter end, OutIter dest) noexcept {
return detail::maybe_swap_iter<endian::big, endian::host>(begin, end, dest);
}
template <typename T>
constexpr T host_to_be(T val) noexcept {
return detail::swap<endian::host, endian::big>(val);

View File

@ -38,3 +38,10 @@ cc_test(
deps = ["test_deps"],
)
cc_test(
name = "token_reader",
srcs = [
"test_token_reader.cpp",
],
deps = ["test_deps"],
)

View File

@ -1,4 +1,4 @@
#include <msgpack/token/type.h>
#include <msgpack/token.h>
#include <boost/ut.hpp>
@ -61,11 +61,11 @@ suite size_and_enum = [] {
suite assignment_and_access = [] {
"token::token()"_test = [] {
msgpack::token obj;
expect(obj.type() == msgpack::token_type::invalid);
expect(obj.type() == msgpack::format::type::invalid);
};
"token::token(bool)"_test = [] {
msgpack::token obj(true);
expect(obj.type() == msgpack::token_type::boolean);
expect(obj.type() == msgpack::format::type::boolean);
auto retrieved = obj.get<bool>();
expect(retrieved && *retrieved);
expect(wrong_types<int, unsigned, char, std::string_view>(obj));
@ -73,7 +73,7 @@ suite assignment_and_access = [] {
"token::token(std::int8_t)"_test = [] {
std::int8_t val = 0x32;
msgpack::token obj(val);
expect(obj.type() == msgpack::token_type::signed_int);
expect(obj.type() == msgpack::format::type::signed_int);
auto retrieved = obj.get<std::int8_t>();
expect(retrieved && *retrieved == val);
expect(wrong_types<bool, unsigned, std::string_view>(obj));
@ -81,7 +81,7 @@ suite assignment_and_access = [] {
"token::token(std::uint8_t)"_test = [] {
std::uint8_t val = 0xaa;
msgpack::token obj(val);
expect(obj.type() == msgpack::token_type::unsigned_int);
expect(obj.type() == msgpack::format::type::unsigned_int);
auto retrieved = obj.get<std::uint8_t>();
expect(retrieved && *retrieved == val);
expect(wrong_types<bool, int, std::string_view>(obj));
@ -91,7 +91,7 @@ suite assignment_and_access = [] {
{
char const* val = "hello world";
msgpack::token obj(val);
expect(obj.type() == msgpack::token_type::string);
expect(obj.type() == msgpack::format::type::string);
auto retrieved = obj.get<std::string_view>();
expect(bool(retrieved));
if (*retrieved != std::string_view(val)) {
@ -107,7 +107,7 @@ suite assignment_and_access = [] {
"token::token(std::string)"_test = [] {
std::string val = "std::string";
msgpack::token obj(val);
expect(obj.type() == msgpack::token_type::string);
expect(obj.type() == msgpack::format::type::string);
auto retrieved = obj.get<std::string_view>();
expect(bool(retrieved));
expect(*retrieved == val);
@ -118,8 +118,7 @@ suite assignment_and_access = [] {
std::vector<std::byte> extracted_val;
{
auto val = make_bytes(0x32, 0xff, 0xaa, 0xce);
msgpack::token obj(val);
expect(obj.type() == msgpack::token_type::bytes);
msgpack::token obj(val); expect(obj.type() == msgpack::format::type::bytes);
auto retrieved = obj.get<std::span<std::byte const>>();
expect(bool(retrieved));
expect(std::equal(retrieved->begin(), retrieved->end(),
@ -137,14 +136,14 @@ suite assignment_and_access = [] {
suite int_truncation = [] {
"unsigned truncation"_test = [] {
msgpack::token obj(0xffffffffu);
expect(obj.type() == msgpack::token_type::unsigned_int);
expect(obj.type() == msgpack::format::type::unsigned_int);
auto retrieved = obj.get<std::uint8_t>();
auto err = tl::make_unexpected(msgpack::error::will_truncate);
expect(retrieved == err);
};
"signed truncation"_test = [] {
msgpack::token obj(-0xffff);
expect(obj.type() == msgpack::token_type::signed_int);
expect(obj.type() == msgpack::format::type::signed_int);
auto retrieved = obj.get<std::int8_t>();
auto err = tl::make_unexpected(msgpack::error::will_truncate);
expect(retrieved == err);

View File

@ -0,0 +1,46 @@
#include <msgpack/token.h>
#include <boost/ut.hpp>
using namespace boost::ut;
namespace {
template <typename... Bytes>
constexpr std::array<std::byte, sizeof...(Bytes)> make_bytes(Bytes &&...bytes) {
return {std::byte(std::forward<Bytes>(bytes))...};
}
template <typename First, typename... Others>
constexpr bool wrong_types(auto const& obj) {
auto err = tl::make_unexpected(msgpack::error::wrong_type);
if (obj.template get<First>() != err) return false;
if constexpr (sizeof...(Others)) {
return wrong_types<Others...>(obj);
} else {
return true;
}
}
template <typename T, typename E>
std::ostream &operator<<(std::ostream &os, tl::expected<T, E> const &exp) {
if (exp.has_value()) {
os << "Value: '" << *exp << "'";
} else {
os << "Error";
}
return os;
}
}
suite reader = [] {
"construction"_test = [] {
constexpr auto payload = make_bytes(0xce, 0x01, 0x02, 0x03, 0x09, 0xce);
msgpack::token_reader reader(payload);
auto token = reader.read_one();
expect(token && token->type() == msgpack::format::type::unsigned_int);
expect(token->get<std::uint8_t>() == tl::make_unexpected(msgpack::error::will_truncate));
expect(token->get<std::uint32_t>() == 0x01020309);
token = reader.read_one();
expect(token == tl::make_unexpected(msgpack::error::incomplete_message));
};
};