Implement integer parsing for token reader API
This commit is contained in:
parent
89cbbb0f30
commit
94e77e7519
@ -59,6 +59,20 @@ struct map_desc {
|
||||
|
||||
namespace format {
|
||||
|
||||
// Classification of the format type. Used by the token API to distinguish
|
||||
// different formats.
|
||||
enum class type : std::uint8_t {
|
||||
invalid,
|
||||
unsigned_int,
|
||||
signed_int,
|
||||
string,
|
||||
bytes,
|
||||
nil,
|
||||
boolean,
|
||||
array,
|
||||
map
|
||||
};
|
||||
|
||||
// Flags that may control the behavior of readers/writers.
|
||||
enum flag {
|
||||
apply_mask = 1
|
||||
|
||||
@ -22,27 +22,87 @@
|
||||
#define msgpack_token_reader_8daff350a0b1a519
|
||||
|
||||
#include "type.h"
|
||||
#include "../core/error.h"
|
||||
#include "../util/endianness.h"
|
||||
#include "../core/format.h"
|
||||
|
||||
#include <tl/expected.hpp>
|
||||
|
||||
namespace msgpack {
|
||||
|
||||
namespace detail {
|
||||
|
||||
constexpr inline decltype(auto) read(std::size_t size, auto& inp)
|
||||
noexcept {
|
||||
std::array<std::byte, token::word_size> value;
|
||||
be_to_host(inp, inp + size, std::begin(value));
|
||||
inp += size;
|
||||
return value;
|
||||
}
|
||||
|
||||
template <format::type FormatType>
|
||||
constexpr inline decltype(auto) read_value(std::size_t size, auto& inp) {
|
||||
using value_type = typename token_traits<FormatType>::storage_type;
|
||||
return token{std::bit_cast<value_type>(read(size, inp))};
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
class token_reader {
|
||||
public:
|
||||
|
||||
enum class error {
|
||||
end_of_message,
|
||||
incomplete_message
|
||||
};
|
||||
|
||||
constexpr token_reader(std::span<std::byte const> src) noexcept :
|
||||
data_(src), end_(src.size()) {}
|
||||
data_(src), curr_{src.begin()}, end_(src.end()) {}
|
||||
|
||||
constexpr auto remaining() noexcept {
|
||||
using dist_type = decltype(std::distance(curr_, end_));
|
||||
return std::max(dist_type(0), std::distance(curr_, end_));
|
||||
}
|
||||
|
||||
// Read the next token. If the reader currently points to the end of the
|
||||
// byte buffer, then end_of_message is returned, and if there is still
|
||||
// some data present in the buffer, then incomplete_message is returned,
|
||||
// potentially hinting that further buffering is required.
|
||||
constexpr tl::expected<token, error> read_one() noexcept;
|
||||
constexpr tl::expected<token, error> read_one() noexcept {
|
||||
if (curr_ >= end_) {
|
||||
return tl::make_unexpected(error::end_of_message);
|
||||
}
|
||||
|
||||
// Enumerate the current byte first by switch statement, then by
|
||||
// fix types.
|
||||
long int size = 0;
|
||||
auto id = *curr_;
|
||||
++curr_;
|
||||
switch (id) {
|
||||
case format::uint8::marker:
|
||||
size = 1;
|
||||
case format::uint16::marker:
|
||||
size = 2;
|
||||
case format::uint32::marker:
|
||||
size = 3;
|
||||
case format::uint64::marker:
|
||||
size = 4;
|
||||
if (remaining() < size) {
|
||||
return tl::make_unexpected(error::incomplete_message);
|
||||
}
|
||||
return detail::read_value<format::type::unsigned_int>(size, curr_);
|
||||
|
||||
case format::int8::marker:
|
||||
size = 1;
|
||||
case format::int16::marker:
|
||||
size = 2;
|
||||
case format::int32::marker:
|
||||
size = 3;
|
||||
case format::int64::marker:
|
||||
size = 4;
|
||||
if (remaining() < size) {
|
||||
return tl::make_unexpected(error::incomplete_message);
|
||||
}
|
||||
return detail::read_value<format::type::signed_int>(size, curr_);
|
||||
}
|
||||
return tl::make_unexpected(error::not_implemented);
|
||||
}
|
||||
|
||||
|
||||
// Read multiple tokens from the byte buffer. The number of tokens parsed
|
||||
// can be surmised from the returned span of tokens. If the reader
|
||||
@ -53,8 +113,8 @@ public:
|
||||
std::span<token> token_buffer) noexcept;
|
||||
private:
|
||||
std::span<std::byte const> data_;
|
||||
std::size_t curr_;
|
||||
std::size_t end_;
|
||||
decltype(data_)::iterator curr_;
|
||||
decltype(data_)::iterator end_;
|
||||
};
|
||||
|
||||
} // namespace msgpack
|
||||
|
||||
@ -53,15 +53,16 @@ namespace msgpack {
|
||||
//
|
||||
// Of course, this means custom code!
|
||||
|
||||
template <std::integral SizeType, typename E>
|
||||
requires (sizeof(SizeType) + sizeof(std::underlying_type_t<E>) <= sizeof(uintptr_t))
|
||||
template <std::integral Size, typename E>
|
||||
requires (sizeof(Size) + sizeof(std::underlying_type_t<E>) <= sizeof(uintptr_t))
|
||||
class size_and_enum {
|
||||
public:
|
||||
using size_type = SizeType;
|
||||
using size_type = Size;
|
||||
using enum_type = E;
|
||||
using enum_int_type = std::underlying_type_t<E>;
|
||||
constexpr static uintptr_t enum_mask = ((1 << (sizeof(enum_int_type) * 8)) - 1);
|
||||
constexpr static uintptr_t size_shift = (sizeof(SizeType) * 8);
|
||||
constexpr static uintptr_t enum_mask =
|
||||
((1 << (sizeof(enum_int_type) * 8)) - 1);
|
||||
constexpr static uintptr_t size_shift = (sizeof(Size) * 8);
|
||||
constexpr static uintptr_t size_mask = ~enum_mask;
|
||||
|
||||
// Constructors
|
||||
@ -81,10 +82,12 @@ public:
|
||||
|
||||
// Mutators
|
||||
constexpr auto set_size(size_type size) noexcept {
|
||||
value = (static_cast<uintptr_t>(size) << size_shift) | (value & enum_mask);
|
||||
value = (static_cast<uintptr_t>(size) << size_shift) |
|
||||
(value & enum_mask);
|
||||
}
|
||||
constexpr auto set_enum(enum_type enum_value) noexcept {
|
||||
value = (static_cast<uintptr_t>(enum_value) & enum_mask) | (value & size_mask);
|
||||
value = (static_cast<uintptr_t>(enum_value) & enum_mask) |
|
||||
(value & size_mask);
|
||||
}
|
||||
|
||||
constexpr auto set_both(size_type size, enum_type enum_value) noexcept {
|
||||
@ -104,16 +107,34 @@ private:
|
||||
uintptr_t value{};
|
||||
};
|
||||
|
||||
enum class token_type : std::uint8_t {
|
||||
invalid,
|
||||
unsigned_int,
|
||||
signed_int,
|
||||
string,
|
||||
bytes,
|
||||
nil,
|
||||
boolean,
|
||||
array,
|
||||
map
|
||||
template <format::type>
|
||||
struct token_traits;
|
||||
|
||||
template <>
|
||||
struct token_traits<format::type::unsigned_int> {
|
||||
using storage_type = std::uint64_t;
|
||||
constexpr static bool needs_swap = true;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct token_traits<format::type::signed_int> {
|
||||
using storage_type = std::int64_t;
|
||||
constexpr static bool needs_swap = true;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct token_traits<format::type::string> {
|
||||
using storage_type = char const*;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct token_traits<format::type::bytes> {
|
||||
using storage_type = std::byte const*;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct token_traits<format::type::boolean> {
|
||||
using storage_type = bool;
|
||||
};
|
||||
|
||||
template <size_t WordSize>
|
||||
@ -123,6 +144,7 @@ class token_base;
|
||||
template <>
|
||||
class token_base<8> {
|
||||
public:
|
||||
constexpr static std::size_t word_size = 8;
|
||||
token_base() noexcept = default;
|
||||
token_base(token_base const& other) noexcept
|
||||
: value_(other.value_)
|
||||
@ -131,13 +153,13 @@ public:
|
||||
template <std::integral T>
|
||||
explicit token_base(T value) noexcept {
|
||||
if constexpr (std::is_same_v<T, bool>) {
|
||||
size_and_type_.set_enum(token_type::boolean);
|
||||
size_and_type_.set_enum(format::type::boolean);
|
||||
value_.b = value;
|
||||
} else if constexpr (std::is_signed_v<T>) {
|
||||
size_and_type_.set_enum(token_type::signed_int);
|
||||
size_and_type_.set_enum(format::type::signed_int);
|
||||
value_.i = value;
|
||||
} else {
|
||||
size_and_type_.set_enum(token_type::unsigned_int);
|
||||
size_and_type_.set_enum(format::type::unsigned_int);
|
||||
value_.u = value;
|
||||
}
|
||||
}
|
||||
@ -145,7 +167,7 @@ public:
|
||||
template <std::convertible_to<std::string_view> T>
|
||||
explicit token_base(T const& value) noexcept {
|
||||
std::string_view sv(value);
|
||||
size_and_type_.set_enum(token_type::string);
|
||||
size_and_type_.set_enum(format::type::string);
|
||||
size_and_type_.set_size(sv.size());
|
||||
value_.str = sv.data();
|
||||
}
|
||||
@ -153,12 +175,12 @@ public:
|
||||
template <std::convertible_to<std::span<std::byte const>> T>
|
||||
explicit token_base(T const& value) noexcept {
|
||||
std::span<std::byte const> bv(value);
|
||||
size_and_type_.set_enum(token_type::bytes);
|
||||
size_and_type_.set_enum(format::type::bytes);
|
||||
size_and_type_.set_size(bv.size());
|
||||
value_.bp = bv.data();
|
||||
}
|
||||
|
||||
constexpr token_type type() const noexcept {
|
||||
constexpr format::type type() const noexcept {
|
||||
return size_and_type_.get_enum();
|
||||
}
|
||||
|
||||
@ -168,15 +190,15 @@ public:
|
||||
template<std::integral T>
|
||||
constexpr tl::expected<T, error> get() const noexcept {
|
||||
constexpr auto expected_type = std::is_same_v<T, bool> ?
|
||||
token_type::boolean : std::is_signed_v<T> ?
|
||||
token_type::signed_int : token_type::unsigned_int;
|
||||
format::type::boolean : std::is_signed_v<T> ?
|
||||
format::type::signed_int : format::type::unsigned_int;
|
||||
|
||||
if (type() != expected_type) {
|
||||
return tl::make_unexpected(error::wrong_type);
|
||||
}
|
||||
if constexpr (expected_type == token_type::boolean) {
|
||||
if constexpr (expected_type == format::type::boolean) {
|
||||
return T(value_.b);
|
||||
} else if constexpr (expected_type == token_type::signed_int) {
|
||||
} else if constexpr (expected_type == format::type::signed_int) {
|
||||
if (std::numeric_limits<T>::max() < value_.i ||
|
||||
std::numeric_limits<T>::lowest() > value_.i) {
|
||||
return tl::make_unexpected(error::will_truncate);
|
||||
@ -192,21 +214,21 @@ public:
|
||||
|
||||
private:
|
||||
union {
|
||||
std::uint64_t u;
|
||||
std::int64_t i;
|
||||
char const* str;
|
||||
std::byte const* bp;
|
||||
bool b;
|
||||
typename token_traits<format::type::unsigned_int>::storage_type u;
|
||||
typename token_traits<format::type::signed_int>::storage_type i;
|
||||
typename token_traits<format::type::string>::storage_type str;
|
||||
typename token_traits<format::type::bytes>::storage_type bp;
|
||||
typename token_traits<format::type::boolean>::storage_type b;
|
||||
token_base* obj;
|
||||
} value_;
|
||||
size_and_enum<std::uint32_t, token_type> size_and_type_{};
|
||||
size_and_enum<std::uint32_t, format::type> size_and_type_{};
|
||||
};
|
||||
|
||||
template<>
|
||||
inline tl::expected<std::string, error> token_base<8>::get()
|
||||
const noexcept
|
||||
{
|
||||
if (type() != token_type::string) {
|
||||
if (type() != format::type::string) {
|
||||
return tl::make_unexpected(error::wrong_type);
|
||||
}
|
||||
return std::string{value_.str, size_and_type_.get_size()};
|
||||
@ -216,7 +238,7 @@ template<>
|
||||
constexpr tl::expected<std::string_view, error> token_base<8>::get()
|
||||
const noexcept
|
||||
{
|
||||
if (type() != token_type::string) {
|
||||
if (type() != format::type::string) {
|
||||
return tl::make_unexpected(error::wrong_type);
|
||||
}
|
||||
return std::string_view{value_.str, size_and_type_.get_size()};
|
||||
@ -227,7 +249,7 @@ inline tl::expected<std::vector<std::byte>, error> token_base<8>::get()
|
||||
const noexcept
|
||||
{
|
||||
tl::expected<std::vector<std::byte>, error> result;
|
||||
if (type() != token_type::bytes) {
|
||||
if (type() != format::type::bytes) {
|
||||
result = tl::make_unexpected(error::wrong_type);
|
||||
} else {
|
||||
result = std::vector<std::byte>(value_.bp,
|
||||
@ -240,7 +262,7 @@ template<>
|
||||
constexpr tl::expected<std::span<std::byte const>, error>
|
||||
token_base<8>::get() const noexcept
|
||||
{
|
||||
if (type() != token_type::bytes) {
|
||||
if (type() != format::type::bytes) {
|
||||
return tl::make_unexpected(error::wrong_type);
|
||||
}
|
||||
return std::span<std::byte const>(value_.bp, size_and_type_.get_size());
|
||||
|
||||
@ -51,8 +51,8 @@ struct host_endianness_check {
|
||||
* Helper function that swaps bytes of arbitrary lengths by copying input to
|
||||
* output in reverse order.
|
||||
*/
|
||||
template <typename Iter>
|
||||
constexpr void byte_swap(Iter begin, Iter end, Iter dest) {
|
||||
template <typename Iter, typename OutIter>
|
||||
constexpr void byte_swap(Iter begin, Iter end, OutIter dest) {
|
||||
while (begin != end) {
|
||||
--end;
|
||||
*dest = *end;
|
||||
@ -77,6 +77,16 @@ constexpr auto maybe_swap(std::array<std::byte, N> val) noexcept {
|
||||
}
|
||||
}
|
||||
|
||||
template <endianness From, endianness To, typename Iter, typename OutIter>
|
||||
requires (From != endianness::other && To != endianness::other)
|
||||
constexpr void maybe_swap_iter(Iter begin, Iter end, OutIter dest) noexcept {
|
||||
if constexpr (From == To) {
|
||||
std::copy(begin, end, dest);
|
||||
} else {
|
||||
byte_swap(begin, end, dest);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A helper function for converting a data type into an std::array for use
|
||||
* with byte swap operations. May also come in use for reinterpreting data
|
||||
@ -137,6 +147,11 @@ constexpr T be_to_host(T val) noexcept {
|
||||
return detail::swap<endian::big, endian::host>(val);
|
||||
}
|
||||
|
||||
template <typename Iter, typename OutIter>
|
||||
constexpr void be_to_host(Iter begin, Iter end, OutIter dest) noexcept {
|
||||
return detail::maybe_swap_iter<endian::big, endian::host>(begin, end, dest);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
constexpr T host_to_be(T val) noexcept {
|
||||
return detail::swap<endian::host, endian::big>(val);
|
||||
|
||||
@ -38,3 +38,10 @@ cc_test(
|
||||
deps = ["test_deps"],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "token_reader",
|
||||
srcs = [
|
||||
"test_token_reader.cpp",
|
||||
],
|
||||
deps = ["test_deps"],
|
||||
)
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
#include <msgpack/token/type.h>
|
||||
#include <msgpack/token.h>
|
||||
|
||||
#include <boost/ut.hpp>
|
||||
|
||||
@ -61,11 +61,11 @@ suite size_and_enum = [] {
|
||||
suite assignment_and_access = [] {
|
||||
"token::token()"_test = [] {
|
||||
msgpack::token obj;
|
||||
expect(obj.type() == msgpack::token_type::invalid);
|
||||
expect(obj.type() == msgpack::format::type::invalid);
|
||||
};
|
||||
"token::token(bool)"_test = [] {
|
||||
msgpack::token obj(true);
|
||||
expect(obj.type() == msgpack::token_type::boolean);
|
||||
expect(obj.type() == msgpack::format::type::boolean);
|
||||
auto retrieved = obj.get<bool>();
|
||||
expect(retrieved && *retrieved);
|
||||
expect(wrong_types<int, unsigned, char, std::string_view>(obj));
|
||||
@ -73,7 +73,7 @@ suite assignment_and_access = [] {
|
||||
"token::token(std::int8_t)"_test = [] {
|
||||
std::int8_t val = 0x32;
|
||||
msgpack::token obj(val);
|
||||
expect(obj.type() == msgpack::token_type::signed_int);
|
||||
expect(obj.type() == msgpack::format::type::signed_int);
|
||||
auto retrieved = obj.get<std::int8_t>();
|
||||
expect(retrieved && *retrieved == val);
|
||||
expect(wrong_types<bool, unsigned, std::string_view>(obj));
|
||||
@ -81,7 +81,7 @@ suite assignment_and_access = [] {
|
||||
"token::token(std::uint8_t)"_test = [] {
|
||||
std::uint8_t val = 0xaa;
|
||||
msgpack::token obj(val);
|
||||
expect(obj.type() == msgpack::token_type::unsigned_int);
|
||||
expect(obj.type() == msgpack::format::type::unsigned_int);
|
||||
auto retrieved = obj.get<std::uint8_t>();
|
||||
expect(retrieved && *retrieved == val);
|
||||
expect(wrong_types<bool, int, std::string_view>(obj));
|
||||
@ -91,7 +91,7 @@ suite assignment_and_access = [] {
|
||||
{
|
||||
char const* val = "hello world";
|
||||
msgpack::token obj(val);
|
||||
expect(obj.type() == msgpack::token_type::string);
|
||||
expect(obj.type() == msgpack::format::type::string);
|
||||
auto retrieved = obj.get<std::string_view>();
|
||||
expect(bool(retrieved));
|
||||
if (*retrieved != std::string_view(val)) {
|
||||
@ -107,7 +107,7 @@ suite assignment_and_access = [] {
|
||||
"token::token(std::string)"_test = [] {
|
||||
std::string val = "std::string";
|
||||
msgpack::token obj(val);
|
||||
expect(obj.type() == msgpack::token_type::string);
|
||||
expect(obj.type() == msgpack::format::type::string);
|
||||
auto retrieved = obj.get<std::string_view>();
|
||||
expect(bool(retrieved));
|
||||
expect(*retrieved == val);
|
||||
@ -118,8 +118,7 @@ suite assignment_and_access = [] {
|
||||
std::vector<std::byte> extracted_val;
|
||||
{
|
||||
auto val = make_bytes(0x32, 0xff, 0xaa, 0xce);
|
||||
msgpack::token obj(val);
|
||||
expect(obj.type() == msgpack::token_type::bytes);
|
||||
msgpack::token obj(val); expect(obj.type() == msgpack::format::type::bytes);
|
||||
auto retrieved = obj.get<std::span<std::byte const>>();
|
||||
expect(bool(retrieved));
|
||||
expect(std::equal(retrieved->begin(), retrieved->end(),
|
||||
@ -137,14 +136,14 @@ suite assignment_and_access = [] {
|
||||
suite int_truncation = [] {
|
||||
"unsigned truncation"_test = [] {
|
||||
msgpack::token obj(0xffffffffu);
|
||||
expect(obj.type() == msgpack::token_type::unsigned_int);
|
||||
expect(obj.type() == msgpack::format::type::unsigned_int);
|
||||
auto retrieved = obj.get<std::uint8_t>();
|
||||
auto err = tl::make_unexpected(msgpack::error::will_truncate);
|
||||
expect(retrieved == err);
|
||||
};
|
||||
"signed truncation"_test = [] {
|
||||
msgpack::token obj(-0xffff);
|
||||
expect(obj.type() == msgpack::token_type::signed_int);
|
||||
expect(obj.type() == msgpack::format::type::signed_int);
|
||||
auto retrieved = obj.get<std::int8_t>();
|
||||
auto err = tl::make_unexpected(msgpack::error::will_truncate);
|
||||
expect(retrieved == err);
|
||||
|
||||
46
tests/msgpack/test_token_reader.cpp
Normal file
46
tests/msgpack/test_token_reader.cpp
Normal file
@ -0,0 +1,46 @@
|
||||
#include <msgpack/token.h>
|
||||
|
||||
#include <boost/ut.hpp>
|
||||
|
||||
using namespace boost::ut;
|
||||
|
||||
namespace {
|
||||
template <typename... Bytes>
|
||||
constexpr std::array<std::byte, sizeof...(Bytes)> make_bytes(Bytes &&...bytes) {
|
||||
return {std::byte(std::forward<Bytes>(bytes))...};
|
||||
}
|
||||
|
||||
template <typename First, typename... Others>
|
||||
constexpr bool wrong_types(auto const& obj) {
|
||||
auto err = tl::make_unexpected(msgpack::error::wrong_type);
|
||||
if (obj.template get<First>() != err) return false;
|
||||
if constexpr (sizeof...(Others)) {
|
||||
return wrong_types<Others...>(obj);
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename E>
|
||||
std::ostream &operator<<(std::ostream &os, tl::expected<T, E> const &exp) {
|
||||
if (exp.has_value()) {
|
||||
os << "Value: '" << *exp << "'";
|
||||
} else {
|
||||
os << "Error";
|
||||
}
|
||||
return os;
|
||||
}
|
||||
}
|
||||
|
||||
suite reader = [] {
|
||||
"construction"_test = [] {
|
||||
constexpr auto payload = make_bytes(0xce, 0x01, 0x02, 0x03, 0x09, 0xce);
|
||||
msgpack::token_reader reader(payload);
|
||||
auto token = reader.read_one();
|
||||
expect(token && token->type() == msgpack::format::type::unsigned_int);
|
||||
expect(token->get<std::uint8_t>() == tl::make_unexpected(msgpack::error::will_truncate));
|
||||
expect(token->get<std::uint32_t>() == 0x01020309);
|
||||
token = reader.read_one();
|
||||
expect(token == tl::make_unexpected(msgpack::error::incomplete_message));
|
||||
};
|
||||
};
|
||||
Loading…
Reference in New Issue
Block a user