parselink-old/include/parselink/msgpack/token/reader.h

269 lines
10 KiB
C++

//-----------------------------------------------------------------------------
// ___ __ _ _
// / _ \__ _ _ __ ___ ___ / /(_)_ __ | | __
// / /_)/ _` | '__/ __|/ _ \/ / | | '_ \| |/ /
// / ___/ (_| | | \__ \ __/ /__| | | | | <
// \/ \__,_|_| |___/\___\____/_|_| |_|_|\_\ .
//
//-----------------------------------------------------------------------------
// Author: Kurt Sassenrath
// Module: msgpack
//
// Token-based reader for unpacking MessagePack data. Ensure lifetime of the
// MessagePack buffer exceeds the lifetime of parsed tokens.
//
// TBD: How best to handle arrays and maps.
//
// Copyright (c) 2023 Kurt Sassenrath.
//
// License TBD.
//-----------------------------------------------------------------------------
#ifndef msgpack_token_reader_8daff350a0b1a519
#define msgpack_token_reader_8daff350a0b1a519
#include "../core/error.h"
#include "../core/format.h"
#include "../util/endianness.h"
#include "type.h"
#include <tl/expected.hpp>
namespace msgpack {
namespace detail {
constexpr std::int64_t sign_extend(std::size_t size, auto bytes) noexcept {
switch (size) {
case 0:
case 1: return std::int8_t(std::bit_cast<std::int64_t>(bytes));
case 2: return std::int16_t(std::bit_cast<std::int64_t>(bytes));
case 4: return std::int32_t(std::bit_cast<std::int64_t>(bytes));
case 8: return std::bit_cast<std::int64_t>(bytes);
default: break; // Not reachable.
}
return {};
}
constexpr decltype(auto) read(std::size_t size, auto& inp) noexcept {
std::array<std::byte, token::word_size> value{};
be_to_host(inp, inp + size, std::begin(value));
inp += size;
return value;
}
template <format::type FormatType>
constexpr inline decltype(auto) make_token(auto bytes) {
using value_type = typename token_traits<FormatType>::storage_type;
return token{std::bit_cast<value_type>(bytes)};
}
// Define in token/type.h instead? Avoid instantiations in the core API.
constexpr inline format::traits const& traits_lookup(std::byte id) {
// Check for fix formats first.
if ((id & ~format::negative_fixint::mask)
== format::negative_fixint::marker) {
return format::get_traits<format::negative_fixint>();
} else if ((id & ~format::positive_fixint::mask)
== format::positive_fixint::marker) {
return format::get_traits<format::positive_fixint>();
} else if ((id & ~format::fixstr::mask) == format::fixstr::marker) {
return format::get_traits<format::fixstr>();
} else if ((id & ~format::fixmap::mask) == format::fixmap::marker) {
return format::get_traits<format::fixmap>();
} else if ((id & ~format::fixarray::mask) == format::fixarray::marker) {
return format::get_traits<format::fixarray>();
}
switch (id) {
case format::uint8::marker: return format::get_traits<format::uint8>();
case format::uint16::marker:
return format::get_traits<format::uint16>();
case format::uint32::marker:
return format::get_traits<format::uint32>();
case format::uint64::marker:
return format::get_traits<format::uint64>();
case format::int8::marker: return format::get_traits<format::int8>();
case format::int16::marker: return format::get_traits<format::int16>();
case format::int32::marker: return format::get_traits<format::int32>();
case format::int64::marker: return format::get_traits<format::int64>();
case format::str8::marker: return format::get_traits<format::str8>();
case format::str16::marker: return format::get_traits<format::str16>();
case format::str32::marker: return format::get_traits<format::str32>();
case format::bin8::marker: return format::get_traits<format::bin8>();
case format::bin16::marker: return format::get_traits<format::bin16>();
case format::bin32::marker: return format::get_traits<format::bin32>();
case format::array16::marker:
return format::get_traits<format::array16>();
case format::array32::marker:
return format::get_traits<format::array32>();
case format::map16::marker: return format::get_traits<format::map16>();
case format::map32::marker: return format::get_traits<format::map32>();
case format::nil::marker: return format::get_traits<format::nil>();
case format::invalid::marker:
return format::get_traits<format::invalid>();
case format::boolean::marker:
case format::boolean::marker | std::byte{1}:
return format::get_traits<format::boolean>();
default: break;
}
return format::no_traits;
}
} // namespace detail
class token_reader {
public:
constexpr token_reader(std::span<std::byte const> src) noexcept
: data_(src)
, curr_{} {}
constexpr auto current() const noexcept {
return std::next(data_.begin(), curr_);
}
constexpr auto end() const noexcept { return data_.end(); }
constexpr auto remaining(auto itr) noexcept {
using dist_type = decltype(std::distance(itr, data_.end()));
return std::max(dist_type(0), std::distance(itr, data_.end()));
}
constexpr auto remaining() noexcept { return remaining(current()); }
// Read the next token. If the reader currently points to the end of the
// byte buffer, then end_of_message is returned, and if there is still
// some data present in the buffer, then incomplete_message is returned,
// potentially hinting that further buffering is required.
constexpr tl::expected<token, error> read_one() noexcept {
token tok;
if (remaining() == 0) {
return tl::make_unexpected(error::end_of_message);
}
// curr_ will be updated after everything succeeds.
auto curr = current();
std::size_t var_size = 0;
auto id = *curr++;
auto const& traits = detail::traits_lookup(id);
if (traits == format::no_traits) {
return tl::make_unexpected(error::not_implemented);
}
if (remaining(curr) < traits.size) {
return tl::make_unexpected(error::incomplete_message);
}
// This is either the value of the format, or the size of the format.
auto first_bytes = [&] {
if (traits.size) {
return detail::read(traits.size, curr);
} else {
auto lsb = id;
if (traits.flags & format::flag::apply_mask) {
lsb &= std::byte(traits.mask);
}
return std::array<std::byte, token::word_size>{lsb};
}
}();
// This indicates first_bytes is the size of the format payload.
if (!(traits.flags & format::flag::fixed_size)) {
var_size = std::bit_cast<std::size_t>(first_bytes);
if (std::size_t(remaining(curr)) < var_size) {
return tl::make_unexpected(error::incomplete_message);
}
// For variable-sized types we don't actually read anything yet.
}
switch (traits.token_type) {
case format::type::boolean:
tok = token{bool(first_bytes[0] & traits.mask)};
break;
case format::type::invalid: tok = token{invalid{}}; break;
case format::type::nil: tok = token{nil{}}; break;
case format::type::unsigned_int:
tok = detail::make_token<format::type::unsigned_int>(
first_bytes);
break;
case format::type::signed_int: {
auto value = detail::sign_extend(traits.size, first_bytes);
tok = token{value};
break;
}
case format::type::string: {
using type = token_traits<format::type::string>::storage_type;
auto ptr = reinterpret_cast<type>(&*curr);
tok = token{std::string_view{ptr, var_size}};
curr += var_size;
break;
}
case format::type::binary: {
using type = token_traits<format::type::binary>::storage_type;
auto ptr = reinterpret_cast<type>(&*curr);
tok = token{std::span<std::byte const>{ptr, var_size}};
curr += var_size;
break;
}
case format::type::array: {
tok = token{array_desc(var_size)};
break;
}
case format::type::map: {
tok = token{map_desc(var_size)};
break;
}
default: return tl::make_unexpected(error::not_implemented);
}
curr_ = std::distance(data_.begin(), curr);
return {tok};
}
// Read multiple tokens from the byte buffer, returning a subspan with the
// number of tokens parsed, or a relevant error. If the reader currently
// points to the end of the byte buffer, then error::end_of_message is
// returned, and if there is not enough data to fully parse a token, then
// incomplete_message is returned.
constexpr tl::expected<std::span<token>, error> read_some(
std::span<token> token_buffer) noexcept {
auto tok = token_buffer.begin();
error err = error::end_of_message;
while (tok != token_buffer.end()) {
auto result = read_one().map([&tok](auto&& t) {
*tok = t;
++tok;
});
if (!result) {
err = result.error();
break;
}
}
auto parsed = std::distance(token_buffer.begin(), tok);
if (parsed == 0) {
return tl::make_unexpected(err);
}
return token_buffer.subspan(0, parsed);
}
template <typename T>
constexpr tl::expected<T, error> read() {}
private:
std::span<std::byte const> data_;
decltype(data_)::iterator::difference_type curr_;
};
} // namespace msgpack
#endif // msgpack_token_reader_8daff350a0b1a519