//----------------------------------------------------------------------------- // ___ __ _ _ // / _ \__ _ _ __ ___ ___ / /(_)_ __ | | __ // / /_)/ _` | '__/ __|/ _ \/ / | | '_ \| |/ / // / ___/ (_| | | \__ \ __/ /__| | | | | < // \/ \__,_|_| |___/\___\____/_|_| |_|_|\_\ . // //----------------------------------------------------------------------------- // Author: Kurt Sassenrath // Module: msgpack // // Token-based reader for unpacking MessagePack data. Ensure lifetime of the // MessagePack buffer exceeds the lifetime of parsed tokens. // // TBD: How best to handle arrays and maps. // // Copyright (c) 2023 Kurt Sassenrath. // // License TBD. //----------------------------------------------------------------------------- #ifndef msgpack_token_reader_8daff350a0b1a519 #define msgpack_token_reader_8daff350a0b1a519 #include "type.h" #include "../core/error.h" #include "../util/endianness.h" #include "../core/format.h" #include namespace msgpack { namespace detail { constexpr std::int64_t sign_extend(std::size_t size, auto bytes) noexcept { switch (size) { case 0: case 1: return std::int8_t(std::bit_cast(bytes)); case 2: return std::int16_t(std::bit_cast(bytes)); case 4: return std::int32_t(std::bit_cast(bytes)); case 8: return std::bit_cast(bytes); default: break; // Not reachable. } return {}; } constexpr decltype(auto) read(std::size_t size, auto& inp) noexcept { std::array value{}; be_to_host(inp, inp + size, std::begin(value)); inp += size; return value; } template constexpr inline decltype(auto) make_token(auto bytes) { using value_type = typename token_traits::storage_type; return token{std::bit_cast(bytes)}; } // Define in token/type.h instead? Avoid instantiations in the core API. constexpr inline format::traits const& traits_lookup(std::byte id) { // Check for fix formats first. if ((id & ~format::negative_fixint::mask) == format::negative_fixint::marker) { return format::get_traits(); } else if ((id & ~format::positive_fixint::mask) == format::positive_fixint::marker) { return format::get_traits(); } else if ((id & ~format::fixstr::mask) == format::fixstr::marker) { return format::get_traits(); } else if ((id & ~format::fixmap::mask) == format::fixmap::marker) { return format::get_traits(); } else if ((id & ~format::fixarray::mask) == format::fixarray::marker) { return format::get_traits(); } switch (id) { case format::uint8::marker: return format::get_traits(); case format::uint16::marker: return format::get_traits(); case format::uint32::marker: return format::get_traits(); case format::uint64::marker: return format::get_traits(); case format::int8::marker: return format::get_traits(); case format::int16::marker: return format::get_traits(); case format::int32::marker: return format::get_traits(); case format::int64::marker: return format::get_traits(); case format::str8::marker: return format::get_traits(); case format::str16::marker: return format::get_traits(); case format::str32::marker: return format::get_traits(); case format::bin8::marker: return format::get_traits(); case format::bin16::marker: return format::get_traits(); case format::bin32::marker: return format::get_traits(); case format::array16::marker: return format::get_traits(); case format::array32::marker: return format::get_traits(); case format::map16::marker: return format::get_traits(); case format::map32::marker: return format::get_traits(); case format::nil::marker: return format::get_traits(); case format::invalid::marker: return format::get_traits(); case format::boolean::marker: case format::boolean::marker | std::byte{1}: return format::get_traits(); default: break; } return format::no_traits; } } // namespace detail class token_reader { public: constexpr token_reader(std::span src) noexcept : data_(src), curr_{} {} constexpr auto current() const noexcept { return std::next(data_.begin(), curr_); } constexpr auto remaining(auto itr) noexcept { using dist_type = decltype(std::distance(itr, data_.end())); return std::max(dist_type(0), std::distance(itr, data_.end())); } constexpr auto remaining() noexcept { return remaining(current()); } // Read the next token. If the reader currently points to the end of the // byte buffer, then end_of_message is returned, and if there is still // some data present in the buffer, then incomplete_message is returned, // potentially hinting that further buffering is required. constexpr tl::expected read_one() noexcept { token tok; if (remaining() == 0) { return tl::make_unexpected(error::end_of_message); } // curr_ will be updated after everything succeeds. auto curr = current(); std::size_t var_size = 0; auto id = *curr++; auto const& traits = detail::traits_lookup(id); if (traits == format::no_traits) { return tl::make_unexpected(error::not_implemented); } if (remaining(curr) < traits.size) { return tl::make_unexpected(error::incomplete_message); } // This is either the value of the format, or the size of the format. auto first_bytes = [&]{ if (traits.size) { return detail::read(traits.size, curr); } else { auto lsb = id; if (traits.flags & format::flag::apply_mask) { lsb &= std::byte(traits.mask); } return std::array{lsb}; } }(); // This indicates first_bytes is the size of the format payload. if (!(traits.flags & format::flag::fixed_size)) { var_size = std::bit_cast(first_bytes); if (std::size_t(remaining(curr)) < var_size) { return tl::make_unexpected(error::incomplete_message); } // For variable-sized types we don't actually read anything yet. } switch (traits.token_type) { case format::type::boolean: tok = token{bool(first_bytes[0] & traits.mask)}; break; case format::type::invalid: tok = token{invalid{}}; break; case format::type::nil: tok = token{nil{}}; break; case format::type::unsigned_int: tok = detail::make_token(first_bytes); break; case format::type::signed_int: { auto value = detail::sign_extend(traits.size, first_bytes); tok = token{value}; break; } case format::type::string: { using type = token_traits::storage_type; auto ptr = reinterpret_cast(&*curr); tok = token{std::string_view{ptr, var_size}}; curr += var_size; break; } case format::type::binary: { using type = token_traits::storage_type; auto ptr = reinterpret_cast(&*curr); tok = token{std::span{ptr, var_size}}; curr += var_size; break; } case format::type::array: { tok = token{array_desc(var_size)}; break; } case format::type::map: { tok = token{map_desc(var_size)}; break; } default: return tl::make_unexpected(error::not_implemented); } curr_ = std::distance(data_.begin(), curr); return {tok}; } // Read multiple tokens from the byte buffer, returning a subspan with the // number of tokens parsed, or a relevant error. If the reader currently // points to the end of the byte buffer, then error::end_of_message is // returned, and if there is not enough data to fully parse a token, then // incomplete_message is returned. constexpr tl::expected, error> read_some( std::span token_buffer) noexcept { auto tok = token_buffer.begin(); error err = error::end_of_message; while (tok != token_buffer.end()) { auto result = read_one().map([&tok](auto&& t){ *tok = t; ++tok; }); if (!result) { err = result.error(); break; } } auto parsed = std::distance(token_buffer.begin(), tok); if (parsed == 0) { return tl::make_unexpected(err); } return token_buffer.subspan(0, parsed); } template constexpr tl::expected read() { } private: std::span data_; decltype(data_)::iterator::difference_type curr_; }; } // namespace msgpack #endif // msgpack_token_reader_8daff350a0b1a519