Deductive writer POC. Start packer implementation.

This commit is contained in:
Kurt Sassenrath 2023-11-22 22:54:21 -08:00
parent 84942171ea
commit ebe2b070b9
6 changed files with 534 additions and 40 deletions

View File

@ -29,8 +29,8 @@ IncludeCategories:
SortPriority: 2
CaseSensitive: false
- Regex: '.*'
Priority: 1
SortPriority: 0
Priority: 3
SortPriority: 3
IncludeIsMainRegex: '(_test)?$'
IndentAccessModifiers: false
IndentCaseLabels: true

View File

@ -0,0 +1,181 @@
//-----------------------------------------------------------------------------
// ___ __ _ _
// / _ \__ _ _ __ ___ ___ / /(_)_ __ | | __
// / /_)/ _` | '__/ __|/ _ \/ / | | '_ \| |/ /
// / ___/ (_| | | \__ \ __/ /__| | | | | <
// \/ \__,_|_| |___/\___\____/_|_| |_|_|\_\ .
//
//-----------------------------------------------------------------------------
// Author: Kurt Sassenrath
// Module: msgpack
//
// Default packer implementation, which aims to deduce the best format to use
// for a given value. For example, if a 32-bit unsigned integer type only
// contains the value 5, a uint32 format would serialize into:
//
// 0xce, 0x00, 0x00, 0x00, 0x05
//
// Instead, the packer will note that this value could be stored in a positive
// fixint, which is simply:
//
// 0x05
//
// The same optimization will be applied to variable-length types, like strings,
// bytes, arrays, and maps.
//
// This flexibility comes at the cost of CPU instructions. For embedded targets,
// writer (to be renamed verbatim_packer in the future) may be a better choice.
//
// Future goals for this particular packer:
// 1. Support containers/ranges seamlessly.
// 2. Support packing of trivial POD structures without an explicit
// pack_adapter.
//
// Copyright (c) 2023 Kurt Sassenrath.
//
// License TBD.
//-----------------------------------------------------------------------------
#ifndef msgpack_core_packer_1d5939e9c1498568
#define msgpack_core_packer_1d5939e9c1498568
#include "parselink/msgpack/core/error.h"
#include "parselink/msgpack/core/format.h"
#include "parselink/msgpack/util/endianness.h"
#include <tl/expected.hpp>
#include <limits>
#include <type_traits>
namespace msgpack {
namespace detail {
// This is a generic helper function for writing integral bytes.
template <typename T, typename Itr>
constexpr auto write_integral(T value, Itr out, std::size_t sz) noexcept {
auto bytes = ::detail::as_bytes(host_to_be(value));
for (std::size_t i = 0; i < sz; ++i) {
*out++ = *(bytes.end() - sz + i);
}
return out;
}
// Depending on the format, a number of bytes will be necessary to represent
// either the value (integer formats) or the length (variable length formats).
template <format::type T>
struct pack_helper {};
template <>
struct pack_helper<format::type::unsigned_int> {
static constexpr std::size_t num_bytes(std::uint64_t value) noexcept {
if (value <= std::uint64_t(format::positive_fixint::mask)) return 0;
return std::bit_ceil(std::uint64_t((std::bit_width(value) + 7) >> 3));
}
static constexpr std::byte marker(std::uint64_t value) noexcept {
switch (num_bytes(value)) {
case 0: return static_cast<std::byte>(value);
case 1: return format::uint8::marker;
case 2: return format::uint16::marker;
case 4: return format::uint32::marker;
default: return format::uint64::marker;
}
}
};
template <>
struct pack_helper<format::type::signed_int> {
static constexpr std::size_t num_bytes(std::int64_t value) noexcept {
// Probably a better way to do this.
if (value < 0 && value >= -32) return 0;
auto underlying = static_cast<std::uint64_t>(value);
// save a branch; these should be cheap on modern hardware.
std::uint64_t counts[2] = {
static_cast<std::uint64_t>(std::countl_zero(underlying)),
static_cast<std::uint64_t>(std::countl_one(underlying))};
std::uint64_t width = 1 + std::numeric_limits<std::uint64_t>::digits
- counts[underlying >> 63];
return std::bit_ceil((width + 7) >> 3);
}
static constexpr std::byte marker(std::int64_t value) noexcept {
switch (num_bytes(value)) {
case 0: return static_cast<std::byte>(value);
case 1: return format::int8::marker;
case 2: return format::int16::marker;
case 4: return format::int32::marker;
default: return format::int64::marker;
}
}
};
template <>
struct pack_helper<format::type::string> {
static constexpr std::size_t num_bytes(std::uint64_t value) noexcept {
if (value <= std::uint32_t(format::fixstr::mask)) return 0;
return std::bit_ceil(std::uint32_t((std::bit_width(value) + 7) >> 3));
}
static constexpr auto marker(std::string_view value) noexcept {
switch (num_bytes(value.size())) {
case 0: return format::fixstr::marker | std::byte(value.size());
case 1: return format::str8::marker;
case 2: return format::str16::marker;
case 4:
default: return format::str32::marker;
}
}
};
} // namespace detail
// Pack adapter is the basis for packing native values into MessagePack format.
template <typename T>
struct pack_adapter {};
template <typename T>
concept builtin_packable_type = requires(T const& t, std::byte* b) {
{ pack_adapter<T>::size(t) } -> std::same_as<std::size_t>;
{ pack_adapter<T>::write(t, b) } -> std::same_as<decltype(b)>;
{ pack_adapter<T>::marker(t) } -> std::same_as<std::byte>;
};
template <typename T>
concept packable_type = builtin_packable_type<T>;
template <format::type F>
struct builtin_pack_adapter {
static constexpr auto format_type = F;
static constexpr auto size(auto value) noexcept {
return detail::pack_helper<F>::num_bytes(value);
}
static constexpr auto marker(auto value) noexcept {
return detail::pack_helper<F>::marker(value);
}
};
template <std::signed_integral T>
struct pack_adapter<T> : builtin_pack_adapter<format::type::signed_int> {
template <typename Itr>
static constexpr Itr write(T value, Itr out) noexcept {
return detail::write_integral(value, out, size(value));
}
};
template <std::unsigned_integral T>
struct pack_adapter<T> : builtin_pack_adapter<format::type::unsigned_int> {
template <typename Itr>
static constexpr Itr write(T value, Itr out) noexcept {
return detail::write_integral(value, out, size(value));
}
};
} // namespace msgpack
#endif // msgpack_core_packer_1d5939e9c1498568

View File

@ -18,14 +18,14 @@
#ifndef msgpack_core_writer_ce48a51aa6ed0858
#define msgpack_core_writer_ce48a51aa6ed0858
#include <tl/expected.hpp>
#include "../util/endianness.h"
#include "error.h"
#include "format.h"
#include <limits>
#include <type_traits>
#include <tl/expected.hpp>
namespace msgpack {
enum class writer_error {
@ -52,19 +52,147 @@ constexpr inline decltype(auto) write_bytes(
return out;
}
namespace detail {
constexpr std::size_t bytes_needed_for_str(std::uint32_t v) {
if (v <= std::uint32_t(format::fixstr::mask)) return 0;
return std::bit_ceil(std::uint32_t((std::bit_width(v) + 7) >> 3));
}
constexpr std::size_t bytes_needed(std::uint64_t v) {
if (v <= std::uint64_t(format::positive_fixint::mask)) return 0;
return std::bit_ceil(std::uint64_t((std::bit_width(v) + 7) >> 3));
}
constexpr std::size_t bytes_needed(std::int64_t v) {
if (v < 0 && v >= -32) return 0;
auto width = 1 + std::numeric_limits<std::uint64_t>::digits
- (v < 0 ? std::countl_one(std::uint64_t(v))
: std::countl_zero(std::uint64_t(v)));
return std::bit_ceil(std::uint64_t((width + 7) >> 3));
}
constexpr std::byte get_format_signed(auto value) {
switch (bytes_needed(std::int64_t{value})) {
case 0: return std::byte(value);
case 1: return format::int8::marker;
case 2: return format::int16::marker;
case 4: return format::int32::marker;
default: return format::int64::marker;
}
}
constexpr std::byte get_format(auto value) {
switch (bytes_needed(std::uint64_t{value})) {
case 0: return std::byte(value);
case 1: return format::uint8::marker;
case 2: return format::uint16::marker;
case 4: return format::uint32::marker;
default: return format::uint64::marker;
}
}
} // namespace detail
template <typename T>
struct write_adapter {};
template <std::integral T>
struct write_adapter<T> {
static constexpr auto size(T) noexcept { return sizeof(T); }
static constexpr auto size(T t) noexcept { return sizeof(T); }
template <typename Itr>
static constexpr auto write(T t, Itr out) noexcept {
return write_bytes(detail::raw_cast(host_to_be(t)), out);
return write_bytes(::detail::raw_cast(host_to_be(t)), out);
}
};
template <typename T>
struct deduced_write_adapter {};
template <std::signed_integral T>
struct deduced_write_adapter<T> {
static constexpr auto size(T value) noexcept {
return detail::bytes_needed(std::int64_t{value});
}
template <typename Itr>
static constexpr auto write(T value, Itr out) noexcept {
auto bytes = ::detail::as_bytes(host_to_be(value));
auto sz = size(value);
for (std::size_t i = 0; i < sz; ++i) {
*out++ = *(bytes.end() - sz + i);
}
return out;
}
static constexpr auto format_hint(T value) noexcept {
return detail::get_format_signed(value);
}
};
template <std::unsigned_integral T>
struct deduced_write_adapter<T> {
static constexpr auto size(T value) noexcept {
return detail::bytes_needed(std::uint64_t{value});
}
template <typename Itr>
static constexpr auto write(T value, Itr out) noexcept {
auto bytes = ::detail::as_bytes(host_to_be(value));
auto sz = size(value);
for (std::size_t i = 0; i < sz; ++i) {
*out++ = *(bytes.end() - sz + i);
}
return out;
}
template <typename Itr>
static constexpr auto write(T value, Itr out, std::size_t sz) noexcept {
auto bytes = ::detail::as_bytes(host_to_be(value));
for (std::size_t i = 0; i < sz; ++i) {
*out++ = *(bytes.end() - sz + i);
}
return out;
}
static constexpr auto format_hint(T value) noexcept {
return detail::get_format(value);
}
};
template <>
struct deduced_write_adapter<std::string_view> {
static constexpr auto size(std::string_view value) noexcept {
return value.size() + detail::bytes_needed_for_str(value.size());
}
template <typename Itr>
static constexpr auto write(std::string_view value, Itr out) noexcept {
auto bytes_needed = detail::bytes_needed_for_str(value.size());
if (bytes_needed) {
out = deduced_write_adapter<std::uint64_t>::write(
value.size(), out, bytes_needed);
}
auto const* beg = reinterpret_cast<std::byte const*>(&*value.begin());
std::copy(beg, beg + value.size(), out);
return out + value.size();
}
static constexpr auto format_hint(std::string_view value) noexcept {
switch (detail::bytes_needed_for_str(value.size())) {
case 0: return format::fixstr::marker | std::byte(value.size());
case 1: return format::str8::marker;
case 2: return format::str16::marker;
case 4:
default: return format::str32::marker;
}
}
};
template <std::convertible_to<std::string_view> T>
struct deduced_write_adapter<T> : deduced_write_adapter<std::string_view> {};
template <>
struct write_adapter<std::string_view> {
static constexpr auto size(std::string_view str) noexcept {
@ -76,7 +204,7 @@ struct write_adapter<std::string_view> {
std::byte const* beg =
reinterpret_cast<std::byte const*>(&*str.begin());
std::copy(beg, beg + str.size(), out);
return out += str.size();
return out + str.size();
}
};
@ -157,6 +285,13 @@ constexpr inline expected<typename F::first_type> pack_first(
}
}
template <typename T>
concept v2_adapted = requires(T const& t, std::byte* b) {
{ deduced_write_adapter<T>::size(t) } -> std::same_as<std::size_t>;
{ deduced_write_adapter<T>::write(t, b) } -> std::same_as<decltype(b)>;
{ deduced_write_adapter<T>::format_hint(t) } -> std::same_as<std::byte>;
};
template <format_type F, typename Itr>
constexpr inline expected<Itr> write(
typename F::value_type&& value, Itr out, Itr const end) {
@ -228,7 +363,6 @@ public:
constexpr expected<tl::monostate> write(
typename F::value_type&& v) noexcept {
using value_type = typename F::value_type;
if (curr == end) return tl::make_unexpected(error::out_of_space);
auto result = detail::write<F>(std::forward<value_type>(v), curr, end);
if (!result) {
return tl::make_unexpected(result.error());
@ -238,10 +372,20 @@ public:
return tl::monostate{};
}
template <typename T>
requires requires { typename detail::format_hint<T>::type; }
constexpr expected<tl::monostate> write(T&& v) {
return write<typename detail::format_hint<T>::type>(std::forward<T>(v));
// Deduced-type write, automatically chooses smallest representative format
template <detail::v2_adapted T>
constexpr expected<tl::monostate> write2(T&& v) {
using diff_type = std::iterator_traits<decltype(curr)>::difference_type;
auto const space_needed =
diff_type(1 + deduced_write_adapter<T>::size(v));
if (space_needed > std::distance(curr, end)) {
return tl::make_unexpected(error::out_of_space);
}
*curr++ = deduced_write_adapter<T>::format_hint(v);
if (space_needed > 1) {
curr = deduced_write_adapter<T>::write(v, curr);
}
return tl::monostate{};
}
constexpr auto pos() const noexcept { return curr; }

View File

@ -22,11 +22,12 @@
#include "parselink/msgpack/core/error.h"
#include "parselink/msgpack/core/format.h"
#include "parselink/msgpack/util/endianness.h"
#include <limits>
#include <type_traits>
#include <tl/expected.hpp>
#include <limits>
#include <type_traits>
namespace msgpack {
enum class writer_error {
@ -53,23 +54,6 @@ constexpr inline decltype(auto) write_bytes(
return out;
}
#if 0
// Figure out the smallest
namespace detail {
constexpr auto const& deduce_format(std::uint64_t value) {
if (value <= format::positive_fixint::mask) return format::
}
template <std::integral T>
struct write_adapter<T> {
static constexpr auto size(T t) noexcept {
}
}
} // namespace detail
#endif
template <typename T>
struct write_adapter {};

View File

@ -99,6 +99,11 @@ constexpr auto raw_cast(T val) noexcept {
return u.data;
}
template <typename T>
constexpr auto as_bytes(T val) noexcept {
return std::bit_cast<std::array<std::byte, sizeof(T)>>(val);
}
/**
* A helper function for converting a std::array into some arbitrary type.
* Beware, this must be used on trivial data types.

View File

@ -2,6 +2,9 @@
#include <boost/ut.hpp>
#include <string>
#include <fmt/format.h>
#include <fmt/ranges.h>
using namespace boost::ut;
namespace format = msgpack::format;
@ -102,17 +105,17 @@ suite writer = [] {
std::array<std::byte, 2> payload;
auto constexpr expected = make_bytes(0x32, 0x55);
msgpack::writer writer(payload);
auto result = writer.write<fmt>(std::uint8_t{0x32});
expect(!!result);
expect(!!writer.write<fmt>(std::uint8_t{0x32}));
expect(writer.tell() == 1);
expect(*writer.subspan().begin() == std::byte{0x32});
expect(*writer.subspan().begin() == *expected.begin());
expect(writer.write<fmt>(std::uint8_t{0x82})
== tl::make_unexpected(error::bad_value));
writer.write(std::uint8_t{0x55});
expect(!!writer.write<fmt>(std::uint8_t{0x55}));
expect(writer.tell() == 2);
expect(equal(writer.subspan(), expected));
expect(writer.write(std::uint8_t{0x01})
expect(writer.write<fmt>(std::uint8_t{0x82})
== tl::make_unexpected(error::out_of_space));
expect(writer.write<fmt>(std::uint8_t{0x32})
== tl::make_unexpected(error::out_of_space));
};
@ -123,12 +126,13 @@ suite writer = [] {
std::array<std::byte, 4> payload;
auto constexpr expected = make_bytes(0xcc, 0x32, 0xcc, 0x82);
msgpack::writer writer(payload);
expect(!!writer.write<fmt>(std::uint8_t{0x32}));
auto result = writer.write<fmt>(std::uint8_t{0x32});
expect(!!result);
expect(writer.tell() == 2);
expect(equal(writer.subspan(), std::span{expected.begin(), 2}));
expect(!!writer.write<fmt>(std::uint8_t{0x82}));
expect(equal(writer.subspan(), expected));
expect(writer.write(std::uint8_t{0x01})
expect(writer.write<fmt>(std::uint8_t{0x01})
== tl::make_unexpected(error::out_of_space));
};
@ -208,7 +212,7 @@ suite writer = [] {
expect(equal(writer.subspan(), std::span{expected.begin(), 2}));
expect(!!writer.write<fmt>(std::int8_t{-5}));
expect(equal(writer.subspan(), expected));
expect(writer.write(std::uint8_t{0x01})
expect(writer.write<fmt>(0x01)
== tl::make_unexpected(error::out_of_space));
};
@ -431,3 +435,179 @@ suite writer = [] {
expect(equal(writer.subspan(), expected));
};
};
// Deduced writer tests
namespace {
template <typename T>
struct test_data {};
template <std::signed_integral T>
struct test_data<T> {
static constexpr auto values = std::to_array<std::int64_t>({
-1, // negative fixint
-32, // negative fixint
-33, // int8
-128, // int8
0, // int8
127, // int8
128, // int16
-129, // int16
-32768, // int16
32767, // int16
-32769, // int32
32768, // int32
-2147483648, // int32
2147483647, // int32
-2147483649, // int64
2147483648, // int64
std::numeric_limits<std::int64_t>::lowest(), // int64
std::numeric_limits<std::int64_t>::max(), // int64
});
static constexpr auto payload = make_bytes(0xff, // negative fixint
0xe0, // negative fixint
0xd0, 0xdf, // int8
0xd0, 0x80, // int8
0xd0, 0x0, // int8
0xd0, 0x7f, // int8
0xd1, 0x00, 0x80, // int16
0xd1, 0xff, 0x7f, // int16
0xd1, 0x80, 0x00, // int16
0xd1, 0x7f, 0xff, // int16
0xd2, 0xff, 0xff, 0x7f, 0xff, // int32
0xd2, 0x00, 0x00, 0x80, 0x00, // int32
0xd2, 0x80, 0x00, 0x00, 0x00, // int32
0xd2, 0x7f, 0xff, 0xff, 0xff, // int32
0xd3, 0xff, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, // int64
0xd3, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, // int64
0xd3, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // int64
0xd3, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff // int64
);
static constexpr auto valid(auto value) noexcept {
return value <= std::numeric_limits<T>::max()
&& value >= std::numeric_limits<T>::lowest();
}
};
template <std::unsigned_integral T>
struct test_data<T> {
static constexpr auto values = std::to_array<std::uint64_t>({
0x00, // positive fixint
0x79, // positive fixint
0x80, // uint8
0xff, // uint8
0x100, // uint16
0xffff, // uint16
0x10000, // uint32
0xffffffff, // uint32
0x100000000, // uint64
0xffffffffffffffff // uint64
});
static constexpr auto payload = make_bytes(0x00, // positive fixint
0x79, // positive fixint
0xcc, 0x80, // uint8
0xcc, 0xff, // uint8
0xcd, 0x01, 0x00, // uint16
0xcd, 0xff, 0xff, // uint16
0xce, 0x00, 0x01, 0x00, 0x00, // uint32
0xce, 0xff, 0xff, 0xff, 0xff, // uint32
0xcf, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, // uint64
0xcf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff // uint64
);
static constexpr auto valid(auto value) noexcept {
return value <= std::numeric_limits<T>::max();
}
};
template <>
struct test_data<std::string_view> {
static constexpr auto values = std::to_array<std::string_view>({
"", // fixstr
"0", // fixstr
"0123456789abcdef0123456789abcde", // fixstr
"0123456789abcdef0123456789abcdef", // str8
});
static constexpr auto payload = make_bytes(0xa0, // fixstr
0xa1, 0x30, // fixstr
// fixstr
0xbf, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x30, 0x31, 0x32, 0x33, 0x34,
0x35, 0x36, 0x37, 0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65,
// str8
0xd9, 0x20, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x30, 0x31, 0x32, 0x33,
0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65,
0x66);
static constexpr auto valid(auto value) noexcept {
return value.size() <= std::numeric_limits<std::uint32_t>::max();
}
};
template <typename T>
bool test_deduced() noexcept {
constexpr auto const& expected_payload = test_data<T>::payload;
std::array<std::byte, expected_payload.size()> payload;
msgpack::writer writer(payload);
for (auto const& value : test_data<T>::values) {
if (!test_data<T>::valid(value)) break;
expect(!!writer.write2(T(value)));
auto expect = std::span(expected_payload.begin(), writer.tell());
auto correct = equal(writer.subspan(), expect);
if (!correct) {
fmt::print("Deduction failed for '{}'\n", T(value));
fmt::print("\tActual: {::#04x}\n", writer.subspan());
fmt::print("\tExpect: {::#04x}\n", expect);
return false;
}
}
return true;
}
} // anonymous namespace
suite deduced_writer = [] {
"writer::write<std::uint8_t> (deduced + compressed)"_test = [] {
expect(test_deduced<std::uint8_t>());
};
"writer::write<std::uint16_t> (deduced + compressed)"_test = [] {
expect(test_deduced<std::uint16_t>());
};
"writer::write<std::uint32_t> (deduced + compressed)"_test = [] {
expect(test_deduced<std::uint32_t>());
};
"writer::write<std::uint64_t> (deduced + compressed)"_test = [] {
expect(test_deduced<std::uint64_t>());
};
"writer::write<std::int8_t> (deduced + compressed)"_test = [] {
expect(test_deduced<std::int8_t>());
};
"writer::write<std::int16_t> (deduced + compressed)"_test = [] {
expect(test_deduced<std::int16_t>());
};
"writer::write<std::int32_t> (deduced + compressed)"_test = [] {
expect(test_deduced<std::int32_t>());
};
"writer::write<std::int64_t> (deduced + compressed)"_test = [] {
expect(test_deduced<std::int32_t>());
};
"writer::write<std::string_view> (deduced + compressed)"_test = [] {
expect(test_deduced<std::string_view>());
};
};