diff --git a/.clang-format b/.clang-format index 97e9d15..15e9f82 100644 --- a/.clang-format +++ b/.clang-format @@ -29,8 +29,8 @@ IncludeCategories: SortPriority: 2 CaseSensitive: false - Regex: '.*' - Priority: 1 - SortPriority: 0 + Priority: 3 + SortPriority: 3 IncludeIsMainRegex: '(_test)?$' IndentAccessModifiers: false IndentCaseLabels: true diff --git a/include/parselink/msgpack/core/packer.h b/include/parselink/msgpack/core/packer.h new file mode 100644 index 0000000..56cd5b1 --- /dev/null +++ b/include/parselink/msgpack/core/packer.h @@ -0,0 +1,181 @@ +//----------------------------------------------------------------------------- +// ___ __ _ _ +// / _ \__ _ _ __ ___ ___ / /(_)_ __ | | __ +// / /_)/ _` | '__/ __|/ _ \/ / | | '_ \| |/ / +// / ___/ (_| | | \__ \ __/ /__| | | | | < +// \/ \__,_|_| |___/\___\____/_|_| |_|_|\_\ . +// +//----------------------------------------------------------------------------- +// Author: Kurt Sassenrath +// Module: msgpack +// +// Default packer implementation, which aims to deduce the best format to use +// for a given value. For example, if a 32-bit unsigned integer type only +// contains the value 5, a uint32 format would serialize into: +// +// 0xce, 0x00, 0x00, 0x00, 0x05 +// +// Instead, the packer will note that this value could be stored in a positive +// fixint, which is simply: +// +// 0x05 +// +// The same optimization will be applied to variable-length types, like strings, +// bytes, arrays, and maps. +// +// This flexibility comes at the cost of CPU instructions. For embedded targets, +// writer (to be renamed verbatim_packer in the future) may be a better choice. +// +// Future goals for this particular packer: +// 1. Support containers/ranges seamlessly. +// 2. Support packing of trivial POD structures without an explicit +// pack_adapter. +// +// Copyright (c) 2023 Kurt Sassenrath. +// +// License TBD. +//----------------------------------------------------------------------------- +#ifndef msgpack_core_packer_1d5939e9c1498568 +#define msgpack_core_packer_1d5939e9c1498568 + +#include "parselink/msgpack/core/error.h" +#include "parselink/msgpack/core/format.h" +#include "parselink/msgpack/util/endianness.h" + +#include + +#include +#include + +namespace msgpack { + +namespace detail { + +// This is a generic helper function for writing integral bytes. +template +constexpr auto write_integral(T value, Itr out, std::size_t sz) noexcept { + auto bytes = ::detail::as_bytes(host_to_be(value)); + for (std::size_t i = 0; i < sz; ++i) { + *out++ = *(bytes.end() - sz + i); + } + return out; +} + +// Depending on the format, a number of bytes will be necessary to represent +// either the value (integer formats) or the length (variable length formats). +template +struct pack_helper {}; + +template <> +struct pack_helper { + static constexpr std::size_t num_bytes(std::uint64_t value) noexcept { + if (value <= std::uint64_t(format::positive_fixint::mask)) return 0; + return std::bit_ceil(std::uint64_t((std::bit_width(value) + 7) >> 3)); + } + + static constexpr std::byte marker(std::uint64_t value) noexcept { + switch (num_bytes(value)) { + case 0: return static_cast(value); + case 1: return format::uint8::marker; + case 2: return format::uint16::marker; + case 4: return format::uint32::marker; + default: return format::uint64::marker; + } + } +}; + +template <> +struct pack_helper { + static constexpr std::size_t num_bytes(std::int64_t value) noexcept { + // Probably a better way to do this. + if (value < 0 && value >= -32) return 0; + auto underlying = static_cast(value); + + // save a branch; these should be cheap on modern hardware. + std::uint64_t counts[2] = { + static_cast(std::countl_zero(underlying)), + static_cast(std::countl_one(underlying))}; + + std::uint64_t width = 1 + std::numeric_limits::digits + - counts[underlying >> 63]; + + return std::bit_ceil((width + 7) >> 3); + } + + static constexpr std::byte marker(std::int64_t value) noexcept { + switch (num_bytes(value)) { + case 0: return static_cast(value); + case 1: return format::int8::marker; + case 2: return format::int16::marker; + case 4: return format::int32::marker; + default: return format::int64::marker; + } + } +}; + +template <> +struct pack_helper { + static constexpr std::size_t num_bytes(std::uint64_t value) noexcept { + if (value <= std::uint32_t(format::fixstr::mask)) return 0; + return std::bit_ceil(std::uint32_t((std::bit_width(value) + 7) >> 3)); + } + + static constexpr auto marker(std::string_view value) noexcept { + switch (num_bytes(value.size())) { + case 0: return format::fixstr::marker | std::byte(value.size()); + case 1: return format::str8::marker; + case 2: return format::str16::marker; + case 4: + default: return format::str32::marker; + } + } +}; + +} // namespace detail + +// Pack adapter is the basis for packing native values into MessagePack format. +template +struct pack_adapter {}; + +template +concept builtin_packable_type = requires(T const& t, std::byte* b) { + { pack_adapter::size(t) } -> std::same_as; + { pack_adapter::write(t, b) } -> std::same_as; + { pack_adapter::marker(t) } -> std::same_as; +}; + +template +concept packable_type = builtin_packable_type; + +template +struct builtin_pack_adapter { + static constexpr auto format_type = F; + + static constexpr auto size(auto value) noexcept { + return detail::pack_helper::num_bytes(value); + } + + static constexpr auto marker(auto value) noexcept { + return detail::pack_helper::marker(value); + } +}; + +template +struct pack_adapter : builtin_pack_adapter { + template + static constexpr Itr write(T value, Itr out) noexcept { + return detail::write_integral(value, out, size(value)); + } +}; + +template +struct pack_adapter : builtin_pack_adapter { + template + static constexpr Itr write(T value, Itr out) noexcept { + return detail::write_integral(value, out, size(value)); + } +}; + +} // namespace msgpack + +#endif // msgpack_core_packer_1d5939e9c1498568 diff --git a/include/parselink/msgpack/core/writer.h b/include/parselink/msgpack/core/writer.h index 245ad7e..bd902cc 100644 --- a/include/parselink/msgpack/core/writer.h +++ b/include/parselink/msgpack/core/writer.h @@ -18,14 +18,14 @@ #ifndef msgpack_core_writer_ce48a51aa6ed0858 #define msgpack_core_writer_ce48a51aa6ed0858 +#include + #include "../util/endianness.h" #include "error.h" #include "format.h" #include #include -#include - namespace msgpack { enum class writer_error { @@ -52,19 +52,147 @@ constexpr inline decltype(auto) write_bytes( return out; } +namespace detail { + +constexpr std::size_t bytes_needed_for_str(std::uint32_t v) { + if (v <= std::uint32_t(format::fixstr::mask)) return 0; + return std::bit_ceil(std::uint32_t((std::bit_width(v) + 7) >> 3)); +} + +constexpr std::size_t bytes_needed(std::uint64_t v) { + if (v <= std::uint64_t(format::positive_fixint::mask)) return 0; + return std::bit_ceil(std::uint64_t((std::bit_width(v) + 7) >> 3)); +} + +constexpr std::size_t bytes_needed(std::int64_t v) { + if (v < 0 && v >= -32) return 0; + auto width = 1 + std::numeric_limits::digits + - (v < 0 ? std::countl_one(std::uint64_t(v)) + : std::countl_zero(std::uint64_t(v))); + return std::bit_ceil(std::uint64_t((width + 7) >> 3)); +} + +constexpr std::byte get_format_signed(auto value) { + switch (bytes_needed(std::int64_t{value})) { + case 0: return std::byte(value); + case 1: return format::int8::marker; + case 2: return format::int16::marker; + case 4: return format::int32::marker; + default: return format::int64::marker; + } +} + +constexpr std::byte get_format(auto value) { + switch (bytes_needed(std::uint64_t{value})) { + case 0: return std::byte(value); + case 1: return format::uint8::marker; + case 2: return format::uint16::marker; + case 4: return format::uint32::marker; + default: return format::uint64::marker; + } +} + +} // namespace detail + template struct write_adapter {}; template struct write_adapter { - static constexpr auto size(T) noexcept { return sizeof(T); } + static constexpr auto size(T t) noexcept { return sizeof(T); } template static constexpr auto write(T t, Itr out) noexcept { - return write_bytes(detail::raw_cast(host_to_be(t)), out); + return write_bytes(::detail::raw_cast(host_to_be(t)), out); } }; +template +struct deduced_write_adapter {}; + +template +struct deduced_write_adapter { + static constexpr auto size(T value) noexcept { + return detail::bytes_needed(std::int64_t{value}); + } + + template + static constexpr auto write(T value, Itr out) noexcept { + auto bytes = ::detail::as_bytes(host_to_be(value)); + auto sz = size(value); + for (std::size_t i = 0; i < sz; ++i) { + *out++ = *(bytes.end() - sz + i); + } + return out; + } + + static constexpr auto format_hint(T value) noexcept { + return detail::get_format_signed(value); + } +}; + +template +struct deduced_write_adapter { + static constexpr auto size(T value) noexcept { + return detail::bytes_needed(std::uint64_t{value}); + } + + template + static constexpr auto write(T value, Itr out) noexcept { + auto bytes = ::detail::as_bytes(host_to_be(value)); + auto sz = size(value); + for (std::size_t i = 0; i < sz; ++i) { + *out++ = *(bytes.end() - sz + i); + } + return out; + } + + template + static constexpr auto write(T value, Itr out, std::size_t sz) noexcept { + auto bytes = ::detail::as_bytes(host_to_be(value)); + for (std::size_t i = 0; i < sz; ++i) { + *out++ = *(bytes.end() - sz + i); + } + return out; + } + + static constexpr auto format_hint(T value) noexcept { + return detail::get_format(value); + } +}; + +template <> +struct deduced_write_adapter { + static constexpr auto size(std::string_view value) noexcept { + return value.size() + detail::bytes_needed_for_str(value.size()); + } + + template + static constexpr auto write(std::string_view value, Itr out) noexcept { + auto bytes_needed = detail::bytes_needed_for_str(value.size()); + if (bytes_needed) { + out = deduced_write_adapter::write( + value.size(), out, bytes_needed); + } + auto const* beg = reinterpret_cast(&*value.begin()); + std::copy(beg, beg + value.size(), out); + return out + value.size(); + } + + static constexpr auto format_hint(std::string_view value) noexcept { + switch (detail::bytes_needed_for_str(value.size())) { + case 0: return format::fixstr::marker | std::byte(value.size()); + case 1: return format::str8::marker; + case 2: return format::str16::marker; + case 4: + default: return format::str32::marker; + } + } +}; + +template T> +struct deduced_write_adapter : deduced_write_adapter {}; + template <> struct write_adapter { static constexpr auto size(std::string_view str) noexcept { @@ -76,7 +204,7 @@ struct write_adapter { std::byte const* beg = reinterpret_cast(&*str.begin()); std::copy(beg, beg + str.size(), out); - return out += str.size(); + return out + str.size(); } }; @@ -157,6 +285,13 @@ constexpr inline expected pack_first( } } +template +concept v2_adapted = requires(T const& t, std::byte* b) { + { deduced_write_adapter::size(t) } -> std::same_as; + { deduced_write_adapter::write(t, b) } -> std::same_as; + { deduced_write_adapter::format_hint(t) } -> std::same_as; +}; + template constexpr inline expected write( typename F::value_type&& value, Itr out, Itr const end) { @@ -228,7 +363,6 @@ public: constexpr expected write( typename F::value_type&& v) noexcept { using value_type = typename F::value_type; - if (curr == end) return tl::make_unexpected(error::out_of_space); auto result = detail::write(std::forward(v), curr, end); if (!result) { return tl::make_unexpected(result.error()); @@ -238,10 +372,20 @@ public: return tl::monostate{}; } - template - requires requires { typename detail::format_hint::type; } - constexpr expected write(T&& v) { - return write::type>(std::forward(v)); + // Deduced-type write, automatically chooses smallest representative format + template + constexpr expected write2(T&& v) { + using diff_type = std::iterator_traits::difference_type; + auto const space_needed = + diff_type(1 + deduced_write_adapter::size(v)); + if (space_needed > std::distance(curr, end)) { + return tl::make_unexpected(error::out_of_space); + } + *curr++ = deduced_write_adapter::format_hint(v); + if (space_needed > 1) { + curr = deduced_write_adapter::write(v, curr); + } + return tl::monostate{}; } constexpr auto pos() const noexcept { return curr; } diff --git a/include/parselink/msgpack/token/writer.h b/include/parselink/msgpack/token/writer.h index 7789256..4440b10 100644 --- a/include/parselink/msgpack/token/writer.h +++ b/include/parselink/msgpack/token/writer.h @@ -22,11 +22,12 @@ #include "parselink/msgpack/core/error.h" #include "parselink/msgpack/core/format.h" #include "parselink/msgpack/util/endianness.h" -#include -#include #include +#include +#include + namespace msgpack { enum class writer_error { @@ -53,23 +54,6 @@ constexpr inline decltype(auto) write_bytes( return out; } -#if 0 -// Figure out the smallest -namespace detail { - - constexpr auto const& deduce_format(std::uint64_t value) { - if (value <= format::positive_fixint::mask) return format:: - } - - template - struct write_adapter { - static constexpr auto size(T t) noexcept { - - } - } -} // namespace detail -#endif - template struct write_adapter {}; diff --git a/include/parselink/msgpack/util/endianness.h b/include/parselink/msgpack/util/endianness.h index e265aa2..853628f 100644 --- a/include/parselink/msgpack/util/endianness.h +++ b/include/parselink/msgpack/util/endianness.h @@ -99,6 +99,11 @@ constexpr auto raw_cast(T val) noexcept { return u.data; } +template +constexpr auto as_bytes(T val) noexcept { + return std::bit_cast>(val); +} + /** * A helper function for converting a std::array into some arbitrary type. * Beware, this must be used on trivial data types. diff --git a/tests/msgpack/test_writer.cpp b/tests/msgpack/test_writer.cpp index c814be7..1020e89 100644 --- a/tests/msgpack/test_writer.cpp +++ b/tests/msgpack/test_writer.cpp @@ -2,6 +2,9 @@ #include #include +#include +#include + using namespace boost::ut; namespace format = msgpack::format; @@ -102,17 +105,17 @@ suite writer = [] { std::array payload; auto constexpr expected = make_bytes(0x32, 0x55); msgpack::writer writer(payload); - auto result = writer.write(std::uint8_t{0x32}); - expect(!!result); + expect(!!writer.write(std::uint8_t{0x32})); expect(writer.tell() == 1); - expect(*writer.subspan().begin() == std::byte{0x32}); + expect(*writer.subspan().begin() == *expected.begin()); expect(writer.write(std::uint8_t{0x82}) == tl::make_unexpected(error::bad_value)); - - writer.write(std::uint8_t{0x55}); + expect(!!writer.write(std::uint8_t{0x55})); expect(writer.tell() == 2); expect(equal(writer.subspan(), expected)); - expect(writer.write(std::uint8_t{0x01}) + expect(writer.write(std::uint8_t{0x82}) + == tl::make_unexpected(error::out_of_space)); + expect(writer.write(std::uint8_t{0x32}) == tl::make_unexpected(error::out_of_space)); }; @@ -123,12 +126,13 @@ suite writer = [] { std::array payload; auto constexpr expected = make_bytes(0xcc, 0x32, 0xcc, 0x82); msgpack::writer writer(payload); - expect(!!writer.write(std::uint8_t{0x32})); + auto result = writer.write(std::uint8_t{0x32}); + expect(!!result); expect(writer.tell() == 2); expect(equal(writer.subspan(), std::span{expected.begin(), 2})); expect(!!writer.write(std::uint8_t{0x82})); expect(equal(writer.subspan(), expected)); - expect(writer.write(std::uint8_t{0x01}) + expect(writer.write(std::uint8_t{0x01}) == tl::make_unexpected(error::out_of_space)); }; @@ -208,7 +212,7 @@ suite writer = [] { expect(equal(writer.subspan(), std::span{expected.begin(), 2})); expect(!!writer.write(std::int8_t{-5})); expect(equal(writer.subspan(), expected)); - expect(writer.write(std::uint8_t{0x01}) + expect(writer.write(0x01) == tl::make_unexpected(error::out_of_space)); }; @@ -431,3 +435,179 @@ suite writer = [] { expect(equal(writer.subspan(), expected)); }; }; + +// Deduced writer tests +namespace { + +template +struct test_data {}; + +template +struct test_data { + static constexpr auto values = std::to_array({ + -1, // negative fixint + -32, // negative fixint + -33, // int8 + -128, // int8 + 0, // int8 + 127, // int8 + 128, // int16 + -129, // int16 + -32768, // int16 + 32767, // int16 + -32769, // int32 + 32768, // int32 + -2147483648, // int32 + 2147483647, // int32 + -2147483649, // int64 + 2147483648, // int64 + std::numeric_limits::lowest(), // int64 + std::numeric_limits::max(), // int64 + }); + + static constexpr auto payload = make_bytes(0xff, // negative fixint + 0xe0, // negative fixint + 0xd0, 0xdf, // int8 + 0xd0, 0x80, // int8 + 0xd0, 0x0, // int8 + 0xd0, 0x7f, // int8 + 0xd1, 0x00, 0x80, // int16 + 0xd1, 0xff, 0x7f, // int16 + 0xd1, 0x80, 0x00, // int16 + 0xd1, 0x7f, 0xff, // int16 + 0xd2, 0xff, 0xff, 0x7f, 0xff, // int32 + 0xd2, 0x00, 0x00, 0x80, 0x00, // int32 + 0xd2, 0x80, 0x00, 0x00, 0x00, // int32 + 0xd2, 0x7f, 0xff, 0xff, 0xff, // int32 + 0xd3, 0xff, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, // int64 + 0xd3, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, // int64 + 0xd3, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // int64 + 0xd3, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff // int64 + ); + + static constexpr auto valid(auto value) noexcept { + return value <= std::numeric_limits::max() + && value >= std::numeric_limits::lowest(); + } +}; + +template +struct test_data { + static constexpr auto values = std::to_array({ + 0x00, // positive fixint + 0x79, // positive fixint + 0x80, // uint8 + 0xff, // uint8 + 0x100, // uint16 + 0xffff, // uint16 + 0x10000, // uint32 + 0xffffffff, // uint32 + 0x100000000, // uint64 + 0xffffffffffffffff // uint64 + }); + + static constexpr auto payload = make_bytes(0x00, // positive fixint + 0x79, // positive fixint + 0xcc, 0x80, // uint8 + 0xcc, 0xff, // uint8 + 0xcd, 0x01, 0x00, // uint16 + 0xcd, 0xff, 0xff, // uint16 + 0xce, 0x00, 0x01, 0x00, 0x00, // uint32 + 0xce, 0xff, 0xff, 0xff, 0xff, // uint32 + 0xcf, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, // uint64 + 0xcf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff // uint64 + ); + + static constexpr auto valid(auto value) noexcept { + return value <= std::numeric_limits::max(); + } +}; + +template <> +struct test_data { + static constexpr auto values = std::to_array({ + "", // fixstr + "0", // fixstr + "0123456789abcdef0123456789abcde", // fixstr + "0123456789abcdef0123456789abcdef", // str8 + }); + + static constexpr auto payload = make_bytes(0xa0, // fixstr + 0xa1, 0x30, // fixstr + + // fixstr + 0xbf, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, + 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x30, 0x31, 0x32, 0x33, 0x34, + 0x35, 0x36, 0x37, 0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, + + // str8 + 0xd9, 0x20, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, + 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x30, 0x31, 0x32, 0x33, + 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, + 0x66); + + static constexpr auto valid(auto value) noexcept { + return value.size() <= std::numeric_limits::max(); + } +}; + +template +bool test_deduced() noexcept { + constexpr auto const& expected_payload = test_data::payload; + std::array payload; + + msgpack::writer writer(payload); + for (auto const& value : test_data::values) { + if (!test_data::valid(value)) break; + expect(!!writer.write2(T(value))); + auto expect = std::span(expected_payload.begin(), writer.tell()); + auto correct = equal(writer.subspan(), expect); + if (!correct) { + fmt::print("Deduction failed for '{}'\n", T(value)); + fmt::print("\tActual: {::#04x}\n", writer.subspan()); + fmt::print("\tExpect: {::#04x}\n", expect); + return false; + } + } + return true; +} + +} // anonymous namespace + +suite deduced_writer = [] { + "writer::write (deduced + compressed)"_test = [] { + expect(test_deduced()); + }; + + "writer::write (deduced + compressed)"_test = [] { + expect(test_deduced()); + }; + + "writer::write (deduced + compressed)"_test = [] { + expect(test_deduced()); + }; + + "writer::write (deduced + compressed)"_test = [] { + expect(test_deduced()); + }; + + "writer::write (deduced + compressed)"_test = [] { + expect(test_deduced()); + }; + + "writer::write (deduced + compressed)"_test = [] { + expect(test_deduced()); + }; + + "writer::write (deduced + compressed)"_test = [] { + expect(test_deduced()); + }; + + "writer::write (deduced + compressed)"_test = [] { + expect(test_deduced()); + }; + + "writer::write (deduced + compressed)"_test = [] { + expect(test_deduced()); + }; +};