diff --git a/include/bitcoin/system/hash/sha/algorithm.hpp b/include/bitcoin/system/hash/sha/algorithm.hpp index 3c11a9c058..46ee4de435 100644 --- a/include/bitcoin/system/hash/sha/algorithm.hpp +++ b/include/bitcoin/system/hash/sha/algorithm.hpp @@ -113,6 +113,9 @@ class algorithm static void accumulate(state_t& state, iblocks_t&& blocks) NOEXCEPT; static constexpr void accumulate(state_t& state, const block_t& block) NOEXCEPT; static constexpr digest_t normalize(const state_t& state) NOEXCEPT; + + template + static constexpr digest_t finalize(state_t& state) NOEXCEPT; static constexpr digest_t finalize(state_t& state, size_t blocks) NOEXCEPT; static constexpr digest_t finalize_second(const state_t& state) NOEXCEPT; static constexpr digest_t finalize_double(state_t& state, size_t blocks) NOEXCEPT; @@ -166,9 +169,6 @@ class algorithm template = true> using wstate_t = std_array; - ////template = true> - ////using wblock_t = std_array; - /// Other types. /// ----------------------------------------------------------------------- @@ -232,8 +232,8 @@ class algorithm /// ----------------------------------------------------------------------- INLINE static constexpr void input(buffer_t& buffer, const block_t& block) NOEXCEPT; - INLINE static constexpr void input_left(buffer_t& buffer, const half_t& half) NOEXCEPT; - INLINE static constexpr void input_right(buffer_t& buffer, const half_t& half) NOEXCEPT; + INLINE static constexpr void input_left(auto& buffer, const half_t& half) NOEXCEPT; + INLINE static constexpr void input_right(auto& buffer, const half_t& half) NOEXCEPT; INLINE static constexpr digest_t output(const state_t& state) NOEXCEPT; /// Padding. @@ -260,7 +260,8 @@ class algorithm /// Double hashing. /// ----------------------------------------------------------------------- - static constexpr void reinput(auto& buffer, const auto& state) NOEXCEPT; + static constexpr void reinput_left(auto& buffer, const auto& left) NOEXCEPT; + static constexpr void reinput_right(auto& buffer, const auto& right) NOEXCEPT; /// Iteration (message scheduling vectorized for multiple blocks). /// ----------------------------------------------------------------------- @@ -385,13 +386,30 @@ class algorithm xint128_t message) NOEXCEPT; template - INLINE static void native_rounds(xint128_t& lo, xint128_t& hi, + static void native_rounds(xint128_t& lo, xint128_t& hi, const block_t& block) NOEXCEPT; - static void native_(state_t& state, iblocks_t& blocks) NOEXCEPT; - static void native_(state_t& state, const block_t& block) NOEXCEPT; - INLINE static void native_preswapped(state_t& state, - const words_t& block) NOEXCEPT; + template + static void native_transform(state_t& state, const auto& block) NOEXCEPT; + static void native_transform(state_t& state, iblocks_t& blocks) NOEXCEPT; + + template + static digest_t native_finalize(state_t& state) NOEXCEPT; + static digest_t native_finalize(state_t& state, size_t blocks) NOEXCEPT; + static digest_t native_finalize(state_t& state, const words_t& pad) NOEXCEPT; + + static digest_t native_finalize_second(const state_t& half) NOEXCEPT; + static digest_t native_finalize_double(state_t& half, size_t blocks) NOEXCEPT; + + ////static digest_t native_hash(const block_t& block) NOEXCEPT; + static digest_t native_hash(const half_t& half) NOEXCEPT; + static digest_t native_hash(const half_t& left, const half_t& right) NOEXCEPT; + + static digest_t native_double_hash(const block_t& block) NOEXCEPT; + static digest_t native_double_hash(const half_t& half) NOEXCEPT; + static digest_t native_double_hash(const half_t& left, const half_t& right) NOEXCEPT; + + public: /// Summary public values. diff --git a/include/bitcoin/system/have.hpp b/include/bitcoin/system/have.hpp index 79da053996..e23a7ed2ce 100644 --- a/include/bitcoin/system/have.hpp +++ b/include/bitcoin/system/have.hpp @@ -130,7 +130,6 @@ /// XCPU architecture intrinsics sse41, avx2, avx512f, sha-ni. /// This assumes that avx512 implies avx2 and that all imply sse41. -/// All require runtime evaluation for the binary is portable across XCPUs. #if defined(HAVE_XCPU) // TODO: CLANG/GCC compile test and set -msse4 -mavx2 -mavx512f -msha. #if defined(WITH_SHANI) diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_double.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_double.ipp index cf1be17fd7..95aec5abb3 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_double.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_double.ipp @@ -22,7 +22,7 @@ // Double hashing. // ============================================================================ // No hash(state_t) optimizations for sha160 (requires chunk_t/half_t). -// State input directly to buffer (reinput) eliminates two endianness calls. +// State put directly to buffer (reinput) eliminates two endianness transforms. namespace libbitcoin { namespace system { @@ -33,30 +33,57 @@ namespace sha { TEMPLATE INLINE constexpr void CLASS:: -reinput(auto& buffer, const auto& state) NOEXCEPT +reinput_left(auto& buffer, const auto& left) NOEXCEPT { - static_assert(SHA::strength != 160); + using words = decltype(buffer); + static_assert(array_count >= SHA::state_words); if (std::is_constant_evaluated()) { - buffer[0] = state[0]; - buffer[1] = state[1]; - buffer[2] = state[2]; - buffer[3] = state[3]; - buffer[4] = state[4]; - buffer[5] = state[5]; - buffer[6] = state[6]; - buffer[7] = state[7]; + buffer.at(0) = left.at(0); + buffer.at(1) = left.at(1); + buffer.at(2) = left.at(2); + buffer.at(3) = left.at(3); + buffer.at(4) = left.at(4); + buffer.at(5) = left.at(5); + buffer.at(6) = left.at(6); + buffer.at(7) = left.at(7); } else { - using word = array_element; - array_cast(buffer) = state; + using word = array_element; + array_cast(buffer) = left; + } +} + +TEMPLATE +INLINE constexpr void CLASS:: +reinput_right(auto& buffer, const auto& right) NOEXCEPT +{ + using words = decltype(buffer); + static_assert(array_count >= SHA::state_words); + + if (std::is_constant_evaluated()) + { + buffer.at(8) = right.at(0); + buffer.at(9) = right.at(1); + buffer.at(10) = right.at(2); + buffer.at(11) = right.at(3); + buffer.at(12) = right.at(4); + buffer.at(13) = right.at(5); + buffer.at(14) = right.at(6); + buffer.at(15) = right.at(7); + } + else + { + using word = array_element; + array_cast(buffer) = right; } } // public // ---------------------------------------------------------------------------- +// These benefit from avoiding state endian transition and reusing buffer. TEMPLATE template @@ -68,18 +95,18 @@ double_hash(const ablocks_t& blocks) NOEXCEPT auto state = H::get; iterate(state, blocks); - buffer_t buffer{}; - schedule_n(buffer); - compress(state, buffer); - - // Second hash - reinput(buffer, state); - pad_half(buffer); - schedule(buffer); - state = H::get; - compress(state, buffer); - - return output(state); + if (std::is_constant_evaluated()) + { + return finalize_double(state, Size); + } + else if constexpr (native && SHA::strength == 256) + { + return native_finalize_double(state, Size); + } + else + { + return finalize_double(state, Size); + } } TEMPLATE @@ -94,18 +121,14 @@ double_hash(iblocks_t&& blocks) NOEXCEPT auto state = H::get; iterate(state, blocks); - buffer_t buffer{}; - schedule_n(buffer, count); - compress(state, buffer); - - // Second hash - reinput(buffer, state); - pad_half(buffer); - schedule(buffer); - state = H::get; - compress(state, buffer); - - return output(state); + if constexpr (native && SHA::strength == 256) + { + return native_finalize_double(state, count); + } + else + { + return finalize_double(state, count); + } } TEMPLATE @@ -114,23 +137,38 @@ double_hash(const block_t& block) NOEXCEPT { static_assert(is_same_type); - auto state = H::get; + const auto hash2 = [](const block_t& block) NOEXCEPT + { + auto state = H::get; + buffer_t buffer{}; + input(buffer, block); + schedule(buffer); + compress(state, buffer); + schedule_1(buffer); + compress(state, buffer); + + // Second hash + reinput_left(buffer, state); + pad_half(buffer); + schedule(buffer); + state = H::get; + compress(state, buffer); + + return output(state); + }; - buffer_t buffer{}; - input(buffer, block); - schedule(buffer); - compress(state, buffer); - schedule_1(buffer); - compress(state, buffer); - - // Second hash - reinput(buffer, state); - pad_half(buffer); - schedule(buffer); - state = H::get; - compress(state, buffer); - - return output(state); + if (std::is_constant_evaluated()) + { + return hash2(block); + } + else if constexpr (native && SHA::strength == 256) + { + return native_double_hash(block); + } + else + { + return hash2(block); + } } TEMPLATE @@ -139,22 +177,37 @@ double_hash(const half_t& half) NOEXCEPT { static_assert(is_same_type); - auto state = H::get; - - buffer_t buffer{}; - input_left(buffer, half); - pad_half(buffer); - schedule(buffer); - compress(state, buffer); - - // Second hash - reinput(buffer, state); - pad_half(buffer); - schedule(buffer); - state = H::get; - compress(state, buffer); + const auto hash2 = [](const half_t& half) NOEXCEPT + { + auto state = H::get; + buffer_t buffer{}; + input_left(buffer, half); + pad_half(buffer); + schedule(buffer); + compress(state, buffer); + + // Second hash + reinput_left(buffer, state); + pad_half(buffer); + schedule(buffer); + state = H::get; + compress(state, buffer); + + return output(state); + }; - return output(state); + if (std::is_constant_evaluated()) + { + return hash2(half); + } + else if constexpr (native && SHA::strength == 256) + { + return native_double_hash(half); + } + else + { + return hash2(half); + } } TEMPLATE @@ -163,24 +216,39 @@ double_hash(const half_t& left, const half_t& right) NOEXCEPT { static_assert(is_same_type); - auto state = H::get; + const auto hash2 = [](const half_t& left, const half_t& right) NOEXCEPT + { + auto state = H::get; + buffer_t buffer{}; + input_left(buffer, left); + input_right(buffer, right); + schedule(buffer); + compress(state, buffer); + schedule_1(buffer); + compress(state, buffer); + + // Second hash + reinput_left(buffer, state); + pad_half(buffer); + schedule(buffer); + state = H::get; + compress(state, buffer); + + return output(state); + }; - buffer_t buffer{}; - input_left(buffer, left); - input_right(buffer, right); - schedule(buffer); - compress(state, buffer); - schedule_1(buffer); - compress(state, buffer); - - // Second hash - reinput(buffer, state); - pad_half(buffer); - schedule(buffer); - state = H::get; - compress(state, buffer); - - return output(state); + if (std::is_constant_evaluated()) + { + return hash2(left, right); + } + else if constexpr (native && SHA::strength == 256) + { + return native_double_hash(left, right); + } + else + { + return hash2(left, right); + } } } // namespace sha diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_iterate.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_iterate.ipp index 17eff4e206..1be09fd30d 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_iterate.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_iterate.ipp @@ -248,14 +248,14 @@ INLINE void CLASS:: iterate_native(state_t& state, const ablocks_t& blocks) NOEXCEPT { iblocks_t iblocks{ array_cast(blocks) }; - native_(state, iblocks); + native_transform(state, iblocks); } TEMPLATE INLINE void CLASS:: iterate_native(state_t& state, iblocks_t& blocks) NOEXCEPT { - native_(state, blocks); + native_transform(state, blocks); } // Dispatch and normal forms. diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_merkle.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_merkle.ipp index 72f3853beb..338c97306c 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_merkle.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_merkle.ipp @@ -372,7 +372,7 @@ merkle_hash_vector(idigests_t& digests, iblocks_t& blocks) NOEXCEPT compress_(xstate, xbuffer); // Second hash - reinput(xbuffer, xstate); + reinput_left(xbuffer, xstate); pad_half(xbuffer); schedule_(xbuffer); xstate = initial; @@ -404,9 +404,18 @@ merkle_hash_vector(digests_t& digests) NOEXCEPT // Merkle hash vector dispatch. if constexpr (use_x512) merkle_hash_vector(idigests, iblocks); + + // Use if shani is not available or at least 32 blocks. if constexpr (use_x256) - merkle_hash_vector(idigests, iblocks); - if constexpr (use_x128) + { + if constexpr (!native) + merkle_hash_vector(idigests, iblocks); + else if (start >= 32_size) + merkle_hash_vector(idigests, iblocks); + } + + // Only use if shani is not available. + if constexpr (use_x128 && !native) merkle_hash_vector(idigests, iblocks); // iblocks.size() is reduced by vectorization. @@ -461,7 +470,6 @@ merkle_hash(digests_t& digests) NOEXCEPT #endif if constexpr (vector) { - // TODO: test vector vs. native performance for the 4 lane scenario. // Merkle block vectorization is applied at 16/8/4 lanes (as available) // and falls back to native/normal (as available) for 3/2/1 lanes. merkle_hash_vector(digests); diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_native.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_native.ipp index dd6277ca11..798ecce9d3 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_native.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_native.ipp @@ -96,9 +96,15 @@ round_4(xint128_t& state0, xint128_t& state1, xint128_t message) NOEXCEPT state0 = mm_sha256rnds2_epu32(state0, state1, mm_shuffle_epi32(wk, 0x0e)); } +// Platform agnostic. +// ---------------------------------------------------------------------------- +// Individual state vars are used vs. array to ensure register persistence. +// This creates bifurcations in this template because of the lack of a buffer +// and the differing optimal locations for applying endianness conversions. + TEMPLATE template -INLINE void CLASS:: +void CLASS:: native_rounds(xint128_t& lo, xint128_t& hi, const block_t& block) NOEXCEPT { const auto& wblock = array_cast(block); @@ -170,17 +176,22 @@ native_rounds(xint128_t& lo, xint128_t& hi, const block_t& block) NOEXCEPT hi = add(hi, start_hi); } +// Transforms perform scheduling and compression with optional endianness +// conversion of the block input. State is normalized, which requires some +// additional shuffle/unshuffle calls between transformations of same state. +// State output is not finalized, which is endianness conversion to digest_t. +// ---------------------------------------------------------------------------- + TEMPLATE void CLASS:: -native_(state_t& state, iblocks_t& blocks) NOEXCEPT +native_transform(state_t& state, iblocks_t& blocks) NOEXCEPT { - // Individual state vars are used vs. array to ensure register persistence. auto& wstate = array_cast(state); auto lo = load(wstate[0]); auto hi = load(wstate[1]); shuffle(lo, hi); - for (auto& block : blocks) + for (auto& block: blocks) native_rounds(lo, hi, block); unshuffle(lo, hi); @@ -189,34 +200,173 @@ native_(state_t& state, iblocks_t& blocks) NOEXCEPT } TEMPLATE +template void CLASS:: -native_(state_t& state, const block_t& block) NOEXCEPT +native_transform(state_t& state, const auto& block) NOEXCEPT { auto& wstate = array_cast(state); auto lo = load(wstate[0]); auto hi = load(wstate[1]); shuffle(lo, hi); - native_rounds(lo, hi, block); + native_rounds(lo, hi, array_cast(block)); unshuffle(lo, hi); store(wstate[0], lo); store(wstate[1], hi); } +// Finalization creates and/or applies a given padding block to the state +// accumulation and performs big-endian conversion from state_t to digest_t. +// As padding blocks are generated and therefore do not require endianness +// conversion, those calls are not applied when transforming the pad block. +// ---------------------------------------------------------------------------- + TEMPLATE -INLINE void CLASS:: -native_preswapped(state_t& state, const words_t& block) NOEXCEPT +typename CLASS::digest_t CLASS:: +native_finalize(state_t& state, const words_t& pad) NOEXCEPT { auto& wstate = array_cast(state); auto lo = load(wstate[0]); auto hi = load(wstate[1]); shuffle(lo, hi); + native_rounds(lo, hi, array_cast(pad)); + unshuffle(lo, hi); - // This override is for padding (big-endian, preswapped data). - native_rounds(lo, hi, array_cast(block)); + // digest is copied so that state remains valid (LE). + digest_t digest{}; + auto& wdigest = array_cast(digest); + store(wdigest[0], byteswap(lo)); + store(wdigest[1], byteswap(hi)); + return digest; +} - unshuffle(lo, hi); - store(wstate[0], lo); - store(wstate[1], hi); +TEMPLATE +template +typename CLASS::digest_t CLASS:: +native_finalize(state_t& state) NOEXCEPT +{ + // We could use Blocks to cache padding but given the padding blocks are + // unscheduled when performing native transformations there's no benefit. + return native_finalize(state, Blocks); +} + +TEMPLATE +typename CLASS::digest_t CLASS:: +native_finalize(state_t& state, size_t blocks) NOEXCEPT +{ + return native_finalize(state, pad_blocks(blocks)); +} + +TEMPLATE +typename CLASS::digest_t CLASS:: +native_finalize_second(const state_t& state) NOEXCEPT +{ + // No hash(state_t) optimizations for sha160 (requires chunk_t/half_t). + static_assert(is_same_type); + + // Hash a state value and finalize it. + auto state2 = H::get; + words_t block{}; + reinput_left(block, state); // swapped + pad_half(block); // swapped + return native_finalize(state2, block); // no block swap (swaps state) +} + +TEMPLATE +typename CLASS::digest_t CLASS:: +native_finalize_double(state_t& state, size_t blocks) NOEXCEPT +{ + // Complete first hash by transforming padding, but don't convert state. + auto block = pad_blocks(blocks); + native_transform(state, block); // no swap + + // This is native_finalize_second() but reuses the initial block. + auto state2 = H::get; + reinput_left(block, state); // swapped + pad_half(block); // swapped + return native_finalize(state2, block); // no block swap (swaps state) +} + +// Hash functions start with BE data and end with BE digest_t. +// ---------------------------------------------------------------------------- + +TEMPLATE +typename CLASS::digest_t CLASS:: +native_hash(const half_t& half) NOEXCEPT +{ + // No hash(state_t) optimizations for sha160 (requires chunk_t/half_t). + static_assert(is_same_type); + + auto state = H::get; + words_t block{}; + input_left(block, half); // swaps + pad_half(block); // swapped + return native_finalize(state, block); // no block swap (swaps state) +} + +TEMPLATE +typename CLASS::digest_t CLASS:: +native_hash(const half_t& left, const half_t& right) NOEXCEPT +{ + auto state = H::get; + words_t block{}; + reinput_left(block, array_cast(left)); // unswapped + reinput_right(block, array_cast(right)); // unswapped + native_transform(state, block); // swap + return native_finalize(state); // no block swap (swaps state) +} + +// Double hash functions start with BE data and end with BE digest_t. +// ---------------------------------------------------------------------------- + +TEMPLATE +typename CLASS::digest_t CLASS:: +native_double_hash(const block_t& block) NOEXCEPT +{ + auto state = H::get; + native_transform(state, block); // swap + native_transform(state, pad_block()); // swapped + + // Second hash + words_t block2{}; + reinput_left(block2, state); // swapped + pad_half(block2); // swapped + state = H::get; // [reuse state var] + return native_finalize(state, block2); // no block swap (swaps state) +} + +TEMPLATE +typename CLASS::digest_t CLASS:: +native_double_hash(const half_t& half) NOEXCEPT +{ + auto state = H::get; + words_t block{}; + input_left(block, half); // swaps + pad_half(block); // swapped + native_transform(state, block); // no block swap + + // Second hash + reinput_left(block, state); // swapped + pad_half(block); // swapped + state = H::get; // [reuse state var] + return native_finalize(state, block); // no block swap (swaps state) +} + +TEMPLATE +typename CLASS::digest_t CLASS:: +native_double_hash(const half_t& left, const half_t& right) NOEXCEPT +{ + auto state = H::get; + words_t block{}; + reinput_left(block, array_cast(left)); // unswapped + reinput_right(block, array_cast(right)); // unswapped + native_transform(state, block); // swap + native_transform(state, pad_block()); // swapped + + // Second hash + reinput_left(block, state); // swapped + pad_half(block); // swapped + state = H::get; // [reuse state var] + return native_finalize(state, block); // no block swap (swaps state) } } // namespace sha diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_parsing.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_parsing.ipp index 4f0ae899a6..9101cce7ce 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_parsing.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_parsing.ipp @@ -113,7 +113,7 @@ input(buffer_t& buffer, const block_t& block) NOEXCEPT TEMPLATE INLINE constexpr void CLASS:: -input_left(buffer_t& buffer, const half_t& half) NOEXCEPT +input_left(auto& buffer, const half_t& half) NOEXCEPT { using word = array_element; @@ -167,7 +167,7 @@ input_left(buffer_t& buffer, const half_t& half) NOEXCEPT TEMPLATE INLINE constexpr void CLASS:: -input_right(buffer_t& buffer, const half_t& half) NOEXCEPT +input_right(auto& buffer, const half_t& half) NOEXCEPT { using word = array_element; diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_single.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_single.ipp index ee29582ce0..06383ec21b 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_single.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_single.ipp @@ -34,138 +34,89 @@ template constexpr typename CLASS::digest_t CLASS:: hash(const ablocks_t& blocks) NOEXCEPT { - if (std::is_constant_evaluated()) - { - auto state = H::get; - iterate(state, blocks); - buffer_t buffer{}; - schedule_n(buffer); - compress(state, buffer); - return output(state); - } - else if constexpr (native && SHA::strength == 256) - { - auto state = H::get; - iterate(state, blocks); - native_preswapped(state, pad_blocks(Size)); - return output(state); - } - else - { - auto state = H::get; - iterate(state, blocks); - buffer_t buffer{}; - schedule_n(buffer); - compress(state, buffer); - return output(state); - } + auto state = H::get; + iterate(state, blocks); + return finalize(state); } TEMPLATE typename CLASS::digest_t CLASS:: hash(iblocks_t&& blocks) NOEXCEPT { - if constexpr (native && SHA::strength == 256) + // Save block count, as iterable decrements. + const auto count = blocks.size(); + auto state = H::get; + iterate(state, blocks); + return finalize(state, count); +} + +TEMPLATE +constexpr typename CLASS::digest_t CLASS:: +hash(const block_t& block) NOEXCEPT +{ + return hash(ablocks_t{ block }); +} + +TEMPLATE +constexpr typename CLASS::digest_t CLASS:: +hash(const half_t& half) NOEXCEPT +{ + const auto hash1 = [](const half_t& half) NOEXCEPT { - // Save block count, as iterable decrements. - const auto count = blocks.size(); auto state = H::get; - iterate(state, blocks); - native_preswapped(state, pad_blocks(count)); + buffer_t buffer{}; + input_left(buffer, half); + pad_half(buffer); + schedule(buffer); + compress(state, buffer); return output(state); + }; + + if (std::is_constant_evaluated()) + { + return hash1(half); + } + else if constexpr (native && SHA::strength == 256) + { + return native_hash(half); } else { - // Save block count, as iterable decrements. - const auto count = blocks.size(); - auto state = H::get; - iterate(state, blocks); - buffer_t buffer{}; - schedule_n(buffer, count); - compress(state, buffer); - return output(state); + return hash1(half); } } TEMPLATE constexpr typename CLASS::digest_t CLASS:: -hash(const block_t& block) NOEXCEPT +hash(const half_t& left, const half_t& right) NOEXCEPT { - if (std::is_constant_evaluated()) + const auto hash1 = [](const half_t& left, const half_t& right) NOEXCEPT { auto state = H::get; buffer_t buffer{}; - input(buffer, block); + input_left(buffer, left); + input_right(buffer, right); schedule(buffer); compress(state, buffer); schedule_1(buffer); compress(state, buffer); return output(state); + }; + + if (std::is_constant_evaluated()) + { + return hash1(left, right); } else if constexpr (native && SHA::strength == 256) { - ////auto state = H::get; - ////auto& wstate = array_cast(state); - ////auto lo = load(wstate[0]); - ////auto hi = load(wstate[1]); - ////shuffle(lo, hi); - ////native_(lo, hi, block); - ////native_(lo, hi, pad_1()); - ////unshuffle(lo, hi); - ////byteswap(lo); - ////byteswap(hi); - ////store(wstate[0], lo); - ////store(wstate[1], hi); - ////return array_cast(state); - - // Simpler but repeats shuffle/unshuffle, re-loads state, and unloads - // state before byteswap. - auto state = H::get; - native_(state, block); - native_preswapped(state, pad_block()); - return output(state); + return native_hash(left, right); } else { - auto state = H::get; - buffer_t buffer{}; - input(buffer, block); - schedule(buffer); - compress(state, buffer); - schedule_1(buffer); - compress(state, buffer); - return output(state); + return hash1(left, right); } } -TEMPLATE -constexpr typename CLASS::digest_t CLASS:: -hash(const half_t& half) NOEXCEPT -{ - auto state = H::get; - buffer_t buffer{}; - input_left(buffer, half); - pad_half(buffer); - schedule(buffer); - compress(state, buffer); - return output(state); -} - -TEMPLATE -constexpr typename CLASS::digest_t CLASS:: -hash(const half_t& left, const half_t& right) NOEXCEPT -{ - auto state = H::get; - buffer_t buffer{}; - input_left(buffer, left); - input_right(buffer, right); - schedule(buffer); - compress(state, buffer); - schedule_1(buffer); - compress(state, buffer); - return output(state); -} - } // namespace sha } // namespace system } // namespace libbitcoin diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_stream.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_stream.ipp index cc57b5e302..d4a821cada 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_stream.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_stream.ipp @@ -40,10 +40,7 @@ TEMPLATE constexpr void CLASS:: accumulate(state_t& state, const block_t& block) NOEXCEPT { - buffer_t buffer{}; - input(buffer, block); - schedule(buffer); - compress(state, buffer); + iterate(state, ablocks_t{ block }); } TEMPLATE @@ -54,14 +51,56 @@ normalize(const state_t& state) NOEXCEPT } TEMPLATE +template constexpr typename CLASS::digest_t CLASS:: -finalize(state_t& state, size_t blocks) NOEXCEPT +finalize(state_t& state) NOEXCEPT { - buffer_t buffer{}; - schedule_n(buffer, blocks); - compress(state, buffer); + const auto finalize1 = [](state_t& state) NOEXCEPT + { + buffer_t buffer{}; + schedule_n(buffer); + compress(state, buffer); + return output(state); + }; + + if (std::is_constant_evaluated()) + { + return finalize1(state); + } + else if constexpr (native && SHA::strength == 256) + { + return native_finalize(state); + } + else + { + return finalize1(state); + } +} - return output(state); +TEMPLATE +constexpr typename CLASS::digest_t CLASS:: +finalize(state_t& state, size_t blocks) NOEXCEPT +{ + const auto finalize1 = [](state_t& state, size_t blocks) NOEXCEPT + { + buffer_t buffer{}; + schedule_n(buffer, blocks); + compress(state, buffer); + return output(state); + }; + + if (std::is_constant_evaluated()) + { + return finalize1(state, blocks); + } + else if constexpr (native && SHA::strength == 256) + { + return native_finalize(state, blocks); + } + else + { + return finalize1(state, blocks); + } } TEMPLATE @@ -71,34 +110,65 @@ finalize_second(const state_t& state) NOEXCEPT // No hash(state_t) optimizations for sha160 (requires chunk_t/half_t). static_assert(is_same_type); - auto state2 = H::get; - - buffer_t buffer{}; - reinput(buffer, state); - pad_half(buffer); - schedule(buffer); - compress(state2, buffer); - - return output(state2); + // This hashes a hash result (state) without the endianness conversion. + const auto finalize2 = [](const state_t& state) NOEXCEPT + { + auto state2 = H::get; + buffer_t buffer{}; + reinput_left(buffer, state); + pad_half(buffer); + schedule(buffer); + compress(state2, buffer); + return output(state2); + }; + + if (std::is_constant_evaluated()) + { + return finalize2(state); + } + else if constexpr (native && SHA::strength == 256) + { + return native_finalize_second(state); + } + else + { + return finalize2(state); + } } TEMPLATE constexpr typename CLASS::digest_t CLASS:: finalize_double(state_t& state, size_t blocks) NOEXCEPT { - // The state out parameter is updated for first hash. - buffer_t buffer{}; - schedule_n(buffer, blocks); - compress(state, buffer); - - // Second hash - reinput(buffer, state); - pad_half(buffer); - schedule(buffer); - auto state2 = H::get; - compress(state2, buffer); - - return output(state2); + // Pad a hash state from a number of blocks. + const auto finalize2 = [](state_t& state, size_t blocks) NOEXCEPT + { + buffer_t buffer{}; + schedule_n(buffer, blocks); + compress(state, buffer); + + // This is finalize_second() but reuses the initial buffer. + auto state2 = H::get; + reinput_left(buffer, state); + pad_half(buffer); + schedule(buffer); + compress(state2, buffer); + + return output(state2); + }; + + if (std::is_constant_evaluated()) + { + return finalize2(state, blocks); + } + else if constexpr (native && SHA::strength == 256) + { + return native_finalize_double(state, blocks); + } + else + { + return finalize2(state, blocks); + } } } // namespace sha diff --git a/test/hash/performance/performance.cpp b/test/hash/performance/performance.cpp index ba5e7c891d..bc04927ced 100644 --- a/test/hash/performance/performance.cpp +++ b/test/hash/performance/performance.cpp @@ -77,45 +77,86 @@ struct mr ////static constexpr size_t s = 32; }; -BOOST_AUTO_TEST_CASE(performance__sha256a_none__merkle) -{ - auto complete = true; - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - BOOST_CHECK(complete); -} - -BOOST_AUTO_TEST_CASE(performance__sha256a_vect__merkle) -{ - auto complete = true; - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - complete &= test_merkle(std::cout); - BOOST_CHECK(complete); -} +////BOOST_AUTO_TEST_CASE(performance__sha256a_none__merkle) +////{ +//// auto complete = true; +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// BOOST_CHECK(complete); +////} +//// +////BOOST_AUTO_TEST_CASE(performance__sha256a_vect__merkle) +////{ +//// auto complete = true; +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// BOOST_CHECK(complete); +////} +//// +////BOOST_AUTO_TEST_CASE(performance__sha256a_comp__merkle) +////{ +//// auto complete = true; +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// BOOST_CHECK(complete); +////} + +//// +////BOOST_AUTO_TEST_CASE(performance__sha256a_both__merkle) +////{ +//// auto complete = true; +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// complete &= test_merkle(std::cout); +//// BOOST_CHECK(complete); +////} // !using shax (see performahce.hpp) @@ -129,7 +170,7 @@ BOOST_AUTO_TEST_CASE(performance__sha256a_vect__merkle) //// complete &= base::test_hash(std::cout); //// BOOST_CHECK(complete); ////} -//// + ////BOOST_AUTO_TEST_CASE(performance__sha256a_none) ////{ //// auto complete = true; @@ -151,6 +192,28 @@ BOOST_AUTO_TEST_CASE(performance__sha256a_vect__merkle) //// complete &= test_accumulator(std::cout); //// BOOST_CHECK(complete); ////} +//// +////BOOST_AUTO_TEST_CASE(performance__sha256a_comp) +////{ +//// auto complete = true; +//// complete &= test_accumulator(std::cout); +//// complete &= test_accumulator(std::cout); +//// complete &= test_accumulator(std::cout); +//// complete &= test_accumulator(std::cout); +//// complete &= test_accumulator(std::cout); +//// BOOST_CHECK(complete); +////} +//// +////BOOST_AUTO_TEST_CASE(performance__sha256a_both) +////{ +//// auto complete = true; +//// complete &= test_accumulator(std::cout); +//// complete &= test_accumulator(std::cout); +//// complete &= test_accumulator(std::cout); +//// complete &= test_accumulator(std::cout); +//// complete &= test_accumulator(std::cout); +//// BOOST_CHECK(complete); +////} ////BOOST_AUTO_TEST_CASE(performance__rmd160__baseline) ////{