diff --git a/doc/design.md b/doc/design.md index cb11a38c..19d034f5 100644 --- a/doc/design.md +++ b/doc/design.md @@ -157,7 +157,11 @@ The IADD_RS instruction utilizes the address calculation logic of CPUs and can b Because integer division is not fully pipelined in CPUs and can be made faster in ASICs, the IMUL_RCP instruction requires only one division per program to calculate the reciprocal. This forces an ASIC to include a hardware divider without giving them a performance advantage during program execution. -#### 2.4.3 ISWAP_R +#### 2.4.3 IROR_R/IROL_R + +Rotation instructions are split between rotate right and rotate left with a 4:1 ratio. Rotate right has a higher frequency because some architecures (like ARM) don't support rotate left natively (it must be emulated using rotate right). + +#### 2.4.4 ISWAP_R This instruction can be executed efficiently by CPUs that support register renaming/move elimination. diff --git a/doc/specs.md b/doc/specs.md index 6f9ef39c..fdb55e41 100644 --- a/doc/specs.md +++ b/doc/specs.md @@ -567,8 +567,8 @@ For integer instructions, the destination is always an integer register (registe |2/256|INEG_R|R|-|-|`dst = -dst`| |15/256|IXOR_R|R|R|`src = imm32`|`dst = dst ^ src`| |5/256|IXOR_M|R|R|`src = 0`|`dst = dst ^ [mem]`| -|10/256|IROR_R|R|R|`src = imm32`|`dst = dst >>> src`| -|0/256|IROL_R|R|R|`src = imm32`|`dst = dst <<< src`| +|8/256|IROR_R|R|R|`src = imm32`|`dst = dst >>> src`| +|2/256|IROL_R|R|R|`src = imm32`|`dst = dst <<< src`| |4/256|ISWAP_R|R|R|`src = dst`|`temp = src; src = dst; dst = temp`| #### 5.2.1 IADD_RS @@ -616,13 +616,13 @@ All floating point operations are rounded according to the current value of the |frequency|instruction|dst|src|operation| |-|-|-|-|-| -|8/256|FSWAP_R|F+E|-|`(dst0, dst1) = (dst1, dst0)`| -|20/256|FADD_R|F|A|`(dst0, dst1) = (dst0 + src0, dst1 + src1)`| +|4/256|FSWAP_R|F+E|-|`(dst0, dst1) = (dst1, dst0)`| +|16/256|FADD_R|F|A|`(dst0, dst1) = (dst0 + src0, dst1 + src1)`| |5/256|FADD_M|F|R|`(dst0, dst1) = (dst0 + [mem][0], dst1 + [mem][1])`| -|20/256|FSUB_R|F|A|`(dst0, dst1) = (dst0 - src0, dst1 - src1)`| +|16/256|FSUB_R|F|A|`(dst0, dst1) = (dst0 - src0, dst1 - src1)`| |5/256|FSUB_M|F|R|`(dst0, dst1) = (dst0 - [mem][0], dst1 - [mem][1])`| |6/256|FSCAL_R|F|-|(dst0, dst1) = (-2x0 * dst0, -2x1 * dst1)| -|20/256|FMUL_R|E|A|`(dst0, dst1) = (dst0 * src0, dst1 * src1)`| +|32/256|FMUL_R|E|A|`(dst0, dst1) = (dst0 * src0, dst1 * src1)`| |4/256|FDIV_M|E|R|`(dst0, dst1) = (dst0 / [mem][0], dst1 / [mem][1])`| |6/256|FSQRT_R|E|-|`(dst0, dst1) = (√dst0, √dst1)`| diff --git a/src/tests/benchmark.cpp b/src/tests/benchmark.cpp index 2d4f31ea..780b37b0 100644 --- a/src/tests/benchmark.cpp +++ b/src/tests/benchmark.cpp @@ -38,6 +38,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "utility.hpp" #include "../randomx.h" #include "../blake2/endian.h" +#include "../common.hpp" +#ifdef _WIN32 +#include +#include +#endif const uint8_t blockTemplate_[] = { 0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14, @@ -84,6 +89,19 @@ void printUsage(const char* executable) { std::cout << " --seed S seed for cache initialization (default: 0)" << std::endl; } +struct MemoryException : public std::exception { +}; +struct CacheAllocException : public MemoryException { + const char * what() const throw () { + return "Cache allocation failed"; + } +}; +struct DatasetAllocException : public MemoryException { + const char * what() const throw () { + return "Dataset allocation failed"; + } +}; + void mine(randomx_vm* vm, std::atomic& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread) { uint64_t hash[RANDOMX_HASH_SIZE / sizeof(uint64_t)]; uint8_t blockTemplate[sizeof(blockTemplate_)]; @@ -118,7 +136,7 @@ int main(int argc, char** argv) { store32(&seed, seedValue); - std::cout << "RandomX benchmark" << std::endl; + std::cout << "RandomX benchmark v1.0.4" << std::endl; if (help || (!miningMode && !verificationMode)) { printUsage(argv[0]); @@ -171,19 +189,20 @@ int main(int argc, char** argv) { std::cout << " ..." << std::endl; try { + if (jit && !RANDOMX_HAVE_COMPILER) { + throw std::runtime_error("JIT compilation is not supported on this platform"); + } + Stopwatch sw(true); cache = randomx_alloc_cache(flags); if (cache == nullptr) { - if (jit) { - throw std::runtime_error("JIT compilation is not supported or cache allocation failed"); - } - throw std::runtime_error("Cache allocation failed"); + throw CacheAllocException(); } randomx_init_cache(cache, &seed, sizeof(seed)); if (miningMode) { dataset = randomx_alloc_dataset(flags); if (dataset == nullptr) { - throw std::runtime_error("Dataset allocation failed"); + throw DatasetAllocException(); } uint32_t datasetItemCount = randomx_dataset_item_count(); if (initThreadCount > 1) { @@ -241,7 +260,7 @@ int main(int argc, char** argv) { std::cout << "Calculated result: "; result.print(std::cout); if (noncesCount == 1000 && seedValue == 0) - std::cout << "Reference result: a925d346195ef38048e714709e0b24a88fef565fa02fa97127e00fac08ee6eb8" << std::endl; + std::cout << "Reference result: 38d47ea494480bff8d621189e8e92747288bb1da6c75dc401f2ab4b6807b6010" << std::endl; if (!miningMode) { std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl; } @@ -249,6 +268,20 @@ int main(int argc, char** argv) { std::cout << "Performance: " << noncesCount / elapsed << " hashes per second" << std::endl; } } + catch (MemoryException& e) { + std::cout << "ERROR: " << e.what() << std::endl; + if (largePages) { +#ifdef _WIN32 + std::cout << "To use large pages, please enable the \"Lock Pages in Memory\" policy and reboot." << std::endl; + if (!IsWindows8OrGreater()) { + std::cout << "Additionally, you have to run the benchmark from elevated command prompt." << std::endl; + } +#else + std::cout << "To use large pages, please run: sudo sysctl -w vm.nr_hugepages=1250" << std::endl; +#endif + } + return 1; + } catch (std::exception& e) { std::cout << "ERROR: " << e.what() << std::endl; return 1; diff --git a/src/tests/tests.cpp b/src/tests/tests.cpp index 6584217a..b3ac8145 100644 --- a/src/tests/tests.cpp +++ b/src/tests/tests.cpp @@ -14,6 +14,7 @@ #include "../reciprocal.h" #include "../intrin_portable.h" #include "../jit_compiler.hpp" +#include "../aes_hash.hpp" struct CacheKey { void* key; @@ -146,6 +147,13 @@ int main() { assert(datasetItem[0] == 0x145a5091f7853099); }); + runTest("AesGenerator1R", true, []() { + char state[64] = { 0 }; + hex2bin("6c19536eb2de31b6c0065f7f116e86f960d8af0c57210a6584c3237b9d064dc7", 64, state); + fillAes1Rx4(state, sizeof(state), state); + assert(equalsHex(state, "fa89397dd6ca422513aeadba3f124b5540324c4ad4b6db434394307a17c833ab")); + }); + runTest("randomx_reciprocal", true, []() { assert(randomx_reciprocal(3) == 12297829382473034410U); assert(randomx_reciprocal(13) == 11351842506898185609U); @@ -959,35 +967,34 @@ int main() { auto test_a = [&] { char hash[RANDOMX_HASH_SIZE]; calcStringHash("test key 000", "This is a test", &hash); - assert(equalsHex(hash, "207d7cedf2a16590bd33d758e413ad129ce9888e05417984f46296252a7ba3d0")); + assert(equalsHex(hash, "b33f8d10a8655d6f1925e3754adeb0a6da4c2f48a81cd4c220a412f1ef016a15")); }; auto test_b = [&] { char hash[RANDOMX_HASH_SIZE]; calcStringHash("test key 000", "Lorem ipsum dolor sit amet", &hash); - assert(equalsHex(hash, "76dd2da840d56d38153e0beaca33e7f862c5ead91a052380d99f3a62bf84579b")); + assert(equalsHex(hash, "62ac336786ad3a7aff990beb2f643bd748d81dba585a52149d0baebdea0e9823")); }; auto test_c = [&] { char hash[RANDOMX_HASH_SIZE]; calcStringHash("test key 000", "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua", &hash); - assert(equalsHex(hash, "109f6a405efe09d302336dce4389127e33aa62d4c782aca7797a628e87839a61")); + assert(equalsHex(hash, "6c550ebe765f7b784d2c183552fbb6048b58f17a3f115baf2b968724eb2f7a23")); }; auto test_d = [&] { char hash[RANDOMX_HASH_SIZE]; calcStringHash("test key 001", "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua", &hash); - assert(equalsHex(hash, "3cbb82edf9541ab80233cdc47384cea719c8567a8bbaca8f3ff038488ce9c16c")); + assert(equalsHex(hash, "cb602b9c498b67e31e519fbdc07e288de46f949b14ad620380df6250eaffbd4e")); }; auto test_e = [&] { char hash[RANDOMX_HASH_SIZE]; calcHexHash("test key 001", "0b0b98bea7e805e0010a2126d287a2a0cc833d312cb786385a7c2f9de69d25537f584a9bc9977b00000000666fd8753bf61a8631f12984e3fd44f4014eca629276817b56f32e9b68bd82f416", &hash); - + //std::cout << std::endl; //outputHex(std::cout, (const char*)hash, sizeof(hash)); //std::cout << std::endl; - - assert(equalsHex(hash, "e003ef128b1f96d99d4a0490e03253ef11186002a8ec018cbd4e07b8ec8c82e8")); + assert(equalsHex(hash, "f60caf300917760337e8ce51487484e6a33d4aaa15aa79c985efb4ea00390918")); }; runTest("Hash test 1a (interpreter)", stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_a); diff --git a/src/tests/utility.hpp b/src/tests/utility.hpp index d294f674..2b3c4b1a 100644 --- a/src/tests/utility.hpp +++ b/src/tests/utility.hpp @@ -52,7 +52,7 @@ char parseNibble(char hex) { return hex; } -void hex2bin(char *in, int length, char *out) { +void hex2bin(const char *in, int length, char *out) { for (int i = 0; i < length; i += 2) { char nibble1 = parseNibble(*in++); char nibble2 = parseNibble(*in++); @@ -67,7 +67,7 @@ constexpr bool stringsEqual(char const * a, char const * b) { template bool equalsHex(const void* hash, const char (&hex)[N]) { char reference[N / 2]; - hex2bin((char*)hex, N - 1, reference); + hex2bin(hex, N - 1, reference); return memcmp(hash, reference, sizeof(reference)) == 0; }