diff --git a/doc/design.md b/doc/design.md
index cb11a38c..19d034f5 100644
--- a/doc/design.md
+++ b/doc/design.md
@@ -157,7 +157,11 @@ The IADD_RS instruction utilizes the address calculation logic of CPUs and can b
Because integer division is not fully pipelined in CPUs and can be made faster in ASICs, the IMUL_RCP instruction requires only one division per program to calculate the reciprocal. This forces an ASIC to include a hardware divider without giving them a performance advantage during program execution.
-#### 2.4.3 ISWAP_R
+#### 2.4.3 IROR_R/IROL_R
+
+Rotation instructions are split between rotate right and rotate left with a 4:1 ratio. Rotate right has a higher frequency because some architecures (like ARM) don't support rotate left natively (it must be emulated using rotate right).
+
+#### 2.4.4 ISWAP_R
This instruction can be executed efficiently by CPUs that support register renaming/move elimination.
diff --git a/doc/specs.md b/doc/specs.md
index 6f9ef39c..fdb55e41 100644
--- a/doc/specs.md
+++ b/doc/specs.md
@@ -567,8 +567,8 @@ For integer instructions, the destination is always an integer register (registe
|2/256|INEG_R|R|-|-|`dst = -dst`|
|15/256|IXOR_R|R|R|`src = imm32`|`dst = dst ^ src`|
|5/256|IXOR_M|R|R|`src = 0`|`dst = dst ^ [mem]`|
-|10/256|IROR_R|R|R|`src = imm32`|`dst = dst >>> src`|
-|0/256|IROL_R|R|R|`src = imm32`|`dst = dst <<< src`|
+|8/256|IROR_R|R|R|`src = imm32`|`dst = dst >>> src`|
+|2/256|IROL_R|R|R|`src = imm32`|`dst = dst <<< src`|
|4/256|ISWAP_R|R|R|`src = dst`|`temp = src; src = dst; dst = temp`|
#### 5.2.1 IADD_RS
@@ -616,13 +616,13 @@ All floating point operations are rounded according to the current value of the
|frequency|instruction|dst|src|operation|
|-|-|-|-|-|
-|8/256|FSWAP_R|F+E|-|`(dst0, dst1) = (dst1, dst0)`|
-|20/256|FADD_R|F|A|`(dst0, dst1) = (dst0 + src0, dst1 + src1)`|
+|4/256|FSWAP_R|F+E|-|`(dst0, dst1) = (dst1, dst0)`|
+|16/256|FADD_R|F|A|`(dst0, dst1) = (dst0 + src0, dst1 + src1)`|
|5/256|FADD_M|F|R|`(dst0, dst1) = (dst0 + [mem][0], dst1 + [mem][1])`|
-|20/256|FSUB_R|F|A|`(dst0, dst1) = (dst0 - src0, dst1 - src1)`|
+|16/256|FSUB_R|F|A|`(dst0, dst1) = (dst0 - src0, dst1 - src1)`|
|5/256|FSUB_M|F|R|`(dst0, dst1) = (dst0 - [mem][0], dst1 - [mem][1])`|
|6/256|FSCAL_R|F|-|(dst0, dst1) = (-2x0 * dst0, -2x1 * dst1)
|
-|20/256|FMUL_R|E|A|`(dst0, dst1) = (dst0 * src0, dst1 * src1)`|
+|32/256|FMUL_R|E|A|`(dst0, dst1) = (dst0 * src0, dst1 * src1)`|
|4/256|FDIV_M|E|R|`(dst0, dst1) = (dst0 / [mem][0], dst1 / [mem][1])`|
|6/256|FSQRT_R|E|-|`(dst0, dst1) = (√dst0, √dst1)`|
diff --git a/src/tests/benchmark.cpp b/src/tests/benchmark.cpp
index 2d4f31ea..780b37b0 100644
--- a/src/tests/benchmark.cpp
+++ b/src/tests/benchmark.cpp
@@ -38,6 +38,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "utility.hpp"
#include "../randomx.h"
#include "../blake2/endian.h"
+#include "../common.hpp"
+#ifdef _WIN32
+#include
+#include
+#endif
const uint8_t blockTemplate_[] = {
0x07, 0x07, 0xf7, 0xa4, 0xf0, 0xd6, 0x05, 0xb3, 0x03, 0x26, 0x08, 0x16, 0xba, 0x3f, 0x10, 0x90, 0x2e, 0x1a, 0x14,
@@ -84,6 +89,19 @@ void printUsage(const char* executable) {
std::cout << " --seed S seed for cache initialization (default: 0)" << std::endl;
}
+struct MemoryException : public std::exception {
+};
+struct CacheAllocException : public MemoryException {
+ const char * what() const throw () {
+ return "Cache allocation failed";
+ }
+};
+struct DatasetAllocException : public MemoryException {
+ const char * what() const throw () {
+ return "Dataset allocation failed";
+ }
+};
+
void mine(randomx_vm* vm, std::atomic& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread) {
uint64_t hash[RANDOMX_HASH_SIZE / sizeof(uint64_t)];
uint8_t blockTemplate[sizeof(blockTemplate_)];
@@ -118,7 +136,7 @@ int main(int argc, char** argv) {
store32(&seed, seedValue);
- std::cout << "RandomX benchmark" << std::endl;
+ std::cout << "RandomX benchmark v1.0.4" << std::endl;
if (help || (!miningMode && !verificationMode)) {
printUsage(argv[0]);
@@ -171,19 +189,20 @@ int main(int argc, char** argv) {
std::cout << " ..." << std::endl;
try {
+ if (jit && !RANDOMX_HAVE_COMPILER) {
+ throw std::runtime_error("JIT compilation is not supported on this platform");
+ }
+
Stopwatch sw(true);
cache = randomx_alloc_cache(flags);
if (cache == nullptr) {
- if (jit) {
- throw std::runtime_error("JIT compilation is not supported or cache allocation failed");
- }
- throw std::runtime_error("Cache allocation failed");
+ throw CacheAllocException();
}
randomx_init_cache(cache, &seed, sizeof(seed));
if (miningMode) {
dataset = randomx_alloc_dataset(flags);
if (dataset == nullptr) {
- throw std::runtime_error("Dataset allocation failed");
+ throw DatasetAllocException();
}
uint32_t datasetItemCount = randomx_dataset_item_count();
if (initThreadCount > 1) {
@@ -241,7 +260,7 @@ int main(int argc, char** argv) {
std::cout << "Calculated result: ";
result.print(std::cout);
if (noncesCount == 1000 && seedValue == 0)
- std::cout << "Reference result: a925d346195ef38048e714709e0b24a88fef565fa02fa97127e00fac08ee6eb8" << std::endl;
+ std::cout << "Reference result: 38d47ea494480bff8d621189e8e92747288bb1da6c75dc401f2ab4b6807b6010" << std::endl;
if (!miningMode) {
std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl;
}
@@ -249,6 +268,20 @@ int main(int argc, char** argv) {
std::cout << "Performance: " << noncesCount / elapsed << " hashes per second" << std::endl;
}
}
+ catch (MemoryException& e) {
+ std::cout << "ERROR: " << e.what() << std::endl;
+ if (largePages) {
+#ifdef _WIN32
+ std::cout << "To use large pages, please enable the \"Lock Pages in Memory\" policy and reboot." << std::endl;
+ if (!IsWindows8OrGreater()) {
+ std::cout << "Additionally, you have to run the benchmark from elevated command prompt." << std::endl;
+ }
+#else
+ std::cout << "To use large pages, please run: sudo sysctl -w vm.nr_hugepages=1250" << std::endl;
+#endif
+ }
+ return 1;
+ }
catch (std::exception& e) {
std::cout << "ERROR: " << e.what() << std::endl;
return 1;
diff --git a/src/tests/tests.cpp b/src/tests/tests.cpp
index 6584217a..b3ac8145 100644
--- a/src/tests/tests.cpp
+++ b/src/tests/tests.cpp
@@ -14,6 +14,7 @@
#include "../reciprocal.h"
#include "../intrin_portable.h"
#include "../jit_compiler.hpp"
+#include "../aes_hash.hpp"
struct CacheKey {
void* key;
@@ -146,6 +147,13 @@ int main() {
assert(datasetItem[0] == 0x145a5091f7853099);
});
+ runTest("AesGenerator1R", true, []() {
+ char state[64] = { 0 };
+ hex2bin("6c19536eb2de31b6c0065f7f116e86f960d8af0c57210a6584c3237b9d064dc7", 64, state);
+ fillAes1Rx4(state, sizeof(state), state);
+ assert(equalsHex(state, "fa89397dd6ca422513aeadba3f124b5540324c4ad4b6db434394307a17c833ab"));
+ });
+
runTest("randomx_reciprocal", true, []() {
assert(randomx_reciprocal(3) == 12297829382473034410U);
assert(randomx_reciprocal(13) == 11351842506898185609U);
@@ -959,35 +967,34 @@ int main() {
auto test_a = [&] {
char hash[RANDOMX_HASH_SIZE];
calcStringHash("test key 000", "This is a test", &hash);
- assert(equalsHex(hash, "207d7cedf2a16590bd33d758e413ad129ce9888e05417984f46296252a7ba3d0"));
+ assert(equalsHex(hash, "b33f8d10a8655d6f1925e3754adeb0a6da4c2f48a81cd4c220a412f1ef016a15"));
};
auto test_b = [&] {
char hash[RANDOMX_HASH_SIZE];
calcStringHash("test key 000", "Lorem ipsum dolor sit amet", &hash);
- assert(equalsHex(hash, "76dd2da840d56d38153e0beaca33e7f862c5ead91a052380d99f3a62bf84579b"));
+ assert(equalsHex(hash, "62ac336786ad3a7aff990beb2f643bd748d81dba585a52149d0baebdea0e9823"));
};
auto test_c = [&] {
char hash[RANDOMX_HASH_SIZE];
calcStringHash("test key 000", "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua", &hash);
- assert(equalsHex(hash, "109f6a405efe09d302336dce4389127e33aa62d4c782aca7797a628e87839a61"));
+ assert(equalsHex(hash, "6c550ebe765f7b784d2c183552fbb6048b58f17a3f115baf2b968724eb2f7a23"));
};
auto test_d = [&] {
char hash[RANDOMX_HASH_SIZE];
calcStringHash("test key 001", "sed do eiusmod tempor incididunt ut labore et dolore magna aliqua", &hash);
- assert(equalsHex(hash, "3cbb82edf9541ab80233cdc47384cea719c8567a8bbaca8f3ff038488ce9c16c"));
+ assert(equalsHex(hash, "cb602b9c498b67e31e519fbdc07e288de46f949b14ad620380df6250eaffbd4e"));
};
auto test_e = [&] {
char hash[RANDOMX_HASH_SIZE];
calcHexHash("test key 001", "0b0b98bea7e805e0010a2126d287a2a0cc833d312cb786385a7c2f9de69d25537f584a9bc9977b00000000666fd8753bf61a8631f12984e3fd44f4014eca629276817b56f32e9b68bd82f416", &hash);
-
+ //std::cout << std::endl;
//outputHex(std::cout, (const char*)hash, sizeof(hash));
//std::cout << std::endl;
-
- assert(equalsHex(hash, "e003ef128b1f96d99d4a0490e03253ef11186002a8ec018cbd4e07b8ec8c82e8"));
+ assert(equalsHex(hash, "f60caf300917760337e8ce51487484e6a33d4aaa15aa79c985efb4ea00390918"));
};
runTest("Hash test 1a (interpreter)", stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), test_a);
diff --git a/src/tests/utility.hpp b/src/tests/utility.hpp
index d294f674..2b3c4b1a 100644
--- a/src/tests/utility.hpp
+++ b/src/tests/utility.hpp
@@ -52,7 +52,7 @@ char parseNibble(char hex) {
return hex;
}
-void hex2bin(char *in, int length, char *out) {
+void hex2bin(const char *in, int length, char *out) {
for (int i = 0; i < length; i += 2) {
char nibble1 = parseNibble(*in++);
char nibble2 = parseNibble(*in++);
@@ -67,7 +67,7 @@ constexpr bool stringsEqual(char const * a, char const * b) {
template
bool equalsHex(const void* hash, const char (&hex)[N]) {
char reference[N / 2];
- hex2bin((char*)hex, N - 1, reference);
+ hex2bin(hex, N - 1, reference);
return memcmp(hash, reference, sizeof(reference)) == 0;
}