From f7c99c5b580987a2d4793bac3d073702b7cb81e9 Mon Sep 17 00:00:00 2001 From: tevador <37503146+tevador@users.noreply.github.com> Date: Sat, 1 Jun 2019 11:13:30 +0200 Subject: [PATCH] Use 4 AES rounds for program generation (#46) --- doc/specs.md | 109 ++++++++++++++++++++++++++-------------- src/aes_hash.cpp | 89 ++++++++++++++++++++++++++++---- src/aes_hash.hpp | 3 ++ src/tests/benchmark.cpp | 2 +- src/virtual_machine.cpp | 2 +- 5 files changed, 156 insertions(+), 49 deletions(-) diff --git a/doc/specs.md b/doc/specs.md index ed1d1364..7872cd89 100644 --- a/doc/specs.md +++ b/doc/specs.md @@ -23,9 +23,11 @@ RandomX is a proof of work (PoW) algorithm which was designed to close the gap b **Argon2d** is a tradeoff-resistant variant of [Argon2](https://github.com/P-H-C/phc-winner-argon2/blob/master/argon2-specs.pdf), a memory-hard password derivation function. -**AesGenerator** refers to an AES-based pseudo-random number generator described in chapter 3.2. It's initialized with a 512-bit seed value and is capable of producing more than 10 bytes per clock cycle. +**AesGenerator1R** refers to an AES-based pseudo-random number generator described in chapter 3.2. It's initialized with a 512-bit seed value and is capable of producing more than 10 bytes per clock cycle. -**AesHash** refers to an AES-based fingerprinting function described in chapter 3.3. It's capable of processing more than 10 bytes per clock cycle and produces a 512-bit output. +**AesGenerator4R** is a slower but more secure AES-based pseudo-random number generator described in chapter 3.3. It's initialized with a 512-bit seed value. + +**AesHash1R** refers to an AES-based fingerprinting function described in chapter 3.4. It's capable of processing more than 10 bytes per clock cycle and produces a 512-bit output. **BlakeGenerator** refers to a custom pseudo-random number generator described in chapter 3.4. It's based on the Blake2b hashing function. @@ -88,15 +90,16 @@ The algorithm consists of the following steps: 1. The Dataset is initialized using the key value `K` (see chapter 7 for details). 1. 64-byte seed `S` is calculated as `S = Hash512(H)`. -1. AesGenerator is initialized with state `S`. -1. The Scratchpad is filled with `RANDOMX_SCRATCHPAD_L3` random bytes obtained from the AesGenerator. +1. Let `gen1 = AesGenerator1R(S)`. +1. The Scratchpad is filled with `RANDOMX_SCRATCHPAD_L3` random bytes using generator `gen1`. +1. Let `gen4 = AesGenerator4R(gen1.state)` (use the final state of `gen1`). 1. The value of the VM register `fprc` is set to 0 (default rounding mode - see chapter 4.3). -1. The VM is programmed using `128 + 8 * RANDOMX_PROGRAM_SIZE` random bytes from the AesGenerator (see chapter 4.5). +1. The VM is programmed using `128 + 8 * RANDOMX_PROGRAM_SIZE` random bytes using generator `gen4` (see chapter 4.5). 1. The VM is executed (see chapter 4.6). 1. New 64-byte seed is calculated as `S = Hash512(RegisterFile)`. -1. AesGenerator is reinitialized with seed `S`. -1. Steps 6-9 are performed a total of `RANDOMX_PROGRAM_COUNT` times. The last iteration skips steps 8 and 9. -1. Scratchpad fingerprint is calculated as `A = AesHash(Scratchpad)`. +1. Set `gen4.state = S` (modify the state of the generator). +1. Steps 7-10 are performed a total of `RANDOMX_PROGRAM_COUNT` times. The last iteration skips steps 9 and 10. +1. Scratchpad fingerprint is calculated as `A = AesHash1R(Scratchpad)`. 1. The binary values of the VM registers `a0`-`a3` (4×16 bytes) are set to the value of `A`. 1. Result is calculated as `R = Hash256(RegisterFile)`. @@ -110,24 +113,21 @@ Two of the custom functions are based on the [Advanced Encryption Standard](http **AES decryption round** refers to the application of inverse ShiftRows, inverse SubBytes and inverse MixColumns transformations followed by a XOR with the round key. -### 3.2 AesGenerator +### 3.2 AesGenerator1R -AesGenerator produces a sequence of pseudo-random bytes. +AesGenerator1R produces a sequence of pseudo-random bytes. -The internal state of AesGenerator consists of 64 bytes arranged into four columns of 16 bytes each. During each output iteration, every column is decrypted (columns 0, 2) or encrypted (columns 1, 3) with one AES round using the following round keys (one key per column): +The internal state of the generator consists of 64 bytes arranged into four columns of 16 bytes each. During each output iteration, every column is decrypted (columns 0, 2) or encrypted (columns 1, 3) with one AES round using the following round keys (one key per column): ``` -key0 = 2d ec ee 84 d5 f6 4f 45 32 91 32 ca e3 a2 20 df -key1 = d0 63 7b 01 78 c5 0f f1 7f 38 d0 fe 71 59 eb 1d -key2 = 52 7a 7d 32 a1 70 2c 2f b4 ce 17 a5 b3 26 c9 df -key3 = d3 77 8d 5c 5e da 17 3d a9 e0 ec a0 1c f3 1c 34 +key0 = 53 a5 ac 6d 09 66 71 62 2b 55 b5 db 17 49 f4 b4 +key1 = 07 af 7c 6d 0d 71 6a 84 78 d3 25 17 4e dc a1 0d +key2 = f1 62 12 3f c6 7e 94 9f 4f 79 c0 f4 45 e3 20 3e +key3 = 35 81 ef 6a 7c 31 ba b1 88 4c 31 16 54 91 16 49 ``` -These keys were generated by calculating Blake2b hash with 256-bit output of these ASCII strings (first 128 bits of the hash are used): +These keys were generated as: ``` -"RandomX Generator key0" -"RandomX Generator key1" -"RandomX Generator key2" -"RandomX Generator key3" +key0, key1, key2, key3 = Hash512("RandomX AesGenerator1R keys") ``` @@ -142,25 +142,61 @@ state0 (16 B) state1 (16 B) state2 (16 B) state3 (16 B) state0' state1' state2' state3' ``` -### 3.3 AesHash +### 3.3 AesGenerator4R + +AesGenerator4R works the same way as AesGenerator1R, except it uses 4 rounds per column: + +``` +state0 (16 B) state1 (16 B) state2 (16 B) state3 (16 B) + | | | | + AES decrypt AES encrypt AES decrypt AES encrypt + (key0) (key0) (key0) (key0) + | | | | + v v v v + AES decrypt AES encrypt AES decrypt AES encrypt + (key1) (key1) (key1) (key1) + | | | | + v v v v + AES decrypt AES encrypt AES decrypt AES encrypt + (key2) (key2) (key2) (key2) + | | | | + v v v v + AES decrypt AES encrypt AES decrypt AES encrypt + (key3) (key3) (key3) (key3) + | | | | + v v v v + state0' state1' state2' state3' +``` + +AesGenerator4R uses the following 4 round keys: + +``` +key0 = 5d 46 90 f8 a6 e4 fb 7f b7 82 1f 14 95 9e 35 cf +key1 = 50 c4 55 6a 8a 27 e8 fe c3 5a 5c bd dc ff 41 67 +key2 = a4 47 4c 11 e4 fd 24 d5 d2 9a 27 a7 ac 4a 32 3d +key3 = 2a 3a 0c 81 ff ae a9 99 d9 db d3 42 08 db f6 76 +``` +These keys were generated as: +``` +key0, key1, key2, key3 = Hash512("RandomX AesGenerator4R keys") +``` + +### 3.4 AesHash1R -AesHash calculates a 512-bit fingerprint of its input. +AesHash1R calculates a 512-bit fingerprint of its input. -AesHash has a 64-byte internal state, which is arranged into four columns of 16 bytes each. The initial state is: +AesHash1R has a 64-byte internal state, which is arranged into four columns of 16 bytes each. The initial state is: ``` -state0 = 00 8e 77 c4 ab f5 7a 88 67 d1 46 11 fd 26 31 8d -state1 = 4b ef 34 b8 89 af 95 1b 2b 63 da 58 a1 9f fe 19 -state2 = 3a dd 42 77 00 3a 28 ab 44 d7 5a c3 74 cd b2 1b -state3 = 9a 44 8b e1 cc 97 5d dc 57 3c 59 49 8a a5 30 bb +state0 = 0d 2c b5 92 de 56 a8 9f 47 db 82 cc ad 3a 98 d7 +state1 = 6e 99 8d 33 98 b7 c7 15 5a 12 9e f5 57 80 e7 ac +state2 = 17 00 77 6a d0 c7 62 ae 6b 50 79 50 e4 7c a0 e8 +state3 = 0c 24 0a 63 8d 82 ad 07 05 00 a1 79 48 49 99 7e ``` -The initial state vectors were generated by calculating Blake2b hash with 256-bit output of these ASCII strings (first 128 bits of the hash are used): +The initial state vectors were generated as: ``` -"RandomX Finalizer state0" -"RandomX Finalizer state1" -"RandomX Finalizer state2" -"RandomX Finalizer state3" +state0, state1, state2, state3 = Hash512("RandomX AesHash1R state") ``` The input is processed in 64-byte blocks. Each input block is considered to be a set of four AES round keys `key0`, `key1`, `key2`, `key3`. Each state column is encrypted (columns 0, 2) or decrypted (columns 1, 3) with one AES round using the corresponding round key: @@ -178,14 +214,13 @@ state0 (16 B) state1 (16 B) state2 (16 B) state3 (16 B) When all input bytes have been processed, the state is processed with two additional AES rounds with the following extra keys (one key per round, same pair of keys for all columns): ``` -xkey0 = 47 f2 cb 11 9c 92 5a 2a 3d 59 c5 e4 83 12 95 83 -xkey1 = 95 6c 81 ce 0b ef 7b 47 23 25 bc ab b2 5b 21 ff +xkey0 = 89 83 fa f6 9f 94 24 8b bf 56 dc 90 01 02 89 06 +xkey1 = d1 63 b2 61 3c e0 f4 51 c6 43 10 ee 9b f9 18 ed ``` -The extra keys were generated by calculating Blake2b hash with 256-bit output of these ASCII strings (first 128 bits of the hash are used): +The extra keys were generated as: ``` -"RandomX Finalizer xkey0" -"RandomX Finalizer xkey1" +xkey0, xkey1 = Hash256("RandomX AesHash1R xkeys") ``` ``` diff --git a/src/aes_hash.cpp b/src/aes_hash.cpp index ff1af9a2..c1239aac 100644 --- a/src/aes_hash.cpp +++ b/src/aes_hash.cpp @@ -28,6 +28,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "soft_aes.h" +#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d +#define AES_HASH_1R_STATE1 0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e +#define AES_HASH_1R_STATE2 0xe8a07ce4, 0x5079506b, 0xae62c7d0, 0x6a770017 +#define AES_HASH_1R_STATE3 0x7e994948, 0x79a10005, 0x07ad828d, 0x630a240c + +#define AES_HASH_1R_XKEY0 0x06890201, 0x90dc56bf, 0x8b24949f, 0xf6fa8389 +#define AES_HASH_1R_XKEY1 0xed18f99b, 0xee1043c6, 0x51f4e03c, 0x61b263d1 + /* Calculate a 512-bit hash of 'input' using 4 lanes of AES. The input is treated as a set of round keys for the encryption @@ -49,10 +57,10 @@ void hashAes1Rx4(const void *input, size_t inputSize, void *hash) { rx_vec_i128 in0, in1, in2, in3; //intial state - state0 = rx_set_int_vec_i128(0x8d3126fd, 0x1146d167, 0x887af5ab, 0xc4778e00); - state1 = rx_set_int_vec_i128(0x19fe9fa1, 0x58da632b, 0x1b95af89, 0xb834ef4b); - state2 = rx_set_int_vec_i128(0x1bb2cd74, 0xc35ad744, 0xab283a00, 0x7742dd3a); - state3 = rx_set_int_vec_i128(0xbb30a58a, 0x49593c57, 0xdc5d97cc, 0xe18b449a); + state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0); + state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1); + state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2); + state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3); //process 64 bytes at a time in 4 lanes while (inptr < inputEnd) { @@ -70,8 +78,8 @@ void hashAes1Rx4(const void *input, size_t inputSize, void *hash) { } //two extra rounds to achieve full diffusion - rx_vec_i128 xkey0 = rx_set_int_vec_i128(0x83951283, 0xe4c5593d, 0x2a5a929c, 0x11cbf247); - rx_vec_i128 xkey1 = rx_set_int_vec_i128(0xff215bb2, 0xabbc2523, 0x477bef0b, 0xce816c95); + rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0); + rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1); state0 = aesenc(state0, xkey0); state1 = aesdec(state1, xkey0); @@ -93,6 +101,11 @@ void hashAes1Rx4(const void *input, size_t inputSize, void *hash) { template void hashAes1Rx4(const void *input, size_t inputSize, void *hash); template void hashAes1Rx4(const void *input, size_t inputSize, void *hash); +#define AES_GEN_1R_KEY0 0xb4f44917, 0xdbb5552b, 0x62716609, 0x6daca553 +#define AES_GEN_1R_KEY1 0x0da1dc4e, 0x1725d378, 0x846a710d, 0x6d7caf07 +#define AES_GEN_1R_KEY2 0x3e20e345, 0xf4c0794f, 0x9f947ec6, 0x3f1262f1 +#define AES_GEN_1R_KEY3 0x49169154, 0x16314c88, 0xb1ba317c, 0x6aef8135 + /* Fill 'buffer' with pseudorandom data based on 512-bit 'state'. The state is encrypted using a single AES round per 16 bytes of output @@ -111,10 +124,10 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer) { rx_vec_i128 state0, state1, state2, state3; rx_vec_i128 key0, key1, key2, key3; - key0 = rx_set_int_vec_i128(0xdf20a2e3, 0xca329132, 0x454ff6d5, 0x84eeec2d); - key1 = rx_set_int_vec_i128(0x1deb5971, 0xfed0387f, 0xf10fc578, 0x017b63d0); - key2 = rx_set_int_vec_i128(0xdfc926b3, 0xa517ceb4, 0x2f2c70a1, 0x327d7a52); - key3 = rx_set_int_vec_i128(0x341cf31c, 0xa0ece0a9, 0x3d17da5e, 0x5c8d77d3); + key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0); + key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1); + key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2); + key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3); state0 = rx_load_vec_i128((rx_vec_i128*)state + 0); state1 = rx_load_vec_i128((rx_vec_i128*)state + 1); @@ -143,3 +156,59 @@ void fillAes1Rx4(void *state, size_t outputSize, void *buffer) { template void fillAes1Rx4(void *state, size_t outputSize, void *buffer); template void fillAes1Rx4(void *state, size_t outputSize, void *buffer); + +#define AES_GEN_4R_KEY0 0xcf359e95, 0x141f82b7, 0x7ffbe4a6, 0xf890465d +#define AES_GEN_4R_KEY1 0x6741ffdc, 0xbd5c5ac3, 0xfee8278a, 0x6a55c450 +#define AES_GEN_4R_KEY2 0x3d324aac, 0xa7279ad2, 0xd524fde4, 0x114c47a4 +#define AES_GEN_4R_KEY3 0x76f6db08, 0x42d3dbd9, 0x99a9aeff, 0x810c3a2a + +template +void fillAes4Rx4(void *state, size_t outputSize, void *buffer) { + const uint8_t* outptr = (uint8_t*)buffer; + const uint8_t* outputEnd = outptr + outputSize; + + rx_vec_i128 state0, state1, state2, state3; + rx_vec_i128 key0, key1, key2, key3; + + key0 = rx_set_int_vec_i128(AES_GEN_4R_KEY0); + key1 = rx_set_int_vec_i128(AES_GEN_4R_KEY1); + key2 = rx_set_int_vec_i128(AES_GEN_4R_KEY2); + key3 = rx_set_int_vec_i128(AES_GEN_4R_KEY3); + + state0 = rx_load_vec_i128((rx_vec_i128*)state + 0); + state1 = rx_load_vec_i128((rx_vec_i128*)state + 1); + state2 = rx_load_vec_i128((rx_vec_i128*)state + 2); + state3 = rx_load_vec_i128((rx_vec_i128*)state + 3); + + while (outptr < outputEnd) { + state0 = aesdec(state0, key0); + state1 = aesenc(state1, key0); + state2 = aesdec(state2, key0); + state3 = aesenc(state3, key0); + + state0 = aesdec(state0, key1); + state1 = aesenc(state1, key1); + state2 = aesdec(state2, key1); + state3 = aesenc(state3, key1); + + state0 = aesdec(state0, key2); + state1 = aesenc(state1, key2); + state2 = aesdec(state2, key2); + state3 = aesenc(state3, key2); + + state0 = aesdec(state0, key3); + state1 = aesenc(state1, key3); + state2 = aesdec(state2, key3); + state3 = aesenc(state3, key3); + + rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0); + rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1); + rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2); + rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3); + + outptr += 64; + } +} + +template void fillAes4Rx4(void *state, size_t outputSize, void *buffer); +template void fillAes4Rx4(void *state, size_t outputSize, void *buffer); diff --git a/src/aes_hash.hpp b/src/aes_hash.hpp index b7cc199c..b4d0e940 100644 --- a/src/aes_hash.hpp +++ b/src/aes_hash.hpp @@ -35,3 +35,6 @@ void hashAes1Rx4(const void *input, size_t inputSize, void *hash); template void fillAes1Rx4(void *state, size_t outputSize, void *buffer); + +template +void fillAes4Rx4(void *state, size_t outputSize, void *buffer); diff --git a/src/tests/benchmark.cpp b/src/tests/benchmark.cpp index 5b33719d..73437132 100644 --- a/src/tests/benchmark.cpp +++ b/src/tests/benchmark.cpp @@ -241,7 +241,7 @@ int main(int argc, char** argv) { std::cout << "Calculated result: "; result.print(std::cout); if (noncesCount == 1000 && seedValue == 0) - std::cout << "Reference result: 0063222e8c4c687cc7c91ea86f3747d8dbd53af6bdf937167736b9284e4d7dac" << std::endl; + std::cout << "Reference result: 669ae4f2e5e2c0d9cc232ff2c37d41ae113fa302bbf983d9f3342879831b4edf" << std::endl; if (!miningMode) { std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl; } diff --git a/src/virtual_machine.cpp b/src/virtual_machine.cpp index 6af1ed4a..d73a0247 100644 --- a/src/virtual_machine.cpp +++ b/src/virtual_machine.cpp @@ -127,7 +127,7 @@ namespace randomx { template void VmBase::generateProgram(void* seed) { - fillAes1Rx4(seed, sizeof(program), &program); + fillAes4Rx4(seed, sizeof(program), &program); } template class VmBase, false>;