Skip to content

Commit

Permalink
Updated JIT compiler and assembly generator for new int -> float conv…
Browse files Browse the repository at this point in the history
…ersion
  • Loading branch information
tevador committed Feb 24, 2019
1 parent 790b382 commit d9bc6cf
Show file tree
Hide file tree
Showing 10 changed files with 56 additions and 49 deletions.
5 changes: 3 additions & 2 deletions doc/isa-ops.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ For floating point instructions, the destination can be a group F or group E reg

Memory operands are loaded as 8-byte values from the address indicated by `src`. The 8 byte value is interpreted as two 32-bit signed integers and implicitly converted to floating point format. The lower and upper memory operands are marked as `[src][0]` and `[src][1]`.

Memory operands for group E registers are loaded as described above, then their sign bit is cleared and their exponent value is set to `0x30F` (corresponds to 2<sup>-240</sup>).

|frequency|instruction|dst|src|operation|
|-|-|-|-|-|
|8/256|FSWAP_R|F+E|-|`(dst0, dst1) = (dst1, dst0)`|
Expand All @@ -58,8 +60,7 @@ This instruction negates the number and multiplies it by <code>2<sup>x</sup></co
The mathematical operation described above is equivalent to a bitwise XOR of the binary representation with the value of `0x81F0000000000000`.

#### Denormal and NaN values
Due to restrictions on the values of the floating point registers, no operation results in `NaN`.
`FDIV_M` can produce a denormal result. In that case, the result is set to `DBL_MIN = 2.22507385850720138309e-308`, which is the smallest positive normal number.
Due to restrictions on the values of the floating point registers, no operation results in `NaN` or a denormal number.

#### Rounding
All floating point instructions give correctly rounded results. The rounding mode depends on the value of the `fprc` register:
Expand Down
4 changes: 2 additions & 2 deletions src/AssemblyGeneratorX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -385,9 +385,9 @@ namespace RandomX {
instr.dst %= 4;
genAddressReg(instr);
asmCode << "\tcvtdq2pd xmm12, qword ptr [rsi+rax]" << std::endl;
asmCode << "\tandps xmm12, xmm14" << std::endl;
asmCode << "\tandps xmm12, xmm13" << std::endl;
asmCode << "\torps xmm12, xmm14" << std::endl;
asmCode << "\tdivpd " << regE[instr.dst] << ", xmm12" << std::endl;
asmCode << "\tmaxpd " << regE[instr.dst] << ", " << dblMin << std::endl;
traceflt(instr);
}

Expand Down
10 changes: 4 additions & 6 deletions src/JitCompilerX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ namespace RandomX {
; xmm10 -> "a2"
; xmm11 -> "a3"
; xmm12 -> temporary
; xmm13 -> DBL_MIN
; xmm14 -> absolute value mask 0x7fffffffffffffff7fffffffffffffff
; xmm15 -> sign mask 0x80000000000000008000000000000000
; xmm13 -> mantissa mask = 0x000fffffffffffff000fffffffffffff
; xmm14 -> exponent 2**-240 = 0x30f000000000000030f0000000000000
; xmm15 -> scale mask = 0x81f000000000000081f0000000000000
*/

Expand Down Expand Up @@ -165,7 +165,7 @@ namespace RandomX {
static const uint8_t JMP = 0xe9;
static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 };
static const uint8_t REX_XCHG[] = { 0x4d, 0x87 };
static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0f, 0x54, 0xe6 };
static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6 };
static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f };
static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 };

Expand Down Expand Up @@ -556,8 +556,6 @@ namespace RandomX {
emit(REX_ANDPS_XMM12);
emit(REX_DIVPD);
emitByte(0xe4 + 8 * instr.dst);
emit(REX_MAXPD);
emitByte(0xe5 + 8 * instr.dst);
}

void JitCompilerX86::h_FSQRT_R(Instruction& instr) {
Expand Down
12 changes: 8 additions & 4 deletions src/asm/program_loop_load.inc
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@
cvtdq2pd xmm5, qword ptr [rcx+40]
cvtdq2pd xmm6, qword ptr [rcx+48]
cvtdq2pd xmm7, qword ptr [rcx+56]
andps xmm4, xmm14
andps xmm5, xmm14
andps xmm6, xmm14
andps xmm7, xmm14
andps xmm4, xmm13
andps xmm5, xmm13
andps xmm6, xmm13
andps xmm7, xmm13
orps xmm4, xmm14
orps xmm5, xmm14
orps xmm6, xmm14
orps xmm7, xmm14
8 changes: 4 additions & 4 deletions src/asm/program_loop_store.inc
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15
pop rcx
mulpd xmm0, xmm4
mulpd xmm1, xmm5
mulpd xmm2, xmm6
mulpd xmm3, xmm7
xorpd xmm0, xmm4
xorpd xmm1, xmm5
xorpd xmm2, xmm6
xorpd xmm3, xmm7
movapd xmmword ptr [rcx+0], xmm0
movapd xmmword ptr [rcx+16], xmm1
movapd xmmword ptr [rcx+32], xmm2
Expand Down
6 changes: 3 additions & 3 deletions src/asm/program_prologue_linux.inc
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
movapd xmm9, xmmword ptr [rcx+88]
movapd xmm10, xmmword ptr [rcx+104]
movapd xmm11, xmmword ptr [rcx+120]
movapd xmm13, xmmword ptr minDbl[rip]
movapd xmm14, xmmword ptr absMask[rip]
movapd xmm15, xmmword ptr signMask[rip]
movapd xmm13, xmmword ptr mantissaMask[rip]
movapd xmm14, xmmword ptr exp240[rip]
movapd xmm15, xmmword ptr scaleMask[rip]

jmp DECL(randomx_program_loop_begin)
6 changes: 3 additions & 3 deletions src/asm/program_prologue_win64.inc
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@
movapd xmm9, xmmword ptr [rcx+88]
movapd xmm10, xmmword ptr [rcx+104]
movapd xmm11, xmmword ptr [rcx+120]
movapd xmm13, xmmword ptr [minDbl]
movapd xmm14, xmmword ptr [absMask]
movapd xmm15, xmmword ptr [signMask]
movapd xmm13, xmmword ptr [mantissaMask]
movapd xmm14, xmmword ptr [exp240]
movapd xmm15, xmmword ptr [scaleMask]

jmp randomx_program_loop_begin
10 changes: 5 additions & 5 deletions src/asm/program_xmm_constants.inc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
minDbl:
db 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 16, 0
absMask:
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
signMask:
mantissaMask:
db 255, 255, 255, 255, 255, 255, 15, 0, 255, 255, 255, 255, 255, 255, 15, 0
exp240:
db 0, 0, 0, 0, 0, 0, 240, 48, 0, 0, 0, 0, 0, 0, 240, 48
scaleMask:
db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129
42 changes: 23 additions & 19 deletions src/executeProgram-win64.asm
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ executeProgram PROC
; xmm10 -> "a2"
; xmm11 -> "a3"
; xmm12 -> temporary
; xmm13 -> DBL_MIN
; xmm14 -> absolute value mask
; xmm15 -> sign mask
; xmm13 -> mantissa mask = 0x000fffffffffffff000fffffffffffff
; xmm14 -> exponent 2**-240 = 0x30f000000000000030f0000000000000
; xmm15 -> scale mask = 0x81f000000000000081f0000000000000

; store callee-saved registers
push rbx
Expand Down Expand Up @@ -103,18 +103,18 @@ executeProgram PROC
movapd xmm9, xmmword ptr [rcx+88]
movapd xmm10, xmmword ptr [rcx+104]
movapd xmm11, xmmword ptr [rcx+120]
movapd xmm13, xmmword ptr [minDbl]
movapd xmm14, xmmword ptr [absMask]
movapd xmm15, xmmword ptr [signMask]
movapd xmm13, xmmword ptr [mantissaMask]
movapd xmm14, xmmword ptr [exp240]
movapd xmm15, xmmword ptr [scaleMask]

jmp program_begin

ALIGN 64
minDbl:
db 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 16, 0
absMask:
db 255, 255, 255, 255, 255, 255, 255, 127, 255, 255, 255, 255, 255, 255, 255, 127
signMask:
mantissaMask:
db 255, 255, 255, 255, 255, 255, 15, 0, 255, 255, 255, 255, 255, 255, 15, 0
exp240:
db 0, 0, 0, 0, 0, 0, 240, 48, 0, 0, 0, 0, 0, 0, 240, 48
scaleMask:
db 0, 0, 0, 0, 0, 0, 240, 129, 0, 0, 0, 0, 0, 0, 240, 129

ALIGN 64
Expand Down Expand Up @@ -145,10 +145,14 @@ program_begin:
cvtdq2pd xmm5, qword ptr [rcx+40]
cvtdq2pd xmm6, qword ptr [rcx+48]
cvtdq2pd xmm7, qword ptr [rcx+56]
andps xmm4, xmm14
andps xmm5, xmm14
andps xmm6, xmm14
andps xmm7, xmm14
andps xmm4, xmm13
andps xmm5, xmm13
andps xmm6, xmm13
andps xmm7, xmm13
orps xmm4, xmm14
orps xmm5, xmm14
orps xmm6, xmm14
orps xmm7, xmm14

;# 256 instructions
include program.inc
Expand Down Expand Up @@ -181,10 +185,10 @@ IF 1
mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15
pop rcx
mulpd xmm0, xmm4
mulpd xmm1, xmm5
mulpd xmm2, xmm6
mulpd xmm3, xmm7
xorpd xmm0, xmm4
xorpd xmm1, xmm5
xorpd xmm2, xmm6
xorpd xmm3, xmm7
movapd xmmword ptr [rcx+0], xmm0
movapd xmmword ptr [rcx+16], xmm1
movapd xmmword ptr [rcx+32], xmm2
Expand Down
2 changes: 1 addition & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ int main(int argc, char** argv) {
std::cout << "Calculated result: ";
result.print(std::cout);
if(programCount == 1000)
std::cout << "Reference result: d3ae5a9365196ed48bb98ebfc3316498e29443ea7f056ecbd272f749c6af7730" << std::endl;
std::cout << "Reference result: e1b4144293ff9ab5aa4c98f2389bb18950d8c3fd874891ac64628e028a286006" << std::endl;
if (!miningMode) {
std::cout << "Performance: " << 1000 * elapsed / programCount << " ms per hash" << std::endl;
}
Expand Down

0 comments on commit d9bc6cf

Please sign in to comment.