Skip to content

Commit

Permalink
timetravel algo
Browse files Browse the repository at this point in the history
+ new kernels jh512-80 groestl-80 and cubehash-80

Signed-off-by: Tanguy Pruvot <[email protected]>
  • Loading branch information
tpruvot committed Mar 7, 2017
1 parent 3ede61b commit 07ebcb5
Show file tree
Hide file tree
Showing 15 changed files with 959 additions and 44 deletions.
2 changes: 1 addition & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \
qubit/qubit.cu qubit/qubit_luffa512.cu qubit/deep.cu qubit/luffa.cu \
x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
x11/cuda_x11_luffa512_Cubehash.cu x11/x11evo.cu \
x11/cuda_x11_luffa512_Cubehash.cu x11/x11evo.cu x11/timetravel.cu \
x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \
x15/x14.cu x15/x15.cu x15/cuda_x14_shabal512.cu x15/cuda_x15_whirlpool.cu \
x15/whirlpool.cu \
Expand Down
2 changes: 2 additions & 0 deletions algos.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ enum sha_algos {
ALGO_SKEIN,
ALGO_SKEIN2,
ALGO_S3,
ALGO_TIMETRAVEL,
ALGO_X11EVO,
ALGO_X11,
ALGO_X13,
Expand Down Expand Up @@ -101,6 +102,7 @@ static const char *algo_names[] = {
"skein",
"skein2",
"s3",
"timetravel",
"x11evo",
"x11",
"x13",
Expand Down
1 change: 1 addition & 0 deletions bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ void algo_free_all(int thr_id)
//free_sha256d(thr_id);
free_scrypt(thr_id);
free_scrypt_jane(thr_id);
free_timetravel(thr_id);
}

// benchmark all algos (called once per mining thread)
Expand Down
6 changes: 6 additions & 0 deletions ccminer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ Options:\n\
skein Skein SHA2 (Skeincoin)\n\
skein2 Double Skein (Woodcoin)\n\
s3 S3 (1Coin)\n\
timetravel Machinecoin permuted x8\n\
vanilla Blake256-8 (VNL)\n\
veltor Thorsriddle streebog\n\
whirlcoin Old Whirlcoin (Whirlpool algo)\n\
Expand Down Expand Up @@ -1619,6 +1620,7 @@ static bool stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
case ALGO_LBRY:
case ALGO_LYRA2v2:
case ALGO_LYRA2Z:
case ALGO_TIMETRAVEL:
work_set_target(work, sctx->job.diff / (256.0 * opt_difficulty));
break;
case ALGO_KECCAK:
Expand Down Expand Up @@ -2121,6 +2123,7 @@ static void *miner_thread(void *userdata)
case ALGO_HEAVY:
case ALGO_LYRA2v2:
case ALGO_S3:
case ALGO_TIMETRAVEL:
case ALGO_X11EVO:
case ALGO_X11:
case ALGO_X13:
Expand Down Expand Up @@ -2333,6 +2336,9 @@ static void *miner_thread(void *userdata)
case ALGO_WILDKECCAK:
rc = scanhash_wildkeccak(thr_id, &work, max_nonce, &hashes_done);
break;
case ALGO_TIMETRAVEL:
rc = scanhash_timetravel(thr_id, &work, max_nonce, &hashes_done);
break;
case ALGO_X11EVO:
rc = scanhash_x11evo(thr_id, &work, max_nonce, &hashes_done);
break;
Expand Down
1 change: 1 addition & 0 deletions ccminer.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,7 @@
<CudaCompile Include="x11\fresh.cu" />
<CudaCompile Include="x11\sib.cu" />
<CudaCompile Include="x11\s3.cu" />
<CudaCompile Include="x11\timetravel.cu" />
<CudaCompile Include="x11\veltor.cu" />
<CudaCompile Include="x11\x11.cu" />
<CudaCompile Include="x11\x11evo.cu" />
Expand Down
3 changes: 3 additions & 0 deletions ccminer.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,9 @@
<CudaCompile Include="x11\s3.cu">
<Filter>Source Files\CUDA\x11</Filter>
</CudaCompile>
<CudaCompile Include="x11\timetravel.cu">
<Filter>Source Files\CUDA\x11</Filter>
</CudaCompile>
<CudaCompile Include="x11\veltor.cu">
<Filter>Source Files\CUDA\x11</Filter>
</CudaCompile>
Expand Down
3 changes: 3 additions & 0 deletions miner.h
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ extern int scanhash_sib(int thr_id, struct work* work, uint32_t max_nonce, unsig
extern int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_timetravel(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_vanilla(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done, int8_t blake_rounds);
extern int scanhash_veltor(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_whirl(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
Expand Down Expand Up @@ -360,6 +361,7 @@ extern void free_sib(int thr_id);
extern void free_skeincoin(int thr_id);
extern void free_skein2(int thr_id);
extern void free_s3(int thr_id);
extern void free_timetravel(int thr_id);
extern void free_vanilla(int thr_id);
extern void free_veltor(int thr_id);
extern void free_whirl(int thr_id);
Expand Down Expand Up @@ -882,6 +884,7 @@ void sibhash(void *output, const void *input);
void skeincoinhash(void *output, const void *input);
void skein2hash(void *output, const void *input);
void s3hash(void *output, const void *input);
void timetravel_hash(void *output, const void *input);
void veltorhash(void *output, const void *input);
void wcoinhash(void *state, const void *input);
void whirlxHash(void *state, const void *input);
Expand Down
83 changes: 83 additions & 0 deletions quark/cuda_jh512.cu
Original file line number Diff line number Diff line change
Expand Up @@ -334,3 +334,86 @@ void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce,

// Setup function
__host__ void quark_jh512_cpu_init(int thr_id, uint32_t threads) {}

#define WANT_JH80
#ifdef WANT_JH80

__constant__
static uint32_t c_PaddedMessage80[20]; // padded message (80 bytes)

__host__
void jh512_setBlock_80(int thr_id, uint32_t *endiandata)
{
cudaMemcpyToSymbol(c_PaddedMessage80, endiandata, sizeof(c_PaddedMessage80), 0, cudaMemcpyHostToDevice);
}

__global__
void jh512_gpu_hash_80(const uint32_t threads, const uint32_t startNounce, uint32_t * g_outhash)
{
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint32_t h[20];
AS_UINT4(&h[ 0]) = AS_UINT4(&c_PaddedMessage80[ 0]);
AS_UINT4(&h[ 4]) = AS_UINT4(&c_PaddedMessage80[ 4]);
AS_UINT4(&h[ 8]) = AS_UINT4(&c_PaddedMessage80[ 8]);
AS_UINT4(&h[12]) = AS_UINT4(&c_PaddedMessage80[12]);
AS_UINT2(&h[16]) = AS_UINT2(&c_PaddedMessage80[16]);
h[18] = c_PaddedMessage80[18];
h[19] = cuda_swab32(startNounce + thread);

uint32_t x[8][4] = { /* init */
{ 0x964bd16f, 0x17aa003e, 0x052e6a63, 0x43d5157a },
{ 0x8d5e228a, 0x0bef970c, 0x591234e9, 0x61c3b3f2 },
{ 0xc1a01d89, 0x1e806f53, 0x6b05a92a, 0x806d2bea },
{ 0xdbcc8e58, 0xa6ba7520, 0x763a0fa9, 0xf73bf8ba },
{ 0x05e66901, 0x694ae341, 0x8e8ab546, 0x5ae66f2e },
{ 0xd0a74710, 0x243c84c1, 0xb1716e3b, 0x99c15a2d },
{ 0xecf657cf, 0x56f8b19d, 0x7c8806a7, 0x56b11657 },
{ 0xdffcc2e3, 0xfb1785e6, 0x78465a54, 0x4bdd8ccc }
};

// 1 (could be precomputed)
#pragma unroll
for (int i = 0; i < 16; i++)
x[i/4][i & 3] ^= h[i];
E8(x);
#pragma unroll
for (int i = 0; i < 16; i++)
x[(i+16)/4][(i+16) & 3] ^= h[i];

// 2 (16 bytes with nonce)
#pragma unroll
for (int i = 0; i < 4; i++)
x[0][i] ^= h[16+i];
x[1][0] ^= 0x80U;
E8(x);
#pragma unroll
for (int i = 0; i < 4; i++)
x[4][i] ^= h[16+i];
x[5][0] ^= 0x80U;

// 3 close
x[3][3] ^= 0x80020000U; // 80 bytes = 640bits (0x280)
E8(x);
x[7][3] ^= 0x80020000U;

uint32_t *Hash = &g_outhash[(size_t)16 * thread];
AS_UINT4(&Hash[ 0]) = AS_UINT4(&x[4][0]);
AS_UINT4(&Hash[ 4]) = AS_UINT4(&x[5][0]);
AS_UINT4(&Hash[ 8]) = AS_UINT4(&x[6][0]);
AS_UINT4(&Hash[12]) = AS_UINT4(&x[7][0]);
}
}

__host__
void jh512_cuda_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNounce, uint32_t *d_hash)
{
const uint32_t threadsperblock = 256;
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);

jh512_gpu_hash_80 <<<grid, block>>> (threads, startNounce, d_hash);
}

#endif
95 changes: 95 additions & 0 deletions quark/cuda_quark_groestl512.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
#include "groestl_transf_quad.h"
#endif

#define WANT_GROESTL80
#ifdef WANT_GROESTL80
__constant__ static uint32_t c_Message80[20];
#endif

#include "cuda_quark_groestl512_sm2.cuh"

__global__ __launch_bounds__(TPB, THF)
Expand Down Expand Up @@ -114,3 +119,93 @@ void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNo
quark_groestl512_sm20_hash_64(thr_id, threads, startNounce, d_nonceVector, d_hash, order);
}

// --------------------------------------------------------------------------------------------------------------------------------------------

#ifdef WANT_GROESTL80

__host__
void groestl512_setBlock_80(int thr_id, uint32_t *endiandata)
{
cudaMemcpyToSymbol(c_Message80, endiandata, sizeof(c_Message80), 0, cudaMemcpyHostToDevice);
}

__global__ __launch_bounds__(TPB, THF)
void groestl512_gpu_hash_80_quad(const uint32_t threads, const uint32_t startNounce, uint32_t * g_outhash)
{
#if __CUDA_ARCH__ >= 300
// BEWARE : 4-WAY CODE (one hash need 4 threads)
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x) >> 2;
if (thread < threads)
{
const uint32_t thr = threadIdx.x & 0x3; // % THF

/*| M0 M1 M2 M3 M4 | M5 M6 M7 | (input)
--|----------------|----------|
T0| 0 4 8 12 16 | 80 |
T1| 1 5 17 | |
T2| 2 6 18 | |
T3| 3 7 Nc | 01 |
--|----------------|----------| TPR */

uint32_t message[8];

#pragma unroll 5
for(int k=0; k<5; k++) message[k] = c_Message80[thr + (k * THF)];

#pragma unroll 3
for(int k=5; k<8; k++) message[k] = 0;

if (thr == 0) message[5] = 0x80U;
if (thr == 3) {
message[4] = cuda_swab32(startNounce + thread);
message[7] = 0x01000000U;
}

uint32_t msgBitsliced[8];
to_bitslice_quad(message, msgBitsliced);

uint32_t state[8];
groestl512_progressMessage_quad(state, msgBitsliced);

uint32_t hash[16];
from_bitslice_quad(state, hash);

if (thr == 0) { /* 4 threads were done */
const off_t hashPosition = thread;
//if (!thread) hash[15] = 0xFFFFFFFF;
uint4 *outpt = (uint4*) &g_outhash[hashPosition << 4];
uint4 *phash = (uint4*) hash;
outpt[0] = phash[0];
outpt[1] = phash[1];
outpt[2] = phash[2];
outpt[3] = phash[3];
}
}
#endif
}

__host__
void groestl512_cuda_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNounce, uint32_t *d_hash)
{
int dev_id = device_map[thr_id];

if (device_sm[dev_id] >= 300 && cuda_arch[dev_id] >= 300) {
const uint32_t threadsperblock = TPB;
const uint32_t factor = THF;

dim3 grid(factor*((threads + threadsperblock-1)/threadsperblock));
dim3 block(threadsperblock);

groestl512_gpu_hash_80_quad <<<grid, block>>> (threads, startNounce, d_hash);

} else {

const uint32_t threadsperblock = 256;
dim3 grid((threads + threadsperblock-1)/threadsperblock);
dim3 block(threadsperblock);

groestl512_gpu_hash_80_sm2 <<<grid, block>>> (threads, startNounce, d_hash);
}
}

#endif
Loading

0 comments on commit 07ebcb5

Please sign in to comment.