9 changed files with 112 additions and 192 deletions
@ -1,22 +0,0 @@ |
|||||
#define copy(dst, src, count) for (uint32_t i = 0; i < count; i++) { (dst)[i] = (src)[i]; } |
|
||||
|
|
||||
#define ACCESSES 64 |
|
||||
#define THREADS_PER_HASH (128 / 16) |
|
||||
#define FNV_PRIME 0x01000193 |
|
||||
|
|
||||
#define fnv(x,y) ((x) * FNV_PRIME ^(y)) |
|
||||
|
|
||||
__device__ uint4 fnv4(uint4 a, uint4 b) |
|
||||
{ |
|
||||
uint4 c; |
|
||||
c.x = a.x * FNV_PRIME ^ b.x; |
|
||||
c.y = a.y * FNV_PRIME ^ b.y; |
|
||||
c.z = a.z * FNV_PRIME ^ b.z; |
|
||||
c.w = a.w * FNV_PRIME ^ b.w; |
|
||||
return c; |
|
||||
} |
|
||||
|
|
||||
__device__ uint32_t fnv_reduce(uint4 v) |
|
||||
{ |
|
||||
return fnv(fnv(fnv(v.x, v.y), v.z), v.w); |
|
||||
} |
|
@ -1,119 +0,0 @@ |
|||||
#include "ethash_cuda_miner_kernel_globals.h" |
|
||||
#include "ethash_cuda_miner_kernel.h" |
|
||||
#include "keccak.cuh" |
|
||||
#include "dagger.cuh" |
|
||||
|
|
||||
typedef union |
|
||||
{ |
|
||||
hash64_t init; |
|
||||
hash32_t mix; |
|
||||
} compute_hash_share; |
|
||||
|
|
||||
__device__ hash64_t init_hash(uint64_t nonce) |
|
||||
{ |
|
||||
hash64_t init; |
|
||||
|
|
||||
// sha3_512(header .. nonce) |
|
||||
uint64_t state[25]; |
|
||||
|
|
||||
state[0] = d_header.uint64s[0]; |
|
||||
state[1] = d_header.uint64s[1]; |
|
||||
state[2] = d_header.uint64s[2]; |
|
||||
state[3] = d_header.uint64s[3]; |
|
||||
state[4] = nonce; |
|
||||
|
|
||||
keccak_f1600_init((uint2 *)state); |
|
||||
copy(init.uint64s, state, 8); |
|
||||
return init; |
|
||||
} |
|
||||
|
|
||||
__device__ uint32_t inner_loop(uint4 mix, uint32_t thread_id, uint32_t* share) |
|
||||
{ |
|
||||
// share init0 |
|
||||
if (thread_id == 0) |
|
||||
*share = mix.x; |
|
||||
|
|
||||
uint32_t init0 = *share; |
|
||||
|
|
||||
uint32_t a = 0; |
|
||||
|
|
||||
do |
|
||||
{ |
|
||||
|
|
||||
bool update_share = thread_id == ((a >> 2) & (THREADS_PER_HASH - 1)); |
|
||||
|
|
||||
//#pragma unroll 4 |
|
||||
for (uint32_t i = 0; i < 4; i++) |
|
||||
{ |
|
||||
|
|
||||
if (update_share) |
|
||||
{ |
|
||||
uint32_t m[4] = { mix.x, mix.y, mix.z, mix.w }; |
|
||||
*share = fnv(init0 ^ (a + i), m[i]) % d_dag_size; |
|
||||
} |
|
||||
__threadfence_block(); |
|
||||
|
|
||||
#if __CUDA_ARCH__ >= 350 |
|
||||
mix = fnv4(mix, __ldg((&d_dag[*share])->uint4s + thread_id)); |
|
||||
#else |
|
||||
mix = fnv4(mix, (&d_dag[*share])->uint4s[thread_id]); |
|
||||
#endif |
|
||||
|
|
||||
} |
|
||||
|
|
||||
} while ((a += 4) != ACCESSES); |
|
||||
|
|
||||
return fnv_reduce(mix); |
|
||||
} |
|
||||
|
|
||||
__device__ hash32_t final_hash(hash64_t const* init, hash32_t const* mix) |
|
||||
{ |
|
||||
uint64_t state[25]; |
|
||||
|
|
||||
hash32_t hash; |
|
||||
|
|
||||
// keccak_256(keccak_512(header..nonce) .. mix); |
|
||||
copy(state, init->uint64s, 8); |
|
||||
copy(state + 8, mix->uint64s, 4); |
|
||||
|
|
||||
keccak_f1600_final((uint2 *)state); |
|
||||
|
|
||||
// copy out |
|
||||
copy(hash.uint64s, state, 4); |
|
||||
return hash; |
|
||||
} |
|
||||
|
|
||||
__device__ hash32_t compute_hash( |
|
||||
uint64_t nonce |
|
||||
) |
|
||||
{ |
|
||||
extern __shared__ compute_hash_share share[]; |
|
||||
|
|
||||
// Compute one init hash per work item. |
|
||||
hash64_t init = init_hash(nonce); |
|
||||
|
|
||||
// Threads work together in this phase in groups of 8. |
|
||||
uint32_t const thread_id = threadIdx.x & (THREADS_PER_HASH - 1); |
|
||||
uint32_t const hash_id = threadIdx.x >> 3; |
|
||||
|
|
||||
hash32_t mix; |
|
||||
|
|
||||
for (int i = 0; i < THREADS_PER_HASH; i++) |
|
||||
{ |
|
||||
// share init with other threads |
|
||||
if (i == thread_id) |
|
||||
share[hash_id].init = init; |
|
||||
|
|
||||
uint4 thread_init = share[hash_id].init.uint4s[thread_id & 3]; |
|
||||
|
|
||||
uint32_t thread_mix = inner_loop(thread_init, thread_id, share[hash_id].mix.uint32s); |
|
||||
|
|
||||
share[hash_id].mix.uint32s[thread_id] = thread_mix; |
|
||||
|
|
||||
|
|
||||
if (i == thread_id) |
|
||||
mix = share[hash_id].mix; |
|
||||
} |
|
||||
|
|
||||
return final_hash(&init, &mix); |
|
||||
} |
|
Loading…
Reference in new issue