You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
107 lines
2.7 KiB
107 lines
2.7 KiB
10 years ago
|
#include "ethash_cu_miner_kernel_globals.h"
|
||
|
#include "ethash_cu_miner_kernel.h"
|
||
|
#include "keccak.cuh"
|
||
|
#include "dagger.cuh"
|
||
|
|
||
|
__device__ uint64_t compute_hash_shuffle(
|
||
|
uint2 const* g_header,
|
||
|
hash128_t const* g_dag,
|
||
|
uint64_t nonce
|
||
|
)
|
||
|
{
|
||
|
// sha3_512(header .. nonce)
|
||
|
uint2 state[25];
|
||
|
|
||
|
state[0] = g_header[0];
|
||
|
state[1] = g_header[1];
|
||
|
state[2] = g_header[2];
|
||
|
state[3] = g_header[3];
|
||
|
state[4] = vectorize(nonce);
|
||
|
state[5] = vectorize(0x0000000000000001ULL);
|
||
|
for (uint32_t i = 6; i < 25; i++)
|
||
|
{
|
||
|
state[i] = make_uint2(0, 0);
|
||
|
}
|
||
|
state[8] = vectorize(0x8000000000000000ULL);
|
||
|
keccak_f1600_block(state,8);
|
||
|
|
||
|
// Threads work together in this phase in groups of 8.
|
||
|
const int thread_id = threadIdx.x & (THREADS_PER_HASH - 1);
|
||
|
const int start_lane = threadIdx.x & ~(THREADS_PER_HASH - 1);
|
||
|
const int mix_idx = thread_id & 3;
|
||
|
|
||
|
uint4 mix;
|
||
|
uint2 shuffle[8];
|
||
|
|
||
|
for (int i = 0; i < THREADS_PER_HASH; i++)
|
||
|
{
|
||
|
// share init among threads
|
||
|
for (int j = 0; j < 8; j++) {
|
||
|
shuffle[j].x = __shfl(state[j].x, start_lane + i);
|
||
|
shuffle[j].y = __shfl(state[j].y, start_lane + i);
|
||
|
}
|
||
|
|
||
|
// ugly but avoids local reads/writes
|
||
|
if (mix_idx < 2) {
|
||
|
if (mix_idx == 0)
|
||
|
mix = vectorize2(shuffle[0], shuffle[1]);
|
||
|
else
|
||
|
mix = vectorize2(shuffle[2], shuffle[3]);
|
||
|
}
|
||
|
else {
|
||
|
if (mix_idx == 2)
|
||
|
mix = vectorize2(shuffle[4], shuffle[5]);
|
||
|
else
|
||
|
mix = vectorize2(shuffle[6], shuffle[7]);
|
||
|
}
|
||
|
|
||
|
uint32_t init0 = __shfl(shuffle[0].x, start_lane);
|
||
|
|
||
|
for (uint32_t a = 0; a < ACCESSES; a += 4)
|
||
|
{
|
||
|
int t = ((a >> 2) & (THREADS_PER_HASH - 1));
|
||
|
|
||
|
for (uint32_t b = 0; b < 4; b++)
|
||
|
{
|
||
|
if (thread_id == t)
|
||
|
{
|
||
|
shuffle[0].x = fnv(init0 ^ (a + b), ((uint32_t *)&mix)[b]) % d_dag_size;
|
||
|
}
|
||
|
shuffle[0].x = __shfl(shuffle[0].x, start_lane + t);
|
||
|
|
||
|
mix = fnv4(mix, g_dag[shuffle[0].x].uint4s[thread_id]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
uint32_t thread_mix = fnv_reduce(mix);
|
||
|
|
||
|
// update mix accross threads
|
||
|
shuffle[0].x = __shfl(thread_mix, start_lane + 0);
|
||
|
shuffle[0].y = __shfl(thread_mix, start_lane + 1);
|
||
|
shuffle[1].x = __shfl(thread_mix, start_lane + 2);
|
||
|
shuffle[1].y = __shfl(thread_mix, start_lane + 3);
|
||
|
shuffle[2].x = __shfl(thread_mix, start_lane + 4);
|
||
|
shuffle[2].y = __shfl(thread_mix, start_lane + 5);
|
||
|
shuffle[3].x = __shfl(thread_mix, start_lane + 6);
|
||
|
shuffle[3].y = __shfl(thread_mix, start_lane + 7);
|
||
|
|
||
|
if (i == thread_id) {
|
||
|
//move mix into state:
|
||
|
state[8] = shuffle[0];
|
||
|
state[9] = shuffle[1];
|
||
|
state[10] = shuffle[2];
|
||
|
state[11] = shuffle[3];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// keccak_256(keccak_512(header..nonce) .. mix);
|
||
|
state[12] = vectorize(0x0000000000000001ULL);
|
||
|
for (uint32_t i = 13; i < 25; i++)
|
||
|
{
|
||
|
state[i] = vectorize(0ULL);
|
||
|
}
|
||
|
state[16] = vectorize(0x8000000000000000);
|
||
|
keccak_f1600_block(state, 1);
|
||
|
|
||
|
return devectorize(state[0]);
|
||
|
}
|