diff --git a/libethash-cuda/dagger_shuffled.cuh b/libethash-cuda/dagger_shuffled.cuh index 03846fb44..0918ff4f4 100644 --- a/libethash-cuda/dagger_shuffled.cuh +++ b/libethash-cuda/dagger_shuffled.cuh @@ -76,6 +76,7 @@ __device__ uint64_t compute_hash_shuffle( uint32_t thread_mix = fnv_reduce(mix); // update mix accross threads + /* shuffle[0].x = __shfl(thread_mix, start_lane + 0); shuffle[0].y = __shfl(thread_mix, start_lane + 1); shuffle[1].x = __shfl(thread_mix, start_lane + 2); @@ -84,14 +85,30 @@ __device__ uint64_t compute_hash_shuffle( shuffle[2].y = __shfl(thread_mix, start_lane + 5); shuffle[3].x = __shfl(thread_mix, start_lane + 6); shuffle[3].y = __shfl(thread_mix, start_lane + 7); - + */ if (i == thread_id) { //move mix into state: - state[8] = shuffle[0]; - state[9] = shuffle[1]; - state[10] = shuffle[2]; - state[11] = shuffle[3]; + state[8].x = __shfl(thread_mix, start_lane + 0); + state[8].y = __shfl(thread_mix, start_lane + 1); + state[9].x = __shfl(thread_mix, start_lane + 2); + state[9].y = __shfl(thread_mix, start_lane + 3); + state[10].x = __shfl(thread_mix, start_lane + 4); + state[10].y = __shfl(thread_mix, start_lane + 5); + state[11].x = __shfl(thread_mix, start_lane + 6); + state[11].y = __shfl(thread_mix, start_lane + 7); + } + /* + else { + shuffle[0].x = __shfl(thread_mix, start_lane + 0); + shuffle[0].y = __shfl(thread_mix, start_lane + 1); + shuffle[1].x = __shfl(thread_mix, start_lane + 2); + shuffle[1].y = __shfl(thread_mix, start_lane + 3); + shuffle[2].x = __shfl(thread_mix, start_lane + 4); + shuffle[2].y = __shfl(thread_mix, start_lane + 5); + shuffle[3].x = __shfl(thread_mix, start_lane + 6); + shuffle[3].y = __shfl(thread_mix, start_lane + 7); } + */ } // keccak_256(keccak_512(header..nonce) .. mix);