From 970646119531079a535dcf804a193442fd0bc837 Mon Sep 17 00:00:00 2001 From: Genoil Date: Tue, 1 Sep 2015 21:53:41 +0200 Subject: [PATCH 1/2] small fiex. still not sure if works --- libethash-cuda/dagger_shuffled.cuh | 2 +- libethash-cuda/keccak.cuh | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/libethash-cuda/dagger_shuffled.cuh b/libethash-cuda/dagger_shuffled.cuh index 479b7f7e4..36275c4e4 100644 --- a/libethash-cuda/dagger_shuffled.cuh +++ b/libethash-cuda/dagger_shuffled.cuh @@ -32,7 +32,7 @@ __device__ uint64_t compute_hash_shuffle( uint4 mix; uint2 shuffle[8]; - + for (int i = 0; i < THREADS_PER_HASH; i++) { // share init among threads diff --git a/libethash-cuda/keccak.cuh b/libethash-cuda/keccak.cuh index 861a0200c..fe98082f9 100644 --- a/libethash-cuda/keccak.cuh +++ b/libethash-cuda/keccak.cuh @@ -105,7 +105,7 @@ __device__ __forceinline__ void keccak_f1600_block(uint2* s, uint32_t out_size) // squeeze this in here /* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */ u = s[0]; v = s[1]; - s[0] = chi(s[0], v, s[2]); + s[0] = chi(s[0], s[1], s[2]); /* iota: a[0,0] ^= round constant */ s[0] ^= vectorize(keccak_round_constants[i]); @@ -119,7 +119,7 @@ __device__ __forceinline__ void keccak_f1600_block(uint2* s, uint32_t out_size) u = s[5]; v = s[6]; - s[5] = chi(s[5], v, s[7]); + s[5] = chi(s[5], s[6], s[7]); s[6] = chi(s[6], s[7], s[8]); s[7] = chi(s[7], s[8], s[9]); @@ -129,21 +129,21 @@ __device__ __forceinline__ void keccak_f1600_block(uint2* s, uint32_t out_size) s[9] = chi(s[9], u, v); u = s[10]; v = s[11]; - s[10] = chi(s[10], v, s[12]); + s[10] = chi(s[10], s[11], s[12]); s[11] = chi(s[11], s[12], s[13]); s[12] = chi(s[12], s[13], s[14]); s[13] = chi(s[13], s[14], u); s[14] = chi(s[14], u, v); u = s[15]; v = s[16]; - s[15] = chi(s[15], v, s[17]); - s[16] = chi(s[16], s[12], s[18]); - s[17] = chi(s[17], s[13], s[19]); - s[18] = chi(s[18], s[14], u); + s[15] = chi(s[15], s[16], s[17]); + s[16] = chi(s[16], s[17], s[18]); + s[17] = chi(s[17], s[18], s[19]); + s[18] = chi(s[18], s[19], u); s[19] = chi(s[19], u, v); u = s[20]; v = s[21]; - s[20] = chi(s[20], v, s[22]); + s[20] = chi(s[20], s[21], s[22]); s[21] = chi(s[21], s[22], s[23]); s[22] = chi(s[22], s[23], s[24]); s[23] = chi(s[23], s[24], u); From d84d08f4fe26dda4aaeee9995092269b01944d7b Mon Sep 17 00:00:00 2001 From: Genoil Date: Tue, 1 Sep 2015 22:13:13 +0200 Subject: [PATCH 2/2] makelist.. --- libethash-cuda/CMakeLists.txt | 28 ++++++++++++++++++++++++++++ libethash-cuda/keccak.cuh | 14 ++++++++------ 2 files changed, 36 insertions(+), 6 deletions(-) create mode 100644 libethash-cuda/CMakeLists.txt diff --git a/libethash-cuda/CMakeLists.txt b/libethash-cuda/CMakeLists.txt new file mode 100644 index 000000000..7a3f8b753 --- /dev/null +++ b/libethash-cuda/CMakeLists.txt @@ -0,0 +1,28 @@ +set(EXECUTABLE ethash-cuda) + +FIND_PACKAGE(CUDA REQUIRED) + +file(GLOB SRC_LIST "*.cpp" "*.cu") +file(GLOB HEADERS "*.h" "*.cuh") + +set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--std=c++11;--disable-warnings;--ptxas-options=-v;-use_fast_math;-lineinfo) + +LIST(APPEND CUDA_NVCC_FLAGS_RELEASE -O3) +LIST(APPEND CUDA_NVCC_FLAGS_DEBUG -G) + +if(COMPUTE) + LIST(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_${COMPUTE},code=sm_${COMPUTE}) +else(COMPUTE) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_20,code=sm_20;-gencode arch=compute_30,code=sm_30;-gencode arch=compute_32,code=sm_32;-gencode arch=compute_35,code=sm_35;-gencode arch=compute_50,code=sm_50;-gencode arch=compute_52,code=sm_52) +endif(COMPUTE) + + + +include_directories(${CMAKE_CURRENT_BINARY_DIR}) +include_directories(${CUDA_INCLUDE_DIRS}) +include_directories(..) +CUDA_ADD_LIBRARY(${EXECUTABLE} STATIC ${SRC_LIST} ${HEADERS}) +TARGET_LINK_LIBRARIES(${EXECUTABLE} ${CUDA_LIBRARIES} ethash) + +install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) +install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) \ No newline at end of file diff --git a/libethash-cuda/keccak.cuh b/libethash-cuda/keccak.cuh index fe98082f9..77e36bbcf 100644 --- a/libethash-cuda/keccak.cuh +++ b/libethash-cuda/keccak.cuh @@ -26,7 +26,14 @@ uint2 xor5(const uint2 a, const uint2 b, const uint2 c, const uint2 d, const uin uint2 f = lop3xor(a,b,c); return lop3xor(d,e,f); } +#else +__device__ __forceinline__ +uint2 xor5(const uint2 a, const uint2 b, const uint2 c, const uint2 d, const uint2 e) { + return a ^ b ^ c ^ d ^ e; +} +#endif +#if __CUDA_ARCH__ >= 500 __device__ __forceinline__ uint2 chi(const uint2 a, const uint2 b, const uint2 c) { uint2 result; @@ -34,12 +41,7 @@ uint2 chi(const uint2 a, const uint2 b, const uint2 c) { asm("lop3.b32 %0, %1, %2, %3, 0x82;" : "=r"(result.y) : "r"(a.y), "r"(b.y), "r"(c.y)); return result; } - -#else -__device__ __forceinline__ -uint2 xor5(const uint2 a, const uint2 b, const uint2 c, const uint2 d, const uint2 e) { - return a ^ b ^ c ^ d ^ e; -} +#else __device__ __forceinline__ uint2 chi(const uint2 a, const uint2 b, const uint2 c) { return a ^ (~b) & c;