diff --git a/libethash-cuda/CMakeLists.txt b/libethash-cuda/CMakeLists.txt new file mode 100644 index 000000000..7a3f8b753 --- /dev/null +++ b/libethash-cuda/CMakeLists.txt @@ -0,0 +1,28 @@ +set(EXECUTABLE ethash-cuda) + +FIND_PACKAGE(CUDA REQUIRED) + +file(GLOB SRC_LIST "*.cpp" "*.cu") +file(GLOB HEADERS "*.h" "*.cuh") + +set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--std=c++11;--disable-warnings;--ptxas-options=-v;-use_fast_math;-lineinfo) + +LIST(APPEND CUDA_NVCC_FLAGS_RELEASE -O3) +LIST(APPEND CUDA_NVCC_FLAGS_DEBUG -G) + +if(COMPUTE) + LIST(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_${COMPUTE},code=sm_${COMPUTE}) +else(COMPUTE) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_20,code=sm_20;-gencode arch=compute_30,code=sm_30;-gencode arch=compute_32,code=sm_32;-gencode arch=compute_35,code=sm_35;-gencode arch=compute_50,code=sm_50;-gencode arch=compute_52,code=sm_52) +endif(COMPUTE) + + + +include_directories(${CMAKE_CURRENT_BINARY_DIR}) +include_directories(${CUDA_INCLUDE_DIRS}) +include_directories(..) +CUDA_ADD_LIBRARY(${EXECUTABLE} STATIC ${SRC_LIST} ${HEADERS}) +TARGET_LINK_LIBRARIES(${EXECUTABLE} ${CUDA_LIBRARIES} ethash) + +install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) +install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) \ No newline at end of file diff --git a/libethash-cuda/keccak.cuh b/libethash-cuda/keccak.cuh index fe98082f9..77e36bbcf 100644 --- a/libethash-cuda/keccak.cuh +++ b/libethash-cuda/keccak.cuh @@ -26,7 +26,14 @@ uint2 xor5(const uint2 a, const uint2 b, const uint2 c, const uint2 d, const uin uint2 f = lop3xor(a,b,c); return lop3xor(d,e,f); } +#else +__device__ __forceinline__ +uint2 xor5(const uint2 a, const uint2 b, const uint2 c, const uint2 d, const uint2 e) { + return a ^ b ^ c ^ d ^ e; +} +#endif +#if __CUDA_ARCH__ >= 500 __device__ __forceinline__ uint2 chi(const uint2 a, const uint2 b, const uint2 c) { uint2 result; @@ -34,12 +41,7 @@ uint2 chi(const uint2 a, const uint2 b, const uint2 c) { asm("lop3.b32 %0, %1, %2, %3, 0x82;" : "=r"(result.y) : "r"(a.y), "r"(b.y), "r"(c.y)); return result; } - -#else -__device__ __forceinline__ -uint2 xor5(const uint2 a, const uint2 b, const uint2 c, const uint2 d, const uint2 e) { - return a ^ b ^ c ^ d ^ e; -} +#else __device__ __forceinline__ uint2 chi(const uint2 a, const uint2 b, const uint2 c) { return a ^ (~b) & c;