From dcb0cc96ab8cb92b20b2840abf9ae977e1cdf54a Mon Sep 17 00:00:00 2001 From: Jan Willem Penterman Date: Wed, 2 Sep 2015 11:01:17 +0200 Subject: [PATCH 1/3] fixed multiple GPU support --- ethminer/MinerAux.h | 19 ++++++++------- libethash-cuda/ethash_cuda_miner.cpp | 36 ++++++++++++++++------------ libethash-cuda/ethash_cuda_miner.h | 1 + libethcore/EthashCUDAMiner.cpp | 5 ++-- libethcore/EthashCUDAMiner.h | 1 - 5 files changed, 34 insertions(+), 28 deletions(-) diff --git a/ethminer/MinerAux.h b/ethminer/MinerAux.h index 1a2182e25..29a69b3ba 100644 --- a/ethminer/MinerAux.h +++ b/ethminer/MinerAux.h @@ -375,23 +375,24 @@ public: else if (m_minerType == MinerType::CUDA) { #if ETH_ETHASHCUDA || !ETH_TRUE + if (m_cudaDeviceCount == 0) + { + m_cudaDevices[0] = 0; + m_cudaDeviceCount = 1; + } + EthashCUDAMiner::setDevices(m_cudaDevices, m_cudaDeviceCount); + m_miningThreads = m_cudaDeviceCount; + + EthashCUDAMiner::setNumInstances(m_miningThreads); if (!EthashCUDAMiner::configureGPU( m_localWorkSize, m_globalWorkSizeMultiplier, m_numStreams, - m_openclDevice, m_extraGPUMemory, m_cudaHighCPULoad, m_currentBlock )) exit(1); - if (m_cudaDeviceCount != 0) - { - EthashCUDAMiner::setDevices(m_cudaDevices, m_cudaDeviceCount); - m_miningThreads = m_cudaDeviceCount; - } - EthashCUDAMiner::setNumInstances(m_miningThreads); - #else cerr << "Selected CUDA mining without having compiled with -DETHASHCUDA=1 or -DBUNDLE=cudaminer" << endl; exit(1); @@ -486,7 +487,7 @@ private: sealers["opencl"] = GenericFarm::SealerDescriptor{&EthashGPUMiner::instances, [](GenericMiner::ConstructionInfo ci){ return new EthashGPUMiner(ci); }}; #endif #if ETH_ETHASHCUDA - sealers["cuda"] = GenericFarm::SealerDescriptor{ &EthashGPUMiner::instances, [](GenericMiner::ConstructionInfo ci){ return new EthashCUDAMiner(ci); } }; + sealers["cuda"] = GenericFarm::SealerDescriptor{ &EthashCUDAMiner::instances, [](GenericMiner::ConstructionInfo ci){ return new EthashCUDAMiner(ci); } }; #endif f.setSealers(sealers); f.onSolutionFound([&](EthashProofOfWork::Solution) { return false; }); diff --git a/libethash-cuda/ethash_cuda_miner.cpp b/libethash-cuda/ethash_cuda_miner.cpp index 7308334ac..965cd77fc 100644 --- a/libethash-cuda/ethash_cuda_miner.cpp +++ b/libethash-cuda/ethash_cuda_miner.cpp @@ -129,6 +129,7 @@ unsigned ethash_cuda_miner::getNumDevices() } bool ethash_cuda_miner::configureGPU( + int * _devices, unsigned _blockSize, unsigned _gridSize, unsigned _numStreams, @@ -148,24 +149,29 @@ bool ethash_cuda_miner::configureGPU( uint64_t requiredSize = dagSize + _extraGPUMemory; for (unsigned int i = 0; i < getNumDevices(); i++) { - cudaDeviceProp props; - CUDA_SAFE_CALL(cudaGetDeviceProperties(&props, i)); - if (props.totalGlobalMem >= requiredSize) + if (_devices[i] != -1) { - ETHCUDA_LOG( - "Found suitable CUDA device [" << string(props.name) - << "] with " << props.totalGlobalMem << " bytes of GPU memory" - ); - return true; + cudaDeviceProp props; + CUDA_SAFE_CALL(cudaGetDeviceProperties(&props, _devices[i])); + if (props.totalGlobalMem >= requiredSize) + { + ETHCUDA_LOG( + "Found suitable CUDA device [" << string(props.name) + << "] with " << props.totalGlobalMem << " bytes of GPU memory" + ); + } + else + { + ETHCUDA_LOG( + "CUDA device " << string(props.name) + << " has insufficient GPU memory." << to_string(props.totalGlobalMem) << + " bytes of memory found < " << to_string(requiredSize) << " bytes of memory required" + ); + return false; + } } - - ETHCUDA_LOG( - "CUDA device " << string(props.name) - << " has insufficient GPU memory." << to_string(props.totalGlobalMem) << - " bytes of memory found < " << to_string(requiredSize) << " bytes of memory required" - ); } - return false; + return true; } unsigned ethash_cuda_miner::s_extraRequiredGPUMem; diff --git a/libethash-cuda/ethash_cuda_miner.h b/libethash-cuda/ethash_cuda_miner.h index 2b97ab294..d89096cfb 100644 --- a/libethash-cuda/ethash_cuda_miner.h +++ b/libethash-cuda/ethash_cuda_miner.h @@ -26,6 +26,7 @@ public: static unsigned getNumDevices(); static void listDevices(); static bool configureGPU( + int * _devices, unsigned _blockSize, unsigned _gridSize, unsigned _numStreams, diff --git a/libethcore/EthashCUDAMiner.cpp b/libethcore/EthashCUDAMiner.cpp index 69b2338b8..a5d82c430 100644 --- a/libethcore/EthashCUDAMiner.cpp +++ b/libethcore/EthashCUDAMiner.cpp @@ -146,7 +146,7 @@ void EthashCUDAMiner::workLoop() delete m_miner; m_miner = new ethash_cuda_miner; - unsigned device = instances() > 1 ? (s_devices[index()] > -1 ? s_devices[index()] : index()) : s_deviceId; + unsigned device = s_devices[index()] > -1 ? s_devices[index()] : index(); EthashAux::FullType dag; while (true) @@ -202,19 +202,18 @@ bool EthashCUDAMiner::configureGPU( unsigned _blockSize, unsigned _gridSize, unsigned _numStreams, - unsigned _deviceId, unsigned _extraGPUMemory, bool _highcpu, uint64_t _currentBlock ) { - s_deviceId = _deviceId; if (_blockSize != 32 && _blockSize != 64 && _blockSize != 128) { cout << "Given localWorkSize of " << toString(_blockSize) << "is invalid. Must be either 32,64 or 128" << endl; return false; } if (!ethash_cuda_miner::configureGPU( + s_devices, _blockSize, _gridSize, _numStreams, diff --git a/libethcore/EthashCUDAMiner.h b/libethcore/EthashCUDAMiner.h index b587aad86..565ea5be2 100644 --- a/libethcore/EthashCUDAMiner.h +++ b/libethcore/EthashCUDAMiner.h @@ -51,7 +51,6 @@ namespace eth unsigned _blockSize, unsigned _gridSize, unsigned _numStreams, - unsigned _deviceId, unsigned _extraGPUMemory, bool _highcpu, uint64_t _currentBlock From 3fbe74bcc308cd2116ceffde7d7362627e292392 Mon Sep 17 00:00:00 2001 From: Jan Willem Penterman Date: Wed, 2 Sep 2015 11:43:24 +0200 Subject: [PATCH 2/3] missing makelist --- libethash-cuda/CMakeLists.txt | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 libethash-cuda/CMakeLists.txt diff --git a/libethash-cuda/CMakeLists.txt b/libethash-cuda/CMakeLists.txt new file mode 100644 index 000000000..7e11d97fe --- /dev/null +++ b/libethash-cuda/CMakeLists.txt @@ -0,0 +1,29 @@ +set(EXECUTABLE ethash-cuda) + +FIND_PACKAGE(CUDA REQUIRED) + +file(GLOB SRC_LIST "*.cpp" "*.cu") +file(GLOB HEADERS "*.h" "*.cuh") + +set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--std=c++11;--disable-warnings;--ptxas-options=-v;-use_fast_math;-lineinfo) + +LIST(APPEND CUDA_NVCC_FLAGS_RELEASE -O3) +LIST(APPEND CUDA_NVCC_FLAGS_DEBUG -G) + +if(COMPUTE) + LIST(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_${COMPUTE},code=sm_${COMPUTE}) +else(COMPUTE) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_20,code=sm_20;-gencode arch=compute_30,code=sm_30;-gencode arch=compute_32,code=sm_32;-gencode arch=compute_35,code=sm_35;-gencode arch=compute_50,code=sm_50;-gencode arch=compute_52,code=sm_52) +endif(COMPUTE) + + + +include_directories(${CMAKE_CURRENT_BINARY_DIR}) +include_directories(${CUDA_INCLUDE_DIRS}) +include_directories(..) +CUDA_ADD_LIBRARY(${EXECUTABLE} STATIC ${SRC_LIST} ${HEADERS}) +TARGET_LINK_LIBRARIES(${EXECUTABLE} ${CUDA_LIBRARIES} ethash) + +install( TARGETS ${EXECUTABLE} RUNTIME DESTINATION bin ARCHIVE DESTINATION lib LIBRARY DESTINATION lib ) +install( FILES ${HEADERS} DESTINATION include/${EXECUTABLE} ) + From 59dc0628d652f06a5ae335b09b9a1c8ea12a9587 Mon Sep 17 00:00:00 2001 From: Jan Willem Penterman Date: Wed, 2 Sep 2015 13:11:06 +0200 Subject: [PATCH 3/3] reverted possible speedup. --- libethash-cuda/dagger_shuffled.cuh | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/libethash-cuda/dagger_shuffled.cuh b/libethash-cuda/dagger_shuffled.cuh index 0918ff4f4..db4edb045 100644 --- a/libethash-cuda/dagger_shuffled.cuh +++ b/libethash-cuda/dagger_shuffled.cuh @@ -76,7 +76,7 @@ __device__ uint64_t compute_hash_shuffle( uint32_t thread_mix = fnv_reduce(mix); // update mix accross threads - /* + shuffle[0].x = __shfl(thread_mix, start_lane + 0); shuffle[0].y = __shfl(thread_mix, start_lane + 1); shuffle[1].x = __shfl(thread_mix, start_lane + 2); @@ -85,30 +85,15 @@ __device__ uint64_t compute_hash_shuffle( shuffle[2].y = __shfl(thread_mix, start_lane + 5); shuffle[3].x = __shfl(thread_mix, start_lane + 6); shuffle[3].y = __shfl(thread_mix, start_lane + 7); - */ + if (i == thread_id) { //move mix into state: - state[8].x = __shfl(thread_mix, start_lane + 0); - state[8].y = __shfl(thread_mix, start_lane + 1); - state[9].x = __shfl(thread_mix, start_lane + 2); - state[9].y = __shfl(thread_mix, start_lane + 3); - state[10].x = __shfl(thread_mix, start_lane + 4); - state[10].y = __shfl(thread_mix, start_lane + 5); - state[11].x = __shfl(thread_mix, start_lane + 6); - state[11].y = __shfl(thread_mix, start_lane + 7); + state[8] = shuffle[0]; + state[9] = shuffle[1]; + state[10] = shuffle[2]; + state[11] = shuffle[3]; } - /* - else { - shuffle[0].x = __shfl(thread_mix, start_lane + 0); - shuffle[0].y = __shfl(thread_mix, start_lane + 1); - shuffle[1].x = __shfl(thread_mix, start_lane + 2); - shuffle[1].y = __shfl(thread_mix, start_lane + 3); - shuffle[2].x = __shfl(thread_mix, start_lane + 4); - shuffle[2].y = __shfl(thread_mix, start_lane + 5); - shuffle[3].x = __shfl(thread_mix, start_lane + 6); - shuffle[3].y = __shfl(thread_mix, start_lane + 7); - } - */ + } // keccak_256(keccak_512(header..nonce) .. mix);