From 56bb6941ace2fd01d3a70a56b4773537337ba3e7 Mon Sep 17 00:00:00 2001 From: Jan Willem Penterman Date: Wed, 26 Aug 2015 16:22:56 +0200 Subject: [PATCH] upstream merge also likely fixes a bug in device selection many cmd line params changed --- ethminer/CMakeLists.txt | 9 +- ethminer/MinerAux.h | 110 +++++++++++++++------ libethash-cu/ethash_cu_miner.cpp | 158 ++++++++++++++++++++++--------- libethash-cu/ethash_cu_miner.h | 47 +++++---- libethcore/CMakeLists.txt | 2 +- libethcore/EthashCUDAMiner.cpp | 33 ++++--- libethcore/EthashCUDAMiner.h | 16 ++-- 7 files changed, 258 insertions(+), 117 deletions(-) diff --git a/ethminer/CMakeLists.txt b/ethminer/CMakeLists.txt index cda6aa8b8..a6cecdb76 100644 --- a/ethminer/CMakeLists.txt +++ b/ethminer/CMakeLists.txt @@ -5,11 +5,16 @@ aux_source_directory(. SRC_LIST) include_directories(BEFORE ..) include_directories(${Boost_INCLUDE_DIRS}) + if (JSONRPC) -include_directories(BEFORE ${JSONCPP_INCLUDE_DIRS}) -include_directories(${JSON_RPC_CPP_INCLUDE_DIRS}) + include_directories(BEFORE ${JSONCPP_INCLUDE_DIRS}) + include_directories(${JSON_RPC_CPP_INCLUDE_DIRS}) endif() +if (ETHASHCU) + include_directories(${CUDA_INCLUDE_DIRS}) +endif () + set(EXECUTABLE ethminer) file(GLOB HEADERS "*.h") diff --git a/ethminer/MinerAux.h b/ethminer/MinerAux.h index 023d7a6f4..8589f52d4 100644 --- a/ethminer/MinerAux.h +++ b/ethminer/MinerAux.h @@ -145,7 +145,7 @@ public: BOOST_THROW_EXCEPTION(BadArgument()); } #if ETH_ETHASHCL || ETH_ETHASHCU || !ETH_TRUE - else if (arg == "--gpu-global-work" && i + 1 < argc) + else if ((arg == "--cl-global-work" || arg == "--cuda-grid-size") && i + 1 < argc) try { m_globalWorkSizeMultiplier = stol(argv[++i]); } @@ -154,7 +154,7 @@ public: cerr << "Bad " << arg << " option: " << argv[i] << endl; BOOST_THROW_EXCEPTION(BadArgument()); } - else if (arg == "--gpu-local-work" && i + 1 < argc) + else if ((arg == "--cl-local-work" || arg == "--cuda-block-size") && i + 1 < argc) try { m_localWorkSize = stol(argv[++i]); } @@ -163,6 +163,12 @@ public: cerr << "Bad " << arg << " option: " << argv[i] << endl; BOOST_THROW_EXCEPTION(BadArgument()); } + else if (arg == "--list-devices") + m_shouldListDevices = true; + else if ((arg == "--cl-extragpu-mem" || arg == "--cuda-extragpu-mem") && i + 1 < argc) + m_extraGPUMemory = 1000000 * stol(argv[++i]); +#endif +#if ETH_ETHASHCL || !ETH_TRUE else if (arg == "--cl-ms-per-batch" && i + 1 < argc) try { m_msPerBatch = stol(argv[++i]); @@ -172,13 +178,29 @@ public: cerr << "Bad " << arg << " option: " << argv[i] << endl; BOOST_THROW_EXCEPTION(BadArgument()); } -#endif - else if (arg == "--list-devices") - m_shouldListDevices = true; else if (arg == "--allow-opencl-cpu") m_clAllowCPU = true; - else if (arg == "--cl-extragpu-mem" && i + 1 < argc) - m_extraGPUMemory = 1000000 * stol(argv[++i]); +#endif +#if ETH_ETHASHCU || !ETH_TRUE + else if (arg == "--cuda-devices") { + while (m_cudaDeviceCount < 16 && i + 1 < argc) + { + try { + m_cudaDevices[m_cudaDeviceCount++] = stol(argv[++i]); + } + catch (...) + { + break; + } + } + } + else if (arg == "--cuda-turbo") { + m_cudaHighCPULoad = true; + } + else if (arg == "--cuda-streams" && i + 1 < argc) { + m_numStreams = stol(argv[++i]); + } +#endif else if (arg == "--phone-home" && i + 1 < argc) { string m = argv[++i]; @@ -295,23 +317,6 @@ public: BOOST_THROW_EXCEPTION(BadArgument()); } } -#if ETH_ETHASHCU || !ETH_TRUE - else if (arg == "--cuda-devices") { - while (m_cudaDeviceCount < 16 && i + 1 < argc) - { - try { - m_cudaDevices[m_cudaDeviceCount++] = stol(argv[++i]); - } - catch (...) - { - break; - } - } - } - else if (arg == "--cuda-high-cpu") { - m_cudaHighCPULoad = true; - } -#endif else return false; return true; @@ -322,7 +327,12 @@ public: if (m_shouldListDevices) { #if ETH_ETHASHCL || !ETH_TRUE - EthashGPUMiner::listDevices(); + if (m_minerType == MinerType::GPU) + EthashGPUMiner::listDevices(); +#endif +#if ETH_ETHASHCU || !ETH_TRUE + if (m_minerType == MinerType::CUDA) + EthashCUDAMiner::listDevices(); #endif exit(0); } @@ -347,6 +357,31 @@ public: #else cerr << "Selected GPU mining without having compiled with -DETHASHCL=1" << endl; exit(1); +#endif + } + else if (m_minerType == MinerType::CUDA) + { +#if ETH_ETHASHCU || !ETH_TRUE + if (!EthashCUDAMiner::configureGPU( + m_localWorkSize, + m_globalWorkSizeMultiplier, + m_numStreams, + m_openclDevice, + m_extraGPUMemory, + m_cudaHighCPULoad, + m_currentBlock + )) + exit(1); + + if (m_cudaDeviceCount != 0) { + EthashCUDAMiner::setDevices(m_cudaDevices, m_cudaDeviceCount); + m_miningThreads = m_cudaDeviceCount; + } + EthashCUDAMiner::setNumInstances(m_miningThreads); + +#else + cerr << "Selected CUDA mining without having compiled with -DETHASHCU=1 or -DBUNDLE=cudaminer" << endl; + exit(1); #endif } if (mode == OperationMode::DAGInit) @@ -382,17 +417,26 @@ public: << "Mining configuration:" << endl << " -C,--cpu When mining, use the CPU." << endl << " -G,--opencl When mining use the GPU via OpenCL." << endl + << " -U,--cuda When mining use the GPU via CUDA." << endl << " --opencl-platform When mining using -G/--opencl use OpenCL platform n (default: 0)." << endl << " --opencl-device When mining using -G/--opencl use OpenCL device n (default: 0)." << endl << " -t, --mining-threads Limit number of CPU/GPU miners to n (default: use everything available on selected platform)" << endl << " --allow-opencl-cpu Allows CPU to be considered as an OpenCL device if the OpenCL platform supports it." << endl - << " --list-devices List the detected OpenCL devices and exit." << endl + << " --list-devices List the detected OpenCL/CUDA devices and exit." << endl << " --current-block Let the miner know the current block number at configuration time. Will help determine DAG size and required GPU memory." << endl #if ETH_ETHASHCL || !ETH_TRUE << " --cl-extragpu-mem Set the memory (in MB) you believe your GPU requires for stuff other than mining. Windows rendering e.t.c.." << endl << " --cl-local-work Set the OpenCL local work size. Default is " << toString(ethash_cl_miner::c_defaultLocalWorkSize) << endl << " --cl-global-work Set the OpenCL global work size as a multiple of the local work size. Default is " << toString(ethash_cl_miner::c_defaultGlobalWorkSizeMultiplier) << " * " << toString(ethash_cl_miner::c_defaultLocalWorkSize) << endl << " --cl-ms-per-batch Set the OpenCL target milliseconds per batch (global workgroup size). Default is " << toString(ethash_cl_miner::c_defaultMSPerBatch) << ". If 0 is given then no autoadjustment of global work size will happen" << endl +#endif +#if ETH_ETHASHCU || !ETH_TRUE + << " --cuda-extragpu-mem Set the memory (in MB) you believe your GPU requires for stuff other than mining. Windows rendering e.t.c.." << endl + << " --cuda-block-size Set the CUDA block work size. Default is " << toString(ethash_cu_miner::c_defaultBlockSize) << endl + << " --cuda-grid-size Set the CUDA grid size. Default is " << toString(ethash_cu_miner::c_defaultGridSize) << endl + << " --cuda-streams Set the number of CUDA streams. Default is " << toString(ethash_cu_miner::c_defaultNumStreams) << endl + << " --cuda-turbo Get a bit of extra hashrate at the cost of high CPU load... Default is false" << endl + << " --cuda-devices <0 1 ..n> Select which GPU's to mine on. Default is to use all" << endl #endif ; } @@ -427,11 +471,14 @@ private: sealers["cpu"] = GenericFarm::SealerDescriptor{&EthashCPUMiner::instances, [](GenericMiner::ConstructionInfo ci){ return new EthashCPUMiner(ci); }}; #if ETH_ETHASHCL sealers["opencl"] = GenericFarm::SealerDescriptor{&EthashGPUMiner::instances, [](GenericMiner::ConstructionInfo ci){ return new EthashGPUMiner(ci); }}; +#endif +#if ETH_ETHASHCU + sealers["cuda"] = GenericFarm::SealerDescriptor{ &EthashGPUMiner::instances, [](GenericMiner::ConstructionInfo ci){ return new EthashCUDAMiner(ci); } }; #endif f.setSealers(sealers); f.onSolutionFound([&](EthashProofOfWork::Solution) { return false; }); - string platformInfo = _m == MinerType::CPU ? "CPU" : "GPU";//EthashProofOfWork::CPUMiner::platformInfo() : _m == MinerType::GPU ? EthashProofOfWork::GPUMiner::platformInfo() : ""; + string platformInfo = _m == MinerType::CPU ? "CPU" : (_m == MinerType::GPU ? "GPU" : "CUDA"); cout << "Benchmarking on platform: " << platformInfo << endl; cout << "Preparing DAG..." << endl; @@ -443,6 +490,8 @@ private: f.start("cpu"); else if (_m == MinerType::GPU) f.start("opencl"); + else if (_m == MinerType::CUDA) + f.start("cuda"); map results; uint64_t mean = 0; @@ -614,15 +663,18 @@ private: bool m_shouldListDevices = false; bool m_clAllowCPU = false; #if ETH_ETHASHCL || !ETH_TRUE +#if !ETH_ETHASHCU || !ETH_TRUE unsigned m_globalWorkSizeMultiplier = ethash_cl_miner::c_defaultGlobalWorkSizeMultiplier; unsigned m_localWorkSize = ethash_cl_miner::c_defaultLocalWorkSize; +#endif unsigned m_msPerBatch = ethash_cl_miner::c_defaultMSPerBatch; #endif #if ETH_ETHASHCU || !ETH_TRUE - unsigned m_globalWorkSizeMultiplier = ethash_cu_miner::c_defaultGlobalWorkSizeMultiplier; - unsigned m_localWorkSize = ethash_cu_miner::c_defaultLocalWorkSize; + unsigned m_globalWorkSizeMultiplier = ethash_cu_miner::c_defaultGridSize; + unsigned m_localWorkSize = ethash_cu_miner::c_defaultBlockSize; unsigned m_cudaDeviceCount = 0; unsigned m_cudaDevices[16]; + unsigned m_numStreams = ethash_cu_miner::c_defaultNumStreams; bool m_cudaHighCPULoad = false; #endif uint64_t m_currentBlock = 0; diff --git a/libethash-cu/ethash_cu_miner.cpp b/libethash-cu/ethash_cu_miner.cpp index 22cad1fd4..ceb594c99 100644 --- a/libethash-cu/ethash_cu_miner.cpp +++ b/libethash-cu/ethash_cu_miner.cpp @@ -28,11 +28,14 @@ #include #include #include +#include +#include #include #include #include #include #include +#include #include #include "ethash_cu_miner.h" #include "ethash_cu_miner_kernel_globals.h" @@ -51,8 +54,36 @@ using namespace std; -unsigned const ethash_cu_miner::c_defaultLocalWorkSize = 128; -unsigned const ethash_cu_miner::c_defaultGlobalWorkSizeMultiplier = 2048; // * CL_DEFAULT_LOCAL_WORK_SIZE +unsigned const ethash_cu_miner::c_defaultBlockSize = 128; +unsigned const ethash_cu_miner::c_defaultGridSize = 2048; // * CL_DEFAULT_LOCAL_WORK_SIZE +unsigned const ethash_cu_miner::c_defaultNumStreams = 2; + +#if defined(_WIN32) +extern "C" __declspec(dllimport) void __stdcall OutputDebugStringA(const char* lpOutputString); +static std::atomic_flag s_logSpin = ATOMIC_FLAG_INIT; +#define ETHCU_LOG(_contents) \ + do \ + { \ + std::stringstream ss; \ + ss << _contents; \ + while (s_logSpin.test_and_set(std::memory_order_acquire)) {} \ + OutputDebugStringA(ss.str().c_str()); \ + cerr << ss.str() << endl << flush; \ + s_logSpin.clear(std::memory_order_release); \ + } while (false) +#else +#define ETHCL_LOG(_contents) cout << "[OPENCL]:" << _contents << endl +#endif + +#define CUDA_SAFE_CALL(call) \ +do { \ + cudaError_t err = call; \ + if (cudaSuccess != err) { \ + fprintf(stderr, "Cuda error in func '%s' at line %i : %s.\n", \ + __FUNCTION__, __LINE__, cudaGetErrorString(err) ); \ + exit(EXIT_FAILURE); \ + } \ +} while (0) ethash_cu_miner::search_hook::~search_hook() {} @@ -70,28 +101,19 @@ std::string ethash_cu_miner::platform_info(unsigned _deviceId) if (device_count == 0) return std::string(); - if (cudaRuntimeGetVersion(&runtime_version) == cudaErrorInvalidValue) - { - cout << cudaGetErrorString(cudaErrorInvalidValue) << endl; - return std::string(); - } + CUDA_SAFE_CALL(cudaRuntimeGetVersion(&runtime_version)); // use selected default device int device_num = std::min((int)_deviceId, device_count - 1); - cudaDeviceProp device_props; - if (cudaGetDeviceProperties(&device_props, device_num) == cudaErrorInvalidDevice) - { - cout << cudaGetErrorString(cudaErrorInvalidDevice) << endl; - return std::string(); - } + + CUDA_SAFE_CALL(cudaGetDeviceProperties(&device_props, device_num)); char platform[5]; int version_major = runtime_version / 1000; int version_minor = (runtime_version - (version_major * 1000)) / 10; sprintf(platform, "%d.%d", version_major, version_minor); - - + char compute[5]; sprintf(compute, "%d.%d", device_props.major, device_props.minor); @@ -99,28 +121,85 @@ std::string ethash_cu_miner::platform_info(unsigned _deviceId) } -int ethash_cu_miner::getNumDevices() +unsigned ethash_cu_miner::getNumDevices() { int device_count; + CUDA_SAFE_CALL(cudaGetDeviceCount(&device_count)); + return device_count; +} - if (cudaGetDeviceCount(&device_count) == cudaErrorNoDevice) +bool ethash_cu_miner::configureGPU( + unsigned _blockSize, + unsigned _gridSize, + unsigned _numStreams, + unsigned _extraGPUMemory, + bool _highcpu, + uint64_t _currentBlock + ) +{ + s_blockSize = _blockSize; + s_gridSize = _gridSize; + s_extraRequiredGPUMem = _extraGPUMemory; + s_numStreams = _numStreams; + s_highCPU = _highcpu; + + // by default let's only consider the DAG of the first epoch + uint64_t dagSize = ethash_get_datasize(_currentBlock); + uint64_t requiredSize = dagSize + _extraGPUMemory; + for (unsigned int i = 0; i < getNumDevices(); i++) { - cout << cudaGetErrorString(cudaErrorNoDevice) << endl; - return 0; + cudaDeviceProp props; + CUDA_SAFE_CALL(cudaGetDeviceProperties(&props, i)); + if (props.totalGlobalMem >= requiredSize) + { + ETHCU_LOG( + "Found suitable CUDA device [" << props.name + << "] with " << props.totalGlobalMem << " bytes of GPU memory" + ); + return true; + } + + ETHCU_LOG( + "CUDA device " << props.name + << " has insufficient GPU memory." << props.totalGlobalMem << + " bytes of memory found < " << requiredSize << " bytes of memory required" + ); } - return device_count; + return false; +} + +unsigned ethash_cu_miner::s_extraRequiredGPUMem; +unsigned ethash_cu_miner::s_blockSize = ethash_cu_miner::c_defaultBlockSize; +unsigned ethash_cu_miner::s_gridSize = ethash_cu_miner::c_defaultGridSize; +unsigned ethash_cu_miner::s_numStreams = ethash_cu_miner::c_defaultNumStreams; +bool ethash_cu_miner::s_highCPU = false; + +void ethash_cu_miner::listDevices() +{ + string outString = "\nListing CUDA devices.\nFORMAT: [deviceID] deviceName\n"; + unsigned int i = 0; + for (unsigned int i = 0; i < getNumDevices(); i++) + { + cudaDeviceProp props; + CUDA_SAFE_CALL(cudaGetDeviceProperties(&props, i)); + + outString += "[" + to_string(i) + "] " + props.name + "\n"; + outString += "\tCompute version: " + to_string(props.major) + "." + to_string(props.minor) + "\n"; + outString += "\tcudaDeviceProp::totalGlobalMem: " + to_string(props.totalGlobalMem) + "\n"; + } + ETHCU_LOG(outString); } void ethash_cu_miner::finish() { - for (unsigned i = 0; i != m_num_buffers; i++) { + for (unsigned i = 0; i != s_numStreams; i++) { cudaStreamDestroy(m_streams[i]); m_streams[i] = 0; } cudaDeviceReset(); } -bool ethash_cu_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned num_buffers, unsigned search_batch_size, unsigned workgroup_size, unsigned _deviceId, bool highcpu) +bool ethash_cu_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned _deviceId) { int device_count = getNumDevices(); @@ -147,19 +226,12 @@ bool ethash_cu_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned num_ return false; } cudaDeviceReset(); - cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeEightByte); - - m_num_buffers = num_buffers; - m_search_batch_size = search_batch_size; - - m_hash_buf = new void *[m_num_buffers]; - m_search_buf = new uint32_t *[m_num_buffers]; - m_streams = new cudaStream_t[m_num_buffers]; - - // use requested workgroup size, but we require multiple of 8 - m_workgroup_size = ((workgroup_size + 7) / 8) * 8; + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + cudaDeviceSetCacheConfig(cudaFuncCachePreferL1); - m_highcpu = highcpu; + m_hash_buf = new void *[s_numStreams]; + m_search_buf = new uint32_t *[s_numStreams]; + m_streams = new cudaStream_t[s_numStreams]; // patch source code cudaError result; @@ -179,7 +251,7 @@ bool ethash_cu_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned num_ result = cudaMemcpy(m_dag_ptr, _dag, _dagSize, cudaMemcpyHostToDevice); // create mining buffers - for (unsigned i = 0; i != m_num_buffers; ++i) + for (unsigned i = 0; i != s_numStreams; ++i) { result = cudaMallocHost(&m_hash_buf[i], 32 * c_hash_batch_size); result = cudaMallocHost(&m_search_buf[i], (c_max_search_results + 1) * sizeof(uint32_t)); @@ -219,7 +291,7 @@ void ethash_cu_miner::search(uint8_t const* header, uint64_t target, search_hook // update header constant buffer cudaMemcpy(m_header, header, 32, cudaMemcpyHostToDevice); - for (unsigned i = 0; i != m_num_buffers; ++i) + for (unsigned i = 0; i != s_numStreams; ++i) { cudaMemcpy(m_search_buf[i], &c_zero, 4, cudaMemcpyHostToDevice); } @@ -232,21 +304,21 @@ void ethash_cu_miner::search(uint8_t const* header, uint64_t target, search_hook unsigned buf = 0; std::random_device engine; uint64_t start_nonce = std::uniform_int_distribution()(engine); - for (;; start_nonce += m_search_batch_size) + for (;; start_nonce += s_gridSize) { - run_ethash_search(m_search_batch_size / m_workgroup_size, m_workgroup_size, m_streams[buf], m_search_buf[buf], m_header, m_dag_ptr, start_nonce, target); + run_ethash_search(s_gridSize, s_blockSize, m_streams[buf], m_search_buf[buf], m_header, m_dag_ptr, start_nonce, target); pending.push({ start_nonce, buf }); - buf = (buf + 1) % m_num_buffers; + buf = (buf + 1) % s_numStreams; // read results - if (pending.size() == m_num_buffers) + if (pending.size() == s_numStreams) { pending_batch const& batch = pending.front(); uint32_t results[1 + c_max_search_results]; - if (!m_highcpu) + if (!s_highCPU) waitStream(m_streams[buf]); // 28ms cudaMemcpyAsync(results, m_search_buf[batch.buf], (1 + c_max_search_results) * sizeof(uint32_t), cudaMemcpyHostToHost, m_streams[batch.buf]); @@ -261,11 +333,11 @@ void ethash_cu_miner::search(uint8_t const* header, uint64_t target, search_hook // cout << endl; bool exit = num_found && hook.found(nonces, num_found); - exit |= hook.searched(batch.start_nonce, m_search_batch_size); // always report searched before exit + exit |= hook.searched(batch.start_nonce, s_gridSize * s_blockSize); // always report searched before exit if (exit) break; - start_nonce += m_search_batch_size; + start_nonce += s_gridSize * s_blockSize; // reset search buffer if we're still going if (num_found) cudaMemcpyAsync(m_search_buf[batch.buf], &c_zero, 4, cudaMemcpyHostToDevice, m_streams[batch.buf]); diff --git a/libethash-cu/ethash_cu_miner.h b/libethash-cu/ethash_cu_miner.h index 54d33549c..3594a6a9e 100644 --- a/libethash-cu/ethash_cu_miner.h +++ b/libethash-cu/ethash_cu_miner.h @@ -22,37 +22,35 @@ public: public: ethash_cu_miner(); - bool init(uint8_t const* _dag, uint64_t _dagSize, unsigned num_buffers = 2, unsigned search_batch_size = 262144, unsigned workgroup_size = 64, unsigned _deviceId = 0, bool highcpu = false); static std::string platform_info(unsigned _deviceId = 0); - static int getNumDevices(); + static unsigned getNumDevices(); static void listDevices(); static bool configureGPU( - unsigned _platformId, - unsigned _localWorkSize, - unsigned _globalWorkSize, - unsigned _msPerBatch, - bool _allowCPU, + unsigned _blockSize, + unsigned _gridSize, + unsigned _numStreams, unsigned _extraGPUMemory, + bool _highcpu, uint64_t _currentBlock ); - + bool init( + uint8_t const* _dag, + uint64_t _dagSize, + unsigned _deviceId = 0 + ); void finish(); - void hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count); void search(uint8_t const* header, uint64_t target, search_hook& hook); /* -- default values -- */ - /// Default value of the local work size. Also known as workgroup size. - static unsigned const c_defaultLocalWorkSize; - /// Default value of the global work size as a multiplier of the local work size - static unsigned const c_defaultGlobalWorkSizeMultiplier; + /// Default value of the block size. Also known as workgroup size. + static unsigned const c_defaultBlockSize; + /// Default value of the grid size + static unsigned const c_defaultGridSize; + // default number of CUDA streams + static unsigned const c_defaultNumStreams; private: enum { c_max_search_results = 63, c_hash_batch_size = 1024 }; - - bool m_highcpu; - unsigned m_num_buffers; - unsigned m_search_batch_size; - unsigned m_workgroup_size; hash128_t * m_dag_ptr; hash32_t * m_header; @@ -61,5 +59,16 @@ private: uint32_t ** m_search_buf; cudaStream_t * m_streams; - + /// The local work size for the search + static unsigned s_blockSize; + /// The initial global work size for the searches + static unsigned s_gridSize; + /// The number of CUDA streams + static unsigned s_numStreams; + /// Whether or not to let the CPU wait + static bool s_highCPU; + + /// GPU memory required for other things, like window rendering e.t.c. + /// User can set it via the --cl-extragpu-mem argument. + static unsigned s_extraRequiredGPUMem; }; \ No newline at end of file diff --git a/libethcore/CMakeLists.txt b/libethcore/CMakeLists.txt index e5b512fe1..6c93a80c4 100644 --- a/libethcore/CMakeLists.txt +++ b/libethcore/CMakeLists.txt @@ -21,7 +21,7 @@ file(GLOB HEADERS "*.h") add_library(${EXECUTABLE} ${SRC_LIST} ${HEADERS}) target_link_libraries(${EXECUTABLE} ethash) -target_link_libraries(${EXECUTABLE} evmcore) + if (ETHASHCL) target_link_libraries(${EXECUTABLE} ethash-cl) endif () diff --git a/libethcore/EthashCUDAMiner.cpp b/libethcore/EthashCUDAMiner.cpp index d0e3c6ad9..2282e9a77 100644 --- a/libethcore/EthashCUDAMiner.cpp +++ b/libethcore/EthashCUDAMiner.cpp @@ -103,6 +103,8 @@ namespace dev unsigned EthashCUDAMiner::s_platformId = 0; unsigned EthashCUDAMiner::s_deviceId = 0; unsigned EthashCUDAMiner::s_numInstances = 0; +int EthashCUDAMiner::s_devices[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; + EthashCUDAMiner::EthashCUDAMiner(ConstructionInfo const& _ci) : GenericMiner(_ci), @@ -147,7 +149,7 @@ void EthashCUDAMiner::workLoop() delete m_miner; m_miner = new ethash_cu_miner; - unsigned device = instances() > 1 ? index() : s_deviceId; + unsigned device = instances() > 1 ? (s_devices[index()] > -1 ? s_devices[index()] : index()) : s_deviceId; EthashAux::FullType dag; while (true) @@ -164,7 +166,7 @@ void EthashCUDAMiner::workLoop() this_thread::sleep_for(chrono::milliseconds(500)); } bytesConstRef dagData = dag->data(); - m_miner->init(dagData.data(), dagData.size(), s_platformId, device); + m_miner->init(dagData.data(), dagData.size(), device); } uint64_t upper64OfBoundary = (uint64_t)(u64)((u256)w.boundary >> 192); @@ -200,39 +202,36 @@ void EthashCUDAMiner::listDevices() } bool EthashCUDAMiner::configureGPU( - unsigned _localWorkSize, - unsigned _globalWorkSizeMultiplier, - unsigned _msPerBatch, - unsigned _platformId, + unsigned _blockSize, + unsigned _gridSize, + unsigned _numStreams, unsigned _deviceId, - bool _allowCPU, unsigned _extraGPUMemory, + bool _highcpu, uint64_t _currentBlock ) { - s_platformId = _platformId; s_deviceId = _deviceId; - if (_localWorkSize != 32 && _localWorkSize != 64 && _localWorkSize != 128 && _localWorkSize != 256) + if (_blockSize != 32 && _blockSize != 64 && _blockSize != 128) { - cout << "Given localWorkSize of " << toString(_localWorkSize) << "is invalid. Must be either 32,64,128 or 256" << endl; + cout << "Given localWorkSize of " << toString(_blockSize) << "is invalid. Must be either 32,64 or 128" << endl; return false; } if (!ethash_cu_miner::configureGPU( - _platformId, - _localWorkSize, - _globalWorkSizeMultiplier * _localWorkSize, - _msPerBatch, - _allowCPU, + _blockSize, + _gridSize, + _numStreams, _extraGPUMemory, + _highcpu, _currentBlock) ) { - cout << "No GPU device with sufficient memory was found. Can't GPU mine. Remove the -G argument" << endl; + cout << "No CUDA device with sufficient memory was found. Can't CUDA mine. Remove the -U argument" << endl; return false; } return true; } -#endif +#endif \ No newline at end of file diff --git a/libethcore/EthashCUDAMiner.h b/libethcore/EthashCUDAMiner.h index 989e3c2a5..6fed08716 100644 --- a/libethcore/EthashCUDAMiner.h +++ b/libethcore/EthashCUDAMiner.h @@ -46,17 +46,20 @@ namespace dev static unsigned getNumDevices(); static void listDevices(); static bool configureGPU( - unsigned _localWorkSize, - unsigned _globalWorkSizeMultiplier, - unsigned _msPerBatch, - unsigned _platformId, + unsigned _blockSize, + unsigned _gridSize, + unsigned _numStreams, unsigned _deviceId, - bool _allowCPU, unsigned _extraGPUMemory, + bool _highcpu, uint64_t _currentBlock ); static void setNumInstances(unsigned _instances) { s_numInstances = std::min(_instances, getNumDevices()); } - + static void setDevices(unsigned * _devices, unsigned _selectedDeviceCount) { + for (unsigned i = 0; i < _selectedDeviceCount; i++) { + s_devices[i] = _devices[i]; + } + } protected: void kickOff() override; void pause() override; @@ -74,6 +77,7 @@ namespace dev static unsigned s_platformId; static unsigned s_deviceId; static unsigned s_numInstances; + static int s_devices[16]; }; }