From f03c12ca240efa0f371f3a26a0cbf3e971b451ec Mon Sep 17 00:00:00 2001 From: Lefteris Karapetsas Date: Fri, 26 Jun 2015 15:06:01 +0200 Subject: [PATCH 1/8] Adjust CL miner work batch size properly We are now propery adjusting the batch size of the OpenCL miner properly depending on the execution time of the last search. --- libethash-cl/ethash_cl_miner.cpp | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/libethash-cl/ethash_cl_miner.cpp b/libethash-cl/ethash_cl_miner.cpp index 3a72810fa..59a4b4f90 100644 --- a/libethash-cl/ethash_cl_miner.cpp +++ b/libethash-cl/ethash_cl_miner.cpp @@ -456,8 +456,7 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook uint64_t start_nonce = uniform_int_distribution()(engine); for (;; start_nonce += m_batchSize) { -// chrono::high_resolution_clock::time_point t = chrono::high_resolution_clock::now(); - + chrono::high_resolution_clock::time_point t = chrono::high_resolution_clock::now(); // supply output buffer to kernel m_searchKernel.setArg(0, m_searchBuffer[buf]); if (m_dagChunksCount == 1) @@ -497,19 +496,23 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook pending.pop(); } -/* chrono::high_resolution_clock::duration d = chrono::high_resolution_clock::now() - t; - if (d > chrono::milliseconds(_msPerBatch * 10 / 9)) + // adjust batch size depending on last search time + auto d = chrono::duration_cast(chrono::high_resolution_clock::now() - t); + if (d != chrono::milliseconds(0)) // if duration is zero, we did not get in the actual search so adjust nothing { - cerr << "Batch of" << m_batchSize << "took" << chrono::duration_cast(d).count() << "ms, >>" << _msPerBatch << "ms."; - m_batchSize = max(128, m_batchSize * 9 / 10); - cerr << "New batch size" << m_batchSize; + if (d > chrono::milliseconds(_msPerBatch * 10 / 9)) + { + // cerr << "Batch of " << m_batchSize << " took " << chrono::duration_cast(d).count() << " ms, >> " << _msPerBatch << " ms." << endl; + m_batchSize = max(128, m_batchSize * 9 / 10); + // cerr << "New batch size" << m_batchSize << endl; + } + else if (d < chrono::milliseconds(_msPerBatch * 9 / 10)) + { + // cerr << "Batch of " << m_batchSize << " took " << chrono::duration_cast(d).count() << " ms, << " << _msPerBatch << " ms." << endl; + m_batchSize = m_batchSize * 10 / 9; + // cerr << "New batch size" << m_batchSize << endl; + } } - else if (d < chrono::milliseconds(_msPerBatch * 9 / 10)) - { - cerr << "Batch of" << m_batchSize << "took" << chrono::duration_cast(d).count() << "ms, <<" << _msPerBatch << "ms."; - m_batchSize = m_batchSize * 10 / 9; - cerr << "New batch size" << m_batchSize; - }*/ } // not safe to return until this is ready From f51033dc75a766e0e2f71b58f1d690c02f98b3f0 Mon Sep 17 00:00:00 2001 From: Lefteris Karapetsas Date: Mon, 29 Jun 2015 13:05:11 +0200 Subject: [PATCH 2/8] CL Global and local work size adjustments - Giving names to the variables that properly reflect the API - Making sure that the limitations that are stated in clEnqueueNDRangeKernel() documentation are adhered to --- libethash-cl/ethash_cl_miner.cpp | 30 +++++++++++++++++++----------- libethash-cl/ethash_cl_miner.h | 3 ++- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/libethash-cl/ethash_cl_miner.cpp b/libethash-cl/ethash_cl_miner.cpp index 59a4b4f90..893c1be9c 100644 --- a/libethash-cl/ethash_cl_miner.cpp +++ b/libethash-cl/ethash_cl_miner.cpp @@ -301,6 +301,11 @@ bool ethash_cl_miner::init( // use requested workgroup size, but we require multiple of 8 m_workgroupSize = ((_workgroupSize + 7) / 8) * 8; + // make sure that global work size is evenly divisible by the local workgroup size + if (m_globalWorkSize % m_workgroupSize != 0) + m_globalWorkSize = ((m_globalWorkSize / m_workgroupSize) + 1) * m_workgroupSize; + // remember the device's address bits + m_deviceBits = device.getInfo(); // patch source code // note: ETHASH_CL_MINER_KERNEL is simply ethash_cl_miner_kernel.cl compiled @@ -454,7 +459,7 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook unsigned buf = 0; random_device engine; uint64_t start_nonce = uniform_int_distribution()(engine); - for (;; start_nonce += m_batchSize) + for (;; start_nonce += m_globalWorkSize) { chrono::high_resolution_clock::time_point t = chrono::high_resolution_clock::now(); // supply output buffer to kernel @@ -465,7 +470,7 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook m_searchKernel.setArg(6, start_nonce); // execute it! - m_queue.enqueueNDRangeKernel(m_searchKernel, cl::NullRange, m_batchSize, m_workgroupSize); + m_queue.enqueueNDRangeKernel(m_searchKernel, cl::NullRange, m_globalWorkSize, m_workgroupSize); pending.push({ start_nonce, buf }); buf = (buf + 1) % c_bufferCount; @@ -485,7 +490,7 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook m_queue.enqueueUnmapMemObject(m_searchBuffer[batch.buf], results); bool exit = num_found && hook.found(nonces, num_found); - exit |= hook.searched(batch.start_nonce, m_batchSize); // always report searched before exit + exit |= hook.searched(batch.start_nonce, m_globalWorkSize); // always report searched before exit if (exit) break; @@ -496,21 +501,24 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook pending.pop(); } - // adjust batch size depending on last search time + // adjust global work size depending on last search time + // Global work size must be: + // - less than or equal to 2 ^ DEVICE_BITS - 1 + // - divisible by lobal work size (workgroup size) auto d = chrono::duration_cast(chrono::high_resolution_clock::now() - t); - if (d != chrono::milliseconds(0)) // if duration is zero, we did not get in the actual search so adjust nothing + if (d != chrono::milliseconds(0)) // if duration is zero, we did not get in the actual searh/or search not finished { if (d > chrono::milliseconds(_msPerBatch * 10 / 9)) { - // cerr << "Batch of " << m_batchSize << " took " << chrono::duration_cast(d).count() << " ms, >> " << _msPerBatch << " ms." << endl; - m_batchSize = max(128, m_batchSize * 9 / 10); - // cerr << "New batch size" << m_batchSize << endl; + // cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast(d).count() << " ms, >> " << _msPerBatch << " ms." << endl; + m_globalWorkSize = max(128, m_globalWorkSize + m_workgroupSize); + // cerr << "New global work size" << m_globalWorkSize << endl; } else if (d < chrono::milliseconds(_msPerBatch * 9 / 10)) { - // cerr << "Batch of " << m_batchSize << " took " << chrono::duration_cast(d).count() << " ms, << " << _msPerBatch << " ms." << endl; - m_batchSize = m_batchSize * 10 / 9; - // cerr << "New batch size" << m_batchSize << endl; + // cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast(d).count() << " ms, << " << _msPerBatch << " ms." << endl; + m_globalWorkSize = min(pow(2, m_deviceBits) - 1, m_globalWorkSize - m_workgroupSize); + // cerr << "New global work size" << m_globalWorkSize << endl; } } } diff --git a/libethash-cl/ethash_cl_miner.h b/libethash-cl/ethash_cl_miner.h index 73bf7e94a..16d0b52de 100644 --- a/libethash-cl/ethash_cl_miner.h +++ b/libethash-cl/ethash_cl_miner.h @@ -77,8 +77,9 @@ private: cl::Buffer m_hashBuffer[c_bufferCount]; cl::Buffer m_searchBuffer[c_bufferCount]; unsigned m_workgroupSize; - unsigned m_batchSize = c_searchBatchSize; + unsigned m_globalWorkSize = c_searchBatchSize; bool m_openclOnePointOne; + unsigned m_deviceBits; /// Allow CPU to appear as an OpenCL device or not. Default is false static bool s_allowCPU; From 7501191b405e73685d21cb14ba11c8bd1ed43710 Mon Sep 17 00:00:00 2001 From: Lefteris Karapetsas Date: Mon, 29 Jun 2015 15:37:38 +0200 Subject: [PATCH 3/8] GlobalWork size should never be less than local size --- libethash-cl/ethash_cl_miner.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libethash-cl/ethash_cl_miner.cpp b/libethash-cl/ethash_cl_miner.cpp index 893c1be9c..7132cc04d 100644 --- a/libethash-cl/ethash_cl_miner.cpp +++ b/libethash-cl/ethash_cl_miner.cpp @@ -461,7 +461,7 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook uint64_t start_nonce = uniform_int_distribution()(engine); for (;; start_nonce += m_globalWorkSize) { - chrono::high_resolution_clock::time_point t = chrono::high_resolution_clock::now(); + auto t = chrono::high_resolution_clock::now(); // supply output buffer to kernel m_searchKernel.setArg(0, m_searchBuffer[buf]); if (m_dagChunksCount == 1) @@ -518,6 +518,8 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook { // cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast(d).count() << " ms, << " << _msPerBatch << " ms." << endl; m_globalWorkSize = min(pow(2, m_deviceBits) - 1, m_globalWorkSize - m_workgroupSize); + // Global work size should never be less than the workgroup size + m_globalWorkSize = max(m_workgroupSize, m_globalWorkSize); // cerr << "New global work size" << m_globalWorkSize << endl; } } From 15fc63d6a255de5f8c5a4e93edada90bf2ccccce Mon Sep 17 00:00:00 2001 From: Lefteris Karapetsas Date: Tue, 30 Jun 2015 13:49:12 +0200 Subject: [PATCH 4/8] New OpenCL arguments - Adding an argument to specify OpenCL global work size. - Adding an argument to specify milliseconds per global work size (msPerBatch). If this is 0 then no adjustment of the global work size happens. --- ethminer/MinerAux.h | 25 +++++++++++++++++ libethash-cl/ethash_cl_miner.cpp | 47 +++++++++++++++++++------------- libethash-cl/ethash_cl_miner.h | 13 +++++++-- libethcore/Ethash.cpp | 4 ++- libethcore/Ethash.h | 4 ++- 5 files changed, 70 insertions(+), 23 deletions(-) diff --git a/ethminer/MinerAux.h b/ethminer/MinerAux.h index ec6ee57e7..50ff93070 100644 --- a/ethminer/MinerAux.h +++ b/ethminer/MinerAux.h @@ -39,6 +39,7 @@ #include #include #include +#include #include #if ETH_JSONRPC || !ETH_TRUE #include @@ -128,6 +129,24 @@ public: cerr << "Bad " << arg << " option: " << argv[i] << endl; BOOST_THROW_EXCEPTION(BadArgument()); } + else if (arg == "--cl-global-work-size" && i + 1 < argc) + try { + m_globalWorkSize = stol(argv[++i]); + } + catch (...) + { + cerr << "Bad " << arg << " option: " << argv[i] << endl; + BOOST_THROW_EXCEPTION(BadArgument()); + } + else if (arg == "--cl-ms-per-batch" && i + 1 < argc) + try { + m_msPerBatch = stol(argv[++i]); + } + catch (...) + { + cerr << "Bad " << arg << " option: " << argv[i] << endl; + BOOST_THROW_EXCEPTION(BadArgument()); + } else if (arg == "--list-devices") m_shouldListDevices = true; else if (arg == "--allow-opencl-cpu") @@ -266,6 +285,8 @@ public: else if (m_minerType == MinerType::GPU) { if (!ProofOfWork::GPUMiner::configureGPU( + m_globalWorkSize, + m_msPerBatch, m_openclPlatform, m_openclDevice, m_clAllowCPU, @@ -318,6 +339,8 @@ public: << " --list-devices List the detected OpenCL devices and exit." << endl << " --current-block Let the miner know the current block number at configuration time. Will help determine DAG size and required GPU memory." << endl << " --cl-extragpu-mem Set the memory (in MB) you believe your GPU requires for stuff other than mining. Windows rendering e.t.c.." << endl + << " --cl-global-work Set the OpenCL global work size. Default is " << toString(CL_DEFAULT_GLOBAL_WORK_SIZE) << endl + << " --cl-ms-per-batch Set the OpenCL target milliseconds per batch (global workgroup size). Default is " << toString(CL_DEFAULT_MS_PER_BATCH) << ". If 0 is given then no autoadjustment of global work size will happen" << endl ; } @@ -506,6 +529,8 @@ private: unsigned m_miningThreads = UINT_MAX; bool m_shouldListDevices = false; bool m_clAllowCPU = false; + unsigned m_globalWorkSize = CL_DEFAULT_GLOBAL_WORK_SIZE; + unsigned m_msPerBatch = CL_DEFAULT_MS_PER_BATCH; boost::optional m_currentBlock; // default value is 350MB of GPU memory for other stuff (windows system rendering, e.t.c.) unsigned m_extraGPUMemory = 350000000; diff --git a/libethash-cl/ethash_cl_miner.cpp b/libethash-cl/ethash_cl_miner.cpp index 7132cc04d..02c9609fa 100644 --- a/libethash-cl/ethash_cl_miner.cpp +++ b/libethash-cl/ethash_cl_miner.cpp @@ -140,11 +140,15 @@ unsigned ethash_cl_miner::getNumDevices(unsigned _platformId) bool ethash_cl_miner::configureGPU( unsigned _platformId, + unsigned _globalWorkSize, + unsigned _msPerBatch, bool _allowCPU, unsigned _extraGPUMemory, boost::optional _currentBlock ) { + s_initialGlobalWorkSize = _globalWorkSize; + s_msPerBatch = _msPerBatch; s_allowCPU = _allowCPU; s_extraRequiredGPUMem = _extraGPUMemory; // by default let's only consider the DAG of the first epoch @@ -175,6 +179,8 @@ bool ethash_cl_miner::configureGPU( bool ethash_cl_miner::s_allowCPU = false; unsigned ethash_cl_miner::s_extraRequiredGPUMem; +unsigned ethash_cl_miner::s_msPerBatch = CL_DEFAULT_MS_PER_BATCH; +unsigned ethash_cl_miner::s_initialGlobalWorkSize = CL_DEFAULT_GLOBAL_WORK_SIZE; bool ethash_cl_miner::searchForAllDevices(function _callback) { @@ -302,6 +308,7 @@ bool ethash_cl_miner::init( // use requested workgroup size, but we require multiple of 8 m_workgroupSize = ((_workgroupSize + 7) / 8) * 8; // make sure that global work size is evenly divisible by the local workgroup size + m_globalWorkSize = s_initialGlobalWorkSize; if (m_globalWorkSize % m_workgroupSize != 0) m_globalWorkSize = ((m_globalWorkSize / m_workgroupSize) + 1) * m_workgroupSize; // remember the device's address bits @@ -420,9 +427,8 @@ bool ethash_cl_miner::init( return true; } -void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook, unsigned _msPerBatch) +void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook) { - (void)_msPerBatch; try { struct pending_batch @@ -502,25 +508,28 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook } // adjust global work size depending on last search time - // Global work size must be: - // - less than or equal to 2 ^ DEVICE_BITS - 1 - // - divisible by lobal work size (workgroup size) - auto d = chrono::duration_cast(chrono::high_resolution_clock::now() - t); - if (d != chrono::milliseconds(0)) // if duration is zero, we did not get in the actual searh/or search not finished + if (s_msPerBatch) { - if (d > chrono::milliseconds(_msPerBatch * 10 / 9)) + // Global work size must be: + // - less than or equal to 2 ^ DEVICE_BITS - 1 + // - divisible by lobal work size (workgroup size) + auto d = chrono::duration_cast(chrono::high_resolution_clock::now() - t); + if (d != chrono::milliseconds(0)) // if duration is zero, we did not get in the actual searh/or search not finished { - // cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast(d).count() << " ms, >> " << _msPerBatch << " ms." << endl; - m_globalWorkSize = max(128, m_globalWorkSize + m_workgroupSize); - // cerr << "New global work size" << m_globalWorkSize << endl; - } - else if (d < chrono::milliseconds(_msPerBatch * 9 / 10)) - { - // cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast(d).count() << " ms, << " << _msPerBatch << " ms." << endl; - m_globalWorkSize = min(pow(2, m_deviceBits) - 1, m_globalWorkSize - m_workgroupSize); - // Global work size should never be less than the workgroup size - m_globalWorkSize = max(m_workgroupSize, m_globalWorkSize); - // cerr << "New global work size" << m_globalWorkSize << endl; + if (d > chrono::milliseconds(s_msPerBatch * 10 / 9)) + { + // cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast(d).count() << " ms, >> " << _msPerBatch << " ms." << endl; + m_globalWorkSize = max(128, m_globalWorkSize + m_workgroupSize); + // cerr << "New global work size" << m_globalWorkSize << endl; + } + else if (d < chrono::milliseconds(s_msPerBatch * 9 / 10)) + { + // cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast(d).count() << " ms, << " << _msPerBatch << " ms." << endl; + m_globalWorkSize = min(pow(2, m_deviceBits) - 1, m_globalWorkSize - m_workgroupSize); + // Global work size should never be less than the workgroup size + m_globalWorkSize = max(m_workgroupSize, m_globalWorkSize); + // cerr << "New global work size" << m_globalWorkSize << endl; + } } } } diff --git a/libethash-cl/ethash_cl_miner.h b/libethash-cl/ethash_cl_miner.h index 16d0b52de..e78108288 100644 --- a/libethash-cl/ethash_cl_miner.h +++ b/libethash-cl/ethash_cl_miner.h @@ -17,6 +17,9 @@ #include #include +#define CL_DEFAULT_GLOBAL_WORK_SIZE 1024 * 16 +#define CL_DEFAULT_MS_PER_BATCH 100 + class ethash_cl_miner { private: @@ -45,6 +48,8 @@ public: static void listDevices(); static bool configureGPU( unsigned _platformId, + unsigned _globalWorkSize, + unsigned _msPerBatch, bool _allowCPU, unsigned _extraGPUMemory, boost::optional _currentBlock @@ -58,7 +63,7 @@ public: unsigned _deviceId = 0 ); void finish(); - void search(uint8_t const* _header, uint64_t _target, search_hook& _hook, unsigned _msPerBatch = 100); + void search(uint8_t const* _header, uint64_t _target, search_hook& _hook); void hash_chunk(uint8_t* _ret, uint8_t const* _header, uint64_t _nonce, unsigned _count); void search_chunk(uint8_t const*_header, uint64_t _target, search_hook& _hook); @@ -77,10 +82,14 @@ private: cl::Buffer m_hashBuffer[c_bufferCount]; cl::Buffer m_searchBuffer[c_bufferCount]; unsigned m_workgroupSize; - unsigned m_globalWorkSize = c_searchBatchSize; + unsigned m_globalWorkSize; bool m_openclOnePointOne; unsigned m_deviceBits; + /// The initial global work size for the searches + static unsigned s_initialGlobalWorkSize; + /// The target milliseconds per batch for the search. If 0, then no adjustment will happen + static unsigned s_msPerBatch; /// Allow CPU to appear as an OpenCL device or not. Default is false static bool s_allowCPU; /// GPU memory required for other things, like window rendering e.t.c. diff --git a/libethcore/Ethash.cpp b/libethcore/Ethash.cpp index 46d19d164..296b4a605 100644 --- a/libethcore/Ethash.cpp +++ b/libethcore/Ethash.cpp @@ -409,6 +409,8 @@ void Ethash::GPUMiner::listDevices() } bool Ethash::GPUMiner::configureGPU( + unsigned _globalWorkSize, + unsigned _msPerBatch, unsigned _platformId, unsigned _deviceId, bool _allowCPU, @@ -418,7 +420,7 @@ bool Ethash::GPUMiner::configureGPU( { s_platformId = _platformId; s_deviceId = _deviceId; - return ethash_cl_miner::configureGPU(_platformId, _allowCPU, _extraGPUMemory, _currentBlock); + return ethash_cl_miner::configureGPU(_globalWorkSize, _msPerBatch, _allowCPU, _extraGPUMemory, _currentBlock); } #endif diff --git a/libethcore/Ethash.h b/libethcore/Ethash.h index 11e012df5..4106229ed 100644 --- a/libethcore/Ethash.h +++ b/libethcore/Ethash.h @@ -88,7 +88,7 @@ public: static unsigned instances() { return s_numInstances > 0 ? s_numInstances : std::thread::hardware_concurrency(); } static std::string platformInfo(); static void listDevices() {} - static bool configureGPU(unsigned, unsigned, bool, unsigned, boost::optional) { return false; } + static bool configureGPU(unsigned, unsigned, unsigned, unsigned, bool, unsigned, boost::optional) { return false; } static void setNumInstances(unsigned _instances) { s_numInstances = std::min(_instances, std::thread::hardware_concurrency()); } protected: void kickOff() override @@ -118,6 +118,8 @@ public: static unsigned getNumDevices(); static void listDevices(); static bool configureGPU( + unsigned _globalWorkSize, + unsigned _msPerBatch, unsigned _platformId, unsigned _deviceId, bool _allowCPU, From db54ff3b4a9ca7ba7c0d1697d4741c4e3c0511f8 Mon Sep 17 00:00:00 2001 From: Lefteris Karapetsas Date: Tue, 30 Jun 2015 14:56:30 +0200 Subject: [PATCH 5/8] CL Argument for local work size - Now the user can also set the local work size (workgroup size) - In addition the global work size is specified in the command line only as a multiplier of the local work size. --- ethminer/MinerAux.h | 25 +++++++++++++++++-------- libethash-cl/ethash_cl_miner.cpp | 22 +++++++++++----------- libethash-cl/ethash_cl_miner.h | 11 ++++++++--- libethcore/Ethash.cpp | 26 +++++++++++++++++++++++--- libethcore/Ethash.h | 5 +++-- 5 files changed, 62 insertions(+), 27 deletions(-) diff --git a/ethminer/MinerAux.h b/ethminer/MinerAux.h index 50ff93070..9c199cecb 100644 --- a/ethminer/MinerAux.h +++ b/ethminer/MinerAux.h @@ -129,9 +129,18 @@ public: cerr << "Bad " << arg << " option: " << argv[i] << endl; BOOST_THROW_EXCEPTION(BadArgument()); } - else if (arg == "--cl-global-work-size" && i + 1 < argc) + else if (arg == "--cl-global-work" && i + 1 < argc) try { - m_globalWorkSize = stol(argv[++i]); + m_globalWorkSizeMultiplier = stol(argv[++i]); + } + catch (...) + { + cerr << "Bad " << arg << " option: " << argv[i] << endl; + BOOST_THROW_EXCEPTION(BadArgument()); + } + else if (arg == "--cl-local-work" && i + 1 < argc) + try { + m_localWorkSize = stol(argv[++i]); } catch (...) { @@ -285,7 +294,8 @@ public: else if (m_minerType == MinerType::GPU) { if (!ProofOfWork::GPUMiner::configureGPU( - m_globalWorkSize, + m_localWorkSize, + m_globalWorkSizeMultiplier, m_msPerBatch, m_openclPlatform, m_openclDevice, @@ -293,10 +303,7 @@ public: m_extraGPUMemory, m_currentBlock )) - { - cout << "No GPU device with sufficient memory was found. Can't GPU mine. Remove the -G argument" << endl; exit(1); - } ProofOfWork::GPUMiner::setNumInstances(m_miningThreads); } if (mode == OperationMode::DAGInit) @@ -339,7 +346,8 @@ public: << " --list-devices List the detected OpenCL devices and exit." << endl << " --current-block Let the miner know the current block number at configuration time. Will help determine DAG size and required GPU memory." << endl << " --cl-extragpu-mem Set the memory (in MB) you believe your GPU requires for stuff other than mining. Windows rendering e.t.c.." << endl - << " --cl-global-work Set the OpenCL global work size. Default is " << toString(CL_DEFAULT_GLOBAL_WORK_SIZE) << endl + << " --cl-local-work Set the OpenCL local work size. Default is " << toString(CL_DEFAULT_LOCAL_WORK_SIZE) << endl + << " --cl-global-work Set the OpenCL global work size as a multiple of the local work size. Default is " << toString(CL_DEFAULT_GLOBAL_WORK_SIZE_MULTIPLIER) << " * " << toString(CL_DEFAULT_LOCAL_WORK_SIZE) << endl << " --cl-ms-per-batch Set the OpenCL target milliseconds per batch (global workgroup size). Default is " << toString(CL_DEFAULT_MS_PER_BATCH) << ". If 0 is given then no autoadjustment of global work size will happen" << endl ; } @@ -529,7 +537,8 @@ private: unsigned m_miningThreads = UINT_MAX; bool m_shouldListDevices = false; bool m_clAllowCPU = false; - unsigned m_globalWorkSize = CL_DEFAULT_GLOBAL_WORK_SIZE; + unsigned m_globalWorkSizeMultiplier = CL_DEFAULT_GLOBAL_WORK_SIZE_MULTIPLIER; + unsigned m_localWorkSize = CL_DEFAULT_LOCAL_WORK_SIZE; unsigned m_msPerBatch = CL_DEFAULT_MS_PER_BATCH; boost::optional m_currentBlock; // default value is 350MB of GPU memory for other stuff (windows system rendering, e.t.c.) diff --git a/libethash-cl/ethash_cl_miner.cpp b/libethash-cl/ethash_cl_miner.cpp index 02c9609fa..8c22f7f1b 100644 --- a/libethash-cl/ethash_cl_miner.cpp +++ b/libethash-cl/ethash_cl_miner.cpp @@ -140,6 +140,7 @@ unsigned ethash_cl_miner::getNumDevices(unsigned _platformId) bool ethash_cl_miner::configureGPU( unsigned _platformId, + unsigned _localWorkSize, unsigned _globalWorkSize, unsigned _msPerBatch, bool _allowCPU, @@ -147,6 +148,7 @@ bool ethash_cl_miner::configureGPU( boost::optional _currentBlock ) { + s_workgroupSize = _localWorkSize; s_initialGlobalWorkSize = _globalWorkSize; s_msPerBatch = _msPerBatch; s_allowCPU = _allowCPU; @@ -180,7 +182,8 @@ bool ethash_cl_miner::configureGPU( bool ethash_cl_miner::s_allowCPU = false; unsigned ethash_cl_miner::s_extraRequiredGPUMem; unsigned ethash_cl_miner::s_msPerBatch = CL_DEFAULT_MS_PER_BATCH; -unsigned ethash_cl_miner::s_initialGlobalWorkSize = CL_DEFAULT_GLOBAL_WORK_SIZE; +unsigned ethash_cl_miner::s_workgroupSize = CL_DEFAULT_LOCAL_WORK_SIZE; +unsigned ethash_cl_miner::s_initialGlobalWorkSize = CL_DEFAULT_GLOBAL_WORK_SIZE_MULTIPLIER * CL_DEFAULT_LOCAL_WORK_SIZE; bool ethash_cl_miner::searchForAllDevices(function _callback) { @@ -260,7 +263,6 @@ void ethash_cl_miner::finish() bool ethash_cl_miner::init( uint8_t const* _dag, uint64_t _dagSize, - unsigned _workgroupSize, unsigned _platformId, unsigned _deviceId ) @@ -305,12 +307,10 @@ bool ethash_cl_miner::init( m_context = cl::Context(vector(&device, &device + 1)); m_queue = cl::CommandQueue(m_context, device); - // use requested workgroup size, but we require multiple of 8 - m_workgroupSize = ((_workgroupSize + 7) / 8) * 8; // make sure that global work size is evenly divisible by the local workgroup size m_globalWorkSize = s_initialGlobalWorkSize; - if (m_globalWorkSize % m_workgroupSize != 0) - m_globalWorkSize = ((m_globalWorkSize / m_workgroupSize) + 1) * m_workgroupSize; + if (m_globalWorkSize % s_workgroupSize != 0) + m_globalWorkSize = ((m_globalWorkSize / s_workgroupSize) + 1) * s_workgroupSize; // remember the device's address bits m_deviceBits = device.getInfo(); @@ -318,7 +318,7 @@ bool ethash_cl_miner::init( // note: ETHASH_CL_MINER_KERNEL is simply ethash_cl_miner_kernel.cl compiled // into a byte array by bin2h.cmake. There is no need to load the file by hand in runtime string code(ETHASH_CL_MINER_KERNEL, ETHASH_CL_MINER_KERNEL + ETHASH_CL_MINER_KERNEL_SIZE); - addDefinition(code, "GROUP_SIZE", m_workgroupSize); + addDefinition(code, "GROUP_SIZE", s_workgroupSize); addDefinition(code, "DAG_SIZE", (unsigned)(_dagSize / ETHASH_MIX_BYTES)); addDefinition(code, "ACCESSES", ETHASH_ACCESSES); addDefinition(code, "MAX_OUTPUTS", c_maxSearchResults); @@ -476,7 +476,7 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook m_searchKernel.setArg(6, start_nonce); // execute it! - m_queue.enqueueNDRangeKernel(m_searchKernel, cl::NullRange, m_globalWorkSize, m_workgroupSize); + m_queue.enqueueNDRangeKernel(m_searchKernel, cl::NullRange, m_globalWorkSize, s_workgroupSize); pending.push({ start_nonce, buf }); buf = (buf + 1) % c_bufferCount; @@ -519,15 +519,15 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook if (d > chrono::milliseconds(s_msPerBatch * 10 / 9)) { // cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast(d).count() << " ms, >> " << _msPerBatch << " ms." << endl; - m_globalWorkSize = max(128, m_globalWorkSize + m_workgroupSize); + m_globalWorkSize = max(128, m_globalWorkSize + s_workgroupSize); // cerr << "New global work size" << m_globalWorkSize << endl; } else if (d < chrono::milliseconds(s_msPerBatch * 9 / 10)) { // cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast(d).count() << " ms, << " << _msPerBatch << " ms." << endl; - m_globalWorkSize = min(pow(2, m_deviceBits) - 1, m_globalWorkSize - m_workgroupSize); + m_globalWorkSize = min(pow(2, m_deviceBits) - 1, m_globalWorkSize - s_workgroupSize); // Global work size should never be less than the workgroup size - m_globalWorkSize = max(m_workgroupSize, m_globalWorkSize); + m_globalWorkSize = max(s_workgroupSize, m_globalWorkSize); // cerr << "New global work size" << m_globalWorkSize << endl; } } diff --git a/libethash-cl/ethash_cl_miner.h b/libethash-cl/ethash_cl_miner.h index e78108288..c60ee1881 100644 --- a/libethash-cl/ethash_cl_miner.h +++ b/libethash-cl/ethash_cl_miner.h @@ -17,7 +17,11 @@ #include #include -#define CL_DEFAULT_GLOBAL_WORK_SIZE 1024 * 16 +/// Default value of the local work size. Also known as workgroup size. +#define CL_DEFAULT_LOCAL_WORK_SIZE 64 +/// Default value of the global work size as a multiplier of the local work size +#define CL_DEFAULT_GLOBAL_WORK_SIZE_MULTIPLIER 512 // * CL_DEFAULT_LOCAL_WORK_SIZE +/// Default value of the milliseconds per global work size (per batch) #define CL_DEFAULT_MS_PER_BATCH 100 class ethash_cl_miner @@ -48,6 +52,7 @@ public: static void listDevices(); static bool configureGPU( unsigned _platformId, + unsigned _localWorkSize, unsigned _globalWorkSize, unsigned _msPerBatch, bool _allowCPU, @@ -58,7 +63,6 @@ public: bool init( uint8_t const* _dag, uint64_t _dagSize, - unsigned _workgroupSize = 64, unsigned _platformId = 0, unsigned _deviceId = 0 ); @@ -81,11 +85,12 @@ private: cl::Buffer m_header; cl::Buffer m_hashBuffer[c_bufferCount]; cl::Buffer m_searchBuffer[c_bufferCount]; - unsigned m_workgroupSize; unsigned m_globalWorkSize; bool m_openclOnePointOne; unsigned m_deviceBits; + /// The local work size for the search + static unsigned s_workgroupSize; /// The initial global work size for the searches static unsigned s_initialGlobalWorkSize; /// The target milliseconds per batch for the search. If 0, then no adjustment will happen diff --git a/libethcore/Ethash.cpp b/libethcore/Ethash.cpp index 296b4a605..6c1f7d856 100644 --- a/libethcore/Ethash.cpp +++ b/libethcore/Ethash.cpp @@ -373,7 +373,7 @@ void Ethash::GPUMiner::workLoop() this_thread::sleep_for(chrono::milliseconds(500)); } bytesConstRef dagData = dag->data(); - m_miner->init(dagData.data(), dagData.size(), 32, s_platformId, device); + m_miner->init(dagData.data(), dagData.size(), s_platformId, device); } uint64_t upper64OfBoundary = (uint64_t)(u64)((u256)w.boundary >> 192); @@ -409,7 +409,8 @@ void Ethash::GPUMiner::listDevices() } bool Ethash::GPUMiner::configureGPU( - unsigned _globalWorkSize, + unsigned _localWorkSize, + unsigned _globalWorkSizeMultiplier, unsigned _msPerBatch, unsigned _platformId, unsigned _deviceId, @@ -420,7 +421,26 @@ bool Ethash::GPUMiner::configureGPU( { s_platformId = _platformId; s_deviceId = _deviceId; - return ethash_cl_miner::configureGPU(_globalWorkSize, _msPerBatch, _allowCPU, _extraGPUMemory, _currentBlock); + + if (_localWorkSize != 32 && _localWorkSize != 64 && _localWorkSize != 128) + { + cout << "Given localWorkSize of " << toString(_localWorkSize) << "is invalid. Must be either 32,64, or 128" << endl; + return false; + } + + if (!ethash_cl_miner::configureGPU( + _localWorkSize, + _globalWorkSizeMultiplier * _localWorkSize, + _msPerBatch, + _allowCPU, + _extraGPUMemory, + _currentBlock) + ) + { + cout << "No GPU device with sufficient memory was found. Can't GPU mine. Remove the -G argument" << endl; + return false; + } + return true; } #endif diff --git a/libethcore/Ethash.h b/libethcore/Ethash.h index 4106229ed..e9ddf16ca 100644 --- a/libethcore/Ethash.h +++ b/libethcore/Ethash.h @@ -88,7 +88,7 @@ public: static unsigned instances() { return s_numInstances > 0 ? s_numInstances : std::thread::hardware_concurrency(); } static std::string platformInfo(); static void listDevices() {} - static bool configureGPU(unsigned, unsigned, unsigned, unsigned, bool, unsigned, boost::optional) { return false; } + static bool configureGPU(unsigned, unsigned, unsigned, unsigned, unsigned, bool, unsigned, boost::optional) { return false; } static void setNumInstances(unsigned _instances) { s_numInstances = std::min(_instances, std::thread::hardware_concurrency()); } protected: void kickOff() override @@ -118,7 +118,8 @@ public: static unsigned getNumDevices(); static void listDevices(); static bool configureGPU( - unsigned _globalWorkSize, + unsigned _localWorkSize, + unsigned _globalWorkSizeMultiplier, unsigned _msPerBatch, unsigned _platformId, unsigned _deviceId, From e1391fa187022205180e8e875338b2a639f1246c Mon Sep 17 00:00:00 2001 From: Lefteris Karapetsas Date: Tue, 30 Jun 2015 16:25:15 +0200 Subject: [PATCH 6/8] OpenCL Fixes after rebasing --- libethcore/Ethash.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/libethcore/Ethash.cpp b/libethcore/Ethash.cpp index 6c1f7d856..fc0e07f12 100644 --- a/libethcore/Ethash.cpp +++ b/libethcore/Ethash.cpp @@ -429,6 +429,7 @@ bool Ethash::GPUMiner::configureGPU( } if (!ethash_cl_miner::configureGPU( + _platformId, _localWorkSize, _globalWorkSizeMultiplier * _localWorkSize, _msPerBatch, From bc9bdf09af335b6c8e50b522fa1ff57900e32c7d Mon Sep 17 00:00:00 2001 From: Lefteris Karapetsas Date: Mon, 6 Jul 2015 15:58:07 +0200 Subject: [PATCH 7/8] Move defaults so that cl_miner is not included in MinerAux.h --- ethminer/MinerAux.h | 1 - libethash-cl/ethash_cl_miner.h | 7 ------- libethcore/Ethash.h | 7 +++++++ 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/ethminer/MinerAux.h b/ethminer/MinerAux.h index 9c199cecb..eba68a4c3 100644 --- a/ethminer/MinerAux.h +++ b/ethminer/MinerAux.h @@ -39,7 +39,6 @@ #include #include #include -#include #include #if ETH_JSONRPC || !ETH_TRUE #include diff --git a/libethash-cl/ethash_cl_miner.h b/libethash-cl/ethash_cl_miner.h index c60ee1881..8f7594be5 100644 --- a/libethash-cl/ethash_cl_miner.h +++ b/libethash-cl/ethash_cl_miner.h @@ -17,13 +17,6 @@ #include #include -/// Default value of the local work size. Also known as workgroup size. -#define CL_DEFAULT_LOCAL_WORK_SIZE 64 -/// Default value of the global work size as a multiplier of the local work size -#define CL_DEFAULT_GLOBAL_WORK_SIZE_MULTIPLIER 512 // * CL_DEFAULT_LOCAL_WORK_SIZE -/// Default value of the milliseconds per global work size (per batch) -#define CL_DEFAULT_MS_PER_BATCH 100 - class ethash_cl_miner { private: diff --git a/libethcore/Ethash.h b/libethcore/Ethash.h index e9ddf16ca..8cc392306 100644 --- a/libethcore/Ethash.h +++ b/libethcore/Ethash.h @@ -31,6 +31,13 @@ #include "BlockInfo.h" #include "Miner.h" +/// Default value of the local work size. Also known as workgroup size. +#define CL_DEFAULT_LOCAL_WORK_SIZE 64 +/// Default value of the global work size as a multiplier of the local work size +#define CL_DEFAULT_GLOBAL_WORK_SIZE_MULTIPLIER 512 // * CL_DEFAULT_LOCAL_WORK_SIZE +/// Default value of the milliseconds per global work size (per batch) +#define CL_DEFAULT_MS_PER_BATCH 100 + class ethash_cl_miner; namespace dev From 09d091b8d08268500d2d42bc840ec088d0d47699 Mon Sep 17 00:00:00 2001 From: Lefteris Karapetsas Date: Mon, 6 Jul 2015 20:50:00 +0200 Subject: [PATCH 8/8] Default values of args are now class constants Default values of some mining related arguments are no longer C constant macros but are instead C++ Ethash class constants --- ethminer/MinerAux.h | 12 ++++++------ libethash-cl/ethash_cl_miner.cpp | 8 +++++--- libethcore/Ethash.cpp | 3 +++ libethcore/Ethash.h | 13 ++++++------- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/ethminer/MinerAux.h b/ethminer/MinerAux.h index eba68a4c3..5e2fdc8f1 100644 --- a/ethminer/MinerAux.h +++ b/ethminer/MinerAux.h @@ -345,9 +345,9 @@ public: << " --list-devices List the detected OpenCL devices and exit." << endl << " --current-block Let the miner know the current block number at configuration time. Will help determine DAG size and required GPU memory." << endl << " --cl-extragpu-mem Set the memory (in MB) you believe your GPU requires for stuff other than mining. Windows rendering e.t.c.." << endl - << " --cl-local-work Set the OpenCL local work size. Default is " << toString(CL_DEFAULT_LOCAL_WORK_SIZE) << endl - << " --cl-global-work Set the OpenCL global work size as a multiple of the local work size. Default is " << toString(CL_DEFAULT_GLOBAL_WORK_SIZE_MULTIPLIER) << " * " << toString(CL_DEFAULT_LOCAL_WORK_SIZE) << endl - << " --cl-ms-per-batch Set the OpenCL target milliseconds per batch (global workgroup size). Default is " << toString(CL_DEFAULT_MS_PER_BATCH) << ". If 0 is given then no autoadjustment of global work size will happen" << endl + << " --cl-local-work Set the OpenCL local work size. Default is " << toString(dev::eth::Ethash::defaultLocalWorkSize) << endl + << " --cl-global-work Set the OpenCL global work size as a multiple of the local work size. Default is " << toString(dev::eth::Ethash::defaultGlobalWorkSizeMultiplier) << " * " << toString(dev::eth::Ethash::defaultLocalWorkSize) << endl + << " --cl-ms-per-batch Set the OpenCL target milliseconds per batch (global workgroup size). Default is " << toString(dev::eth::Ethash::defaultMSPerBatch) << ". If 0 is given then no autoadjustment of global work size will happen" << endl ; } @@ -536,9 +536,9 @@ private: unsigned m_miningThreads = UINT_MAX; bool m_shouldListDevices = false; bool m_clAllowCPU = false; - unsigned m_globalWorkSizeMultiplier = CL_DEFAULT_GLOBAL_WORK_SIZE_MULTIPLIER; - unsigned m_localWorkSize = CL_DEFAULT_LOCAL_WORK_SIZE; - unsigned m_msPerBatch = CL_DEFAULT_MS_PER_BATCH; + unsigned m_globalWorkSizeMultiplier = dev::eth::Ethash::defaultGlobalWorkSizeMultiplier; + unsigned m_localWorkSize = dev::eth::Ethash::defaultLocalWorkSize; + unsigned m_msPerBatch = dev::eth::Ethash::defaultMSPerBatch; boost::optional m_currentBlock; // default value is 350MB of GPU memory for other stuff (windows system rendering, e.t.c.) unsigned m_extraGPUMemory = 350000000; diff --git a/libethash-cl/ethash_cl_miner.cpp b/libethash-cl/ethash_cl_miner.cpp index 8c22f7f1b..bf4644b98 100644 --- a/libethash-cl/ethash_cl_miner.cpp +++ b/libethash-cl/ethash_cl_miner.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include "ethash_cl_miner.h" #include "ethash_cl_miner_kernel.h" @@ -49,6 +50,7 @@ #undef max using namespace std; +using namespace dev::eth; // TODO: If at any point we can use libdevcore in here then we should switch to using a LogChannel #define ETHCL_LOG(_contents) cout << "[OPENCL]:" << _contents << endl @@ -181,9 +183,9 @@ bool ethash_cl_miner::configureGPU( bool ethash_cl_miner::s_allowCPU = false; unsigned ethash_cl_miner::s_extraRequiredGPUMem; -unsigned ethash_cl_miner::s_msPerBatch = CL_DEFAULT_MS_PER_BATCH; -unsigned ethash_cl_miner::s_workgroupSize = CL_DEFAULT_LOCAL_WORK_SIZE; -unsigned ethash_cl_miner::s_initialGlobalWorkSize = CL_DEFAULT_GLOBAL_WORK_SIZE_MULTIPLIER * CL_DEFAULT_LOCAL_WORK_SIZE; +unsigned ethash_cl_miner::s_msPerBatch = Ethash::defaultMSPerBatch; +unsigned ethash_cl_miner::s_workgroupSize = Ethash::defaultLocalWorkSize; +unsigned ethash_cl_miner::s_initialGlobalWorkSize = Ethash::defaultGlobalWorkSizeMultiplier * Ethash::defaultLocalWorkSize; bool ethash_cl_miner::searchForAllDevices(function _callback) { diff --git a/libethcore/Ethash.cpp b/libethcore/Ethash.cpp index fc0e07f12..3baac3292 100644 --- a/libethcore/Ethash.cpp +++ b/libethcore/Ethash.cpp @@ -54,6 +54,9 @@ namespace dev namespace eth { +const unsigned Ethash::defaultLocalWorkSize = 64; +const unsigned Ethash::defaultGlobalWorkSizeMultiplier = 512; // * CL_DEFAULT_LOCAL_WORK_SIZE +const unsigned Ethash::defaultMSPerBatch = 100; const Ethash::WorkPackage Ethash::NullWorkPackage = Ethash::WorkPackage(); std::string Ethash::name() diff --git a/libethcore/Ethash.h b/libethcore/Ethash.h index 8cc392306..804c92984 100644 --- a/libethcore/Ethash.h +++ b/libethcore/Ethash.h @@ -31,13 +31,6 @@ #include "BlockInfo.h" #include "Miner.h" -/// Default value of the local work size. Also known as workgroup size. -#define CL_DEFAULT_LOCAL_WORK_SIZE 64 -/// Default value of the global work size as a multiplier of the local work size -#define CL_DEFAULT_GLOBAL_WORK_SIZE_MULTIPLIER 512 // * CL_DEFAULT_LOCAL_WORK_SIZE -/// Default value of the milliseconds per global work size (per batch) -#define CL_DEFAULT_MS_PER_BATCH 100 - class ethash_cl_miner; namespace dev @@ -157,6 +150,12 @@ public: #else using GPUMiner = CPUMiner; #endif + /// Default value of the local work size. Also known as workgroup size. + static const unsigned defaultLocalWorkSize; + /// Default value of the global work size as a multiplier of the local work size + static const unsigned defaultGlobalWorkSizeMultiplier; + /// Default value of the milliseconds per global work size (per batch) + static const unsigned defaultMSPerBatch; }; }