diff --git a/ethminer/MinerAux.h b/ethminer/MinerAux.h index b8305370c..d5145b97c 100644 --- a/ethminer/MinerAux.h +++ b/ethminer/MinerAux.h @@ -128,6 +128,33 @@ public: cerr << "Bad " << arg << " option: " << argv[i] << endl; BOOST_THROW_EXCEPTION(BadArgument()); } + else if (arg == "--cl-global-work" && i + 1 < argc) + try { + m_globalWorkSizeMultiplier = stol(argv[++i]); + } + catch (...) + { + cerr << "Bad " << arg << " option: " << argv[i] << endl; + BOOST_THROW_EXCEPTION(BadArgument()); + } + else if (arg == "--cl-local-work" && i + 1 < argc) + try { + m_localWorkSize = stol(argv[++i]); + } + catch (...) + { + cerr << "Bad " << arg << " option: " << argv[i] << endl; + BOOST_THROW_EXCEPTION(BadArgument()); + } + else if (arg == "--cl-ms-per-batch" && i + 1 < argc) + try { + m_msPerBatch = stol(argv[++i]); + } + catch (...) + { + cerr << "Bad " << arg << " option: " << argv[i] << endl; + BOOST_THROW_EXCEPTION(BadArgument()); + } else if (arg == "--list-devices") m_shouldListDevices = true; else if (arg == "--allow-opencl-cpu") @@ -266,16 +293,16 @@ public: else if (m_minerType == MinerType::GPU) { if (!ProofOfWork::GPUMiner::configureGPU( + m_localWorkSize, + m_globalWorkSizeMultiplier, + m_msPerBatch, m_openclPlatform, m_openclDevice, m_clAllowCPU, m_extraGPUMemory, m_currentBlock )) - { - cout << "No GPU device with sufficient memory was found. Can't GPU mine. Remove the -G argument" << endl; exit(1); - } ProofOfWork::GPUMiner::setNumInstances(m_miningThreads); } if (mode == OperationMode::DAGInit) @@ -318,6 +345,9 @@ public: << " --list-devices List the detected OpenCL devices and exit." << endl << " --current-block Let the miner know the current block number at configuration time. Will help determine DAG size and required GPU memory." << endl << " --cl-extragpu-mem Set the memory (in MB) you believe your GPU requires for stuff other than mining. Windows rendering e.t.c.." << endl + << " --cl-local-work Set the OpenCL local work size. Default is " << toString(dev::eth::Ethash::defaultLocalWorkSize) << endl + << " --cl-global-work Set the OpenCL global work size as a multiple of the local work size. Default is " << toString(dev::eth::Ethash::defaultGlobalWorkSizeMultiplier) << " * " << toString(dev::eth::Ethash::defaultLocalWorkSize) << endl + << " --cl-ms-per-batch Set the OpenCL target milliseconds per batch (global workgroup size). Default is " << toString(dev::eth::Ethash::defaultMSPerBatch) << ". If 0 is given then no autoadjustment of global work size will happen" << endl ; } @@ -506,6 +536,9 @@ private: unsigned m_miningThreads = UINT_MAX; bool m_shouldListDevices = false; bool m_clAllowCPU = false; + unsigned m_globalWorkSizeMultiplier = dev::eth::Ethash::defaultGlobalWorkSizeMultiplier; + unsigned m_localWorkSize = dev::eth::Ethash::defaultLocalWorkSize; + unsigned m_msPerBatch = dev::eth::Ethash::defaultMSPerBatch; boost::optional m_currentBlock; // default value is 350MB of GPU memory for other stuff (windows system rendering, e.t.c.) unsigned m_extraGPUMemory = 350000000; diff --git a/libethash-cl/ethash_cl_miner.cpp b/libethash-cl/ethash_cl_miner.cpp index 9e2d51286..8b8cb0b51 100644 --- a/libethash-cl/ethash_cl_miner.cpp +++ b/libethash-cl/ethash_cl_miner.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include "ethash_cl_miner.h" #include "ethash_cl_miner_kernel.h" @@ -49,6 +50,7 @@ #undef max using namespace std; +using namespace dev::eth; // TODO: If at any point we can use libdevcore in here then we should switch to using a LogChannel #define ETHCL_LOG(_contents) cout << "[OPENCL]:" << _contents << endl @@ -140,11 +142,17 @@ unsigned ethash_cl_miner::getNumDevices(unsigned _platformId) bool ethash_cl_miner::configureGPU( unsigned _platformId, + unsigned _localWorkSize, + unsigned _globalWorkSize, + unsigned _msPerBatch, bool _allowCPU, unsigned _extraGPUMemory, boost::optional _currentBlock ) { + s_workgroupSize = _localWorkSize; + s_initialGlobalWorkSize = _globalWorkSize; + s_msPerBatch = _msPerBatch; s_allowCPU = _allowCPU; s_extraRequiredGPUMem = _extraGPUMemory; // by default let's only consider the DAG of the first epoch @@ -175,6 +183,9 @@ bool ethash_cl_miner::configureGPU( bool ethash_cl_miner::s_allowCPU = false; unsigned ethash_cl_miner::s_extraRequiredGPUMem; +unsigned ethash_cl_miner::s_msPerBatch = Ethash::defaultMSPerBatch; +unsigned ethash_cl_miner::s_workgroupSize = Ethash::defaultLocalWorkSize; +unsigned ethash_cl_miner::s_initialGlobalWorkSize = Ethash::defaultGlobalWorkSizeMultiplier * Ethash::defaultLocalWorkSize; bool ethash_cl_miner::searchForAllDevices(function _callback) { @@ -254,7 +265,6 @@ void ethash_cl_miner::finish() bool ethash_cl_miner::init( uint8_t const* _dag, uint64_t _dagSize, - unsigned _workgroupSize, unsigned _platformId, unsigned _deviceId ) @@ -299,14 +309,18 @@ bool ethash_cl_miner::init( m_context = cl::Context(vector(&device, &device + 1)); m_queue = cl::CommandQueue(m_context, device); - // use requested workgroup size, but we require multiple of 8 - m_workgroupSize = ((_workgroupSize + 7) / 8) * 8; + // make sure that global work size is evenly divisible by the local workgroup size + m_globalWorkSize = s_initialGlobalWorkSize; + if (m_globalWorkSize % s_workgroupSize != 0) + m_globalWorkSize = ((m_globalWorkSize / s_workgroupSize) + 1) * s_workgroupSize; + // remember the device's address bits + m_deviceBits = device.getInfo(); // patch source code // note: ETHASH_CL_MINER_KERNEL is simply ethash_cl_miner_kernel.cl compiled // into a byte array by bin2h.cmake. There is no need to load the file by hand in runtime string code(ETHASH_CL_MINER_KERNEL, ETHASH_CL_MINER_KERNEL + ETHASH_CL_MINER_KERNEL_SIZE); - addDefinition(code, "GROUP_SIZE", m_workgroupSize); + addDefinition(code, "GROUP_SIZE", s_workgroupSize); addDefinition(code, "DAG_SIZE", (unsigned)(_dagSize / ETHASH_MIX_BYTES)); addDefinition(code, "ACCESSES", ETHASH_ACCESSES); addDefinition(code, "MAX_OUTPUTS", c_maxSearchResults); @@ -415,9 +429,8 @@ bool ethash_cl_miner::init( return true; } -void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook, unsigned _msPerBatch) +void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook) { - (void)_msPerBatch; try { struct pending_batch @@ -454,10 +467,9 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook unsigned buf = 0; random_device engine; uint64_t start_nonce = uniform_int_distribution()(engine); - for (;; start_nonce += m_batchSize) + for (;; start_nonce += m_globalWorkSize) { -// chrono::high_resolution_clock::time_point t = chrono::high_resolution_clock::now(); - + auto t = chrono::high_resolution_clock::now(); // supply output buffer to kernel m_searchKernel.setArg(0, m_searchBuffer[buf]); if (m_dagChunksCount == 1) @@ -466,7 +478,7 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook m_searchKernel.setArg(6, start_nonce); // execute it! - m_queue.enqueueNDRangeKernel(m_searchKernel, cl::NullRange, m_batchSize, m_workgroupSize); + m_queue.enqueueNDRangeKernel(m_searchKernel, cl::NullRange, m_globalWorkSize, s_workgroupSize); pending.push({ start_nonce, buf }); buf = (buf + 1) % c_bufferCount; @@ -486,7 +498,7 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook m_queue.enqueueUnmapMemObject(m_searchBuffer[batch.buf], results); bool exit = num_found && hook.found(nonces, num_found); - exit |= hook.searched(batch.start_nonce, m_batchSize); // always report searched before exit + exit |= hook.searched(batch.start_nonce, m_globalWorkSize); // always report searched before exit if (exit) break; @@ -497,19 +509,31 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook pending.pop(); } -/* chrono::high_resolution_clock::duration d = chrono::high_resolution_clock::now() - t; - if (d > chrono::milliseconds(_msPerBatch * 10 / 9)) + // adjust global work size depending on last search time + if (s_msPerBatch) { - cerr << "Batch of" << m_batchSize << "took" << chrono::duration_cast(d).count() << "ms, >>" << _msPerBatch << "ms."; - m_batchSize = max(128, m_batchSize * 9 / 10); - cerr << "New batch size" << m_batchSize; + // Global work size must be: + // - less than or equal to 2 ^ DEVICE_BITS - 1 + // - divisible by lobal work size (workgroup size) + auto d = chrono::duration_cast(chrono::high_resolution_clock::now() - t); + if (d != chrono::milliseconds(0)) // if duration is zero, we did not get in the actual searh/or search not finished + { + if (d > chrono::milliseconds(s_msPerBatch * 10 / 9)) + { + // cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast(d).count() << " ms, >> " << _msPerBatch << " ms." << endl; + m_globalWorkSize = max(128, m_globalWorkSize + s_workgroupSize); + // cerr << "New global work size" << m_globalWorkSize << endl; + } + else if (d < chrono::milliseconds(s_msPerBatch * 9 / 10)) + { + // cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast(d).count() << " ms, << " << _msPerBatch << " ms." << endl; + m_globalWorkSize = min(pow(2, m_deviceBits) - 1, m_globalWorkSize - s_workgroupSize); + // Global work size should never be less than the workgroup size + m_globalWorkSize = max(s_workgroupSize, m_globalWorkSize); + // cerr << "New global work size" << m_globalWorkSize << endl; + } + } } - else if (d < chrono::milliseconds(_msPerBatch * 9 / 10)) - { - cerr << "Batch of" << m_batchSize << "took" << chrono::duration_cast(d).count() << "ms, <<" << _msPerBatch << "ms."; - m_batchSize = m_batchSize * 10 / 9; - cerr << "New batch size" << m_batchSize; - }*/ } // not safe to return until this is ready diff --git a/libethash-cl/ethash_cl_miner.h b/libethash-cl/ethash_cl_miner.h index 73bf7e94a..8f7594be5 100644 --- a/libethash-cl/ethash_cl_miner.h +++ b/libethash-cl/ethash_cl_miner.h @@ -45,6 +45,9 @@ public: static void listDevices(); static bool configureGPU( unsigned _platformId, + unsigned _localWorkSize, + unsigned _globalWorkSize, + unsigned _msPerBatch, bool _allowCPU, unsigned _extraGPUMemory, boost::optional _currentBlock @@ -53,12 +56,11 @@ public: bool init( uint8_t const* _dag, uint64_t _dagSize, - unsigned _workgroupSize = 64, unsigned _platformId = 0, unsigned _deviceId = 0 ); void finish(); - void search(uint8_t const* _header, uint64_t _target, search_hook& _hook, unsigned _msPerBatch = 100); + void search(uint8_t const* _header, uint64_t _target, search_hook& _hook); void hash_chunk(uint8_t* _ret, uint8_t const* _header, uint64_t _nonce, unsigned _count); void search_chunk(uint8_t const*_header, uint64_t _target, search_hook& _hook); @@ -76,10 +78,16 @@ private: cl::Buffer m_header; cl::Buffer m_hashBuffer[c_bufferCount]; cl::Buffer m_searchBuffer[c_bufferCount]; - unsigned m_workgroupSize; - unsigned m_batchSize = c_searchBatchSize; + unsigned m_globalWorkSize; bool m_openclOnePointOne; + unsigned m_deviceBits; + /// The local work size for the search + static unsigned s_workgroupSize; + /// The initial global work size for the searches + static unsigned s_initialGlobalWorkSize; + /// The target milliseconds per batch for the search. If 0, then no adjustment will happen + static unsigned s_msPerBatch; /// Allow CPU to appear as an OpenCL device or not. Default is false static bool s_allowCPU; /// GPU memory required for other things, like window rendering e.t.c. diff --git a/libethcore/Ethash.cpp b/libethcore/Ethash.cpp index 46d19d164..3baac3292 100644 --- a/libethcore/Ethash.cpp +++ b/libethcore/Ethash.cpp @@ -54,6 +54,9 @@ namespace dev namespace eth { +const unsigned Ethash::defaultLocalWorkSize = 64; +const unsigned Ethash::defaultGlobalWorkSizeMultiplier = 512; // * CL_DEFAULT_LOCAL_WORK_SIZE +const unsigned Ethash::defaultMSPerBatch = 100; const Ethash::WorkPackage Ethash::NullWorkPackage = Ethash::WorkPackage(); std::string Ethash::name() @@ -373,7 +376,7 @@ void Ethash::GPUMiner::workLoop() this_thread::sleep_for(chrono::milliseconds(500)); } bytesConstRef dagData = dag->data(); - m_miner->init(dagData.data(), dagData.size(), 32, s_platformId, device); + m_miner->init(dagData.data(), dagData.size(), s_platformId, device); } uint64_t upper64OfBoundary = (uint64_t)(u64)((u256)w.boundary >> 192); @@ -409,6 +412,9 @@ void Ethash::GPUMiner::listDevices() } bool Ethash::GPUMiner::configureGPU( + unsigned _localWorkSize, + unsigned _globalWorkSizeMultiplier, + unsigned _msPerBatch, unsigned _platformId, unsigned _deviceId, bool _allowCPU, @@ -418,7 +424,27 @@ bool Ethash::GPUMiner::configureGPU( { s_platformId = _platformId; s_deviceId = _deviceId; - return ethash_cl_miner::configureGPU(_platformId, _allowCPU, _extraGPUMemory, _currentBlock); + + if (_localWorkSize != 32 && _localWorkSize != 64 && _localWorkSize != 128) + { + cout << "Given localWorkSize of " << toString(_localWorkSize) << "is invalid. Must be either 32,64, or 128" << endl; + return false; + } + + if (!ethash_cl_miner::configureGPU( + _platformId, + _localWorkSize, + _globalWorkSizeMultiplier * _localWorkSize, + _msPerBatch, + _allowCPU, + _extraGPUMemory, + _currentBlock) + ) + { + cout << "No GPU device with sufficient memory was found. Can't GPU mine. Remove the -G argument" << endl; + return false; + } + return true; } #endif diff --git a/libethcore/Ethash.h b/libethcore/Ethash.h index 11e012df5..804c92984 100644 --- a/libethcore/Ethash.h +++ b/libethcore/Ethash.h @@ -88,7 +88,7 @@ public: static unsigned instances() { return s_numInstances > 0 ? s_numInstances : std::thread::hardware_concurrency(); } static std::string platformInfo(); static void listDevices() {} - static bool configureGPU(unsigned, unsigned, bool, unsigned, boost::optional) { return false; } + static bool configureGPU(unsigned, unsigned, unsigned, unsigned, unsigned, bool, unsigned, boost::optional) { return false; } static void setNumInstances(unsigned _instances) { s_numInstances = std::min(_instances, std::thread::hardware_concurrency()); } protected: void kickOff() override @@ -118,6 +118,9 @@ public: static unsigned getNumDevices(); static void listDevices(); static bool configureGPU( + unsigned _localWorkSize, + unsigned _globalWorkSizeMultiplier, + unsigned _msPerBatch, unsigned _platformId, unsigned _deviceId, bool _allowCPU, @@ -147,6 +150,12 @@ public: #else using GPUMiner = CPUMiner; #endif + /// Default value of the local work size. Also known as workgroup size. + static const unsigned defaultLocalWorkSize; + /// Default value of the global work size as a multiplier of the local work size + static const unsigned defaultGlobalWorkSizeMultiplier; + /// Default value of the milliseconds per global work size (per batch) + static const unsigned defaultMSPerBatch; }; }