diff --git a/ethminer/MinerAux.h b/ethminer/MinerAux.h index 50ff93070..9c199cecb 100644 --- a/ethminer/MinerAux.h +++ b/ethminer/MinerAux.h @@ -129,9 +129,18 @@ public: cerr << "Bad " << arg << " option: " << argv[i] << endl; BOOST_THROW_EXCEPTION(BadArgument()); } - else if (arg == "--cl-global-work-size" && i + 1 < argc) + else if (arg == "--cl-global-work" && i + 1 < argc) try { - m_globalWorkSize = stol(argv[++i]); + m_globalWorkSizeMultiplier = stol(argv[++i]); + } + catch (...) + { + cerr << "Bad " << arg << " option: " << argv[i] << endl; + BOOST_THROW_EXCEPTION(BadArgument()); + } + else if (arg == "--cl-local-work" && i + 1 < argc) + try { + m_localWorkSize = stol(argv[++i]); } catch (...) { @@ -285,7 +294,8 @@ public: else if (m_minerType == MinerType::GPU) { if (!ProofOfWork::GPUMiner::configureGPU( - m_globalWorkSize, + m_localWorkSize, + m_globalWorkSizeMultiplier, m_msPerBatch, m_openclPlatform, m_openclDevice, @@ -293,10 +303,7 @@ public: m_extraGPUMemory, m_currentBlock )) - { - cout << "No GPU device with sufficient memory was found. Can't GPU mine. Remove the -G argument" << endl; exit(1); - } ProofOfWork::GPUMiner::setNumInstances(m_miningThreads); } if (mode == OperationMode::DAGInit) @@ -339,7 +346,8 @@ public: << " --list-devices List the detected OpenCL devices and exit." << endl << " --current-block Let the miner know the current block number at configuration time. Will help determine DAG size and required GPU memory." << endl << " --cl-extragpu-mem Set the memory (in MB) you believe your GPU requires for stuff other than mining. Windows rendering e.t.c.." << endl - << " --cl-global-work Set the OpenCL global work size. Default is " << toString(CL_DEFAULT_GLOBAL_WORK_SIZE) << endl + << " --cl-local-work Set the OpenCL local work size. Default is " << toString(CL_DEFAULT_LOCAL_WORK_SIZE) << endl + << " --cl-global-work Set the OpenCL global work size as a multiple of the local work size. Default is " << toString(CL_DEFAULT_GLOBAL_WORK_SIZE_MULTIPLIER) << " * " << toString(CL_DEFAULT_LOCAL_WORK_SIZE) << endl << " --cl-ms-per-batch Set the OpenCL target milliseconds per batch (global workgroup size). Default is " << toString(CL_DEFAULT_MS_PER_BATCH) << ". If 0 is given then no autoadjustment of global work size will happen" << endl ; } @@ -529,7 +537,8 @@ private: unsigned m_miningThreads = UINT_MAX; bool m_shouldListDevices = false; bool m_clAllowCPU = false; - unsigned m_globalWorkSize = CL_DEFAULT_GLOBAL_WORK_SIZE; + unsigned m_globalWorkSizeMultiplier = CL_DEFAULT_GLOBAL_WORK_SIZE_MULTIPLIER; + unsigned m_localWorkSize = CL_DEFAULT_LOCAL_WORK_SIZE; unsigned m_msPerBatch = CL_DEFAULT_MS_PER_BATCH; boost::optional m_currentBlock; // default value is 350MB of GPU memory for other stuff (windows system rendering, e.t.c.) diff --git a/libethash-cl/ethash_cl_miner.cpp b/libethash-cl/ethash_cl_miner.cpp index 02c9609fa..8c22f7f1b 100644 --- a/libethash-cl/ethash_cl_miner.cpp +++ b/libethash-cl/ethash_cl_miner.cpp @@ -140,6 +140,7 @@ unsigned ethash_cl_miner::getNumDevices(unsigned _platformId) bool ethash_cl_miner::configureGPU( unsigned _platformId, + unsigned _localWorkSize, unsigned _globalWorkSize, unsigned _msPerBatch, bool _allowCPU, @@ -147,6 +148,7 @@ bool ethash_cl_miner::configureGPU( boost::optional _currentBlock ) { + s_workgroupSize = _localWorkSize; s_initialGlobalWorkSize = _globalWorkSize; s_msPerBatch = _msPerBatch; s_allowCPU = _allowCPU; @@ -180,7 +182,8 @@ bool ethash_cl_miner::configureGPU( bool ethash_cl_miner::s_allowCPU = false; unsigned ethash_cl_miner::s_extraRequiredGPUMem; unsigned ethash_cl_miner::s_msPerBatch = CL_DEFAULT_MS_PER_BATCH; -unsigned ethash_cl_miner::s_initialGlobalWorkSize = CL_DEFAULT_GLOBAL_WORK_SIZE; +unsigned ethash_cl_miner::s_workgroupSize = CL_DEFAULT_LOCAL_WORK_SIZE; +unsigned ethash_cl_miner::s_initialGlobalWorkSize = CL_DEFAULT_GLOBAL_WORK_SIZE_MULTIPLIER * CL_DEFAULT_LOCAL_WORK_SIZE; bool ethash_cl_miner::searchForAllDevices(function _callback) { @@ -260,7 +263,6 @@ void ethash_cl_miner::finish() bool ethash_cl_miner::init( uint8_t const* _dag, uint64_t _dagSize, - unsigned _workgroupSize, unsigned _platformId, unsigned _deviceId ) @@ -305,12 +307,10 @@ bool ethash_cl_miner::init( m_context = cl::Context(vector(&device, &device + 1)); m_queue = cl::CommandQueue(m_context, device); - // use requested workgroup size, but we require multiple of 8 - m_workgroupSize = ((_workgroupSize + 7) / 8) * 8; // make sure that global work size is evenly divisible by the local workgroup size m_globalWorkSize = s_initialGlobalWorkSize; - if (m_globalWorkSize % m_workgroupSize != 0) - m_globalWorkSize = ((m_globalWorkSize / m_workgroupSize) + 1) * m_workgroupSize; + if (m_globalWorkSize % s_workgroupSize != 0) + m_globalWorkSize = ((m_globalWorkSize / s_workgroupSize) + 1) * s_workgroupSize; // remember the device's address bits m_deviceBits = device.getInfo(); @@ -318,7 +318,7 @@ bool ethash_cl_miner::init( // note: ETHASH_CL_MINER_KERNEL is simply ethash_cl_miner_kernel.cl compiled // into a byte array by bin2h.cmake. There is no need to load the file by hand in runtime string code(ETHASH_CL_MINER_KERNEL, ETHASH_CL_MINER_KERNEL + ETHASH_CL_MINER_KERNEL_SIZE); - addDefinition(code, "GROUP_SIZE", m_workgroupSize); + addDefinition(code, "GROUP_SIZE", s_workgroupSize); addDefinition(code, "DAG_SIZE", (unsigned)(_dagSize / ETHASH_MIX_BYTES)); addDefinition(code, "ACCESSES", ETHASH_ACCESSES); addDefinition(code, "MAX_OUTPUTS", c_maxSearchResults); @@ -476,7 +476,7 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook m_searchKernel.setArg(6, start_nonce); // execute it! - m_queue.enqueueNDRangeKernel(m_searchKernel, cl::NullRange, m_globalWorkSize, m_workgroupSize); + m_queue.enqueueNDRangeKernel(m_searchKernel, cl::NullRange, m_globalWorkSize, s_workgroupSize); pending.push({ start_nonce, buf }); buf = (buf + 1) % c_bufferCount; @@ -519,15 +519,15 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook if (d > chrono::milliseconds(s_msPerBatch * 10 / 9)) { // cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast(d).count() << " ms, >> " << _msPerBatch << " ms." << endl; - m_globalWorkSize = max(128, m_globalWorkSize + m_workgroupSize); + m_globalWorkSize = max(128, m_globalWorkSize + s_workgroupSize); // cerr << "New global work size" << m_globalWorkSize << endl; } else if (d < chrono::milliseconds(s_msPerBatch * 9 / 10)) { // cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast(d).count() << " ms, << " << _msPerBatch << " ms." << endl; - m_globalWorkSize = min(pow(2, m_deviceBits) - 1, m_globalWorkSize - m_workgroupSize); + m_globalWorkSize = min(pow(2, m_deviceBits) - 1, m_globalWorkSize - s_workgroupSize); // Global work size should never be less than the workgroup size - m_globalWorkSize = max(m_workgroupSize, m_globalWorkSize); + m_globalWorkSize = max(s_workgroupSize, m_globalWorkSize); // cerr << "New global work size" << m_globalWorkSize << endl; } } diff --git a/libethash-cl/ethash_cl_miner.h b/libethash-cl/ethash_cl_miner.h index e78108288..c60ee1881 100644 --- a/libethash-cl/ethash_cl_miner.h +++ b/libethash-cl/ethash_cl_miner.h @@ -17,7 +17,11 @@ #include #include -#define CL_DEFAULT_GLOBAL_WORK_SIZE 1024 * 16 +/// Default value of the local work size. Also known as workgroup size. +#define CL_DEFAULT_LOCAL_WORK_SIZE 64 +/// Default value of the global work size as a multiplier of the local work size +#define CL_DEFAULT_GLOBAL_WORK_SIZE_MULTIPLIER 512 // * CL_DEFAULT_LOCAL_WORK_SIZE +/// Default value of the milliseconds per global work size (per batch) #define CL_DEFAULT_MS_PER_BATCH 100 class ethash_cl_miner @@ -48,6 +52,7 @@ public: static void listDevices(); static bool configureGPU( unsigned _platformId, + unsigned _localWorkSize, unsigned _globalWorkSize, unsigned _msPerBatch, bool _allowCPU, @@ -58,7 +63,6 @@ public: bool init( uint8_t const* _dag, uint64_t _dagSize, - unsigned _workgroupSize = 64, unsigned _platformId = 0, unsigned _deviceId = 0 ); @@ -81,11 +85,12 @@ private: cl::Buffer m_header; cl::Buffer m_hashBuffer[c_bufferCount]; cl::Buffer m_searchBuffer[c_bufferCount]; - unsigned m_workgroupSize; unsigned m_globalWorkSize; bool m_openclOnePointOne; unsigned m_deviceBits; + /// The local work size for the search + static unsigned s_workgroupSize; /// The initial global work size for the searches static unsigned s_initialGlobalWorkSize; /// The target milliseconds per batch for the search. If 0, then no adjustment will happen diff --git a/libethcore/Ethash.cpp b/libethcore/Ethash.cpp index 296b4a605..6c1f7d856 100644 --- a/libethcore/Ethash.cpp +++ b/libethcore/Ethash.cpp @@ -373,7 +373,7 @@ void Ethash::GPUMiner::workLoop() this_thread::sleep_for(chrono::milliseconds(500)); } bytesConstRef dagData = dag->data(); - m_miner->init(dagData.data(), dagData.size(), 32, s_platformId, device); + m_miner->init(dagData.data(), dagData.size(), s_platformId, device); } uint64_t upper64OfBoundary = (uint64_t)(u64)((u256)w.boundary >> 192); @@ -409,7 +409,8 @@ void Ethash::GPUMiner::listDevices() } bool Ethash::GPUMiner::configureGPU( - unsigned _globalWorkSize, + unsigned _localWorkSize, + unsigned _globalWorkSizeMultiplier, unsigned _msPerBatch, unsigned _platformId, unsigned _deviceId, @@ -420,7 +421,26 @@ bool Ethash::GPUMiner::configureGPU( { s_platformId = _platformId; s_deviceId = _deviceId; - return ethash_cl_miner::configureGPU(_globalWorkSize, _msPerBatch, _allowCPU, _extraGPUMemory, _currentBlock); + + if (_localWorkSize != 32 && _localWorkSize != 64 && _localWorkSize != 128) + { + cout << "Given localWorkSize of " << toString(_localWorkSize) << "is invalid. Must be either 32,64, or 128" << endl; + return false; + } + + if (!ethash_cl_miner::configureGPU( + _localWorkSize, + _globalWorkSizeMultiplier * _localWorkSize, + _msPerBatch, + _allowCPU, + _extraGPUMemory, + _currentBlock) + ) + { + cout << "No GPU device with sufficient memory was found. Can't GPU mine. Remove the -G argument" << endl; + return false; + } + return true; } #endif diff --git a/libethcore/Ethash.h b/libethcore/Ethash.h index 4106229ed..e9ddf16ca 100644 --- a/libethcore/Ethash.h +++ b/libethcore/Ethash.h @@ -88,7 +88,7 @@ public: static unsigned instances() { return s_numInstances > 0 ? s_numInstances : std::thread::hardware_concurrency(); } static std::string platformInfo(); static void listDevices() {} - static bool configureGPU(unsigned, unsigned, unsigned, unsigned, bool, unsigned, boost::optional) { return false; } + static bool configureGPU(unsigned, unsigned, unsigned, unsigned, unsigned, bool, unsigned, boost::optional) { return false; } static void setNumInstances(unsigned _instances) { s_numInstances = std::min(_instances, std::thread::hardware_concurrency()); } protected: void kickOff() override @@ -118,7 +118,8 @@ public: static unsigned getNumDevices(); static void listDevices(); static bool configureGPU( - unsigned _globalWorkSize, + unsigned _localWorkSize, + unsigned _globalWorkSizeMultiplier, unsigned _msPerBatch, unsigned _platformId, unsigned _deviceId,