New OpenCL arguments

- Adding an argument to specify OpenCL global work size. - Adding an argument to specify milliseconds per global work size (msPerBatch). If this is 0 then no adjustment of the global work size happens.
10 years ago · 15fc63d6a2
5 changed files with 70 additions and 23 deletions
--- a/ethminer/MinerAux.h
+++ b/ethminer/MinerAux.h
@ -39,6 +39,7 @@
 #include <libdevcore/SHA3.h>
 #include <libethcore/ProofOfWork.h>
 #include <libethcore/EthashAux.h>
 #include <libethash-cl/ethash_cl_miner.h>
 #include <libethcore/Farm.h>
 #if ETH_JSONRPC || !ETH_TRUE
 #include <libweb3jsonrpc/WebThreeStubServer.h>
@ -128,6 +129,24 @@ public:
 				cerr << "Bad " << arg << " option: " << argv[i] << endl;
 				BOOST_THROW_EXCEPTION(BadArgument());
 			}
 		else if (arg == "--cl-global-work-size" && i + 1 < argc)
 			try {
 				m_globalWorkSize = stol(argv[++i]);
 			}
 			catch (...)
 			{
 				cerr << "Bad " << arg << " option: " << argv[i] << endl;
 				BOOST_THROW_EXCEPTION(BadArgument());
 			}
 		else if (arg == "--cl-ms-per-batch" && i + 1 < argc)
 			try {
 				m_msPerBatch = stol(argv[++i]);
 			}
 			catch (...)
 			{
 				cerr << "Bad " << arg << " option: " << argv[i] << endl;
 				BOOST_THROW_EXCEPTION(BadArgument());
 			}
 		else if (arg == "--list-devices")
 			m_shouldListDevices = true;
 		else if (arg == "--allow-opencl-cpu")
@ -266,6 +285,8 @@ public:
 		else if (m_minerType == MinerType::GPU)
 		{
 			if (!ProofOfWork::GPUMiner::configureGPU(
 					m_globalWorkSize,
 					m_msPerBatch,
 					m_openclPlatform,
 					m_openclDevice,
 					m_clAllowCPU,
@ -318,6 +339,8 @@ public:
 			<< "    --list-devices List the detected OpenCL devices and exit." << endl
 			<< "    --current-block Let the miner know the current block number at configuration time. Will help determine DAG size and required GPU memory." << endl
 			<< "    --cl-extragpu-mem Set the memory (in MB) you believe your GPU requires for stuff other than mining. Windows rendering e.t.c.." << endl
 			<< "    --cl-global-work Set the OpenCL global work size. Default is " << toString(CL_DEFAULT_GLOBAL_WORK_SIZE) << endl
 			<< "    --cl-ms-per-batch Set the OpenCL target milliseconds per batch (global workgroup size). Default is " << toString(CL_DEFAULT_MS_PER_BATCH) << ". If 0 is given then no autoadjustment of global work size will happen" << endl
 			;
 	}
@ -506,6 +529,8 @@ private:
 	unsigned m_miningThreads = UINT_MAX;
 	bool m_shouldListDevices = false;
 	bool m_clAllowCPU = false;
 	unsigned m_globalWorkSize = CL_DEFAULT_GLOBAL_WORK_SIZE;
 	unsigned m_msPerBatch = CL_DEFAULT_MS_PER_BATCH;
 	boost::optional<uint64_t> m_currentBlock;
 	// default value is 350MB of GPU memory for other stuff (windows system rendering, e.t.c.)
 	unsigned m_extraGPUMemory = 350000000;
--- a/libethash-cl/ethash_cl_miner.cpp
+++ b/libethash-cl/ethash_cl_miner.cpp
@ -140,11 +140,15 @@ unsigned ethash_cl_miner::getNumDevices(unsigned _platformId)
 bool ethash_cl_miner::configureGPU(
 	unsigned _platformId,
 	unsigned _globalWorkSize,
 	unsigned _msPerBatch,
 	bool _allowCPU,
 	unsigned _extraGPUMemory,
 	boost::optional<uint64_t> _currentBlock
 )
 {
 	s_initialGlobalWorkSize = _globalWorkSize;
 	s_msPerBatch = _msPerBatch;
 	s_allowCPU = _allowCPU;
 	s_extraRequiredGPUMem = _extraGPUMemory;
 	// by default let's only consider the DAG of the first epoch
@ -175,6 +179,8 @@ bool ethash_cl_miner::configureGPU(
 bool ethash_cl_miner::s_allowCPU = false;
 unsigned ethash_cl_miner::s_extraRequiredGPUMem;
 unsigned ethash_cl_miner::s_msPerBatch = CL_DEFAULT_MS_PER_BATCH;
 unsigned ethash_cl_miner::s_initialGlobalWorkSize = CL_DEFAULT_GLOBAL_WORK_SIZE;
 bool ethash_cl_miner::searchForAllDevices(function<bool(cl::Device const&)> _callback)
 {
@ -302,6 +308,7 @@ bool ethash_cl_miner::init(
 		// use requested workgroup size, but we require multiple of 8
 		m_workgroupSize = ((_workgroupSize + 7) / 8) * 8;
 		// make sure that global work size is evenly divisible by the local workgroup size
 		m_globalWorkSize = s_initialGlobalWorkSize;
 		if (m_globalWorkSize % m_workgroupSize != 0)
 			m_globalWorkSize = ((m_globalWorkSize / m_workgroupSize) + 1) * m_workgroupSize;
 		// remember the device's address bits
@ -420,9 +427,8 @@ bool ethash_cl_miner::init(
 	return true;
 }
-void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook, unsigned _msPerBatch)
+void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook)
 {
 	(void)_msPerBatch;
 	try
 	{
 		struct pending_batch
@ -502,25 +508,28 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
 			}
 			// adjust global work size depending on last search time
-			// Global work size must be:
+			if (s_msPerBatch)
 			//  - less than or equal to 2 ^ DEVICE_BITS - 1
 			//  - divisible by lobal work size (workgroup size)
 			auto d = chrono::duration_cast<chrono::milliseconds>(chrono::high_resolution_clock::now() - t);
 			if (d != chrono::milliseconds(0)) // if duration is zero, we did not get in the actual searh/or search not finished
 			{
-				if (d > chrono::milliseconds(_msPerBatch * 10 / 9))
+				// Global work size must be:
 				//  - less than or equal to 2 ^ DEVICE_BITS - 1
 				//  - divisible by lobal work size (workgroup size)
 				auto d = chrono::duration_cast<chrono::milliseconds>(chrono::high_resolution_clock::now() - t);
 				if (d != chrono::milliseconds(0)) // if duration is zero, we did not get in the actual searh/or search not finished
 				{
-					// cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast<chrono::milliseconds>(d).count() << " ms, >> " << _msPerBatch << " ms." << endl;
+					if (d > chrono::milliseconds(s_msPerBatch * 10 / 9))
-					m_globalWorkSize = max<unsigned>(128, m_globalWorkSize + m_workgroupSize);
+					{
-					// cerr << "New global work size" << m_globalWorkSize << endl;
+						// cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast<chrono::milliseconds>(d).count() << " ms, >> " << _msPerBatch << " ms." << endl;
-				}
+						m_globalWorkSize = max<unsigned>(128, m_globalWorkSize + m_workgroupSize);
-				else if (d < chrono::milliseconds(_msPerBatch * 9 / 10))
+						// cerr << "New global work size" << m_globalWorkSize << endl;
-				{
+					}
-					// cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast<chrono::milliseconds>(d).count() << " ms, << " << _msPerBatch << " ms." << endl;
+					else if (d < chrono::milliseconds(s_msPerBatch * 9 / 10))
-					m_globalWorkSize = min<unsigned>(pow(2, m_deviceBits) - 1, m_globalWorkSize - m_workgroupSize);
+					{
-					// Global work size should never be less than the workgroup size
+						// cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast<chrono::milliseconds>(d).count() << " ms, << " << _msPerBatch << " ms." << endl;
-					m_globalWorkSize = max<unsigned>(m_workgroupSize,  m_globalWorkSize);
+						m_globalWorkSize = min<unsigned>(pow(2, m_deviceBits) - 1, m_globalWorkSize - m_workgroupSize);
-					// cerr << "New global work size" << m_globalWorkSize << endl;
+						// Global work size should never be less than the workgroup size
 						m_globalWorkSize = max<unsigned>(m_workgroupSize,  m_globalWorkSize);
 						// cerr << "New global work size" << m_globalWorkSize << endl;
 					}
 				}
 			}
 		}
--- a/libethash-cl/ethash_cl_miner.h
+++ b/libethash-cl/ethash_cl_miner.h
@ -17,6 +17,9 @@
 #include <functional>
 #include <libethash/ethash.h>
 #define CL_DEFAULT_GLOBAL_WORK_SIZE 1024 * 16
 #define CL_DEFAULT_MS_PER_BATCH 100
 class ethash_cl_miner
 {
 private:
@ -45,6 +48,8 @@ public:
 	static void listDevices();
 	static bool configureGPU(
 		unsigned _platformId,
 		unsigned _globalWorkSize,
 		unsigned _msPerBatch,
 		bool _allowCPU,
 		unsigned _extraGPUMemory,
 		boost::optional<uint64_t> _currentBlock
@ -58,7 +63,7 @@ public:
 		unsigned _deviceId = 0
 	);
 	void finish();
-	void search(uint8_t const* _header, uint64_t _target, search_hook& _hook, unsigned _msPerBatch = 100);
+	void search(uint8_t const* _header, uint64_t _target, search_hook& _hook);
 	void hash_chunk(uint8_t* _ret, uint8_t const* _header, uint64_t _nonce, unsigned _count);
 	void search_chunk(uint8_t const*_header, uint64_t _target, search_hook& _hook);
@ -77,10 +82,14 @@ private:
 	cl::Buffer m_hashBuffer[c_bufferCount];
 	cl::Buffer m_searchBuffer[c_bufferCount];
 	unsigned m_workgroupSize;
-	unsigned m_globalWorkSize = c_searchBatchSize;
+	unsigned m_globalWorkSize;
 	bool m_openclOnePointOne;
 	unsigned m_deviceBits;
 	/// The initial global work size for the searches
 	static unsigned s_initialGlobalWorkSize;
 	/// The target milliseconds per batch for the search. If 0, then no adjustment will happen
 	static unsigned s_msPerBatch;
 	/// Allow CPU to appear as an OpenCL device or not. Default is false
 	static bool s_allowCPU;
 	/// GPU memory required for other things, like window rendering e.t.c.
--- a/libethcore/Ethash.cpp
+++ b/libethcore/Ethash.cpp
@ -409,6 +409,8 @@ void Ethash::GPUMiner::listDevices()
 }
 bool Ethash::GPUMiner::configureGPU(
 	unsigned _globalWorkSize,
 	unsigned _msPerBatch,
 	unsigned _platformId,
 	unsigned _deviceId,
 	bool _allowCPU,
@ -418,7 +420,7 @@ bool Ethash::GPUMiner::configureGPU(
 {
 	s_platformId = _platformId;
 	s_deviceId = _deviceId;
-	return ethash_cl_miner::configureGPU(_platformId, _allowCPU, _extraGPUMemory, _currentBlock);
+	return ethash_cl_miner::configureGPU(_globalWorkSize, _msPerBatch, _allowCPU, _extraGPUMemory, _currentBlock);
 }
 #endif
--- a/libethcore/Ethash.h
+++ b/libethcore/Ethash.h
@ -88,7 +88,7 @@ public:
 		static unsigned instances() { return s_numInstances > 0 ? s_numInstances : std::thread::hardware_concurrency(); }
 		static std::string platformInfo();
 		static void listDevices() {}
-		static bool configureGPU(unsigned, unsigned, bool, unsigned,  boost::optional<uint64_t>) { return false; }
+		static bool configureGPU(unsigned, unsigned, unsigned, unsigned, bool, unsigned,  boost::optional<uint64_t>) { return false; }
 		static void setNumInstances(unsigned _instances) { s_numInstances = std::min<unsigned>(_instances, std::thread::hardware_concurrency()); }
 	protected:
 		void kickOff() override
@ -118,6 +118,8 @@ public:
 		static unsigned getNumDevices();
 		static void listDevices();
 		static bool configureGPU(
 			unsigned _globalWorkSize,
 			unsigned _msPerBatch,
 			unsigned _platformId,
 			unsigned _deviceId,
 			bool _allowCPU,