cleanup of chunks and msperbatch host code

9 years ago · 322534d26e
5 changed files with 8 additions and 88 deletions
--- a/ethminer/MinerAux.h
+++ b/ethminer/MinerAux.h
@ -211,16 +211,6 @@ public:
 			m_extraGPUMemory = 1000000 * stol(argv[++i]);
 #endif
 #if ETH_ETHASHCL || !ETH_TRUE
 		else if (arg == "--cl-ms-per-batch" && i + 1 < argc)
 			try
 			{
 				m_msPerBatch = stol(argv[++i]);
 			}
 			catch (...)
 			{
 				cerr << "Bad " << arg << " option: " << argv[i] << endl;
 				BOOST_THROW_EXCEPTION(BadArgument());
 			}
 		else if (arg == "--allow-opencl-cpu")
 			m_clAllowCPU = true;
 #endif
@ -447,7 +437,6 @@ public:
 			if (!EthashGPUMiner::configureGPU(
 					m_localWorkSize,
 					m_globalWorkSizeMultiplier,
 					m_msPerBatch,
 					m_openclPlatform,
 					m_openclDevice,
 					m_clAllowCPU,
@ -540,7 +529,6 @@ public:
 			<< "    --cl-extragpu-mem Set the memory (in MB) you believe your GPU requires for stuff other than mining. Windows rendering e.t.c.." << endl
 			<< "    --cl-local-work Set the OpenCL local work size. Default is " << toString(ethash_cl_miner::c_defaultLocalWorkSize) << endl
 			<< "    --cl-global-work Set the OpenCL global work size as a multiple of the local work size. Default is " << toString(ethash_cl_miner::c_defaultGlobalWorkSizeMultiplier) << " * " << toString(ethash_cl_miner::c_defaultLocalWorkSize) << endl
 			<< "    --cl-ms-per-batch Set the OpenCL target milliseconds per batch (global workgroup size). Default is " << toString(ethash_cl_miner::c_defaultMSPerBatch) << ". If 0 is given then no autoadjustment of global work size will happen" << endl
 #endif
 #if ETH_ETHASHCUDA || !ETH_TRUE
 			<< "    --cuda-extragpu-mem Set the memory (in MB) you believe your GPU requires for stuff other than mining. Windows rendering e.t.c.." << endl
@ -914,7 +902,6 @@ private:
 	unsigned m_globalWorkSizeMultiplier = ethash_cl_miner::c_defaultGlobalWorkSizeMultiplier;
 	unsigned m_localWorkSize = ethash_cl_miner::c_defaultLocalWorkSize;
 #endif
 	unsigned m_msPerBatch = ethash_cl_miner::c_defaultMSPerBatch;
 #endif
 #if ETH_ETHASHCUDA || !ETH_TRUE
 	unsigned m_globalWorkSizeMultiplier = ethash_cuda_miner::c_defaultGridSize;
--- a/libethash-cl/ethash_cl_miner.cpp
+++ b/libethash-cl/ethash_cl_miner.cpp
@ -55,7 +55,6 @@ using namespace std;
 unsigned const ethash_cl_miner::c_defaultLocalWorkSize = 64;
 unsigned const ethash_cl_miner::c_defaultGlobalWorkSizeMultiplier = 4096; // * CL_DEFAULT_LOCAL_WORK_SIZE
 unsigned const ethash_cl_miner::c_defaultMSPerBatch = 0;
 // TODO: If at any point we can use libdevcore in here then we should switch to using a LogChannel
 #if defined(_WIN32)
@ -184,7 +183,6 @@ bool ethash_cl_miner::configureGPU(
 	unsigned _platformId,
 	unsigned _localWorkSize,
 	unsigned _globalWorkSize,
 	unsigned _msPerBatch,
 	bool _allowCPU,
 	unsigned _extraGPUMemory,
 	uint64_t _currentBlock
@ -192,7 +190,6 @@ bool ethash_cl_miner::configureGPU(
 {
 	s_workgroupSize = _localWorkSize;
 	s_initialGlobalWorkSize = _globalWorkSize;
 	s_msPerBatch = _msPerBatch;
 	s_allowCPU = _allowCPU;
 	s_extraRequiredGPUMem = _extraGPUMemory;
 	// by default let's only consider the DAG of the first epoch
@ -223,7 +220,6 @@ bool ethash_cl_miner::configureGPU(
 bool ethash_cl_miner::s_allowCPU = false;
 unsigned ethash_cl_miner::s_extraRequiredGPUMem;
 unsigned ethash_cl_miner::s_msPerBatch = ethash_cl_miner::c_defaultMSPerBatch;
 unsigned ethash_cl_miner::s_workgroupSize = ethash_cl_miner::c_defaultLocalWorkSize;
 unsigned ethash_cl_miner::s_initialGlobalWorkSize = ethash_cl_miner::c_defaultGlobalWorkSizeMultiplier * ethash_cl_miner::c_defaultLocalWorkSize;
@ -362,10 +358,6 @@ bool ethash_cl_miner::init(
 		m_globalWorkSize = s_initialGlobalWorkSize;
 		if (m_globalWorkSize % s_workgroupSize != 0)
 			m_globalWorkSize = ((m_globalWorkSize / s_workgroupSize) + 1) * s_workgroupSize;
 		// remember the device's address bits
 		m_deviceBits = device.getInfo<CL_DEVICE_ADDRESS_BITS>();
 		// make sure first step of global work size adjustment is large enough
 		m_stepWorkSizeAdjust = pow(2, m_deviceBits / 2 + 1);
 		// patch source code
 		// note: ETHASH_CL_MINER_KERNEL is simply ethash_cl_miner_kernel.cl compiled
@ -397,13 +389,12 @@ bool ethash_cl_miner::init(
 		// create buffer for dag
 		try
 		{
 			m_dagChunksCount = 1;
 			ETHCL_LOG("Creating one big buffer for the DAG");
-			m_dagChunks.push_back(cl::Buffer(m_context, CL_MEM_READ_ONLY, _dagSize));
+			m_dag = cl::Buffer(m_context, CL_MEM_READ_ONLY, _dagSize);
 			ETHCL_LOG("Loading single big chunk kernels");
 			m_searchKernel = cl::Kernel(program, "ethash_search");
 			ETHCL_LOG("Mapping one big chunk.");
-			m_queue.enqueueWriteBuffer(m_dagChunks[0], CL_TRUE, 0, _dagSize, _dag);
+			m_queue.enqueueWriteBuffer(m_dag, CL_TRUE, 0, _dagSize, _dag);
 		}
 		catch (cl::Error const& err)
 		{
@ -456,13 +447,12 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
 #endif
 			m_queue.finish();
-		unsigned argPos = 2;
+		
 		m_searchKernel.setArg(1, m_header);
-		for (unsigned i = 0; i < m_dagChunksCount; ++i, ++argPos)
+		m_searchKernel.setArg(2, m_dag );
 			m_searchKernel.setArg(argPos, m_dagChunks[i]);
 		// pass these to stop the compiler unrolling the loops
-		m_searchKernel.setArg(argPos + 1, target);
+		m_searchKernel.setArg(4, target);
-		m_searchKernel.setArg(argPos + 2, ~0u);
+		m_searchKernel.setArg(5, ~0u);
 		unsigned buf = 0;
 		random_device engine;
@ -472,10 +462,7 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
 			auto t = chrono::high_resolution_clock::now();
 			// supply output buffer to kernel
 			m_searchKernel.setArg(0, m_searchBuffer[buf]);
 			if (m_dagChunksCount == 1)
 			m_searchKernel.setArg(3, start_nonce);
 			else
 				m_searchKernel.setArg(6, start_nonce);
 			// execute it!
 			m_queue.enqueueNDRangeKernel(m_searchKernel, cl::NullRange, m_globalWorkSize, s_workgroupSize);
@ -508,44 +495,6 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
 				pending.pop();
 			}
 			// adjust global work size depending on last search time
 			if (s_msPerBatch)
 			{
 				// Global work size must be:
 				//  - less than or equal to 2 ^ DEVICE_BITS - 1
 				//  - divisible by lobal work size (workgroup size)
 				auto d = chrono::duration_cast<chrono::milliseconds>(chrono::high_resolution_clock::now() - t);
 				if (d != chrono::milliseconds(0)) // if duration is zero, we did not get in the actual searh/or search not finished
 				{
 					if (d > chrono::milliseconds(s_msPerBatch * 10 / 9))
 					{
 						// Divide the step by 2 when adjustment way change
 						if (m_wayWorkSizeAdjust > -1)
 							m_stepWorkSizeAdjust = max<unsigned>(1, m_stepWorkSizeAdjust / 2);
 						m_wayWorkSizeAdjust = -1;
 						// cerr << "m_stepWorkSizeAdjust: " << m_stepWorkSizeAdjust << ", m_wayWorkSizeAdjust: " << m_wayWorkSizeAdjust << endl;
 						// cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast<chrono::milliseconds>(d).count() << " ms, >> " << s_msPerBatch << " ms." << endl;
 						m_globalWorkSize = max<unsigned>(128, m_globalWorkSize - m_stepWorkSizeAdjust);
 						// cerr << "New global work size" << m_globalWorkSize << endl;
 					}
 					else if (d < chrono::milliseconds(s_msPerBatch * 9 / 10))
 					{
 						// Divide the step by 2 when adjustment way change
 						if (m_wayWorkSizeAdjust < 1)
 							m_stepWorkSizeAdjust = max<unsigned>(1, m_stepWorkSizeAdjust / 2);
 						m_wayWorkSizeAdjust = 1;
 						// cerr << "m_stepWorkSizeAdjust: " << m_stepWorkSizeAdjust << ", m_wayWorkSizeAdjust: " << m_wayWorkSizeAdjust << endl;
 						// cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast<chrono::milliseconds>(d).count() << " ms, << " << s_msPerBatch << " ms." << endl;
 						m_globalWorkSize = min<unsigned>(pow(2, m_deviceBits) - 1, m_globalWorkSize + m_stepWorkSizeAdjust);
 						// Global work size should never be less than the workgroup size
 						m_globalWorkSize = max<unsigned>(s_workgroupSize,  m_globalWorkSize);
 						// cerr << "New global work size" << m_globalWorkSize << endl;
 					}
 				}
 			}
 		}
 		// not safe to return until this is ready
--- a/libethash-cl/ethash_cl_miner.h
+++ b/libethash-cl/ethash_cl_miner.h
@ -46,7 +46,6 @@ public:
 		unsigned _platformId,
 		unsigned _localWorkSize,
 		unsigned _globalWorkSize,
 		unsigned _msPerBatch,
 		bool _allowCPU,
 		unsigned _extraGPUMemory,
 		uint64_t _currentBlock
@ -61,16 +60,11 @@ public:
 	void finish();
 	void search(uint8_t const* _header, uint64_t _target, search_hook& _hook);
 	void hash_chunk(uint8_t* _ret, uint8_t const* _header, uint64_t _nonce, unsigned _count);
 	void search_chunk(uint8_t const*_header, uint64_t _target, search_hook& _hook);
 	/* -- default values -- */
 	/// Default value of the local work size. Also known as workgroup size.
 	static unsigned const c_defaultLocalWorkSize;
 	/// Default value of the global work size as a multiplier of the local work size
 	static unsigned const c_defaultGlobalWorkSizeMultiplier;
 	/// Default value of the milliseconds per global work size (per batch)
 	static unsigned const c_defaultMSPerBatch;
 private:
@ -80,18 +74,11 @@ private:
 	cl::Context m_context;
 	cl::CommandQueue m_queue;
 	cl::Kernel m_searchKernel;
-	unsigned int m_dagChunksCount;
+	cl::Buffer m_dag;
 	std::vector<cl::Buffer> m_dagChunks;
 	cl::Buffer m_header;
 	cl::Buffer m_searchBuffer[c_bufferCount];
 	unsigned m_globalWorkSize;
 	bool m_openclOnePointOne;
 	unsigned m_deviceBits;
 	/// The step used in the work size adjustment
 	unsigned int m_stepWorkSizeAdjust;
 	/// The Work Size way of adjustment, > 0 when previously increased, < 0 when previously decreased
 	int m_wayWorkSizeAdjust = 0;
 	/// The local work size for the search
 	static unsigned s_workgroupSize;
--- a/libethcore/EthashGPUMiner.cpp
+++ b/libethcore/EthashGPUMiner.cpp
@ -202,7 +202,6 @@ void EthashGPUMiner::listDevices()
 bool EthashGPUMiner::configureGPU(
 	unsigned _localWorkSize,
 	unsigned _globalWorkSizeMultiplier,
 	unsigned _msPerBatch,
 	unsigned _platformId,
 	unsigned _deviceId,
 	bool _allowCPU,
@ -223,7 +222,6 @@ bool EthashGPUMiner::configureGPU(
 			_platformId,
 			_localWorkSize,
 			_globalWorkSizeMultiplier * _localWorkSize,
 			_msPerBatch,
 			_allowCPU,
 			_extraGPUMemory,
 			_currentBlock)
--- a/libethcore/EthashGPUMiner.h
+++ b/libethcore/EthashGPUMiner.h
@ -48,7 +48,6 @@ public:
 	static bool configureGPU(
 		unsigned _localWorkSize,
 		unsigned _globalWorkSizeMultiplier,
 		unsigned _msPerBatch,
 		unsigned _platformId,
 		unsigned _deviceId,
 		bool _allowCPU,