From a8eb96755cdffdc7e2c6f59003bc8cb9cc971166 Mon Sep 17 00:00:00 2001
From: Gav Wood <i@gavwood.com>
Date: Tue, 23 Jun 2015 12:21:21 +0200
Subject: [PATCH] Miner targets a restart time of 100ms by default, reducing
 inter-block "pauses".

---
 libethash-cl/ethash_cl_miner.cpp | 93 +++++++++++++++++---------------
 libethash-cl/ethash_cl_miner.h   | 29 +++++-----
 libethcore/Ethash.cpp            | 48 +++++++++++------
 3 files changed, 98 insertions(+), 72 deletions(-)
diff --git a/libethash-cl/ethash_cl_miner.cpp b/libethash-cl/ethash_cl_miner.cpp
index 315f29685..fbb74f5b2 100644
--- a/libethash-cl/ethash_cl_miner.cpp
+++ b/libethash-cl/ethash_cl_miner.cpp
@@ -30,6 +30,7 @@
 #include <queue>
 #include <random>
 #include <vector>
+#include <boost/timer.hpp>
 #include <libethash/util.h>
 #include <libethash/ethash.h>
 #include <libethash/internal.h>
@@ -64,7 +65,7 @@ static void addDefinition(string& _source, char const* _id, unsigned _value)
 ethash_cl_miner::search_hook::~search_hook() {}
 
 ethash_cl_miner::ethash_cl_miner()
-:	m_opencl_1_1()
+:	m_openclOnePointOne()
 {
 }
 
@@ -252,7 +253,7 @@ void ethash_cl_miner::finish()
 bool ethash_cl_miner::init(
 	uint8_t const* _dag,
 	uint64_t _dagSize,
-	unsigned workgroup_size,
+	unsigned _workgroupSize,
 	unsigned _platformId,
 	unsigned _deviceId
 )
@@ -291,23 +292,23 @@ bool ethash_cl_miner::init(
 			return false;
 		}
 		if (strncmp("OpenCL 1.1", device_version.c_str(), 10) == 0)
-			m_opencl_1_1 = true;
+			m_openclOnePointOne = true;
 
 		// create context
 		m_context = cl::Context(vector<cl::Device>(&device, &device + 1));
 		m_queue = cl::CommandQueue(m_context, device);
 
 		// use requested workgroup size, but we require multiple of 8
-		m_workgroup_size = ((workgroup_size + 7) / 8) * 8;
+		m_workgroupSize = ((_workgroupSize + 7) / 8) * 8;
 
 		// patch source code
 		// note: ETHASH_CL_MINER_KERNEL is simply ethash_cl_miner_kernel.cl compiled
 		// into a byte array by bin2h.cmake. There is no need to load the file by hand in runtime
 		string code(ETHASH_CL_MINER_KERNEL, ETHASH_CL_MINER_KERNEL + ETHASH_CL_MINER_KERNEL_SIZE);
-		addDefinition(code, "GROUP_SIZE", m_workgroup_size);
+		addDefinition(code, "GROUP_SIZE", m_workgroupSize);
 		addDefinition(code, "DAG_SIZE", (unsigned)(_dagSize / ETHASH_MIX_BYTES));
 		addDefinition(code, "ACCESSES", ETHASH_ACCESSES);
-		addDefinition(code, "MAX_OUTPUTS", c_max_search_results);
+		addDefinition(code, "MAX_OUTPUTS", c_maxSearchResults);
 		//debugf("%s", code.c_str());
 
 		// create miner OpenCL program
@@ -330,7 +331,7 @@ bool ethash_cl_miner::init(
 		// create buffer for dag
 		try
 		{
-			m_dagChunksNum = 1;
+			m_dagChunksCount = 1;
 			m_dagChunks.push_back(cl::Buffer(m_context, CL_MEM_READ_ONLY, _dagSize));
 			ETHCL_LOG("Created one big buffer for the DAG");
 		}
@@ -346,8 +347,8 @@ bool ethash_cl_miner::init(
 				<< result << ". Trying to allocate 4 chunks."
 			);
 			// The OpenCL kernel has a hard coded number of 4 chunks at the moment
-			m_dagChunksNum = 4;
-			for (unsigned i = 0; i < m_dagChunksNum; i++)
+			m_dagChunksCount = 4;
+			for (unsigned i = 0; i < m_dagChunksCount; i++)
 			{
 				// TODO Note: If we ever change to _dagChunksNum other than 4, then the size would need recalculation
 				ETHCL_LOG("Creating buffer for chunk " << i);
@@ -359,24 +360,24 @@ bool ethash_cl_miner::init(
 			}
 		}
 
-		if (m_dagChunksNum == 1)
+		if (m_dagChunksCount == 1)
 		{
 			ETHCL_LOG("Loading single big chunk kernels");
-			m_hash_kernel = cl::Kernel(program, "ethash_hash");
-			m_search_kernel = cl::Kernel(program, "ethash_search");
+			m_hashKernel = cl::Kernel(program, "ethash_hash");
+			m_searchKernel = cl::Kernel(program, "ethash_search");
 		}
 		else
 		{
 			ETHCL_LOG("Loading chunk kernels");
-			m_hash_kernel = cl::Kernel(program, "ethash_hash_chunks");
-			m_search_kernel = cl::Kernel(program, "ethash_search_chunks");
+			m_hashKernel = cl::Kernel(program, "ethash_hash_chunks");
+			m_searchKernel = cl::Kernel(program, "ethash_search_chunks");
 		}
 
 		// create buffer for header
 		ETHCL_LOG("Creating buffer for header.");
 		m_header = cl::Buffer(m_context, CL_MEM_READ_ONLY, 32);
 
-		if (m_dagChunksNum == 1)
+		if (m_dagChunksCount == 1)
 		{
 			ETHCL_LOG("Mapping one big chunk.");
 			m_queue.enqueueWriteBuffer(m_dagChunks[0], CL_TRUE, 0, _dagSize, _dag);
@@ -385,12 +386,12 @@ bool ethash_cl_miner::init(
 		{
 			// TODO Note: If we ever change to _dagChunksNum other than 4, then the size would need recalculation
 			void* dag_ptr[4];
-			for (unsigned i = 0; i < m_dagChunksNum; i++)
+			for (unsigned i = 0; i < m_dagChunksCount; i++)
 			{
 				ETHCL_LOG("Mapping chunk " << i);
-				dag_ptr[i] = m_queue.enqueueMapBuffer(m_dagChunks[i], true, m_opencl_1_1 ? CL_MAP_WRITE : CL_MAP_WRITE_INVALIDATE_REGION, 0, (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7);
+				dag_ptr[i] = m_queue.enqueueMapBuffer(m_dagChunks[i], true, m_openclOnePointOne ? CL_MAP_WRITE : CL_MAP_WRITE_INVALIDATE_REGION, 0, (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7);
 			}
-			for (unsigned i = 0; i < m_dagChunksNum; i++)
+			for (unsigned i = 0; i < m_dagChunksCount; i++)
 			{
 				memcpy(dag_ptr[i], (char *)_dag + i*((_dagSize >> 9) << 7), (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7);
 				m_queue.enqueueUnmapMemObject(m_dagChunks[i], dag_ptr[i]);
@@ -398,11 +399,11 @@ bool ethash_cl_miner::init(
 		}
 
 		// create mining buffers
-		for (unsigned i = 0; i != c_num_buffers; ++i)
+		for (unsigned i = 0; i != c_bufferCount; ++i)
 		{
 			ETHCL_LOG("Creating mining buffer " << i);
-			m_hash_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY | (!m_opencl_1_1 ? CL_MEM_HOST_READ_ONLY : 0), 32 * c_hash_batch_size);
-			m_search_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, (c_max_search_results + 1) * sizeof(uint32_t));
+			m_hashBuffer[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY | (!m_openclOnePointOne ? CL_MEM_HOST_READ_ONLY : 0), 32 * c_hashBatchSize);
+			m_searchBuffer[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, (c_maxSearchResults + 1) * sizeof(uint32_t));
 		}
 	}
 	catch (cl::Error const& err)
@@ -413,7 +414,7 @@ bool ethash_cl_miner::init(
 	return true;
 }
 
-void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook)
+void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook, unsigned _msPerBatch)
 {
 	try
 	{
@@ -429,8 +430,8 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
 
 		// update header constant buffer
 		m_queue.enqueueWriteBuffer(m_header, false, 0, 32, header);
-		for (unsigned i = 0; i != c_num_buffers; ++i)
-			m_queue.enqueueWriteBuffer(m_search_buf[i], false, 0, 4, &c_zero);
+		for (unsigned i = 0; i != c_bufferCount; ++i)
+			m_queue.enqueueWriteBuffer(m_searchBuffer[i], false, 0, 4, &c_zero);
 
 #if CL_VERSION_1_2 && 0
 		cl::Event pre_return_event;
@@ -441,53 +442,59 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
 			m_queue.finish();
 
 		unsigned argPos = 2;
-		m_search_kernel.setArg(1, m_header);
-		for (unsigned i = 0; i < m_dagChunksNum; ++i, ++argPos)
-			m_search_kernel.setArg(argPos, m_dagChunks[i]);
+		m_searchKernel.setArg(1, m_header);
+		for (unsigned i = 0; i < m_dagChunksCount; ++i, ++argPos)
+			m_searchKernel.setArg(argPos, m_dagChunks[i]);
 		// pass these to stop the compiler unrolling the loops
-		m_search_kernel.setArg(argPos + 1, target);
-		m_search_kernel.setArg(argPos + 2, ~0u);
+		m_searchKernel.setArg(argPos + 1, target);
+		m_searchKernel.setArg(argPos + 2, ~0u);
 
 		unsigned buf = 0;
 		random_device engine;
 		uint64_t start_nonce = uniform_int_distribution<uint64_t>()(engine);
-		for (;; start_nonce += c_search_batch_size)
+		for (;; start_nonce += m_batchSize)
 		{
 			// supply output buffer to kernel
-			m_search_kernel.setArg(0, m_search_buf[buf]);
-			if (m_dagChunksNum == 1)
-				m_search_kernel.setArg(3, start_nonce);
+			m_searchKernel.setArg(0, m_searchBuffer[buf]);
+			if (m_dagChunksCount == 1)
+				m_searchKernel.setArg(3, start_nonce);
 			else
-				m_search_kernel.setArg(6, start_nonce);
+				m_searchKernel.setArg(6, start_nonce);
 
 			// execute it!
-			m_queue.enqueueNDRangeKernel(m_search_kernel, cl::NullRange, c_search_batch_size, m_workgroup_size);
+			boost::timer t;
+			m_queue.enqueueNDRangeKernel(m_searchKernel, cl::NullRange, m_batchSize, m_workgroupSize);
+			unsigned ms = t.elapsed() * 1000;
+			if (ms > _msPerBatch * 1.1)
+				m_batchSize = max(128, m_batchSize * 9 / 10);
+			else if (ms > _msPerBatch * 0.9)
+				m_batchSize = m_batchSize * 10 / 9;
 
 			pending.push({ start_nonce, buf });
-			buf = (buf + 1) % c_num_buffers;
+			buf = (buf + 1) % c_bufferCount;
 
 			// read results
-			if (pending.size() == c_num_buffers)
+			if (pending.size() == c_bufferCount)
 			{
 				pending_batch const& batch = pending.front();
 
 				// could use pinned host pointer instead
-				uint32_t* results = (uint32_t*)m_queue.enqueueMapBuffer(m_search_buf[batch.buf], true, CL_MAP_READ, 0, (1 + c_max_search_results) * sizeof(uint32_t));
-				unsigned num_found = min<unsigned>(results[0], c_max_search_results);
+				uint32_t* results = (uint32_t*)m_queue.enqueueMapBuffer(m_searchBuffer[batch.buf], true, CL_MAP_READ, 0, (1 + c_maxSearchResults) * sizeof(uint32_t));
+				unsigned num_found = min<unsigned>(results[0], c_maxSearchResults);
 
-				uint64_t nonces[c_max_search_results];
+				uint64_t nonces[c_maxSearchResults];
 				for (unsigned i = 0; i != num_found; ++i)
 					nonces[i] = batch.start_nonce + results[i + 1];
 
-				m_queue.enqueueUnmapMemObject(m_search_buf[batch.buf], results);
+				m_queue.enqueueUnmapMemObject(m_searchBuffer[batch.buf], results);
 				bool exit = num_found && hook.found(nonces, num_found);
-				exit |= hook.searched(batch.start_nonce, c_search_batch_size); // always report searched before exit
+				exit |= hook.searched(batch.start_nonce, m_batchSize); // always report searched before exit
 				if (exit)
 					break;
 
 				// reset search buffer if we're still going
 				if (num_found)
-					m_queue.enqueueWriteBuffer(m_search_buf[batch.buf], true, 0, 4, &c_zero);
+					m_queue.enqueueWriteBuffer(m_searchBuffer[batch.buf], true, 0, 4, &c_zero);
 
 				pending.pop();
 			}
diff --git a/libethash-cl/ethash_cl_miner.h b/libethash-cl/ethash_cl_miner.h
index f36082a5a..996453c00 100644
--- a/libethash-cl/ethash_cl_miner.h
+++ b/libethash-cl/ethash_cl_miner.h
@@ -19,6 +19,9 @@
 
 class ethash_cl_miner
 {
+private:
+	enum { c_maxSearchResults = 63, c_bufferCount = 2, c_hashBatchSize = 1024, c_searchBatchSize = 1024 * 16 };
+
 public:
 	struct search_hook
 	{
@@ -29,7 +32,6 @@ public:
 		virtual bool searched(uint64_t start_nonce, uint32_t count) = 0;
 	};
 
-public:
 	ethash_cl_miner();
 	~ethash_cl_miner();
 
@@ -50,33 +52,32 @@ public:
 	bool init(
 		uint8_t const* _dag,
 		uint64_t _dagSize,
-		unsigned workgroup_size = 64,
+		unsigned _workgroupSize = 64,
 		unsigned _platformId = 0,
 		unsigned _deviceId = 0
 	);
 	void finish();
-	void search(uint8_t const* header, uint64_t target, search_hook& hook);
+	void search(uint8_t const* _header, uint64_t _target, search_hook& _hook, unsigned _msPerBatch = 100);
 
-	void hash_chunk(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count);
-	void search_chunk(uint8_t const* header, uint64_t target, search_hook& hook);
+	void hash_chunk(uint8_t* _ret, uint8_t const* _header, uint64_t _nonce, unsigned _count);
+	void search_chunk(uint8_t const*_header, uint64_t _target, search_hook& _hook);
 
 private:
 
 	static std::vector<cl::Device> getDevices(std::vector<cl::Platform> const& _platforms, unsigned _platformId);
-	
-	enum { c_max_search_results = 63, c_num_buffers = 2, c_hash_batch_size = 1024, c_search_batch_size = 1024*256 };
 
 	cl::Context m_context;
 	cl::CommandQueue m_queue;
-	cl::Kernel m_hash_kernel;
-	cl::Kernel m_search_kernel;
-	unsigned int m_dagChunksNum;
+	cl::Kernel m_hashKernel;
+	cl::Kernel m_searchKernel;
+	unsigned int m_dagChunksCount;
 	std::vector<cl::Buffer> m_dagChunks;
 	cl::Buffer m_header;
-	cl::Buffer m_hash_buf[c_num_buffers];
-	cl::Buffer m_search_buf[c_num_buffers];
-	unsigned m_workgroup_size;
-	bool m_opencl_1_1;
+	cl::Buffer m_hashBuffer[c_bufferCount];
+	cl::Buffer m_searchBuffer[c_bufferCount];
+	unsigned m_workgroupSize;
+	unsigned m_batchSize = c_searchBatchSize;
+	bool m_openclOnePointOne;
 
 	/// Allow CPU to appear as an OpenCL device or not. Default is false
 	static bool s_allowCPU;
diff --git a/libethcore/Ethash.cpp b/libethcore/Ethash.cpp
index b277e3c1c..fbc7d4dce 100644
--- a/libethcore/Ethash.cpp
+++ b/libethcore/Ethash.cpp
@@ -225,6 +225,26 @@ std::string Ethash::CPUMiner::platformInfo()
 
 #if ETH_ETHASHCL || !ETH_TRUE
 
+template <class N>
+class Notified
+{
+public:
+	Notified() {}
+	Notified(N const& _v): m_value(_v) {}
+	Notified& operator=(N const& _v) { std::unique_lock l(m_mutex); m_value = _v; m_cv.notify_all(); return *this; }
+
+	operator N() const { std::unique_lock l(m_mutex); return m_value; }
+
+	void wait() const { std::unique_lock l(m_mutex); m_cv.wait(l); }
+	void wait(N const& _v) const { std::unique_lock l(m_mutex); m_cv.wait(l, [&](){return m_value == _v;}); }
+	template <class F> void wait(F const& _f) const { std::unique_lock l(m_mutex); m_cv.wait(l, _f); }
+
+private:
+	Mutex m_mutex;
+	std::condition_variable m_cv;
+	N m_value;
+};
+
 class EthashCLHook: public ethash_cl_miner::search_hook
 {
 public:
@@ -232,19 +252,25 @@ public:
 
 	void abort()
 	{
-		Guard l(x_all);
+		std::unique_lock l(x_all);
 		if (m_aborted)
 			return;
 //		cdebug << "Attempting to abort";
+
 		m_abort = true;
-		for (unsigned timeout = 0; timeout < 100 && !m_aborted; ++timeout)
-			std::this_thread::sleep_for(chrono::milliseconds(30));
+		// m_abort is true so now searched()/found() will return true to abort the search.
+		// we hang around on this thread waiting for them to point out that they have aborted since
+		// otherwise we may end up deleting this object prior to searched()/found() being called.
+		m_aborted.wait(true);
+//		for (unsigned timeout = 0; timeout < 100 && !m_aborted; ++timeout)
+//			std::this_thread::sleep_for(chrono::milliseconds(30));
 //		if (!m_aborted)
 //			cwarn << "Couldn't abort. Abandoning OpenCL process.";
 	}
 
 	void reset()
 	{
+		Mutex l(x_all);
 		m_aborted = m_abort = false;
 	}
 
@@ -253,27 +279,19 @@ protected:
 	{
 //		dev::operator <<(std::cerr << "Found nonces: ", vector<uint64_t>(_nonces, _nonces + _count)) << std::endl;
 		for (uint32_t i = 0; i < _count; ++i)
-		{
 			if (m_owner->report(_nonces[i]))
-			{
-				m_aborted = true;
-				return true;
-			}
-		}
+				return (m_aborted = true);
 		return m_owner->shouldStop();
 	}
 
 	virtual bool searched(uint64_t _startNonce, uint32_t _count) override
 	{
-		Guard l(x_all);
+		Mutex l(x_all);
 //		std::cerr << "Searched " << _count << " from " << _startNonce << std::endl;
 		m_owner->accumulateHashes(_count);
 		m_last = _startNonce + _count;
 		if (m_abort || m_owner->shouldStop())
-		{
-			m_aborted = true;
-			return true;
-		}
+			return (m_aborted = true);
 		return false;
 	}
 
@@ -281,7 +299,7 @@ private:
 	Mutex x_all;
 	uint64_t m_last;
 	bool m_abort = false;
-	bool m_aborted = true;
+	Notified<bool> m_aborted = {true};
 	Ethash::GPUMiner* m_owner = nullptr;
 };