From 96bf8ac00921f8b321e2bedefe7e7a30fd6a08bf Mon Sep 17 00:00:00 2001
From: Kristoffer Josefsson <kejace@gmail.com>
Date: Fri, 29 May 2015 08:44:06 -0700
Subject: [PATCH 01/11] Added chunked upload to older cards by @sontol.

---
 libethash-cl/ethash_cl_miner.cpp       | 386 +++++++++++++++----------
 libethash-cl/ethash_cl_miner.h         |   6 +-
 libethash-cl/ethash_cl_miner_kernel.cl | 142 ++++++++-
 3 files changed, 378 insertions(+), 156 deletions(-)
diff --git a/libethash-cl/ethash_cl_miner.cpp b/libethash-cl/ethash_cl_miner.cpp
index be17ba449..be4e7532c 100644
--- a/libethash-cl/ethash_cl_miner.cpp
+++ b/libethash-cl/ethash_cl_miner.cpp
@@ -24,6 +24,7 @@
 
 #include <cstdio>
 #include <cstdlib>
+#include <fstream>
 #include <iostream>
 #include <assert.h>
 #include <queue>
@@ -42,9 +43,13 @@
 #define CL_MEM_HOST_READ_ONLY 0
 #endif
 
+//#define CHUNKS
+
 #undef min
 #undef max
 
+//#define CHUNKS
+
 using namespace std;
 
 static void add_definition(std::string& source, char const* id, unsigned value)
@@ -131,99 +136,147 @@ void ethash_cl_miner::finish()
 bool ethash_cl_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned workgroup_size, unsigned _platformId, unsigned _deviceId)
 {
 	// get all platforms
-	std::vector<cl::Platform> platforms;
-	cl::Platform::get(&platforms);
-	if (platforms.empty())
-	{
-		cout << "No OpenCL platforms found." << endl;
-		return false;
-	}
+	try {
+		std::vector<cl::Platform> platforms;
+		cl::Platform::get(&platforms);
+		if (platforms.empty())
+		{
+			cout << "No OpenCL platforms found." << endl;
+			return false;
+		}
 
-	// use selected platform
+		// use selected platform
 
-	_platformId = std::min<unsigned>(_platformId, platforms.size() - 1);
+		_platformId = std::min<unsigned>(_platformId, platforms.size() - 1);
 
-	cout << "Using platform: " << platforms[_platformId].getInfo<CL_PLATFORM_NAME>().c_str() << endl;
+		cout << "Using platform: " << platforms[_platformId].getInfo<CL_PLATFORM_NAME>().c_str() << endl;
 
-	// get GPU device of the default platform
-	std::vector<cl::Device> devices;
-	platforms[_platformId].getDevices(CL_DEVICE_TYPE_ALL, &devices);
-	if (devices.empty())
-	{
-		cout << "No OpenCL devices found." << endl;
-		return false;
-	}
+		// get GPU device of the default platform
+		std::vector<cl::Device> devices;
+		platforms[_platformId].getDevices(CL_DEVICE_TYPE_ALL, &devices);
+		if (devices.empty())
+		{
+			cout << "No OpenCL devices found." << endl;
+			return false;
+		}
 
-	// use selected device
-	cl::Device& device = devices[std::min<unsigned>(_deviceId, devices.size() - 1)];
-	std::string device_version = device.getInfo<CL_DEVICE_VERSION>();
-	cout << "Using device: " << device.getInfo<CL_DEVICE_NAME>().c_str() << "(" << device_version.c_str() << ")" << endl;
+		// use selected device
+		cl::Device& device = devices[std::min<unsigned>(_deviceId, devices.size() - 1)];
+		std::string device_version = device.getInfo<CL_DEVICE_VERSION>();
+		cout << "Using device: " << device.getInfo<CL_DEVICE_NAME>().c_str() << "(" << device_version.c_str() << ")" << endl;
 
-	if (strncmp("OpenCL 1.0", device_version.c_str(), 10) == 0)
-	{
-		cout << "OpenCL 1.0 is not supported." << endl;
-		return false;
-	}
-	if (strncmp("OpenCL 1.1", device_version.c_str(), 10) == 0)
-		m_opencl_1_1 = true;
-
-	// create context
-	m_context = cl::Context(std::vector<cl::Device>(&device, &device + 1));
-	m_queue = cl::CommandQueue(m_context, device);
-
-	// use requested workgroup size, but we require multiple of 8
-	m_workgroup_size = ((workgroup_size + 7) / 8) * 8;
-
-	// patch source code
-	std::string code(ETHASH_CL_MINER_KERNEL, ETHASH_CL_MINER_KERNEL + ETHASH_CL_MINER_KERNEL_SIZE);
-	add_definition(code, "GROUP_SIZE", m_workgroup_size);
-	add_definition(code, "DAG_SIZE", (unsigned)(_dagSize / ETHASH_MIX_BYTES));
-	add_definition(code, "ACCESSES", ETHASH_ACCESSES);
-	add_definition(code, "MAX_OUTPUTS", c_max_search_results);
-	//debugf("%s", code.c_str());
-
-	// create miner OpenCL program
-	cl::Program::Sources sources;
-	sources.push_back({code.c_str(), code.size()});
-
-	cl::Program program(m_context, sources);
-	try
-	{
-		program.build({device});
+		if (strncmp("OpenCL 1.0", device_version.c_str(), 10) == 0)
+		{
+			cout << "OpenCL 1.0 is not supported." << endl;
+			return false;
+		}
+		if (strncmp("OpenCL 1.1", device_version.c_str(), 10) == 0)
+			m_opencl_1_1 = true;
+
+		// create context
+		m_context = cl::Context(std::vector<cl::Device>(&device, &device + 1));
+		m_queue = cl::CommandQueue(m_context, device);
+
+		// use requested workgroup size, but we require multiple of 8
+		m_workgroup_size = ((workgroup_size + 7) / 8) * 8;
+
+		// patch source code
+		std::ifstream t("ethash_cl_miner_kernel.cl");
+		std::string code((std::istreambuf_iterator<char>(t)),
+			std::istreambuf_iterator<char>());
+		add_definition(code, "GROUP_SIZE", m_workgroup_size);
+		add_definition(code, "DAG_SIZE", (unsigned)(_dagSize / ETHASH_MIX_BYTES));
+		add_definition(code, "ACCESSES", ETHASH_ACCESSES);
+		add_definition(code, "MAX_OUTPUTS", c_max_search_results);
+		//debugf("%s", code.c_str());
+
+		// create miner OpenCL program
+		cl::Program::Sources sources;
+		sources.push_back({ code.c_str(), code.size() });
+
+		cl::Program program(m_context, sources);
+		try
+		{
+			program.build({ device });
+			cout << "Printing program log" << endl;
+			cout << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str();
+		}
+		catch (cl::Error err)
+		{
+			cout << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str();
+			return false;
+		}
+		#ifdef CHUNKS
+		cout << "loading ethash_hash_chunks" << endl;
+		m_hash_kernel = cl::Kernel(program, "ethash_hash_chunks");
+		cout << "loading ethash_search_chunks" << endl;
+		m_search_kernel = cl::Kernel(program, "ethash_search_chunks");
+
+		#else
+		cout << "loading ethash_hash" << endl;
+		m_hash_kernel = cl::Kernel(program, "ethash_hash");
+		cout << "loading ethash_search" << endl;
+		m_search_kernel = cl::Kernel(program, "ethash_search");
+		#endif
+
+		// create buffer for dag
+		#ifdef CHUNKS
+		for (unsigned i = 0; i < 4; i++){
+            
+            cout << "Creating chunky buffer: " << i << endl;
+			m_dags[i] = cl::Buffer(m_context, CL_MEM_READ_ONLY, (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7);
+		}
+		#else
+		cout << "Creating one big buffer." << endl;
+		m_dag = cl::Buffer(m_context, CL_MEM_READ_ONLY, _dagSize);
+		#endif
+
+		// create buffer for header
+		cout << "Creating buffer for header." << endl;
+		m_header = cl::Buffer(m_context, CL_MEM_READ_ONLY, 32);
+
+		#ifdef CHUNKS
+		void* dag_ptr[4];
+		for (unsigned i = 0; i < 4; i++)
+		{
+			cout << "Mapping chunk " << i << endl;
+			dag_ptr[i] = m_queue.enqueueMapBuffer(m_dags[i], true, m_opencl_1_1 ? CL_MAP_WRITE : CL_MAP_WRITE_INVALIDATE_REGION, 0, (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7);
+		}
+		for (unsigned i = 0; i < 4; i++)
+		{
+			memcpy(dag_ptr[i], (char *)_dag + i*((_dagSize >> 9) << 7), (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7);
+			m_queue.enqueueUnmapMemObject(m_dags[i], dag_ptr[i]);
+		}
+        #else
+        cout << "Mapping chunk." << endl;
+        m_queue.enqueueWriteBuffer(m_dag, CL_TRUE, 0, _dagSize, _dag);
+        #endif
+		// compute dag on CPU
+		/*{
+			m_queue.enqueueWriteBuffer(m_dag, CL_TRUE, 0, _dagSize, _dag);
+
+			// if this throws then it's because we probably need to subdivide the dag uploads for compatibility
+			//		void* dag_ptr = m_queue.enqueueMapBuffer(m_dag, true, m_opencl_1_1 ? CL_MAP_WRITE : CL_MAP_WRITE_INVALIDATE_REGION, 0, _dagSize);
+			// memcpying 1GB: horrible... really. horrible. but necessary since we can't mmap *and* gpumap.
+			//		_fillDAG(dag_ptr);
+			//		m_queue.enqueueUnmapMemObject(m_dag, dag_ptr);
+			}*/
+
+		// create mining buffers
+		for (unsigned i = 0; i != c_num_buffers; ++i)
+		{
+			cout << "Creating minig buffer " << i <<endl;
+			m_hash_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY | (!m_opencl_1_1 ? CL_MEM_HOST_READ_ONLY : 0), 32 * c_hash_batch_size);
+			m_search_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, (c_max_search_results + 1) * sizeof(uint32_t));
+		}
+		
 	}
 	catch (cl::Error err)
 	{
-		cout << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str();
-		return false;
+		std::cout << err.what() << "(" << err.err() << ")" << std::endl;
 	}
-	m_hash_kernel = cl::Kernel(program, "ethash_hash");
-	m_search_kernel = cl::Kernel(program, "ethash_search");
-
-	// create buffer for dag
-	m_dag = cl::Buffer(m_context, CL_MEM_READ_ONLY, _dagSize);
-	
-	// create buffer for header
-	m_header = cl::Buffer(m_context, CL_MEM_READ_ONLY, 32);
 
-	// compute dag on CPU
-	try {
-		m_queue.enqueueWriteBuffer(m_dag, CL_TRUE, 0, _dagSize, _dag);
-	}
-	catch (...)
-	{
-		// didn't work. shitty driver. try allocating in CPU RAM and manually memcpying it.
-		void* dag_ptr = m_queue.enqueueMapBuffer(m_dag, true, m_opencl_1_1 ? CL_MAP_WRITE : CL_MAP_WRITE_INVALIDATE_REGION, 0, _dagSize);
-		memcpy(dag_ptr, _dag, _dagSize);
-		m_queue.enqueueUnmapMemObject(m_dag, dag_ptr);
-	}
-
-	// create mining buffers
-	for (unsigned i = 0; i != c_num_buffers; ++i)
-	{
-		m_hash_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY | (!m_opencl_1_1 ? CL_MEM_HOST_READ_ONLY : 0), 32*c_hash_batch_size);
-		m_search_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, (c_max_search_results + 1) * sizeof(uint32_t));
-	}
-	return true;
+	return true;	
 }
 
 void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count)
@@ -248,10 +301,22 @@ void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce,
 		uint isolate
 		)
 	*/
+    #ifdef CHUNKS
+    cout << "Setting chunk hash arguments." << endl;
+	m_hash_kernel.setArg(1, m_header);
+	m_hash_kernel.setArg(2, m_dags[0]);
+	m_hash_kernel.setArg(3, m_dags[1]);
+	m_hash_kernel.setArg(4, m_dags[2]);
+	m_hash_kernel.setArg(5, m_dags[3]);
+	m_hash_kernel.setArg(6, nonce);
+	m_hash_kernel.setArg(7, ~0u); // have to pass this to stop the compile unrolling the loop
+	#else
+	cout << "Setting hash arguments." << endl;
 	m_hash_kernel.setArg(1, m_header);
 	m_hash_kernel.setArg(2, m_dag);
 	m_hash_kernel.setArg(3, nonce);
 	m_hash_kernel.setArg(4, ~0u); // have to pass this to stop the compile unrolling the loop
+    #endif
 
 	unsigned buf = 0;
 	for (unsigned i = 0; i < count || !pending.empty(); )
@@ -297,95 +362,120 @@ void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce,
 
 void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook)
 {
-	struct pending_batch
-	{
-		uint64_t start_nonce;
-		unsigned buf;
-	};
-	std::queue<pending_batch> pending;
+	try {
+		struct pending_batch
+		{
+			uint64_t start_nonce;
+			unsigned buf;
+		};
+		std::queue<pending_batch> pending;
 
-	static uint32_t const c_zero = 0;
+		static uint32_t const c_zero = 0;
 
-	// update header constant buffer
-	m_queue.enqueueWriteBuffer(m_header, false, 0, 32, header);
-	for (unsigned i = 0; i != c_num_buffers; ++i)
-		m_queue.enqueueWriteBuffer(m_search_buf[i], false, 0, 4, &c_zero);
+		// update header constant buffer
+		m_queue.enqueueWriteBuffer(m_header, false, 0, 32, header);
+		for (unsigned i = 0; i != c_num_buffers; ++i)
+			m_queue.enqueueWriteBuffer(m_search_buf[i], false, 0, 4, &c_zero);
 
 #if CL_VERSION_1_2 && 0
-	cl::Event pre_return_event;
-	if (!m_opencl_1_1)
-		m_queue.enqueueBarrierWithWaitList(NULL, &pre_return_event);
-	else
+		cl::Event pre_return_event;
+		if (!m_opencl_1_1)
+			m_queue.enqueueBarrierWithWaitList(NULL, &pre_return_event);
+		else
 #endif
-		m_queue.finish();
+			m_queue.finish();
 
-	/*
-	__kernel void ethash_combined_search(
+		/*
+		__kernel void ethash_combined_search(
 		__global hash32_t* g_hashes,			// 0
 		__constant hash32_t const* g_header,	// 1
 		__global hash128_t const* g_dag,		// 2
 		ulong start_nonce,						// 3
 		ulong target,							// 4
 		uint isolate							// 5
-	)
-	*/
-	m_search_kernel.setArg(1, m_header);
-	m_search_kernel.setArg(2, m_dag);
-
-	// pass these to stop the compiler unrolling the loops
-	m_search_kernel.setArg(4, target);
-	m_search_kernel.setArg(5, ~0u);
+		)
+		*/
+		#ifdef CHUNKS
+		cout << "Setting chunk search arguments." << endl;
+		m_search_kernel.setArg(1, m_header);
+		m_search_kernel.setArg(2, m_dags[0]);
+		m_search_kernel.setArg(3, m_dags[1]);
+		m_search_kernel.setArg(4, m_dags[2]);
+		m_search_kernel.setArg(5, m_dags[3]);
+
+		// pass these to stop the compiler unrolling the loops
+		m_search_kernel.setArg(7, target);
+		m_search_kernel.setArg(8, ~0u);
+
+        #else
+        cout << "Setting search arguments." << endl;
+		m_search_kernel.setArg(1, m_header);
+		m_search_kernel.setArg(2, m_dag);
+
+		// pass these to stop the compiler unrolling the loops
+		m_search_kernel.setArg(4, target);
+		m_search_kernel.setArg(5, ~0u);
+		#endif
+
+
+
+		unsigned buf = 0;
+		std::random_device engine;
+		uint64_t start_nonce = std::uniform_int_distribution<uint64_t>()(engine);
+		for (;; start_nonce += c_search_batch_size)
+		{
+			// supply output buffer to kernel
+			m_search_kernel.setArg(0, m_search_buf[buf]);
+			#ifdef CHUNKS
+			m_search_kernel.setArg(6, start_nonce);
+			#else
+			m_search_kernel.setArg(3, start_nonce);
+			#endif
 
+			// execute it!
+			m_queue.enqueueNDRangeKernel(m_search_kernel, cl::NullRange, c_search_batch_size, m_workgroup_size);
 
-	unsigned buf = 0;
-	std::random_device engine;
-	uint64_t start_nonce = std::uniform_int_distribution<uint64_t>()(engine);
-	for (; ; start_nonce += c_search_batch_size)
-	{
-		// supply output buffer to kernel
-		m_search_kernel.setArg(0, m_search_buf[buf]);
-		m_search_kernel.setArg(3, start_nonce);
+			pending.push({ start_nonce, buf });
+			buf = (buf + 1) % c_num_buffers;
 
-		// execute it!
-		m_queue.enqueueNDRangeKernel(m_search_kernel, cl::NullRange, c_search_batch_size, m_workgroup_size);
-		
-		pending.push({start_nonce, buf});
-		buf = (buf + 1) % c_num_buffers;
+			// read results
+			if (pending.size() == c_num_buffers)
+			{
+				pending_batch const& batch = pending.front();
 
-		// read results
-		if (pending.size() == c_num_buffers)
-		{
-			pending_batch const& batch = pending.front();
+				// could use pinned host pointer instead
+				uint32_t* results = (uint32_t*)m_queue.enqueueMapBuffer(m_search_buf[batch.buf], true, CL_MAP_READ, 0, (1 + c_max_search_results) * sizeof(uint32_t));
+				unsigned num_found = std::min<unsigned>(results[0], c_max_search_results);
 
-			// could use pinned host pointer instead
-			uint32_t* results = (uint32_t*)m_queue.enqueueMapBuffer(m_search_buf[batch.buf], true, CL_MAP_READ, 0, (1+c_max_search_results) * sizeof(uint32_t));
-			unsigned num_found = std::min<unsigned>(results[0], c_max_search_results);
+				uint64_t nonces[c_max_search_results];
+				for (unsigned i = 0; i != num_found; ++i)
+				{
+					nonces[i] = batch.start_nonce + results[i + 1];
+				}
 
-			uint64_t nonces[c_max_search_results];
-			for (unsigned i = 0; i != num_found; ++i)
-			{
-				nonces[i] = batch.start_nonce + results[i+1];
-			}
+				m_queue.enqueueUnmapMemObject(m_search_buf[batch.buf], results);
 
-			m_queue.enqueueUnmapMemObject(m_search_buf[batch.buf], results);
-			
-			bool exit = num_found && hook.found(nonces, num_found);
-			exit |= hook.searched(batch.start_nonce, c_search_batch_size); // always report searched before exit
-			if (exit)
-				break;
+				bool exit = num_found && hook.found(nonces, num_found);
+				exit |= hook.searched(batch.start_nonce, c_search_batch_size); // always report searched before exit
+				if (exit)
+					break;
 
-			// reset search buffer if we're still going
-			if (num_found)
-				m_queue.enqueueWriteBuffer(m_search_buf[batch.buf], true, 0, 4, &c_zero);
+				// reset search buffer if we're still going
+				if (num_found)
+					m_queue.enqueueWriteBuffer(m_search_buf[batch.buf], true, 0, 4, &c_zero);
 
-			pending.pop();
+				pending.pop();
+			}
 		}
-	}
 
-	// not safe to return until this is ready
+		// not safe to return until this is ready
 #if CL_VERSION_1_2 && 0
-	if (!m_opencl_1_1)
-		pre_return_event.wait();
+		if (!m_opencl_1_1)
+			pre_return_event.wait();
 #endif
+	}
+	catch (cl::Error err)
+	{
+		std::cout << err.what() << "(" << err.err() << ")" << std::endl;
+	}
 }
-
diff --git a/libethash-cl/ethash_cl_miner.h b/libethash-cl/ethash_cl_miner.h
index 43bfa2336..9c97f2aa4 100644
--- a/libethash-cl/ethash_cl_miner.h
+++ b/libethash-cl/ethash_cl_miner.h
@@ -41,6 +41,9 @@ public:
 	void hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count);
 	void search(uint8_t const* header, uint64_t target, search_hook& hook);
 
+	void hash_chunk(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count);
+	void search_chunk(uint8_t const* header, uint64_t target, search_hook& hook);
+
 private:
 	enum { c_max_search_results = 63, c_num_buffers = 2, c_hash_batch_size = 1024, c_search_batch_size = 1024*256 };
 
@@ -49,9 +52,10 @@ private:
 	cl::Kernel m_hash_kernel;
 	cl::Kernel m_search_kernel;
 	cl::Buffer m_dag;
+	cl::Buffer m_dags[4];
 	cl::Buffer m_header;
 	cl::Buffer m_hash_buf[c_num_buffers];
 	cl::Buffer m_search_buf[c_num_buffers];
 	unsigned m_workgroup_size;
 	bool m_opencl_1_1;
-};
+};
\ No newline at end of file
diff --git a/libethash-cl/ethash_cl_miner_kernel.cl b/libethash-cl/ethash_cl_miner_kernel.cl
index 3c8b9dc92..8567bb164 100644
--- a/libethash-cl/ethash_cl_miner_kernel.cl
+++ b/libethash-cl/ethash_cl_miner_kernel.cl
@@ -179,13 +179,13 @@ void keccak_f1600_no_absorb(ulong* a, uint in_size, uint out_size, uint isolate)
 		// much we try and help the compiler save VGPRs because it seems to throw
 		// that information away, hence the implementation of keccak here
 		// doesn't bother.
-		if (isolate) 
+		if (isolate)
 		{
 			keccak_f1600_round((uint2*)a, r++, 25);
 		}
 	}
 	while (r < 23);
-	
+
 	// final round optimised for digest size
 	keccak_f1600_round((uint2*)a, r++, out_size);
 }
@@ -232,7 +232,7 @@ hash64_t init_hash(__constant hash32_t const* header, ulong nonce, uint isolate)
 	hash64_t init;
 	uint const init_size = countof(init.ulongs);
 	uint const hash_size = countof(header->ulongs);
-	
+
 	// sha3_512(header .. nonce)
 	ulong state[25];
 	copy(state, header->ulongs, hash_size);
@@ -243,6 +243,40 @@ hash64_t init_hash(__constant hash32_t const* header, ulong nonce, uint isolate)
 	return init;
 }
 
+uint inner_loop_chunks(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, __global hash128_t const* g_dag1, __global hash128_t const* g_dag2, __global hash128_t const* g_dag3, uint isolate)
+{
+	uint4 mix = init;
+
+	// share init0
+	if (thread_id == 0)
+		*share = mix.x;
+	barrier(CLK_LOCAL_MEM_FENCE);
+	uint init0 = *share;
+
+	uint a = 0;
+	do
+	{
+		bool update_share = thread_id == (a/4) % THREADS_PER_HASH;
+
+		#pragma unroll
+		for (uint i = 0; i != 4; ++i)
+		{
+			if (update_share)
+			{
+				uint m[4] = { mix.x, mix.y, mix.z, mix.w };
+				*share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE;
+			}
+			barrier(CLK_LOCAL_MEM_FENCE);
+
+			mix = fnv4(mix, *share>=3 * DAG_SIZE / 4 ? g_dag3[*share - 3 * DAG_SIZE / 4].uint4s[thread_id] : *share>=DAG_SIZE / 2 ? g_dag2[*share - DAG_SIZE / 2].uint4s[thread_id] : *share>=DAG_SIZE / 4 ? g_dag1[*share - DAG_SIZE / 4].uint4s[thread_id]:g_dag[*share].uint4s[thread_id]);
+		}
+	} while ((a += 4) != (ACCESSES & isolate));
+
+	return fnv_reduce(mix);
+}
+
+
+
 uint inner_loop(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, uint isolate)
 {
 	uint4 mix = init;
@@ -276,6 +310,7 @@ uint inner_loop(uint4 init, uint thread_id, __local uint* share, __global hash12
 	return fnv_reduce(mix);
 }
 
+
 hash32_t final_hash(hash64_t const* init, hash32_t const* mix, uint isolate)
 {
 	ulong state[25];
@@ -309,7 +344,7 @@ hash32_t compute_hash_simple(
 	{
 		mix.uint4s[i] = init.uint4s[i % countof(init.uint4s)];
 	}
-	
+
 	uint mix_val = mix.uints[0];
 	uint init0 = mix.uints[0];
 	uint a = 0;
@@ -333,7 +368,7 @@ hash32_t compute_hash_simple(
 	{
 		fnv_mix.uints[i] = fnv_reduce(mix.uint4s[i]);
 	}
-	
+
 	return final_hash(&init, &fnv_mix, isolate);
 }
 
@@ -347,6 +382,7 @@ typedef union
 	hash32_t mix;
 } compute_hash_share;
 
+
 hash32_t compute_hash(
 	__local compute_hash_share* share,
 	__constant hash32_t const* g_header,
@@ -390,6 +426,53 @@ hash32_t compute_hash(
 	return final_hash(&init, &mix, isolate);
 }
 
+
+hash32_t compute_hash_chunks(
+	__local compute_hash_share* share,
+	__constant hash32_t const* g_header,
+	__global hash128_t const* g_dag,
+	__global hash128_t const* g_dag1,
+	__global hash128_t const* g_dag2,
+	__global hash128_t const* g_dag3,
+	ulong nonce,
+	uint isolate
+	)
+{
+	uint const gid = get_global_id(0);
+
+	// Compute one init hash per work item.
+	hash64_t init = init_hash(g_header, nonce, isolate);
+
+	// Threads work together in this phase in groups of 8.
+	uint const thread_id = gid % THREADS_PER_HASH;
+	uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH;
+
+	hash32_t mix;
+	uint i = 0;
+	do
+	{
+		// share init with other threads
+		if (i == thread_id)
+			share[hash_id].init = init;
+		barrier(CLK_LOCAL_MEM_FENCE);
+
+		uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))];
+		barrier(CLK_LOCAL_MEM_FENCE);
+
+		uint thread_mix = inner_loop_chunks(thread_init, thread_id, share[hash_id].mix.uints, g_dag, g_dag1, g_dag2, g_dag3, isolate);
+
+		share[hash_id].mix.uints[thread_id] = thread_mix;
+		barrier(CLK_LOCAL_MEM_FENCE);
+
+		if (i == thread_id)
+			mix = share[hash_id].mix;
+		barrier(CLK_LOCAL_MEM_FENCE);
+	}
+	while (++i != (THREADS_PER_HASH & isolate));
+
+	return final_hash(&init, &mix, isolate);
+}
+
 __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
 __kernel void ethash_hash_simple(
 	__global hash32_t* g_hashes,
@@ -415,13 +498,15 @@ __kernel void ethash_search_simple(
 {
 	uint const gid = get_global_id(0);
 	hash32_t hash = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate);
-	if (as_ulong(as_uchar8(hash.ulongs[0]).s76543210) < target)
+
+	if (hash.ulongs[countof(hash.ulongs)-1] < target)
 	{
-		uint slot = min(MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1);
+		uint slot = min(convert_uint(MAX_OUTPUTS), convert_uint(atomic_inc(&g_output[0]) + 1));
 		g_output[slot] = gid;
 	}
 }
 
+
 __attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
 __kernel void ethash_hash(
 	__global hash32_t* g_hashes,
@@ -458,3 +543,46 @@ __kernel void ethash_search(
 		g_output[slot] = gid;
 	}
 }
+
+__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
+__kernel void ethash_hash_chunks(
+	__global hash32_t* g_hashes,
+	__constant hash32_t const* g_header,
+	__global hash128_t const* g_dag,
+	__global hash128_t const* g_dag1,
+	__global hash128_t const* g_dag2,
+	__global hash128_t const* g_dag3,
+	ulong start_nonce,
+	uint isolate
+	)
+{
+	__local compute_hash_share share[HASHES_PER_LOOP];
+
+	uint const gid = get_global_id(0);
+	g_hashes[gid] = compute_hash_chunks(share, g_header, g_dag, g_dag1, g_dag2, g_dag3,start_nonce + gid, isolate);
+}
+
+__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
+__kernel void ethash_search_chunks(
+	__global volatile uint* restrict g_output,
+	__constant hash32_t const* g_header,
+	__global hash128_t const* g_dag,
+	__global hash128_t const* g_dag1,
+	__global hash128_t const* g_dag2,
+	__global hash128_t const* g_dag3,
+	ulong start_nonce,
+	ulong target,
+	uint isolate
+	)
+{
+	__local compute_hash_share share[HASHES_PER_LOOP];
+
+	uint const gid = get_global_id(0);
+	hash32_t hash = compute_hash_chunks(share, g_header, g_dag, g_dag1, g_dag2, g_dag3, start_nonce + gid, isolate);
+
+	if (as_ulong(as_uchar8(hash.ulongs[0]).s76543210) < target)
+	{
+		uint slot = min(convert_uint(MAX_OUTPUTS), convert_uint(atomic_inc(&g_output[0]) + 1));
+		g_output[slot] = gid;
+	}
+}
\ No newline at end of file

From aeb49b809135e5db9a28ad20443f23abf25114d9 Mon Sep 17 00:00:00 2001
From: Lefteris Karapetsas <lefteris@refu.co>
Date: Mon, 1 Jun 2015 13:03:02 +0200
Subject: [PATCH 02/11] Style changes

---
 libethash-cl/ethash_cl_miner.cpp | 75 ++++++++++++--------------------
 1 file changed, 28 insertions(+), 47 deletions(-)

diff --git a/libethash-cl/ethash_cl_miner.cpp b/libethash-cl/ethash_cl_miner.cpp
index be4e7532c..520a13180 100644
--- a/libethash-cl/ethash_cl_miner.cpp
+++ b/libethash-cl/ethash_cl_miner.cpp
@@ -43,13 +43,12 @@
 #define CL_MEM_HOST_READ_ONLY 0
 #endif
 
-//#define CHUNKS
+// maybe move to CMakeLists.txt ?
+// #define ETHASH_CL_CHUNK_UPLOAD
 
 #undef min
 #undef max
 
-//#define CHUNKS
-
 using namespace std;
 
 static void add_definition(std::string& source, char const* id, unsigned value)
@@ -136,7 +135,8 @@ void ethash_cl_miner::finish()
 bool ethash_cl_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned workgroup_size, unsigned _platformId, unsigned _deviceId)
 {
 	// get all platforms
-	try {
+	try
+	{
 		std::vector<cl::Platform> platforms;
 		cl::Platform::get(&platforms);
 		if (platforms.empty())
@@ -146,7 +146,6 @@ bool ethash_cl_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned work
 		}
 
 		// use selected platform
-
 		_platformId = std::min<unsigned>(_platformId, platforms.size() - 1);
 
 		cout << "Using platform: " << platforms[_platformId].getInfo<CL_PLATFORM_NAME>().c_str() << endl;
@@ -206,12 +205,11 @@ bool ethash_cl_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned work
 			cout << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str();
 			return false;
 		}
-		#ifdef CHUNKS
+		#ifdef ETHASH_CL_CHUNK_UPLOAD
 		cout << "loading ethash_hash_chunks" << endl;
 		m_hash_kernel = cl::Kernel(program, "ethash_hash_chunks");
 		cout << "loading ethash_search_chunks" << endl;
 		m_search_kernel = cl::Kernel(program, "ethash_search_chunks");
-
 		#else
 		cout << "loading ethash_hash" << endl;
 		m_hash_kernel = cl::Kernel(program, "ethash_hash");
@@ -220,10 +218,10 @@ bool ethash_cl_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned work
 		#endif
 
 		// create buffer for dag
-		#ifdef CHUNKS
-		for (unsigned i = 0; i < 4; i++){
-            
-            cout << "Creating chunky buffer: " << i << endl;
+		#ifdef ETHASH_CL_CHUNK_UPLOAD
+		for (unsigned i = 0; i < 4; i++)
+		{
+			cout << "Creating chunky buffer: " << i << endl;
 			m_dags[i] = cl::Buffer(m_context, CL_MEM_READ_ONLY, (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7);
 		}
 		#else
@@ -235,7 +233,7 @@ bool ethash_cl_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned work
 		cout << "Creating buffer for header." << endl;
 		m_header = cl::Buffer(m_context, CL_MEM_READ_ONLY, 32);
 
-		#ifdef CHUNKS
+		#ifdef ETHASH_CL_CHUNK_UPLOAD
 		void* dag_ptr[4];
 		for (unsigned i = 0; i < 4; i++)
 		{
@@ -247,36 +245,25 @@ bool ethash_cl_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned work
 			memcpy(dag_ptr[i], (char *)_dag + i*((_dagSize >> 9) << 7), (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7);
 			m_queue.enqueueUnmapMemObject(m_dags[i], dag_ptr[i]);
 		}
-        #else
-        cout << "Mapping chunk." << endl;
-        m_queue.enqueueWriteBuffer(m_dag, CL_TRUE, 0, _dagSize, _dag);
-        #endif
-		// compute dag on CPU
-		/*{
-			m_queue.enqueueWriteBuffer(m_dag, CL_TRUE, 0, _dagSize, _dag);
-
-			// if this throws then it's because we probably need to subdivide the dag uploads for compatibility
-			//		void* dag_ptr = m_queue.enqueueMapBuffer(m_dag, true, m_opencl_1_1 ? CL_MAP_WRITE : CL_MAP_WRITE_INVALIDATE_REGION, 0, _dagSize);
-			// memcpying 1GB: horrible... really. horrible. but necessary since we can't mmap *and* gpumap.
-			//		_fillDAG(dag_ptr);
-			//		m_queue.enqueueUnmapMemObject(m_dag, dag_ptr);
-			}*/
+		#else
+		cout << "Mapping chunk." << endl;
+		m_queue.enqueueWriteBuffer(m_dag, CL_TRUE, 0, _dagSize, _dag);
+		#endif
 
 		// create mining buffers
 		for (unsigned i = 0; i != c_num_buffers; ++i)
 		{
-			cout << "Creating minig buffer " << i <<endl;
+			cout << "Creating mining buffer " << i <<endl;
 			m_hash_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY | (!m_opencl_1_1 ? CL_MEM_HOST_READ_ONLY : 0), 32 * c_hash_batch_size);
 			m_search_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, (c_max_search_results + 1) * sizeof(uint32_t));
 		}
-		
 	}
 	catch (cl::Error err)
 	{
 		std::cout << err.what() << "(" << err.err() << ")" << std::endl;
 	}
 
-	return true;	
+	return true;
 }
 
 void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count)
@@ -288,7 +275,7 @@ void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce,
 		unsigned buf;
 	};
 	std::queue<pending_batch> pending;
-	
+
 	// update header constant buffer
 	m_queue.enqueueWriteBuffer(m_header, true, 0, 32, header);
 
@@ -301,8 +288,8 @@ void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce,
 		uint isolate
 		)
 	*/
-    #ifdef CHUNKS
-    cout << "Setting chunk hash arguments." << endl;
+	#ifdef ETHASH_CL_CHUNK_UPLOAD
+	cout << "Setting chunk hash arguments." << endl;
 	m_hash_kernel.setArg(1, m_header);
 	m_hash_kernel.setArg(2, m_dags[0]);
 	m_hash_kernel.setArg(3, m_dags[1]);
@@ -316,7 +303,7 @@ void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce,
 	m_hash_kernel.setArg(2, m_dag);
 	m_hash_kernel.setArg(3, nonce);
 	m_hash_kernel.setArg(4, ~0u); // have to pass this to stop the compile unrolling the loop
-    #endif
+	#endif
 
 	unsigned buf = 0;
 	for (unsigned i = 0; i < count || !pending.empty(); )
@@ -336,9 +323,9 @@ void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce,
 				cl::NullRange,
 				cl::NDRange(batch_count),
 				cl::NDRange(m_workgroup_size)
-				);
+			);
 			m_queue.flush();
-		
+
 			pending.push({i, this_count, buf});
 			i += this_count;
 			buf = (buf + 1) % c_num_buffers;
@@ -348,12 +335,10 @@ void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce,
 		if (i == count || pending.size() == c_num_buffers)
 		{
 			pending_batch const& batch = pending.front();
-
 			// could use pinned host pointer instead, but this path isn't that important.
 			uint8_t* hashes = (uint8_t*)m_queue.enqueueMapBuffer(m_hash_buf[batch.buf], true, CL_MAP_READ, 0, batch.count * ETHASH_BYTES);
 			memcpy(ret + batch.base*ETHASH_BYTES, hashes, batch.count*ETHASH_BYTES);
 			m_queue.enqueueUnmapMemObject(m_hash_buf[batch.buf], hashes);
-
 			pending.pop();
 		}
 	}
@@ -362,7 +347,8 @@ void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce,
 
 void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook)
 {
-	try {
+	try
+	{
 		struct pending_batch
 		{
 			uint64_t start_nonce;
@@ -395,7 +381,7 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
 		uint isolate							// 5
 		)
 		*/
-		#ifdef CHUNKS
+		#ifdef ETHASH_CL_CHUNK_UPLOAD
 		cout << "Setting chunk search arguments." << endl;
 		m_search_kernel.setArg(1, m_header);
 		m_search_kernel.setArg(2, m_dags[0]);
@@ -407,8 +393,8 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
 		m_search_kernel.setArg(7, target);
 		m_search_kernel.setArg(8, ~0u);
 
-        #else
-        cout << "Setting search arguments." << endl;
+		#else
+		cout << "Setting search arguments." << endl;
 		m_search_kernel.setArg(1, m_header);
 		m_search_kernel.setArg(2, m_dag);
 
@@ -417,8 +403,6 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
 		m_search_kernel.setArg(5, ~0u);
 		#endif
 
-
-
 		unsigned buf = 0;
 		std::random_device engine;
 		uint64_t start_nonce = std::uniform_int_distribution<uint64_t>()(engine);
@@ -426,7 +410,7 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
 		{
 			// supply output buffer to kernel
 			m_search_kernel.setArg(0, m_search_buf[buf]);
-			#ifdef CHUNKS
+			#ifdef ETHASH_CL_CHUNK_UPLOAD
 			m_search_kernel.setArg(6, start_nonce);
 			#else
 			m_search_kernel.setArg(3, start_nonce);
@@ -449,12 +433,9 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
 
 				uint64_t nonces[c_max_search_results];
 				for (unsigned i = 0; i != num_found; ++i)
-				{
 					nonces[i] = batch.start_nonce + results[i + 1];
-				}
 
 				m_queue.enqueueUnmapMemObject(m_search_buf[batch.buf], results);
-
 				bool exit = num_found && hook.found(nonces, num_found);
 				exit |= hook.searched(batch.start_nonce, c_search_batch_size); // always report searched before exit
 				if (exit)

From 587209cf5dd86702de000696940b7075d393bfb0 Mon Sep 17 00:00:00 2001
From: Lefteris Karapetsas <lefteris@refu.co>
Date: Mon, 1 Jun 2015 14:49:58 +0200
Subject: [PATCH 03/11] GPU DAG Chunks is now dynamic argument

By providing the --use-chunks argument dagChunks is set to 4. Default is
1 big chunk. Future improvement could be to provide arbitrary number of chunks.
---
 ethminer/MinerAux.h              |   6 ++
 libethash-cl/ethash_cl_miner.cpp | 150 +++++++++++++++----------------
 libethash-cl/ethash_cl_miner.h   |  15 +++-
 libethcore/Ethash.cpp            |   3 +-
 libethcore/Ethash.h              |   2 +
 5 files changed, 96 insertions(+), 80 deletions(-)

diff --git a/ethminer/MinerAux.h b/ethminer/MinerAux.h
index 47fd2e2ae..b6d87e181 100644
--- a/ethminer/MinerAux.h
+++ b/ethminer/MinerAux.h
@@ -127,6 +127,10 @@ public:
 				cerr << "Bad " << arg << " option: " << argv[i] << endl;
 				throw BadArgument();
 			}
+		else if (arg == "--use-chunks")
+		{
+			dagChunks = 4;
+		}
 		else if (arg == "--phone-home" && i + 1 < argc)
 		{
 			string m = argv[++i];
@@ -293,6 +297,7 @@ public:
 			<< "    --opencl-platform <n>  When mining using -G/--opencl use OpenCL platform n (default: 0)." << endl
 			<< "    --opencl-device <n>  When mining using -G/--opencl use OpenCL device n (default: 0)." << endl
 			<< "    -t, --mining-threads <n> Limit number of CPU/GPU miners to n (default: use everything available on selected platform)" << endl
+			<< "    --use-chunks When using GPU mining upload the DAG to the GPU in 4 chunks. " << endl
 			;
 	}
 
@@ -480,6 +485,7 @@ private:
 	unsigned openclPlatform = 0;
 	unsigned openclDevice = 0;
 	unsigned miningThreads = UINT_MAX;
+	unsigned dagChunks = 1;
 
 	/// DAG initialisation param.
 	unsigned initDAG = 0;
diff --git a/libethash-cl/ethash_cl_miner.cpp b/libethash-cl/ethash_cl_miner.cpp
index 520a13180..5e69df1ee 100644
--- a/libethash-cl/ethash_cl_miner.cpp
+++ b/libethash-cl/ethash_cl_miner.cpp
@@ -43,9 +43,6 @@
 #define CL_MEM_HOST_READ_ONLY 0
 #endif
 
-// maybe move to CMakeLists.txt ?
-// #define ETHASH_CL_CHUNK_UPLOAD
-
 #undef min
 #undef max
 
@@ -61,7 +58,7 @@ static void add_definition(std::string& source, char const* id, unsigned value)
 ethash_cl_miner::search_hook::~search_hook() {}
 
 ethash_cl_miner::ethash_cl_miner()
-:	m_opencl_1_1()
+:	m_dagChunks(nullptr), m_opencl_1_1()
 {
 }
 
@@ -130,10 +127,26 @@ void ethash_cl_miner::finish()
 {
 	if (m_queue())
 		m_queue.finish();
+
+	if (m_dagChunks)
+		delete [] m_dagChunks;
 }
 
-bool ethash_cl_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned workgroup_size, unsigned _platformId, unsigned _deviceId)
+bool ethash_cl_miner::init(
+	uint8_t const* _dag,
+	uint64_t _dagSize,
+	unsigned workgroup_size,
+	unsigned _platformId,
+	unsigned _deviceId,
+	unsigned _dagChunksNum
+)
 {
+	// for now due to the .cl kernels we can only have either 1 big chunk or 4 chunks
+	assert(_dagChunksNum == 1 || _dagChunksNum == 4);
+	// now create the number of chunk buffers
+	m_dagChunks = new cl::Buffer[_dagChunksNum];
+	m_dagChunksNum = _dagChunksNum;
+
 	// get all platforms
 	try
 	{
@@ -205,50 +218,61 @@ bool ethash_cl_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned work
 			cout << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str();
 			return false;
 		}
-		#ifdef ETHASH_CL_CHUNK_UPLOAD
-		cout << "loading ethash_hash_chunks" << endl;
-		m_hash_kernel = cl::Kernel(program, "ethash_hash_chunks");
-		cout << "loading ethash_search_chunks" << endl;
-		m_search_kernel = cl::Kernel(program, "ethash_search_chunks");
-		#else
-		cout << "loading ethash_hash" << endl;
-		m_hash_kernel = cl::Kernel(program, "ethash_hash");
-		cout << "loading ethash_search" << endl;
-		m_search_kernel = cl::Kernel(program, "ethash_search");
-		#endif
-
-		// create buffer for dag
-		#ifdef ETHASH_CL_CHUNK_UPLOAD
-		for (unsigned i = 0; i < 4; i++)
+		if (_dagChunksNum == 1)
+		{
+			cout << "loading ethash_hash" << endl;
+			m_hash_kernel = cl::Kernel(program, "ethash_hash");
+			cout << "loading ethash_search" << endl;
+			m_search_kernel = cl::Kernel(program, "ethash_search");
+		}
+		else
 		{
-			cout << "Creating chunky buffer: " << i << endl;
-			m_dags[i] = cl::Buffer(m_context, CL_MEM_READ_ONLY, (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7);
+			cout << "loading ethash_hash_chunks" << endl;
+			m_hash_kernel = cl::Kernel(program, "ethash_hash_chunks");
+			cout << "loading ethash_search_chunks" << endl;
+			m_search_kernel = cl::Kernel(program, "ethash_search_chunks");
 		}
-		#else
+
+		// create buffer for dag
+		if (_dagChunksNum == 1)
+			m_dagChunks[0] = cl::Buffer(m_context, CL_MEM_READ_ONLY, _dagSize);
+		else
+			for (unsigned i = 0; i < _dagChunksNum; i++)
+			{
+				// TODO Note: If we ever change to _dagChunksNum other than 4, then the size would need recalculation
+				cout << "Creating buffer for chunk " << i << endl;
+				m_dagChunks[i] = cl::Buffer(
+					m_context,
+					CL_MEM_READ_ONLY,
+					(i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7
+				);
+			}
 		cout << "Creating one big buffer." << endl;
-		m_dag = cl::Buffer(m_context, CL_MEM_READ_ONLY, _dagSize);
-		#endif
 
 		// create buffer for header
 		cout << "Creating buffer for header." << endl;
 		m_header = cl::Buffer(m_context, CL_MEM_READ_ONLY, 32);
 
-		#ifdef ETHASH_CL_CHUNK_UPLOAD
-		void* dag_ptr[4];
-		for (unsigned i = 0; i < 4; i++)
+		if (_dagChunksNum == 1)
 		{
-			cout << "Mapping chunk " << i << endl;
-			dag_ptr[i] = m_queue.enqueueMapBuffer(m_dags[i], true, m_opencl_1_1 ? CL_MAP_WRITE : CL_MAP_WRITE_INVALIDATE_REGION, 0, (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7);
+			cout << "Mapping one big chunk." << endl;
+			m_queue.enqueueWriteBuffer(m_dagChunks[0], CL_TRUE, 0, _dagSize, _dag);
 		}
-		for (unsigned i = 0; i < 4; i++)
+		else
 		{
-			memcpy(dag_ptr[i], (char *)_dag + i*((_dagSize >> 9) << 7), (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7);
-			m_queue.enqueueUnmapMemObject(m_dags[i], dag_ptr[i]);
+			// TODO Note: If we ever change to _dagChunksNum other than 4, then the size would need recalculation
+			void* dag_ptr[4];
+			for (unsigned i = 0; i < _dagChunksNum; i++)
+			{
+				cout << "Mapping chunk " << i << endl;
+				dag_ptr[i] = m_queue.enqueueMapBuffer(m_dagChunks[i], true, m_opencl_1_1 ? CL_MAP_WRITE : CL_MAP_WRITE_INVALIDATE_REGION, 0, (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7);
+			}
+			for (unsigned i = 0; i < _dagChunksNum; i++)
+			{
+				memcpy(dag_ptr[i], (char *)_dag + i*((_dagSize >> 9) << 7), (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7);
+				m_queue.enqueueUnmapMemObject(m_dagChunks[i], dag_ptr[i]);
+			}
 		}
-		#else
-		cout << "Mapping chunk." << endl;
-		m_queue.enqueueWriteBuffer(m_dag, CL_TRUE, 0, _dagSize, _dag);
-		#endif
 
 		// create mining buffers
 		for (unsigned i = 0; i != c_num_buffers; ++i)
@@ -288,22 +312,13 @@ void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce,
 		uint isolate
 		)
 	*/
-	#ifdef ETHASH_CL_CHUNK_UPLOAD
 	cout << "Setting chunk hash arguments." << endl;
+	unsigned argPos = 2;
 	m_hash_kernel.setArg(1, m_header);
-	m_hash_kernel.setArg(2, m_dags[0]);
-	m_hash_kernel.setArg(3, m_dags[1]);
-	m_hash_kernel.setArg(4, m_dags[2]);
-	m_hash_kernel.setArg(5, m_dags[3]);
-	m_hash_kernel.setArg(6, nonce);
-	m_hash_kernel.setArg(7, ~0u); // have to pass this to stop the compile unrolling the loop
-	#else
-	cout << "Setting hash arguments." << endl;
-	m_hash_kernel.setArg(1, m_header);
-	m_hash_kernel.setArg(2, m_dag);
-	m_hash_kernel.setArg(3, nonce);
-	m_hash_kernel.setArg(4, ~0u); // have to pass this to stop the compile unrolling the loop
-	#endif
+	for (unsigned i = 0 ; i < m_dagChunksNum; ++i, ++argPos)
+		m_hash_kernel.setArg(argPos, m_dagChunks[i]);
+	m_hash_kernel.setArg(argPos + 1, nonce);
+	m_hash_kernel.setArg(argPos + 2, ~0u); // have to pass this to stop the compiler unrolling the loop
 
 	unsigned buf = 0;
 	for (unsigned i = 0; i < count || !pending.empty(); )
@@ -381,27 +396,13 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
 		uint isolate							// 5
 		)
 		*/
-		#ifdef ETHASH_CL_CHUNK_UPLOAD
-		cout << "Setting chunk search arguments." << endl;
-		m_search_kernel.setArg(1, m_header);
-		m_search_kernel.setArg(2, m_dags[0]);
-		m_search_kernel.setArg(3, m_dags[1]);
-		m_search_kernel.setArg(4, m_dags[2]);
-		m_search_kernel.setArg(5, m_dags[3]);
-
-		// pass these to stop the compiler unrolling the loops
-		m_search_kernel.setArg(7, target);
-		m_search_kernel.setArg(8, ~0u);
-
-		#else
-		cout << "Setting search arguments." << endl;
+		unsigned argPos = 2;
 		m_search_kernel.setArg(1, m_header);
-		m_search_kernel.setArg(2, m_dag);
-
+		for (unsigned i = 0; i < m_dagChunksNum; ++i, ++argPos)
+			m_search_kernel.setArg(argPos, m_dagChunks[i]);
 		// pass these to stop the compiler unrolling the loops
-		m_search_kernel.setArg(4, target);
-		m_search_kernel.setArg(5, ~0u);
-		#endif
+		m_search_kernel.setArg(argPos + 1, target);
+		m_search_kernel.setArg(argPos + 2, ~0u);
 
 		unsigned buf = 0;
 		std::random_device engine;
@@ -410,11 +411,10 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
 		{
 			// supply output buffer to kernel
 			m_search_kernel.setArg(0, m_search_buf[buf]);
-			#ifdef ETHASH_CL_CHUNK_UPLOAD
-			m_search_kernel.setArg(6, start_nonce);
-			#else
-			m_search_kernel.setArg(3, start_nonce);
-			#endif
+			if (m_dagChunksNum == 1)
+				m_search_kernel.setArg(3, start_nonce);
+			else
+				m_search_kernel.setArg(6, start_nonce);
 
 			// execute it!
 			m_queue.enqueueNDRangeKernel(m_search_kernel, cl::NullRange, c_search_batch_size, m_workgroup_size);
diff --git a/libethash-cl/ethash_cl_miner.h b/libethash-cl/ethash_cl_miner.h
index 9c97f2aa4..0f83f8565 100644
--- a/libethash-cl/ethash_cl_miner.h
+++ b/libethash-cl/ethash_cl_miner.h
@@ -36,7 +36,14 @@ public:
 	static unsigned get_num_devices(unsigned _platformId = 0);
 	static std::string platform_info(unsigned _platformId = 0, unsigned _deviceId = 0);
 
-	bool init(uint8_t const* _dag, uint64_t _dagSize, unsigned workgroup_size = 64, unsigned _platformId = 0, unsigned _deviceId = 0);
+	bool init(
+		uint8_t const* _dag,
+		uint64_t _dagSize,
+		unsigned workgroup_size = 64,
+		unsigned _platformId = 0,
+		unsigned _deviceId = 0,
+		unsigned _dagChunksNum = 1
+	);
 	void finish();
 	void hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count);
 	void search(uint8_t const* header, uint64_t target, search_hook& hook);
@@ -51,11 +58,11 @@ private:
 	cl::CommandQueue m_queue;
 	cl::Kernel m_hash_kernel;
 	cl::Kernel m_search_kernel;
-	cl::Buffer m_dag;
-	cl::Buffer m_dags[4];
+	unsigned m_dagChunksNum;
+	cl::Buffer* m_dagChunks;
 	cl::Buffer m_header;
 	cl::Buffer m_hash_buf[c_num_buffers];
 	cl::Buffer m_search_buf[c_num_buffers];
 	unsigned m_workgroup_size;
 	bool m_opencl_1_1;
-};
\ No newline at end of file
+};
diff --git a/libethcore/Ethash.cpp b/libethcore/Ethash.cpp
index f62c1f9cd..158f40981 100644
--- a/libethcore/Ethash.cpp
+++ b/libethcore/Ethash.cpp
@@ -285,6 +285,7 @@ private:
 unsigned Ethash::GPUMiner::s_platformId = 0;
 unsigned Ethash::GPUMiner::s_deviceId = 0;
 unsigned Ethash::GPUMiner::s_numInstances = 0;
+unsigned Ethash::GPUMiner::s_dagChunks = 1;
 
 Ethash::GPUMiner::GPUMiner(ConstructionInfo const& _ci):
 	Miner(_ci),
@@ -345,7 +346,7 @@ void Ethash::GPUMiner::workLoop()
 				this_thread::sleep_for(chrono::milliseconds(500));
 			}
 			bytesConstRef dagData = dag->data();
-			m_miner->init(dagData.data(), dagData.size(), 32, s_platformId, device);
+			m_miner->init(dagData.data(), dagData.size(), 32, s_platformId, device, s_dagChunks);
 		}
 
 		uint64_t upper64OfBoundary = (uint64_t)(u64)((u256)w.boundary >> 192);
diff --git a/libethcore/Ethash.h b/libethcore/Ethash.h
index 86540678f..868c27916 100644
--- a/libethcore/Ethash.h
+++ b/libethcore/Ethash.h
@@ -119,6 +119,7 @@ public:
 		static void setDefaultPlatform(unsigned _id) { s_platformId = _id; }
 		static void setDefaultDevice(unsigned _id) { s_deviceId = _id; }
 		static void setNumInstances(unsigned _instances) { s_numInstances = std::min<unsigned>(_instances, getNumDevices()); }
+		static void setDagChunks(unsigned _dagChunks) { s_dagChunks = _dagChunks; }
 
 	protected:
 		void kickOff() override;
@@ -137,6 +138,7 @@ public:
 		static unsigned s_platformId;
 		static unsigned s_deviceId;
 		static unsigned s_numInstances;
+		static unsigned s_dagChunks;
 	};
 #else
 	using GPUMiner = CPUMiner;

From 65ebc5f17b061aa5976a82d3c42c970fa988034a Mon Sep 17 00:00:00 2001
From: Lefteris Karapetsas <lefteris@refu.co>
Date: Thu, 4 Jun 2015 12:22:27 +0200
Subject: [PATCH 04/11] Tweaks after merging latest develop changes

- Use a vector for dag chunks.

- Use ETHCL_LOG for outputing to stdout.
---
 libethash-cl/ethash_cl_miner.cpp | 61 ++++++++++----------------------
 libethash-cl/ethash_cl_miner.h   |  2 +-
 2 files changed, 20 insertions(+), 43 deletions(-)

diff --git a/libethash-cl/ethash_cl_miner.cpp b/libethash-cl/ethash_cl_miner.cpp
index b2b7c49c2..aa90cb4c3 100644
--- a/libethash-cl/ethash_cl_miner.cpp
+++ b/libethash-cl/ethash_cl_miner.cpp
@@ -62,7 +62,7 @@ static void add_definition(std::string& source, char const* id, unsigned value)
 ethash_cl_miner::search_hook::~search_hook() {}
 
 ethash_cl_miner::ethash_cl_miner()
-:	m_dagChunks(nullptr), m_opencl_1_1()
+:	m_opencl_1_1()
 {
 }
 
@@ -172,9 +172,6 @@ void ethash_cl_miner::finish()
 {
 	if (m_queue())
 		m_queue.finish();
-
-	if (m_dagChunks)
-		delete [] m_dagChunks;
 }
 
 bool ethash_cl_miner::init(
@@ -189,7 +186,6 @@ bool ethash_cl_miner::init(
 	// for now due to the .cl kernels we can only have either 1 big chunk or 4 chunks
 	assert(_dagChunksNum == 1 || _dagChunksNum == 4);
 	// now create the number of chunk buffers
-	m_dagChunks = new cl::Buffer[_dagChunksNum];
 	m_dagChunksNum = _dagChunksNum;
 
 	// get all platforms
@@ -254,52 +250,52 @@ bool ethash_cl_miner::init(
 		try
 		{
 			program.build({ device });
-			cout << "Printing program log" << endl;
-			cout << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str();
+			ETHCL_LOG("Printing program log");
+			ETHCL_LOG(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str());
 		}
 		catch (cl::Error err)
 		{
-			cout << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str();
+			ETHCL_LOG(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str());
 			return false;
 		}
 		if (_dagChunksNum == 1)
 		{
-			cout << "loading ethash_hash" << endl;
+			ETHCL_LOG("Loading single big chunk kernels");
 			m_hash_kernel = cl::Kernel(program, "ethash_hash");
-			cout << "loading ethash_search" << endl;
 			m_search_kernel = cl::Kernel(program, "ethash_search");
 		}
 		else
 		{
-			cout << "loading ethash_hash_chunks" << endl;
+			ETHCL_LOG("Loading chunk kernels");
 			m_hash_kernel = cl::Kernel(program, "ethash_hash_chunks");
-			cout << "loading ethash_search_chunks" << endl;
 			m_search_kernel = cl::Kernel(program, "ethash_search_chunks");
 		}
 
 		// create buffer for dag
 		if (_dagChunksNum == 1)
-			m_dagChunks[0] = cl::Buffer(m_context, CL_MEM_READ_ONLY, _dagSize);
+		{
+			ETHCL_LOG("Creating one big buffer");
+			m_dagChunks.push_back(cl::Buffer(m_context, CL_MEM_READ_ONLY, _dagSize));
+		}
 		else
 			for (unsigned i = 0; i < _dagChunksNum; i++)
 			{
 				// TODO Note: If we ever change to _dagChunksNum other than 4, then the size would need recalculation
-				cout << "Creating buffer for chunk " << i << endl;
-				m_dagChunks[i] = cl::Buffer(
+				ETHCL_LOG("Creating buffer for chunk " << i);
+				m_dagChunks.push_back(cl::Buffer(
 					m_context,
 					CL_MEM_READ_ONLY,
 					(i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7
-				);
+				));
 			}
-		cout << "Creating one big buffer." << endl;
 
 		// create buffer for header
-		cout << "Creating buffer for header." << endl;
+		ETHCL_LOG("Creating buffer for header.");
 		m_header = cl::Buffer(m_context, CL_MEM_READ_ONLY, 32);
 
 		if (_dagChunksNum == 1)
 		{
-			cout << "Mapping one big chunk." << endl;
+			ETHCL_LOG("Mapping one big chunk.");
 			m_queue.enqueueWriteBuffer(m_dagChunks[0], CL_TRUE, 0, _dagSize, _dag);
 		}
 		else
@@ -308,7 +304,7 @@ bool ethash_cl_miner::init(
 			void* dag_ptr[4];
 			for (unsigned i = 0; i < _dagChunksNum; i++)
 			{
-				cout << "Mapping chunk " << i << endl;
+				ETHCL_LOG("Mapping chunk " << i);
 				dag_ptr[i] = m_queue.enqueueMapBuffer(m_dagChunks[i], true, m_opencl_1_1 ? CL_MAP_WRITE : CL_MAP_WRITE_INVALIDATE_REGION, 0, (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7);
 			}
 			for (unsigned i = 0; i < _dagChunksNum; i++)
@@ -321,7 +317,7 @@ bool ethash_cl_miner::init(
 		// create mining buffers
 		for (unsigned i = 0; i != c_num_buffers; ++i)
 		{
-			cout << "Creating mining buffer " << i <<endl;
+			ETHCL_LOG("Creating mining buffer " << i);
 			m_hash_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY | (!m_opencl_1_1 ? CL_MEM_HOST_READ_ONLY : 0), 32 * c_hash_batch_size);
 			m_search_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, (c_max_search_results + 1) * sizeof(uint32_t));
 		}
@@ -347,16 +343,7 @@ void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce,
 	// update header constant buffer
 	m_queue.enqueueWriteBuffer(m_header, true, 0, 32, header);
 
-	/*
-	__kernel void ethash_combined_hash(
-		__global hash32_t* g_hashes,
-		__constant hash32_t const* g_header,
-		__global hash128_t const* g_dag,
-		ulong start_nonce,
-		uint isolate
-		)
-	*/
-	cout << "Setting chunk hash arguments." << endl;
+	ETHCL_LOG("Setting chunk hash arguments.");
 	unsigned argPos = 2;
 	m_hash_kernel.setArg(1, m_header);
 	for (unsigned i = 0 ; i < m_dagChunksNum; ++i, ++argPos)
@@ -430,16 +417,6 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
 #endif
 			m_queue.finish();
 
-		/*
-		__kernel void ethash_combined_search(
-		__global hash32_t* g_hashes,			// 0
-		__constant hash32_t const* g_header,	// 1
-		__global hash128_t const* g_dag,		// 2
-		ulong start_nonce,						// 3
-		ulong target,							// 4
-		uint isolate							// 5
-		)
-		*/
 		unsigned argPos = 2;
 		m_search_kernel.setArg(1, m_header);
 		for (unsigned i = 0; i < m_dagChunksNum; ++i, ++argPos)
@@ -501,6 +478,6 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
 	}
 	catch (cl::Error err)
 	{
-		std::cout << err.what() << "(" << err.err() << ")" << std::endl;
+		ETHCL_LOG(err.what() << "(" << err.err() << ")");
 	}
 }
diff --git a/libethash-cl/ethash_cl_miner.h b/libethash-cl/ethash_cl_miner.h
index ba328e1ff..5bf092f42 100644
--- a/libethash-cl/ethash_cl_miner.h
+++ b/libethash-cl/ethash_cl_miner.h
@@ -60,7 +60,7 @@ private:
 	cl::Kernel m_hash_kernel;
 	cl::Kernel m_search_kernel;
 	unsigned m_dagChunksNum;
-	cl::Buffer* m_dagChunks;
+	std::vector<cl::Buffer> m_dagChunks;
 	cl::Buffer m_header;
 	cl::Buffer m_hash_buf[c_num_buffers];
 	cl::Buffer m_search_buf[c_num_buffers];

From 46666c845089a109729e705d8cc8bfdca5a184a4 Mon Sep 17 00:00:00 2001
From: Gav Wood <i@gavwood.com>
Date: Sun, 7 Jun 2015 16:15:50 +0900
Subject: [PATCH 05/11] Accept alternative nonce.

---
 eth/main.cpp                    | 13 +++++++++++++
 libethereum/CanonBlockChain.cpp | 26 ++++++++++++++++++++++++--
 libethereum/CanonBlockChain.h   | 28 +++++++++++++++++-----------
 libethereum/Client.cpp          |  7 +++++--
 4 files changed, 59 insertions(+), 15 deletions(-)

diff --git a/eth/main.cpp b/eth/main.cpp
index 27e5f4ca0..89b3f058b 100644
--- a/eth/main.cpp
+++ b/eth/main.cpp
@@ -116,6 +116,7 @@ void help()
 #endif
 		<< "    -K,--kill  First kill the blockchain." << endl
 		<< "    -R,--rebuild  Rebuild the blockchain from the existing database." << endl
+		<< "    --genesis-nonce <nonce>  Set the Genesis Nonce to the given hex nonce." << endl
 		<< "    -s,--import-secret <secret>  Import a secret key into the key store and use as the default." << endl
 		<< "    -S,--import-session-secret <secret>  Import a secret key into the key store and use as the default for this session only." << endl
 		<< "    --sign-key <address>  Sign all transactions with the key of the given address." << endl
@@ -468,6 +469,18 @@ int main(int argc, char** argv)
 		}
 		else if ((arg == "-d" || arg == "--path" || arg == "--db-path") && i + 1 < argc)
 			dbPath = argv[++i];
+		else if (arg == "--genesis-nonce" && i + 1 < argc)
+		{
+			try
+			{
+				CanonBlockChain::setGenesisNonce(Nonce(argv[++i]));
+			}
+			catch (...)
+			{
+				cerr << "Bad " << arg << " option: " << argv[i] << endl;
+				return -1;
+			}
+		}
 /*		else if ((arg == "-B" || arg == "--block-fees") && i + 1 < argc)
 		{
 			try
diff --git a/libethereum/CanonBlockChain.cpp b/libethereum/CanonBlockChain.cpp
index 5dd7dc2ce..4e6d89243 100644
--- a/libethereum/CanonBlockChain.cpp
+++ b/libethereum/CanonBlockChain.cpp
@@ -72,6 +72,7 @@ std::unordered_map<Address, Account> const& dev::eth::genesisState()
 
 std::unique_ptr<BlockInfo> CanonBlockChain::s_genesis;
 boost::shared_mutex CanonBlockChain::x_genesis;
+Nonce CanonBlockChain::s_nonce(u64(42));
 
 bytes CanonBlockChain::createGenesisBlock()
 {
@@ -87,12 +88,33 @@ bytes CanonBlockChain::createGenesisBlock()
 	}
 
 	block.appendList(15)
-			<< h256() << EmptyListSHA3 << h160() << stateRoot << EmptyTrie << EmptyTrie << LogBloom() << c_genesisDifficulty << 0 << c_genesisGasLimit << 0 << (unsigned)0 << string() << h256() << Nonce(u64(42));
+			<< h256() << EmptyListSHA3 << h160() << stateRoot << EmptyTrie << EmptyTrie << LogBloom() << c_genesisDifficulty << 0 << c_genesisGasLimit << 0 << (unsigned)0 << string() << h256() << s_nonce;
 	block.appendRaw(RLPEmptyList);
 	block.appendRaw(RLPEmptyList);
 	return block.out();
 }
 
-CanonBlockChain::CanonBlockChain(std::string const& _path, WithExisting _we, ProgressCallback const& _pc): BlockChain(CanonBlockChain::createGenesisBlock(), _path, _we, _pc)
+CanonBlockChain::CanonBlockChain(std::string const& _path, WithExisting _we, ProgressCallback const& _pc):
+	BlockChain(createGenesisBlock(), _path, _we, _pc)
 {
 }
+
+void CanonBlockChain::setGenesisNonce(Nonce const& _n)
+{
+	WriteGuard l(x_genesis);
+	s_nonce = _n;
+	s_genesis.reset();
+}
+
+BlockInfo const& CanonBlockChain::genesis()
+{
+	UpgradableGuard l(x_genesis);
+	if (!s_genesis)
+	{
+		auto gb = createGenesisBlock();
+		UpgradeGuard ul(l);
+		s_genesis.reset(new BlockInfo);
+		s_genesis->populate(&gb);
+	}
+	return *s_genesis;
+}
diff --git a/libethereum/CanonBlockChain.h b/libethereum/CanonBlockChain.h
index d1d47cd14..d4494c957 100644
--- a/libethereum/CanonBlockChain.h
+++ b/libethereum/CanonBlockChain.h
@@ -54,21 +54,27 @@ std::unordered_map<Address, Account> const& genesisState();
 class CanonBlockChain: public BlockChain
 {
 public:
-		CanonBlockChain(WithExisting _we = WithExisting::Trust, ProgressCallback const& _pc = ProgressCallback()): CanonBlockChain(std::string(), _we, _pc) {}
-		CanonBlockChain(std::string const& _path, WithExisting _we = WithExisting::Trust, ProgressCallback const& _pc = ProgressCallback());
-		~CanonBlockChain() {}
+	CanonBlockChain(WithExisting _we = WithExisting::Trust, ProgressCallback const& _pc = ProgressCallback()): CanonBlockChain(std::string(), _we, _pc) {}
+	CanonBlockChain(std::string const& _path, WithExisting _we = WithExisting::Trust, ProgressCallback const& _pc = ProgressCallback());
+	~CanonBlockChain() {}
 
-		/// @returns the genesis block header.
-		static BlockInfo const& genesis() { UpgradableGuard l(x_genesis); if (!s_genesis) { auto gb = createGenesisBlock(); UpgradeGuard ul(l); s_genesis.reset(new BlockInfo); s_genesis->populate(&gb); } return *s_genesis; }
+	/// @returns the genesis block header.
+	static BlockInfo const& genesis();
 
-		/// @returns the genesis block as its RLP-encoded byte array.
-		/// @note This is slow as it's constructed anew each call. Consider genesis() instead.
-		static bytes createGenesisBlock();
+	/// @returns the genesis block as its RLP-encoded byte array.
+	/// @note This is slow as it's constructed anew each call. Consider genesis() instead.
+	static bytes createGenesisBlock();
+
+	/// Alter the value of the genesis block's nonce.
+	/// @warning Unless you're very careful, make sure you call this right at the start of the
+	/// program, before anything has had the chance to use this class at all.
+	static void setGenesisNonce(Nonce const& _n);
 
 private:
-		/// Static genesis info and its lock.
-		static boost::shared_mutex x_genesis;
-		static std::unique_ptr<BlockInfo> s_genesis;
+	/// Static genesis info and its lock.
+	static boost::shared_mutex x_genesis;
+	static std::unique_ptr<BlockInfo> s_genesis;
+	static Nonce s_nonce;
 };
 
 }
diff --git a/libethereum/Client.cpp b/libethereum/Client.cpp
index 86b745141..b737f53c9 100644
--- a/libethereum/Client.cpp
+++ b/libethereum/Client.cpp
@@ -47,8 +47,11 @@ VersionChecker::VersionChecker(string const& _dbPath):
 		(void)protocolVersion;
 		auto minorProtocolVersion = (unsigned)status[1];
 		auto databaseVersion = (unsigned)status[2];
+		h256 ourGenesisHash = CanonBlockChain::genesis().hash();
+		auto genesisHash = status.itemCount() > 3 ? (h256)status[3] : ourGenesisHash;
+
 		m_action =
-			databaseVersion != c_databaseVersion ?
+			databaseVersion != c_databaseVersion || genesisHash != ourGenesisHash ?
 				WithExisting::Kill
 			: minorProtocolVersion != eth::c_minorProtocolVersion ?
 				WithExisting::Verify
@@ -73,7 +76,7 @@ void VersionChecker::setOk()
 		{
 			cwarn << "Unhandled exception! Failed to create directory: " << m_path << "\n" << boost::current_exception_diagnostic_information();
 		}
-		writeFile(m_path + "/status", rlpList(eth::c_protocolVersion, eth::c_minorProtocolVersion, c_databaseVersion));
+		writeFile(m_path + "/status", rlpList(eth::c_protocolVersion, eth::c_minorProtocolVersion, c_databaseVersion, CanonBlockChain::genesis().hash()));
 	}
 }
 

From 4894766804c3ad2cbe621899cc9892e123ae8ca3 Mon Sep 17 00:00:00 2001
From: Gav Wood <i@gavwood.com>
Date: Sun, 7 Jun 2015 16:37:23 +0900
Subject: [PATCH 06/11] Protect g_logOverride - don't use it directly. Fixed
 #2056

---
 libdevcore/Log.cpp    | 29 ++++++++++++++++++++++++++---
 libdevcore/Log.h      | 22 ++++++++++++++++++----
 libethereum/State.cpp | 18 ------------------
 3 files changed, 44 insertions(+), 25 deletions(-)

diff --git a/libdevcore/Log.cpp b/libdevcore/Log.cpp
index 3dd2b3879..f28a2c6b9 100644
--- a/libdevcore/Log.cpp
+++ b/libdevcore/Log.cpp
@@ -33,7 +33,29 @@ using namespace dev;
 
 // Logging
 int dev::g_logVerbosity = 5;
-map<type_info const*, bool> dev::g_logOverride;
+mutex x_logOverride;
+
+/// Map of Log Channel types to bool, false forces the channel to be disabled, true forces it to be enabled.
+/// If a channel has no entry, then it will output as long as its verbosity (LogChannel::verbosity) is less than
+/// or equal to the currently output verbosity (g_logVerbosity).
+static map<type_info const*, bool> s_logOverride;
+
+LogOverrideAux::LogOverrideAux(std::type_info const* _ch, bool _value):
+	m_ch(_ch)
+{
+	Guard l(x_logOverride);
+	m_old = s_logOverride.count(_ch) ? (int)s_logOverride[_ch] : c_null;
+	s_logOverride[m_ch] = _value;
+}
+
+LogOverrideAux::~LogOverrideAux()
+{
+	Guard l(x_logOverride);
+	if (m_old == c_null)
+		s_logOverride.erase(m_ch);
+	else
+		s_logOverride[m_ch] = (bool)m_old;
+}
 
 #ifdef _WIN32
 const char* LogChannel::name() { return EthGray "..."; }
@@ -55,8 +77,9 @@ LogOutputStreamBase::LogOutputStreamBase(char const* _id, std::type_info const*
 	m_autospacing(_autospacing),
 	m_verbosity(_v)
 {
-	auto it = g_logOverride.find(_info);
-	if ((it != g_logOverride.end() && it->second == true) || (it == g_logOverride.end() && (int)_v <= g_logVerbosity))
+	Guard l(x_logOverride);
+	auto it = s_logOverride.find(_info);
+	if ((it != s_logOverride.end() && it->second == true) || (it == s_logOverride.end() && (int)_v <= g_logVerbosity))
 	{
 		time_t rawTime = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
 		char buf[24];
diff --git a/libdevcore/Log.h b/libdevcore/Log.h
index 57d8cd349..ce0db17fe 100644
--- a/libdevcore/Log.h
+++ b/libdevcore/Log.h
@@ -54,10 +54,24 @@ extern int g_logVerbosity;
 /// The current method that the logging system uses to output the log messages. Defaults to simpleDebugOut().
 extern std::function<void(std::string const&, char const*)> g_logPost;
 
-/// Map of Log Channel types to bool, false forces the channel to be disabled, true forces it to be enabled.
-/// If a channel has no entry, then it will output as long as its verbosity (LogChannel::verbosity) is less than
-/// or equal to the currently output verbosity (g_logVerbosity).
-extern std::map<std::type_info const*, bool> g_logOverride;
+class LogOverrideAux
+{
+protected:
+	LogOverrideAux(std::type_info const* _ch, bool _value);
+	~LogOverrideAux();
+
+private:
+	std::type_info const* m_ch;
+	static const int c_null = -1;
+	int m_old;
+};
+
+template <class Channel>
+class LogOverride: LogOverrideAux
+{
+public:
+	LogOverride(bool _value): LogOverrideAux(&typeid(Channel), _value) {}
+};
 
 /// Temporary changes system's verbosity for specific function. Restores the old verbosity when function returns.
 /// Not thread-safe, use with caution!
diff --git a/libethereum/State.cpp b/libethereum/State.cpp
index 09765d9ee..f84ee819f 100644
--- a/libethereum/State.cpp
+++ b/libethereum/State.cpp
@@ -592,24 +592,6 @@ string State::vmTrace(bytesConstRef _block, BlockChain const& _bc, ImportRequire
 	return ss.str();
 }
 
-template <class Channel>
-class LogOverride
-{
-public:
-	LogOverride(bool _value): m_old(g_logOverride.count(&typeid(Channel)) ? (int)g_logOverride[&typeid(Channel)] : c_null) { g_logOverride[&typeid(Channel)] = _value; }
-	~LogOverride()
-	{
-		if (m_old == c_null)
-			g_logOverride.erase(&typeid(Channel));
-		else
-			g_logOverride[&typeid(Channel)] = (bool)m_old;
-	}
-
-private:
-	static const int c_null = -1;
-	int m_old;
-};
-
 u256 State::enact(bytesConstRef _block, BlockChain const& _bc, ImportRequirements::value _ir)
 {
 	// m_currentBlock is assumed to be prepopulated and reset.

From c7c1d79176c0611ebd22a220fdac9495c5b36993 Mon Sep 17 00:00:00 2001
From: Gav Wood <i@gavwood.com>
Date: Sun, 7 Jun 2015 22:44:24 +0900
Subject: [PATCH 07/11] Frontier parameters in.

---
 libethcore/Common.cpp      |  6 ++++++
 libethcore/Common.h        |  8 ++++++++
 libethcore/Params.cpp      |  3 ++-
 libethereum/BlockQueue.cpp |  2 +-
 libethereum/Client.cpp     |  2 +-
 libethereum/State.cpp      |  2 +-
 libp2p/RLPxHandshake.cpp   | 15 ++++++++-------
 7 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/libethcore/Common.cpp b/libethcore/Common.cpp
index ed2f8a3d3..63f4a19f9 100644
--- a/libethcore/Common.cpp
+++ b/libethcore/Common.cpp
@@ -46,6 +46,12 @@ const unsigned c_databaseBaseVersion = 9;
 const unsigned c_databaseVersionModifier = 0;
 #endif
 
+#if ETH_FRONTIER
+Network const c_network = Network::Frontier;
+#else
+Network const c_network = Network::Olympic;
+#endif
+
 const unsigned c_databaseVersion = c_databaseBaseVersion + (c_databaseVersionModifier << 8) + (ProofOfWork::revision() << 9);
 
 vector<pair<u256, string>> const& units()
diff --git a/libethcore/Common.h b/libethcore/Common.h
index 1d48803cb..87ebffab7 100644
--- a/libethcore/Common.h
+++ b/libethcore/Common.h
@@ -43,6 +43,14 @@ extern const unsigned c_minorProtocolVersion;
 /// Current database version.
 extern const unsigned c_databaseVersion;
 
+/// The network id.
+enum class Network
+{
+	Olympic = 0,
+	Frontier = 1
+};
+extern const Network c_network;
+
 /// User-friendly string representation of the amount _b in wei.
 std::string formatBalance(bigint const& _b);
 
diff --git a/libethcore/Params.cpp b/libethcore/Params.cpp
index a6107e62b..916adf6ca 100644
--- a/libethcore/Params.cpp
+++ b/libethcore/Params.cpp
@@ -20,6 +20,7 @@
  */
 
 #include "Params.h"
+#include "Common.h"
 
 using namespace std;
 namespace dev
@@ -35,7 +36,7 @@ u256 const c_minGasLimit = 125000;
 u256 const c_gasLimitBoundDivisor = 1024;
 u256 const c_minimumDifficulty = 131072;
 u256 const c_difficultyBoundDivisor = 2048;
-u256 const c_durationLimit = 8;
+u256 const c_durationLimit = c_network == Network::Olympic ? 8 : 12;
 //--- END: AUTOGENERATED FROM /feeStructure.json
 
 }
diff --git a/libethereum/BlockQueue.cpp b/libethereum/BlockQueue.cpp
index 013d8a000..360bf915e 100644
--- a/libethereum/BlockQueue.cpp
+++ b/libethereum/BlockQueue.cpp
@@ -81,7 +81,7 @@ void BlockQueue::verifierBody()
 				res.first.populate(res.second, CheckEverything, work.first);
 				res.first.verifyInternals(&res.second);
 			}
-			catch (InvalidNonce&)
+			catch (InvalidBlockNonce&)
 			{
 				badBlock(res.second, "Invalid block nonce");
 				cwarn << "  Nonce:" << res.first.nonce.hex();
diff --git a/libethereum/Client.cpp b/libethereum/Client.cpp
index b737f53c9..46fbbdfb1 100644
--- a/libethereum/Client.cpp
+++ b/libethereum/Client.cpp
@@ -665,7 +665,7 @@ void Client::doWork()
 		syncBlockQueue();
 
 	t = true;
-	if (m_syncTransactionQueue.compare_exchange_strong(t, false) && !m_remoteWorking)
+	if (m_syncTransactionQueue.compare_exchange_strong(t, false) && !m_remoteWorking && !isSyncing())
 		syncTransactionQueue();
 
 	tick();
diff --git a/libethereum/State.cpp b/libethereum/State.cpp
index f84ee819f..92c84c9b3 100644
--- a/libethereum/State.cpp
+++ b/libethereum/State.cpp
@@ -46,7 +46,7 @@ using namespace dev::eth;
 #define ctrace clog(StateTrace)
 #define ETH_TIMED_ENACTMENTS 0
 
-static const u256 c_blockReward = 1500 * finney;
+static const u256 c_blockReward = c_network == Network::Olympic ? (1500 * finney) : (5 * ether);
 
 const char* StateSafeExceptions::name() { return EthViolet "⚙" EthBlue " ℹ"; }
 const char* StateDetail::name() { return EthViolet "⚙" EthWhite " ◌"; }
diff --git a/libp2p/RLPxHandshake.cpp b/libp2p/RLPxHandshake.cpp
index d7c2e5e3b..b8faf0e3e 100644
--- a/libp2p/RLPxHandshake.cpp
+++ b/libp2p/RLPxHandshake.cpp
@@ -184,7 +184,7 @@ void RLPXHandshake::transition(boost::system::error_code _ech)
 		// old packet format
 		// 5 arguments, HelloPacket
 		RLPStream s;
-		s.append((unsigned)0).appendList(5)
+		s.append((unsigned)HelloPacket).appendList(5)
 		<< dev::p2p::c_protocolVersion
 		<< m_host->m_clientVersion
 		<< m_host->caps()
@@ -205,15 +205,16 @@ void RLPXHandshake::transition(boost::system::error_code _ech)
 		m_nextState = StartSession;
 		
 		// read frame header
-		m_handshakeInBuffer.resize(h256::size);
-		ba::async_read(m_socket->ref(), boost::asio::buffer(m_handshakeInBuffer, h256::size), [this, self](boost::system::error_code ec, std::size_t)
+		unsigned const handshakeSize = 32;
+		m_handshakeInBuffer.resize(handshakeSize);
+		ba::async_read(m_socket->ref(), boost::asio::buffer(m_handshakeInBuffer, handshakeSize), [this, self](boost::system::error_code ec, std::size_t)
 		{
 			if (ec)
 				transition(ec);
 			else
 			{
 				/// authenticate and decrypt header
-				if (!m_io->authAndDecryptHeader(bytesRef(m_handshakeInBuffer.data(), h256::size)))
+				if (!m_io->authAndDecryptHeader(bytesRef(m_handshakeInBuffer.data(), handshakeSize)))
 				{
 					m_nextState = Error;
 					transition();
@@ -235,7 +236,7 @@ void RLPXHandshake::transition(boost::system::error_code _ech)
 				}
 				
 				/// rlp of header has protocol-type, sequence-id[, total-packet-size]
-				bytes headerRLP(header.size() - 3 - h128::size);
+				bytes headerRLP(header.size() - 3 - h128::size);	// this is always 32 - 3 - 16 = 13. wtf?
 				bytesConstRef(&header).cropped(3).copyTo(&headerRLP);
 				
 				/// read padded frame and mac
@@ -255,8 +256,8 @@ void RLPXHandshake::transition(boost::system::error_code _ech)
 							return;
 						}
 						
-						PacketType packetType = (PacketType)(frame[0] == 0x80 ? 0x0 : frame[0]);
-						if (packetType != 0)
+						PacketType packetType = frame[0] == 0x80 ? HelloPacket : (PacketType)frame[0];
+						if (packetType != HelloPacket)
 						{
 							clog(NetTriviaSummary) << (m_originated ? "p2p.connect.egress" : "p2p.connect.ingress") << "hello frame: invalid packet type";
 							m_nextState = Error;

From 5ee6f9b9784289c5c4f665c90eff4a138f4d194b Mon Sep 17 00:00:00 2001
From: Gav Wood <i@gavwood.com>
Date: Sun, 7 Jun 2015 23:48:33 +0900
Subject: [PATCH 08/11] Windows build fix.

---
 libp2p/RLPxHandshake.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libp2p/RLPxHandshake.cpp b/libp2p/RLPxHandshake.cpp
index b8faf0e3e..8caf6e4f3 100644
--- a/libp2p/RLPxHandshake.cpp
+++ b/libp2p/RLPxHandshake.cpp
@@ -214,7 +214,7 @@ void RLPXHandshake::transition(boost::system::error_code _ech)
 			else
 			{
 				/// authenticate and decrypt header
-				if (!m_io->authAndDecryptHeader(bytesRef(m_handshakeInBuffer.data(), handshakeSize)))
+				if (!m_io->authAndDecryptHeader(bytesRef(m_handshakeInBuffer.data(), m_handshakeInBuffer.size())))
 				{
 					m_nextState = Error;
 					transition();

From df8ca0d1a1ac749f76d34885e0c60a5c797f6611 Mon Sep 17 00:00:00 2001
From: Lefteris Karapetsas <lefteris@refu.co>
Date: Mon, 8 Jun 2015 00:56:13 +0200
Subject: [PATCH 09/11] Reclaim chunksNum set in MinerAux.h after merge

---
 ethminer/MinerAux.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ethminer/MinerAux.h b/ethminer/MinerAux.h
index 515dc2389..6a42dd774 100644
--- a/ethminer/MinerAux.h
+++ b/ethminer/MinerAux.h
@@ -268,6 +268,7 @@ public:
 			ProofOfWork::GPUMiner::setDefaultPlatform(openclPlatform);
 			ProofOfWork::GPUMiner::setDefaultDevice(openclDevice);
 			ProofOfWork::GPUMiner::setNumInstances(miningThreads);
+			ProofOfWork::GPUMiner::setDagChunks(dagChunks);
 		}
 		if (mode == OperationMode::DAGInit)
 			doInitDAG(initDAG);

From c6020f6625fb98c0e96dd4f1946a7cd6b06877e9 Mon Sep 17 00:00:00 2001
From: Lefteris Karapetsas <lefteris@refu.co>
Date: Mon, 8 Jun 2015 01:22:17 +0200
Subject: [PATCH 10/11] Don't read kernel file as string during runtime

---
 libethash-cl/ethash_cl_miner.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libethash-cl/ethash_cl_miner.cpp b/libethash-cl/ethash_cl_miner.cpp
index 3d20ecc1b..f501d9642 100644
--- a/libethash-cl/ethash_cl_miner.cpp
+++ b/libethash-cl/ethash_cl_miner.cpp
@@ -233,9 +233,9 @@ bool ethash_cl_miner::init(
 		m_workgroup_size = ((workgroup_size + 7) / 8) * 8;
 
 		// patch source code
-		std::ifstream t("ethash_cl_miner_kernel.cl");
-		std::string code((std::istreambuf_iterator<char>(t)),
-			std::istreambuf_iterator<char>());
+		// note: ETHASH_CL_MINER_KERNEL is simply ethash_cl_miner_kernel.cl compiled
+		// into a byte array by bin2h.cmake. There is no need to load the file by hand in runtime
+		std::string code(ETHASH_CL_MINER_KERNEL, ETHASH_CL_MINER_KERNEL + ETHASH_CL_MINER_KERNEL_SIZE);
 		add_definition(code, "GROUP_SIZE", m_workgroup_size);
 		add_definition(code, "DAG_SIZE", (unsigned)(_dagSize / ETHASH_MIX_BYTES));
 		add_definition(code, "ACCESSES", ETHASH_ACCESSES);

From f07ce95f78a369e2122d63d0f94b747f48b887fc Mon Sep 17 00:00:00 2001
From: Lefteris Karapetsas <lefteris@refu.co>
Date: Mon, 8 Jun 2015 01:33:17 +0200
Subject: [PATCH 11/11] setDagChunks is a part of the Interface

---
 libethcore/Ethash.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libethcore/Ethash.h b/libethcore/Ethash.h
index 48bf765d6..68c21c609 100644
--- a/libethcore/Ethash.h
+++ b/libethcore/Ethash.h
@@ -89,6 +89,7 @@ public:
 		static std::string platformInfo();
 		static bool haveSufficientGPUMemory() { return false; }
 		static void setDefaultPlatform(unsigned) {}
+		static void setDagChunks(unsigned) {}
 		static void setDefaultDevice(unsigned) {}
 		static void setNumInstances(unsigned _instances) { s_numInstances = std::min<unsigned>(_instances, std::thread::hardware_concurrency()); }
 	protected: