ethminer/libethash-cl/ethash_cl_miner.cpp


								/*

								  This file is part of c-ethash.


								  c-ethash is free software: you can redistribute it and/or modify

								  it under the terms of the GNU General Public License as published by

								  the Free Software Foundation, either version 3 of the License, or

								  (at your option) any later version.


								  c-ethash is distributed in the hope that it will be useful,

								  but WITHOUT ANY WARRANTY; without even the implied warranty of

								  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

								  GNU General Public License for more details.


								  You should have received a copy of the GNU General Public License

								  along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.

								*/

								/** @file ethash_cl_miner.cpp

								* @author Tim Hughes <tim@twistedfury.com>

								* @date 2015

								*/


								#define _CRT_SECURE_NO_WARNINGS


								#include <cstdio>

								#include <cstdlib>

								#include <iostream>

								#include <assert.h>

								#include <queue>

								#include <random>

								#include <vector>

								#include <libethash/util.h>

								#include <libethash/ethash.h>

								#include "ethash_cl_miner.h"

								#include "ethash_cl_miner_kernel.h"


								#define ETHASH_BYTES 32


								// workaround lame platforms

								#if !CL_VERSION_1_2

								#define CL_MAP_WRITE_INVALIDATE_REGION CL_MAP_WRITE

								#define CL_MEM_HOST_READ_ONLY 0

								#endif


								#undef min

								#undef max


								using namespace std;


								static void add_definition(std::string& source, char const* id, unsigned value)

								{

									char buf[256];

									sprintf(buf, "#define %s %uu\n", id, value);

									source.insert(source.begin(), buf, buf + strlen(buf));

								}


								ethash_cl_miner::search_hook::~search_hook() {}


								ethash_cl_miner::ethash_cl_miner()

								:	m_opencl_1_1()

								{

								}


								std::string ethash_cl_miner::platform_info(unsigned _platformId, unsigned _deviceId)

								{

									std::vector<cl::Platform> platforms;

									cl::Platform::get(&platforms);

									if (platforms.empty())

									{

										cout << "No OpenCL platforms found." << endl;

										return std::string();

									}


									// get GPU device of the selected platform

									std::vector<cl::Device> devices;

									unsigned platform_num = std::min<unsigned>(_platformId, platforms.size() - 1);

									platforms[platform_num].getDevices(CL_DEVICE_TYPE_ALL, &devices);

									if (devices.empty())

									{

										cout << "No OpenCL devices found." << endl;

										return std::string();

									}


									// use selected default device

									unsigned device_num = std::min<unsigned>(_deviceId, devices.size() - 1);

									cl::Device& device = devices[device_num];

									std::string device_version = device.getInfo<CL_DEVICE_VERSION>();


									return "{ \"platform\": \"" + platforms[platform_num].getInfo<CL_PLATFORM_NAME>() + "\", \"device\": \"" + device.getInfo<CL_DEVICE_NAME>() + "\", \"version\": \"" + device_version + "\" }";

								}


								unsigned ethash_cl_miner::get_num_platforms()

								{

									std::vector<cl::Platform> platforms;

									cl::Platform::get(&platforms);

									return platforms.size();

								}


								unsigned ethash_cl_miner::get_num_devices(unsigned _platformId)

								{

									std::vector<cl::Platform> platforms;

									cl::Platform::get(&platforms);

									if (platforms.empty())

									{

										cout << "No OpenCL platforms found." << endl;

										return 0;

									}


									std::vector<cl::Device> devices;

									unsigned platform_num = std::min<unsigned>(_platformId, platforms.size() - 1);

									platforms[platform_num].getDevices(CL_DEVICE_TYPE_ALL, &devices);

									if (devices.empty())

									{

										cout << "No OpenCL devices found." << endl;

										return 0;

									}

									return devices.size();

								}


								void ethash_cl_miner::finish()

								{

									if (m_queue())

										m_queue.finish();

								}


								bool ethash_cl_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned workgroup_size, unsigned _platformId, unsigned _deviceId)

								{

									// get all platforms

									std::vector<cl::Platform> platforms;

									cl::Platform::get(&platforms);

									if (platforms.empty())

									{

										cout << "No OpenCL platforms found." << endl;

										return false;

									}


									// use selected platform


									_platformId = std::min<unsigned>(_platformId, platforms.size() - 1);


									cout << "Using platform: " << platforms[_platformId].getInfo<CL_PLATFORM_NAME>().c_str() << endl;


									// get GPU device of the default platform

									std::vector<cl::Device> devices;

									platforms[_platformId].getDevices(CL_DEVICE_TYPE_ALL, &devices);

									if (devices.empty())

									{

										cout << "No OpenCL devices found." << endl;

										return false;

									}


									// use selected device

									cl::Device& device = devices[std::min<unsigned>(_deviceId, devices.size() - 1)];

									std::string device_version = device.getInfo<CL_DEVICE_VERSION>();

									cout << "Using device: " << device.getInfo<CL_DEVICE_NAME>().c_str() << "(" << device_version.c_str() << ")" << endl;


									if (strncmp("OpenCL 1.0", device_version.c_str(), 10) == 0)

									{

										cout << "OpenCL 1.0 is not supported." << endl;

										return false;

									}

									if (strncmp("OpenCL 1.1", device_version.c_str(), 10) == 0)

										m_opencl_1_1 = true;


									// create context

									m_context = cl::Context(std::vector<cl::Device>(&device, &device + 1));

									m_queue = cl::CommandQueue(m_context, device);


									// use requested workgroup size, but we require multiple of 8

									m_workgroup_size = ((workgroup_size + 7) / 8) * 8;


									// patch source code

									std::string code(ETHASH_CL_MINER_KERNEL, ETHASH_CL_MINER_KERNEL + ETHASH_CL_MINER_KERNEL_SIZE);

									add_definition(code, "GROUP_SIZE", m_workgroup_size);

									add_definition(code, "DAG_SIZE", (unsigned)(_dagSize / ETHASH_MIX_BYTES));

									add_definition(code, "ACCESSES", ETHASH_ACCESSES);

									add_definition(code, "MAX_OUTPUTS", c_max_search_results);

									//debugf("%s", code.c_str());


									// create miner OpenCL program

									cl::Program::Sources sources;

									sources.push_back({code.c_str(), code.size()});


									cl::Program program(m_context, sources);

									try

									{

										program.build({device});

									}

									catch (cl::Error err)

									{

										cout << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str();

										return false;

									}

									m_hash_kernel = cl::Kernel(program, "ethash_hash");

									m_search_kernel = cl::Kernel(program, "ethash_search");


									// create buffer for dag

									m_dag = cl::Buffer(m_context, CL_MEM_READ_ONLY, _dagSize);


									// create buffer for header

									m_header = cl::Buffer(m_context, CL_MEM_READ_ONLY, 32);


									// compute dag on CPU

									{

										m_queue.enqueueWriteBuffer(m_dag, CL_TRUE, 0, _dagSize, _dag);


										// if this throws then it's because we probably need to subdivide the dag uploads for compatibility

								//		void* dag_ptr = m_queue.enqueueMapBuffer(m_dag, true, m_opencl_1_1 ? CL_MAP_WRITE : CL_MAP_WRITE_INVALIDATE_REGION, 0, _dagSize);

										// memcpying 1GB: horrible... really. horrible. but necessary since we can't mmap *and* gpumap.

								//		_fillDAG(dag_ptr);

								//		m_queue.enqueueUnmapMemObject(m_dag, dag_ptr);

									}


									// create mining buffers

									for (unsigned i = 0; i != c_num_buffers; ++i)

									{

										m_hash_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY | (!m_opencl_1_1 ? CL_MEM_HOST_READ_ONLY : 0), 32*c_hash_batch_size);

										m_search_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, (c_max_search_results + 1) * sizeof(uint32_t));

									}

									return true;

								}


								void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count)

								{

									struct pending_batch

									{

										unsigned base;

										unsigned count;

										unsigned buf;

									};

									std::queue<pending_batch> pending;


									// update header constant buffer

									m_queue.enqueueWriteBuffer(m_header, true, 0, 32, header);


									/*

									__kernel void ethash_combined_hash(

										__global hash32_t* g_hashes,

										__constant hash32_t const* g_header,

										__global hash128_t const* g_dag,

										ulong start_nonce,

										uint isolate

										)

									*/

									m_hash_kernel.setArg(1, m_header);

									m_hash_kernel.setArg(2, m_dag);

									m_hash_kernel.setArg(3, nonce);

									m_hash_kernel.setArg(4, ~0u); // have to pass this to stop the compile unrolling the loop


									unsigned buf = 0;

									for (unsigned i = 0; i < count || !pending.empty(); )

									{

										// how many this batch

										if (i < count)

										{

											unsigned const this_count = std::min<unsigned>(count - i, c_hash_batch_size);

											unsigned const batch_count = std::max<unsigned>(this_count, m_workgroup_size);


											// supply output hash buffer to kernel

											m_hash_kernel.setArg(0, m_hash_buf[buf]);


											// execute it!

											m_queue.enqueueNDRangeKernel(

												m_hash_kernel,

												cl::NullRange,

												cl::NDRange(batch_count),

												cl::NDRange(m_workgroup_size)

												);

											m_queue.flush();


											pending.push({i, this_count, buf});

											i += this_count;

											buf = (buf + 1) % c_num_buffers;

										}


										// read results

										if (i == count || pending.size() == c_num_buffers)

										{

											pending_batch const& batch = pending.front();


											// could use pinned host pointer instead, but this path isn't that important.

											uint8_t* hashes = (uint8_t*)m_queue.enqueueMapBuffer(m_hash_buf[batch.buf], true, CL_MAP_READ, 0, batch.count * ETHASH_BYTES);

											memcpy(ret + batch.base*ETHASH_BYTES, hashes, batch.count*ETHASH_BYTES);

											m_queue.enqueueUnmapMemObject(m_hash_buf[batch.buf], hashes);


											pending.pop();

										}

									}

								}


								void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook)

								{

									struct pending_batch

									{

										uint64_t start_nonce;

										unsigned buf;

									};

									std::queue<pending_batch> pending;


									static uint32_t const c_zero = 0;


									// update header constant buffer

									m_queue.enqueueWriteBuffer(m_header, false, 0, 32, header);

									for (unsigned i = 0; i != c_num_buffers; ++i)

										m_queue.enqueueWriteBuffer(m_search_buf[i], false, 0, 4, &c_zero);


								#if CL_VERSION_1_2 && 0

									cl::Event pre_return_event;

									if (!m_opencl_1_1)

										m_queue.enqueueBarrierWithWaitList(NULL, &pre_return_event);

									else

								#endif

										m_queue.finish();


									/*

									__kernel void ethash_combined_search(

										__global hash32_t* g_hashes,			// 0

										__constant hash32_t const* g_header,	// 1

										__global hash128_t const* g_dag,		// 2

										ulong start_nonce,						// 3

										ulong target,							// 4

										uint isolate							// 5

									)

									*/

									m_search_kernel.setArg(1, m_header);

									m_search_kernel.setArg(2, m_dag);


									// pass these to stop the compiler unrolling the loops

									m_search_kernel.setArg(4, target);

									m_search_kernel.setArg(5, ~0u);


									unsigned buf = 0;

									std::random_device engine;

									uint64_t start_nonce = std::uniform_int_distribution<uint64_t>()(engine);

									for (; ; start_nonce += c_search_batch_size)

									{

										// supply output buffer to kernel

										m_search_kernel.setArg(0, m_search_buf[buf]);

										m_search_kernel.setArg(3, start_nonce);


										// execute it!

										m_queue.enqueueNDRangeKernel(m_search_kernel, cl::NullRange, c_search_batch_size, m_workgroup_size);


										pending.push({start_nonce, buf});

										buf = (buf + 1) % c_num_buffers;


										// read results

										if (pending.size() == c_num_buffers)

										{

											pending_batch const& batch = pending.front();


											// could use pinned host pointer instead

											uint32_t* results = (uint32_t*)m_queue.enqueueMapBuffer(m_search_buf[batch.buf], true, CL_MAP_READ, 0, (1+c_max_search_results) * sizeof(uint32_t));

											unsigned num_found = std::min<unsigned>(results[0], c_max_search_results);


											uint64_t nonces[c_max_search_results];

											for (unsigned i = 0; i != num_found; ++i)

											{

												nonces[i] = batch.start_nonce + results[i+1];

											}


											m_queue.enqueueUnmapMemObject(m_search_buf[batch.buf], results);


											bool exit = num_found && hook.found(nonces, num_found);

											exit |= hook.searched(batch.start_nonce, c_search_batch_size); // always report searched before exit

											if (exit)

												break;


											// reset search buffer if we're still going

											if (num_found)

												m_queue.enqueueWriteBuffer(m_search_buf[batch.buf], true, 0, 4, &c_zero);


											pending.pop();

										}

									}


									// not safe to return until this is ready

								#if CL_VERSION_1_2 && 0

									if (!m_opencl_1_1)

										pre_return_event.wait();

								#endif

								}