|
@ -24,6 +24,7 @@ |
|
|
|
|
|
|
|
|
#include <cstdio> |
|
|
#include <cstdio> |
|
|
#include <cstdlib> |
|
|
#include <cstdlib> |
|
|
|
|
|
#include <fstream> |
|
|
#include <iostream> |
|
|
#include <iostream> |
|
|
#include <assert.h> |
|
|
#include <assert.h> |
|
|
#include <queue> |
|
|
#include <queue> |
|
@ -42,9 +43,13 @@ |
|
|
#define CL_MEM_HOST_READ_ONLY 0 |
|
|
#define CL_MEM_HOST_READ_ONLY 0 |
|
|
#endif |
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
//#define CHUNKS
|
|
|
|
|
|
|
|
|
#undef min |
|
|
#undef min |
|
|
#undef max |
|
|
#undef max |
|
|
|
|
|
|
|
|
|
|
|
//#define CHUNKS
|
|
|
|
|
|
|
|
|
using namespace std; |
|
|
using namespace std; |
|
|
|
|
|
|
|
|
static void add_definition(std::string& source, char const* id, unsigned value) |
|
|
static void add_definition(std::string& source, char const* id, unsigned value) |
|
@ -131,98 +136,146 @@ void ethash_cl_miner::finish() |
|
|
bool ethash_cl_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned workgroup_size, unsigned _platformId, unsigned _deviceId) |
|
|
bool ethash_cl_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned workgroup_size, unsigned _platformId, unsigned _deviceId) |
|
|
{ |
|
|
{ |
|
|
// get all platforms
|
|
|
// get all platforms
|
|
|
std::vector<cl::Platform> platforms; |
|
|
try { |
|
|
cl::Platform::get(&platforms); |
|
|
std::vector<cl::Platform> platforms; |
|
|
if (platforms.empty()) |
|
|
cl::Platform::get(&platforms); |
|
|
{ |
|
|
if (platforms.empty()) |
|
|
cout << "No OpenCL platforms found." << endl; |
|
|
{ |
|
|
return false; |
|
|
cout << "No OpenCL platforms found." << endl; |
|
|
} |
|
|
return false; |
|
|
|
|
|
} |
|
|
// use selected platform
|
|
|
|
|
|
|
|
|
|
|
|
_platformId = std::min<unsigned>(_platformId, platforms.size() - 1); |
|
|
|
|
|
|
|
|
|
|
|
cout << "Using platform: " << platforms[_platformId].getInfo<CL_PLATFORM_NAME>().c_str() << endl; |
|
|
// use selected platform
|
|
|
|
|
|
|
|
|
// get GPU device of the default platform
|
|
|
_platformId = std::min<unsigned>(_platformId, platforms.size() - 1); |
|
|
std::vector<cl::Device> devices; |
|
|
|
|
|
platforms[_platformId].getDevices(CL_DEVICE_TYPE_ALL, &devices); |
|
|
|
|
|
if (devices.empty()) |
|
|
|
|
|
{ |
|
|
|
|
|
cout << "No OpenCL devices found." << endl; |
|
|
|
|
|
return false; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// use selected device
|
|
|
cout << "Using platform: " << platforms[_platformId].getInfo<CL_PLATFORM_NAME>().c_str() << endl; |
|
|
cl::Device& device = devices[std::min<unsigned>(_deviceId, devices.size() - 1)]; |
|
|
|
|
|
std::string device_version = device.getInfo<CL_DEVICE_VERSION>(); |
|
|
|
|
|
cout << "Using device: " << device.getInfo<CL_DEVICE_NAME>().c_str() << "(" << device_version.c_str() << ")" << endl; |
|
|
|
|
|
|
|
|
|
|
|
if (strncmp("OpenCL 1.0", device_version.c_str(), 10) == 0) |
|
|
// get GPU device of the default platform
|
|
|
{ |
|
|
std::vector<cl::Device> devices; |
|
|
cout << "OpenCL 1.0 is not supported." << endl; |
|
|
platforms[_platformId].getDevices(CL_DEVICE_TYPE_ALL, &devices); |
|
|
return false; |
|
|
if (devices.empty()) |
|
|
} |
|
|
{ |
|
|
if (strncmp("OpenCL 1.1", device_version.c_str(), 10) == 0) |
|
|
cout << "No OpenCL devices found." << endl; |
|
|
m_opencl_1_1 = true; |
|
|
return false; |
|
|
|
|
|
} |
|
|
// create context
|
|
|
|
|
|
m_context = cl::Context(std::vector<cl::Device>(&device, &device + 1)); |
|
|
|
|
|
m_queue = cl::CommandQueue(m_context, device); |
|
|
|
|
|
|
|
|
|
|
|
// use requested workgroup size, but we require multiple of 8
|
|
|
|
|
|
m_workgroup_size = ((workgroup_size + 7) / 8) * 8; |
|
|
|
|
|
|
|
|
|
|
|
// patch source code
|
|
|
|
|
|
std::string code(ETHASH_CL_MINER_KERNEL, ETHASH_CL_MINER_KERNEL + ETHASH_CL_MINER_KERNEL_SIZE); |
|
|
|
|
|
add_definition(code, "GROUP_SIZE", m_workgroup_size); |
|
|
|
|
|
add_definition(code, "DAG_SIZE", (unsigned)(_dagSize / ETHASH_MIX_BYTES)); |
|
|
|
|
|
add_definition(code, "ACCESSES", ETHASH_ACCESSES); |
|
|
|
|
|
add_definition(code, "MAX_OUTPUTS", c_max_search_results); |
|
|
|
|
|
//debugf("%s", code.c_str());
|
|
|
|
|
|
|
|
|
|
|
|
// create miner OpenCL program
|
|
|
|
|
|
cl::Program::Sources sources; |
|
|
|
|
|
sources.push_back({code.c_str(), code.size()}); |
|
|
|
|
|
|
|
|
|
|
|
cl::Program program(m_context, sources); |
|
|
|
|
|
try |
|
|
|
|
|
{ |
|
|
|
|
|
program.build({device}); |
|
|
|
|
|
} |
|
|
|
|
|
catch (cl::Error err) |
|
|
|
|
|
{ |
|
|
|
|
|
cout << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str(); |
|
|
|
|
|
return false; |
|
|
|
|
|
} |
|
|
|
|
|
m_hash_kernel = cl::Kernel(program, "ethash_hash"); |
|
|
|
|
|
m_search_kernel = cl::Kernel(program, "ethash_search"); |
|
|
|
|
|
|
|
|
|
|
|
// create buffer for dag
|
|
|
// use selected device
|
|
|
m_dag = cl::Buffer(m_context, CL_MEM_READ_ONLY, _dagSize); |
|
|
cl::Device& device = devices[std::min<unsigned>(_deviceId, devices.size() - 1)]; |
|
|
|
|
|
std::string device_version = device.getInfo<CL_DEVICE_VERSION>(); |
|
|
|
|
|
cout << "Using device: " << device.getInfo<CL_DEVICE_NAME>().c_str() << "(" << device_version.c_str() << ")" << endl; |
|
|
|
|
|
|
|
|
// create buffer for header
|
|
|
if (strncmp("OpenCL 1.0", device_version.c_str(), 10) == 0) |
|
|
m_header = cl::Buffer(m_context, CL_MEM_READ_ONLY, 32); |
|
|
{ |
|
|
|
|
|
cout << "OpenCL 1.0 is not supported." << endl; |
|
|
|
|
|
return false; |
|
|
|
|
|
} |
|
|
|
|
|
if (strncmp("OpenCL 1.1", device_version.c_str(), 10) == 0) |
|
|
|
|
|
m_opencl_1_1 = true; |
|
|
|
|
|
|
|
|
|
|
|
// create context
|
|
|
|
|
|
m_context = cl::Context(std::vector<cl::Device>(&device, &device + 1)); |
|
|
|
|
|
m_queue = cl::CommandQueue(m_context, device); |
|
|
|
|
|
|
|
|
|
|
|
// use requested workgroup size, but we require multiple of 8
|
|
|
|
|
|
m_workgroup_size = ((workgroup_size + 7) / 8) * 8; |
|
|
|
|
|
|
|
|
|
|
|
// patch source code
|
|
|
|
|
|
std::ifstream t("ethash_cl_miner_kernel.cl"); |
|
|
|
|
|
std::string code((std::istreambuf_iterator<char>(t)), |
|
|
|
|
|
std::istreambuf_iterator<char>()); |
|
|
|
|
|
add_definition(code, "GROUP_SIZE", m_workgroup_size); |
|
|
|
|
|
add_definition(code, "DAG_SIZE", (unsigned)(_dagSize / ETHASH_MIX_BYTES)); |
|
|
|
|
|
add_definition(code, "ACCESSES", ETHASH_ACCESSES); |
|
|
|
|
|
add_definition(code, "MAX_OUTPUTS", c_max_search_results); |
|
|
|
|
|
//debugf("%s", code.c_str());
|
|
|
|
|
|
|
|
|
|
|
|
// create miner OpenCL program
|
|
|
|
|
|
cl::Program::Sources sources; |
|
|
|
|
|
sources.push_back({ code.c_str(), code.size() }); |
|
|
|
|
|
|
|
|
|
|
|
cl::Program program(m_context, sources); |
|
|
|
|
|
try |
|
|
|
|
|
{ |
|
|
|
|
|
program.build({ device }); |
|
|
|
|
|
cout << "Printing program log" << endl; |
|
|
|
|
|
cout << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str(); |
|
|
|
|
|
} |
|
|
|
|
|
catch (cl::Error err) |
|
|
|
|
|
{ |
|
|
|
|
|
cout << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str(); |
|
|
|
|
|
return false; |
|
|
|
|
|
} |
|
|
|
|
|
#ifdef CHUNKS |
|
|
|
|
|
cout << "loading ethash_hash_chunks" << endl; |
|
|
|
|
|
m_hash_kernel = cl::Kernel(program, "ethash_hash_chunks"); |
|
|
|
|
|
cout << "loading ethash_search_chunks" << endl; |
|
|
|
|
|
m_search_kernel = cl::Kernel(program, "ethash_search_chunks"); |
|
|
|
|
|
|
|
|
|
|
|
#else |
|
|
|
|
|
cout << "loading ethash_hash" << endl; |
|
|
|
|
|
m_hash_kernel = cl::Kernel(program, "ethash_hash"); |
|
|
|
|
|
cout << "loading ethash_search" << endl; |
|
|
|
|
|
m_search_kernel = cl::Kernel(program, "ethash_search"); |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
// create buffer for dag
|
|
|
|
|
|
#ifdef CHUNKS |
|
|
|
|
|
for (unsigned i = 0; i < 4; i++){ |
|
|
|
|
|
|
|
|
|
|
|
cout << "Creating chunky buffer: " << i << endl; |
|
|
|
|
|
m_dags[i] = cl::Buffer(m_context, CL_MEM_READ_ONLY, (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7); |
|
|
|
|
|
} |
|
|
|
|
|
#else |
|
|
|
|
|
cout << "Creating one big buffer." << endl; |
|
|
|
|
|
m_dag = cl::Buffer(m_context, CL_MEM_READ_ONLY, _dagSize); |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
// create buffer for header
|
|
|
|
|
|
cout << "Creating buffer for header." << endl; |
|
|
|
|
|
m_header = cl::Buffer(m_context, CL_MEM_READ_ONLY, 32); |
|
|
|
|
|
|
|
|
|
|
|
#ifdef CHUNKS |
|
|
|
|
|
void* dag_ptr[4]; |
|
|
|
|
|
for (unsigned i = 0; i < 4; i++) |
|
|
|
|
|
{ |
|
|
|
|
|
cout << "Mapping chunk " << i << endl; |
|
|
|
|
|
dag_ptr[i] = m_queue.enqueueMapBuffer(m_dags[i], true, m_opencl_1_1 ? CL_MAP_WRITE : CL_MAP_WRITE_INVALIDATE_REGION, 0, (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7); |
|
|
|
|
|
} |
|
|
|
|
|
for (unsigned i = 0; i < 4; i++) |
|
|
|
|
|
{ |
|
|
|
|
|
memcpy(dag_ptr[i], (char *)_dag + i*((_dagSize >> 9) << 7), (i == 3) ? (_dagSize - 3 * ((_dagSize >> 9) << 7)) : (_dagSize >> 9) << 7); |
|
|
|
|
|
m_queue.enqueueUnmapMemObject(m_dags[i], dag_ptr[i]); |
|
|
|
|
|
} |
|
|
|
|
|
#else |
|
|
|
|
|
cout << "Mapping chunk." << endl; |
|
|
|
|
|
m_queue.enqueueWriteBuffer(m_dag, CL_TRUE, 0, _dagSize, _dag); |
|
|
|
|
|
#endif |
|
|
|
|
|
// compute dag on CPU
|
|
|
|
|
|
/*{
|
|
|
|
|
|
m_queue.enqueueWriteBuffer(m_dag, CL_TRUE, 0, _dagSize, _dag); |
|
|
|
|
|
|
|
|
|
|
|
// if this throws then it's because we probably need to subdivide the dag uploads for compatibility
|
|
|
|
|
|
// void* dag_ptr = m_queue.enqueueMapBuffer(m_dag, true, m_opencl_1_1 ? CL_MAP_WRITE : CL_MAP_WRITE_INVALIDATE_REGION, 0, _dagSize);
|
|
|
|
|
|
// memcpying 1GB: horrible... really. horrible. but necessary since we can't mmap *and* gpumap.
|
|
|
|
|
|
// _fillDAG(dag_ptr);
|
|
|
|
|
|
// m_queue.enqueueUnmapMemObject(m_dag, dag_ptr);
|
|
|
|
|
|
}*/ |
|
|
|
|
|
|
|
|
|
|
|
// create mining buffers
|
|
|
|
|
|
for (unsigned i = 0; i != c_num_buffers; ++i) |
|
|
|
|
|
{ |
|
|
|
|
|
cout << "Creating minig buffer " << i <<endl; |
|
|
|
|
|
m_hash_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY | (!m_opencl_1_1 ? CL_MEM_HOST_READ_ONLY : 0), 32 * c_hash_batch_size); |
|
|
|
|
|
m_search_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, (c_max_search_results + 1) * sizeof(uint32_t)); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
// compute dag on CPU
|
|
|
|
|
|
try { |
|
|
|
|
|
m_queue.enqueueWriteBuffer(m_dag, CL_TRUE, 0, _dagSize, _dag); |
|
|
|
|
|
} |
|
|
} |
|
|
catch (...) |
|
|
catch (cl::Error err) |
|
|
{ |
|
|
{ |
|
|
// didn't work. shitty driver. try allocating in CPU RAM and manually memcpying it.
|
|
|
std::cout << err.what() << "(" << err.err() << ")" << std::endl; |
|
|
void* dag_ptr = m_queue.enqueueMapBuffer(m_dag, true, m_opencl_1_1 ? CL_MAP_WRITE : CL_MAP_WRITE_INVALIDATE_REGION, 0, _dagSize); |
|
|
|
|
|
memcpy(dag_ptr, _dag, _dagSize); |
|
|
|
|
|
m_queue.enqueueUnmapMemObject(m_dag, dag_ptr); |
|
|
|
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// create mining buffers
|
|
|
|
|
|
for (unsigned i = 0; i != c_num_buffers; ++i) |
|
|
|
|
|
{ |
|
|
|
|
|
m_hash_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY | (!m_opencl_1_1 ? CL_MEM_HOST_READ_ONLY : 0), 32*c_hash_batch_size); |
|
|
|
|
|
m_search_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, (c_max_search_results + 1) * sizeof(uint32_t)); |
|
|
|
|
|
} |
|
|
|
|
|
return true; |
|
|
return true; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
@ -248,10 +301,22 @@ void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, |
|
|
uint isolate |
|
|
uint isolate |
|
|
) |
|
|
) |
|
|
*/ |
|
|
*/ |
|
|
|
|
|
#ifdef CHUNKS |
|
|
|
|
|
cout << "Setting chunk hash arguments." << endl; |
|
|
|
|
|
m_hash_kernel.setArg(1, m_header); |
|
|
|
|
|
m_hash_kernel.setArg(2, m_dags[0]); |
|
|
|
|
|
m_hash_kernel.setArg(3, m_dags[1]); |
|
|
|
|
|
m_hash_kernel.setArg(4, m_dags[2]); |
|
|
|
|
|
m_hash_kernel.setArg(5, m_dags[3]); |
|
|
|
|
|
m_hash_kernel.setArg(6, nonce); |
|
|
|
|
|
m_hash_kernel.setArg(7, ~0u); // have to pass this to stop the compile unrolling the loop
|
|
|
|
|
|
#else |
|
|
|
|
|
cout << "Setting hash arguments." << endl; |
|
|
m_hash_kernel.setArg(1, m_header); |
|
|
m_hash_kernel.setArg(1, m_header); |
|
|
m_hash_kernel.setArg(2, m_dag); |
|
|
m_hash_kernel.setArg(2, m_dag); |
|
|
m_hash_kernel.setArg(3, nonce); |
|
|
m_hash_kernel.setArg(3, nonce); |
|
|
m_hash_kernel.setArg(4, ~0u); // have to pass this to stop the compile unrolling the loop
|
|
|
m_hash_kernel.setArg(4, ~0u); // have to pass this to stop the compile unrolling the loop
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
unsigned buf = 0; |
|
|
unsigned buf = 0; |
|
|
for (unsigned i = 0; i < count || !pending.empty(); ) |
|
|
for (unsigned i = 0; i < count || !pending.empty(); ) |
|
@ -297,95 +362,120 @@ void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, |
|
|
|
|
|
|
|
|
void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook) |
|
|
void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook) |
|
|
{ |
|
|
{ |
|
|
struct pending_batch |
|
|
try { |
|
|
{ |
|
|
struct pending_batch |
|
|
uint64_t start_nonce; |
|
|
{ |
|
|
unsigned buf; |
|
|
uint64_t start_nonce; |
|
|
}; |
|
|
unsigned buf; |
|
|
std::queue<pending_batch> pending; |
|
|
}; |
|
|
|
|
|
std::queue<pending_batch> pending; |
|
|
|
|
|
|
|
|
uint32_t const c_zero = 0; |
|
|
static uint32_t const c_zero = 0; |
|
|
|
|
|
|
|
|
// update header constant buffer
|
|
|
// update header constant buffer
|
|
|
m_queue.enqueueWriteBuffer(m_header, false, 0, 32, header); |
|
|
m_queue.enqueueWriteBuffer(m_header, false, 0, 32, header); |
|
|
for (unsigned i = 0; i != c_num_buffers; ++i) |
|
|
for (unsigned i = 0; i != c_num_buffers; ++i) |
|
|
m_queue.enqueueWriteBuffer(m_search_buf[i], false, 0, 4, &c_zero); |
|
|
m_queue.enqueueWriteBuffer(m_search_buf[i], false, 0, 4, &c_zero); |
|
|
|
|
|
|
|
|
#if CL_VERSION_1_2 && 0 |
|
|
#if CL_VERSION_1_2 && 0 |
|
|
cl::Event pre_return_event; |
|
|
cl::Event pre_return_event; |
|
|
if (!m_opencl_1_1) |
|
|
if (!m_opencl_1_1) |
|
|
m_queue.enqueueBarrierWithWaitList(NULL, &pre_return_event); |
|
|
m_queue.enqueueBarrierWithWaitList(NULL, &pre_return_event); |
|
|
else |
|
|
else |
|
|
#endif |
|
|
#endif |
|
|
m_queue.finish(); |
|
|
m_queue.finish(); |
|
|
|
|
|
|
|
|
/*
|
|
|
/*
|
|
|
__kernel void ethash_combined_search( |
|
|
__kernel void ethash_combined_search( |
|
|
__global hash32_t* g_hashes, // 0
|
|
|
__global hash32_t* g_hashes, // 0
|
|
|
__constant hash32_t const* g_header, // 1
|
|
|
__constant hash32_t const* g_header, // 1
|
|
|
__global hash128_t const* g_dag, // 2
|
|
|
__global hash128_t const* g_dag, // 2
|
|
|
ulong start_nonce, // 3
|
|
|
ulong start_nonce, // 3
|
|
|
ulong target, // 4
|
|
|
ulong target, // 4
|
|
|
uint isolate // 5
|
|
|
uint isolate // 5
|
|
|
) |
|
|
) |
|
|
*/ |
|
|
*/ |
|
|
m_search_kernel.setArg(1, m_header); |
|
|
#ifdef CHUNKS |
|
|
m_search_kernel.setArg(2, m_dag); |
|
|
cout << "Setting chunk search arguments." << endl; |
|
|
|
|
|
m_search_kernel.setArg(1, m_header); |
|
|
// pass these to stop the compiler unrolling the loops
|
|
|
m_search_kernel.setArg(2, m_dags[0]); |
|
|
m_search_kernel.setArg(4, target); |
|
|
m_search_kernel.setArg(3, m_dags[1]); |
|
|
m_search_kernel.setArg(5, ~0u); |
|
|
m_search_kernel.setArg(4, m_dags[2]); |
|
|
|
|
|
m_search_kernel.setArg(5, m_dags[3]); |
|
|
|
|
|
|
|
|
unsigned buf = 0; |
|
|
// pass these to stop the compiler unrolling the loops
|
|
|
std::random_device engine; |
|
|
m_search_kernel.setArg(7, target); |
|
|
uint64_t start_nonce = std::uniform_int_distribution<uint64_t>()(engine); |
|
|
m_search_kernel.setArg(8, ~0u); |
|
|
for (; ; start_nonce += c_search_batch_size) |
|
|
|
|
|
{ |
|
|
#else |
|
|
// supply output buffer to kernel
|
|
|
cout << "Setting search arguments." << endl; |
|
|
m_search_kernel.setArg(0, m_search_buf[buf]); |
|
|
m_search_kernel.setArg(1, m_header); |
|
|
m_search_kernel.setArg(3, start_nonce); |
|
|
m_search_kernel.setArg(2, m_dag); |
|
|
|
|
|
|
|
|
|
|
|
// pass these to stop the compiler unrolling the loops
|
|
|
|
|
|
m_search_kernel.setArg(4, target); |
|
|
|
|
|
m_search_kernel.setArg(5, ~0u); |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
unsigned buf = 0; |
|
|
|
|
|
std::random_device engine; |
|
|
|
|
|
uint64_t start_nonce = std::uniform_int_distribution<uint64_t>()(engine); |
|
|
|
|
|
for (;; start_nonce += c_search_batch_size) |
|
|
|
|
|
{ |
|
|
|
|
|
// supply output buffer to kernel
|
|
|
|
|
|
m_search_kernel.setArg(0, m_search_buf[buf]); |
|
|
|
|
|
#ifdef CHUNKS |
|
|
|
|
|
m_search_kernel.setArg(6, start_nonce); |
|
|
|
|
|
#else |
|
|
|
|
|
m_search_kernel.setArg(3, start_nonce); |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
// execute it!
|
|
|
// execute it!
|
|
|
m_queue.enqueueNDRangeKernel(m_search_kernel, cl::NullRange, c_search_batch_size, m_workgroup_size); |
|
|
m_queue.enqueueNDRangeKernel(m_search_kernel, cl::NullRange, c_search_batch_size, m_workgroup_size); |
|
|
|
|
|
|
|
|
pending.push({start_nonce, buf}); |
|
|
pending.push({ start_nonce, buf }); |
|
|
buf = (buf + 1) % c_num_buffers; |
|
|
buf = (buf + 1) % c_num_buffers; |
|
|
|
|
|
|
|
|
// read results
|
|
|
// read results
|
|
|
if (pending.size() == c_num_buffers) |
|
|
if (pending.size() == c_num_buffers) |
|
|
{ |
|
|
{ |
|
|
pending_batch const& batch = pending.front(); |
|
|
pending_batch const& batch = pending.front(); |
|
|
|
|
|
|
|
|
// could use pinned host pointer instead
|
|
|
// could use pinned host pointer instead
|
|
|
uint32_t* results = (uint32_t*)m_queue.enqueueMapBuffer(m_search_buf[batch.buf], true, CL_MAP_READ, 0, (1+c_max_search_results) * sizeof(uint32_t)); |
|
|
uint32_t* results = (uint32_t*)m_queue.enqueueMapBuffer(m_search_buf[batch.buf], true, CL_MAP_READ, 0, (1 + c_max_search_results) * sizeof(uint32_t)); |
|
|
unsigned num_found = std::min<unsigned>(results[0], c_max_search_results); |
|
|
unsigned num_found = std::min<unsigned>(results[0], c_max_search_results); |
|
|
|
|
|
|
|
|
uint64_t nonces[c_max_search_results]; |
|
|
uint64_t nonces[c_max_search_results]; |
|
|
for (unsigned i = 0; i != num_found; ++i) |
|
|
for (unsigned i = 0; i != num_found; ++i) |
|
|
{ |
|
|
{ |
|
|
nonces[i] = batch.start_nonce + results[i+1]; |
|
|
nonces[i] = batch.start_nonce + results[i + 1]; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
m_queue.enqueueUnmapMemObject(m_search_buf[batch.buf], results); |
|
|
m_queue.enqueueUnmapMemObject(m_search_buf[batch.buf], results); |
|
|
|
|
|
|
|
|
bool exit = num_found && hook.found(nonces, num_found); |
|
|
bool exit = num_found && hook.found(nonces, num_found); |
|
|
exit |= hook.searched(batch.start_nonce, c_search_batch_size); // always report searched before exit
|
|
|
exit |= hook.searched(batch.start_nonce, c_search_batch_size); // always report searched before exit
|
|
|
if (exit) |
|
|
if (exit) |
|
|
break; |
|
|
break; |
|
|
|
|
|
|
|
|
// reset search buffer if we're still going
|
|
|
// reset search buffer if we're still going
|
|
|
if (num_found) |
|
|
if (num_found) |
|
|
m_queue.enqueueWriteBuffer(m_search_buf[batch.buf], true, 0, 4, &c_zero); |
|
|
m_queue.enqueueWriteBuffer(m_search_buf[batch.buf], true, 0, 4, &c_zero); |
|
|
|
|
|
|
|
|
pending.pop(); |
|
|
pending.pop(); |
|
|
|
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// not safe to return until this is ready
|
|
|
// not safe to return until this is ready
|
|
|
#if CL_VERSION_1_2 && 0 |
|
|
#if CL_VERSION_1_2 && 0 |
|
|
if (!m_opencl_1_1) |
|
|
if (!m_opencl_1_1) |
|
|
pre_return_event.wait(); |
|
|
pre_return_event.wait(); |
|
|
#endif |
|
|
#endif |
|
|
|
|
|
} |
|
|
|
|
|
catch (cl::Error err) |
|
|
|
|
|
{ |
|
|
|
|
|
std::cout << err.what() << "(" << err.err() << ")" << std::endl; |
|
|
|
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|