You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
333 lines
9.0 KiB
333 lines
9.0 KiB
10 years ago
|
/*
|
||
|
This file is part of c-ethash.
|
||
|
|
||
|
c-ethash is free software: you can redistribute it and/or modify
|
||
|
it under the terms of the GNU General Public License as published by
|
||
|
the Free Software Foundation, either version 3 of the License, or
|
||
|
(at your option) any later version.
|
||
|
|
||
|
c-ethash is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
GNU General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU General Public License
|
||
|
along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
|
||
|
*/
|
||
|
/** @file ethash_cl_miner.cpp
|
||
|
* @author Tim Hughes <tim@twistedfury.com>
|
||
|
* @date 2015
|
||
|
*/
|
||
|
|
||
|
|
||
|
#define _CRT_SECURE_NO_WARNINGS
|
||
|
|
||
|
#include <cstdio>
|
||
|
#include <cstdlib>
|
||
|
#include <assert.h>
|
||
|
#include <queue>
|
||
|
#include <vector>
|
||
|
#include <libethash/util.h>
|
||
|
#include <libethash/ethash.h>
|
||
|
#include "ethash_cl_miner.h"
|
||
|
#include "ethash_cl_miner_kernel.h"
|
||
|
|
||
|
#define ETHASH_BYTES 32
|
||
|
|
||
|
// workaround lame platforms
|
||
|
#if !CL_VERSION_1_2
|
||
|
#define CL_MAP_WRITE_INVALIDATE_REGION CL_MAP_WRITE
|
||
|
#define CL_MEM_HOST_READ_ONLY 0
|
||
|
#endif
|
||
|
|
||
|
#undef min
|
||
|
#undef max
|
||
|
|
||
|
static void add_definition(std::string& source, char const* id, unsigned value)
|
||
|
{
|
||
|
char buf[256];
|
||
|
sprintf(buf, "#define %s %uu\n", id, value);
|
||
|
source.insert(source.begin(), buf, buf + strlen(buf));
|
||
|
}
|
||
|
|
||
|
ethash_cl_miner::ethash_cl_miner()
|
||
|
: m_opencl_1_1()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
void ethash_cl_miner::finish()
|
||
|
{
|
||
|
if (m_queue())
|
||
|
{
|
||
|
m_queue.finish();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bool ethash_cl_miner::init(ethash_params const& params, std::function<void(void*)> _fillDAG, unsigned workgroup_size)
|
||
|
{
|
||
|
// store params
|
||
|
m_params = params;
|
||
|
|
||
|
// get all platforms
|
||
|
std::vector<cl::Platform> platforms;
|
||
|
cl::Platform::get(&platforms);
|
||
|
if (platforms.empty())
|
||
|
{
|
||
|
debugf("No OpenCL platforms found.\n");
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
// use default platform
|
||
|
fprintf(stderr, "Using platform: %s\n", platforms[0].getInfo<CL_PLATFORM_NAME>().c_str());
|
||
|
|
||
|
// get GPU device of the default platform
|
||
|
std::vector<cl::Device> devices;
|
||
|
platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices);
|
||
|
if (devices.empty())
|
||
|
{
|
||
|
debugf("No OpenCL devices found.\n");
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
// use default device
|
||
|
unsigned device_num = 0;
|
||
|
cl::Device& device = devices[device_num];
|
||
|
std::string device_version = device.getInfo<CL_DEVICE_VERSION>();
|
||
|
fprintf(stderr, "Using device: %s (%s)\n", device.getInfo<CL_DEVICE_NAME>().c_str(),device_version.c_str());
|
||
|
|
||
|
if (strncmp("OpenCL 1.0", device_version.c_str(), 10) == 0)
|
||
|
{
|
||
|
debugf("OpenCL 1.0 is not supported.\n");
|
||
|
return false;
|
||
|
}
|
||
|
if (strncmp("OpenCL 1.1", device_version.c_str(), 10) == 0)
|
||
|
{
|
||
|
m_opencl_1_1 = true;
|
||
|
}
|
||
|
|
||
|
// create context
|
||
|
m_context = cl::Context(std::vector<cl::Device>(&device, &device + 1));
|
||
|
m_queue = cl::CommandQueue(m_context, device);
|
||
|
|
||
|
// use requested workgroup size, but we require multiple of 8
|
||
|
m_workgroup_size = ((workgroup_size + 7) / 8) * 8;
|
||
|
|
||
|
// patch source code
|
||
|
std::string code(ETHASH_CL_MINER_KERNEL, ETHASH_CL_MINER_KERNEL + ETHASH_CL_MINER_KERNEL_SIZE);
|
||
|
add_definition(code, "GROUP_SIZE", m_workgroup_size);
|
||
|
add_definition(code, "DAG_SIZE", (unsigned)(params.full_size / ETHASH_MIX_BYTES));
|
||
|
add_definition(code, "ACCESSES", ETHASH_ACCESSES);
|
||
|
add_definition(code, "MAX_OUTPUTS", c_max_search_results);
|
||
|
//debugf("%s", code.c_str());
|
||
|
|
||
|
// create miner OpenCL program
|
||
|
cl::Program::Sources sources;
|
||
|
sources.push_back({code.c_str(), code.size()});
|
||
|
|
||
|
cl::Program program(m_context, sources);
|
||
|
try
|
||
|
{
|
||
|
program.build({device});
|
||
|
}
|
||
|
catch (cl::Error err)
|
||
|
{
|
||
|
debugf("%s\n", program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str());
|
||
|
return false;
|
||
|
}
|
||
|
m_hash_kernel = cl::Kernel(program, "ethash_hash");
|
||
|
m_search_kernel = cl::Kernel(program, "ethash_search");
|
||
|
|
||
|
// create buffer for dag
|
||
|
m_dag = cl::Buffer(m_context, CL_MEM_READ_ONLY, params.full_size);
|
||
|
|
||
|
// create buffer for header
|
||
|
m_header = cl::Buffer(m_context, CL_MEM_READ_ONLY, 32);
|
||
|
|
||
|
// compute dag on CPU
|
||
|
{
|
||
|
// if this throws then it's because we probably need to subdivide the dag uploads for compatibility
|
||
|
void* dag_ptr = m_queue.enqueueMapBuffer(m_dag, true, m_opencl_1_1 ? CL_MAP_WRITE : CL_MAP_WRITE_INVALIDATE_REGION, 0, params.full_size);
|
||
|
// memcpying 1GB: horrible... really. horrible. but necessary since we can't mmap *and* gpumap.
|
||
|
_fillDAG(dag_ptr);
|
||
|
m_queue.enqueueUnmapMemObject(m_dag, dag_ptr);
|
||
|
}
|
||
|
|
||
|
// create mining buffers
|
||
|
for (unsigned i = 0; i != c_num_buffers; ++i)
|
||
|
{
|
||
|
m_hash_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY | (!m_opencl_1_1 ? CL_MEM_HOST_READ_ONLY : 0), 32*c_hash_batch_size);
|
||
|
m_search_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, (c_max_search_results + 1) * sizeof(uint32_t));
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count)
|
||
|
{
|
||
|
struct pending_batch
|
||
|
{
|
||
|
unsigned base;
|
||
|
unsigned count;
|
||
|
unsigned buf;
|
||
|
};
|
||
|
std::queue<pending_batch> pending;
|
||
|
|
||
|
// update header constant buffer
|
||
|
m_queue.enqueueWriteBuffer(m_header, true, 0, 32, header);
|
||
|
|
||
|
/*
|
||
|
__kernel void ethash_combined_hash(
|
||
|
__global hash32_t* g_hashes,
|
||
|
__constant hash32_t const* g_header,
|
||
|
__global hash128_t const* g_dag,
|
||
|
ulong start_nonce,
|
||
|
uint isolate
|
||
|
)
|
||
|
*/
|
||
|
m_hash_kernel.setArg(1, m_header);
|
||
|
m_hash_kernel.setArg(2, m_dag);
|
||
|
m_hash_kernel.setArg(3, nonce);
|
||
|
m_hash_kernel.setArg(4, ~0u); // have to pass this to stop the compile unrolling the loop
|
||
|
|
||
|
unsigned buf = 0;
|
||
|
for (unsigned i = 0; i < count || !pending.empty(); )
|
||
|
{
|
||
|
// how many this batch
|
||
|
if (i < count)
|
||
|
{
|
||
|
unsigned const this_count = std::min<unsigned>(count - i, c_hash_batch_size);
|
||
|
unsigned const batch_count = std::max<unsigned>(this_count, m_workgroup_size);
|
||
|
|
||
|
// supply output hash buffer to kernel
|
||
|
m_hash_kernel.setArg(0, m_hash_buf[buf]);
|
||
|
|
||
|
// execute it!
|
||
|
m_queue.enqueueNDRangeKernel(
|
||
|
m_hash_kernel,
|
||
|
cl::NullRange,
|
||
|
cl::NDRange(batch_count),
|
||
|
cl::NDRange(m_workgroup_size)
|
||
|
);
|
||
|
m_queue.flush();
|
||
|
|
||
|
pending.push({i, this_count, buf});
|
||
|
i += this_count;
|
||
|
buf = (buf + 1) % c_num_buffers;
|
||
|
}
|
||
|
|
||
|
// read results
|
||
|
if (i == count || pending.size() == c_num_buffers)
|
||
|
{
|
||
|
pending_batch const& batch = pending.front();
|
||
|
|
||
|
// could use pinned host pointer instead, but this path isn't that important.
|
||
|
uint8_t* hashes = (uint8_t*)m_queue.enqueueMapBuffer(m_hash_buf[batch.buf], true, CL_MAP_READ, 0, batch.count * ETHASH_BYTES);
|
||
|
memcpy(ret + batch.base*ETHASH_BYTES, hashes, batch.count*ETHASH_BYTES);
|
||
|
m_queue.enqueueUnmapMemObject(m_hash_buf[batch.buf], hashes);
|
||
|
|
||
|
pending.pop();
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook)
|
||
|
{
|
||
|
struct pending_batch
|
||
|
{
|
||
|
uint64_t start_nonce;
|
||
|
unsigned buf;
|
||
|
};
|
||
|
std::queue<pending_batch> pending;
|
||
|
|
||
|
static uint32_t const c_zero = 0;
|
||
|
|
||
|
// update header constant buffer
|
||
|
m_queue.enqueueWriteBuffer(m_header, false, 0, 32, header);
|
||
|
for (unsigned i = 0; i != c_num_buffers; ++i)
|
||
|
{
|
||
|
m_queue.enqueueWriteBuffer(m_search_buf[i], false, 0, 4, &c_zero);
|
||
|
}
|
||
|
|
||
|
#if CL_VERSION_1_2 && 0
|
||
|
cl::Event pre_return_event;
|
||
|
if (!m_opencl_1_1)
|
||
|
{
|
||
|
m_queue.enqueueBarrierWithWaitList(NULL, &pre_return_event);
|
||
|
}
|
||
|
else
|
||
|
#endif
|
||
|
{
|
||
|
m_queue.finish();
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
__kernel void ethash_combined_search(
|
||
|
__global hash32_t* g_hashes, // 0
|
||
|
__constant hash32_t const* g_header, // 1
|
||
|
__global hash128_t const* g_dag, // 2
|
||
|
ulong start_nonce, // 3
|
||
|
ulong target, // 4
|
||
|
uint isolate // 5
|
||
|
)
|
||
|
*/
|
||
|
m_search_kernel.setArg(1, m_header);
|
||
|
m_search_kernel.setArg(2, m_dag);
|
||
|
|
||
|
// pass these to stop the compiler unrolling the loops
|
||
|
m_search_kernel.setArg(4, target);
|
||
|
m_search_kernel.setArg(5, ~0u);
|
||
|
|
||
|
|
||
|
unsigned buf = 0;
|
||
|
for (uint64_t start_nonce = 0; ; start_nonce += c_search_batch_size)
|
||
|
{
|
||
|
// supply output buffer to kernel
|
||
|
m_search_kernel.setArg(0, m_search_buf[buf]);
|
||
|
m_search_kernel.setArg(3, start_nonce);
|
||
|
|
||
|
// execute it!
|
||
|
m_queue.enqueueNDRangeKernel(m_search_kernel, cl::NullRange, c_search_batch_size, m_workgroup_size);
|
||
|
|
||
|
pending.push({start_nonce, buf});
|
||
|
buf = (buf + 1) % c_num_buffers;
|
||
|
|
||
|
// read results
|
||
|
if (pending.size() == c_num_buffers)
|
||
|
{
|
||
|
pending_batch const& batch = pending.front();
|
||
|
|
||
|
// could use pinned host pointer instead
|
||
|
uint32_t* results = (uint32_t*)m_queue.enqueueMapBuffer(m_search_buf[batch.buf], true, CL_MAP_READ, 0, (1+c_max_search_results) * sizeof(uint32_t));
|
||
|
unsigned num_found = std::min<unsigned>(results[0], c_max_search_results);
|
||
|
|
||
|
uint64_t nonces[c_max_search_results];
|
||
|
for (unsigned i = 0; i != num_found; ++i)
|
||
|
{
|
||
|
nonces[i] = batch.start_nonce + results[i+1];
|
||
|
}
|
||
|
|
||
|
m_queue.enqueueUnmapMemObject(m_search_buf[batch.buf], results);
|
||
|
|
||
|
bool exit = num_found && hook.found(nonces, num_found);
|
||
|
exit |= hook.searched(batch.start_nonce, c_search_batch_size); // always report searched before exit
|
||
|
if (exit)
|
||
|
break;
|
||
|
|
||
|
// reset search buffer if we're still going
|
||
|
if (num_found)
|
||
|
m_queue.enqueueWriteBuffer(m_search_buf[batch.buf], true, 0, 4, &c_zero);
|
||
|
|
||
|
pending.pop();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// not safe to return until this is ready
|
||
|
#if CL_VERSION_1_2 && 0
|
||
|
if (!m_opencl_1_1)
|
||
|
{
|
||
|
pre_return_event.wait();
|
||
|
}
|
||
|
#endif
|
||
|
}
|
||
|
|