You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

333 lines
9.0 KiB

10 years ago
/*
This file is part of c-ethash.
c-ethash is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
c-ethash is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
*/
/** @file ethash_cl_miner.cpp
* @author Tim Hughes <tim@twistedfury.com>
* @date 2015
*/
#define _CRT_SECURE_NO_WARNINGS
#include <cstdio>
#include <cstdlib>
#include <assert.h>
#include <queue>
#include <vector>
#include <libethash/util.h>
#include <libethash/ethash.h>
#include "ethash_cl_miner.h"
#include "ethash_cl_miner_kernel.h"
#define ETHASH_BYTES 32
// workaround lame platforms
#if !CL_VERSION_1_2
#define CL_MAP_WRITE_INVALIDATE_REGION CL_MAP_WRITE
#define CL_MEM_HOST_READ_ONLY 0
#endif
#undef min
#undef max
static void add_definition(std::string& source, char const* id, unsigned value)
{
char buf[256];
sprintf(buf, "#define %s %uu\n", id, value);
source.insert(source.begin(), buf, buf + strlen(buf));
}
ethash_cl_miner::ethash_cl_miner()
: m_opencl_1_1()
{
}
void ethash_cl_miner::finish()
{
if (m_queue())
{
m_queue.finish();
}
}
bool ethash_cl_miner::init(ethash_params const& params, std::function<void(void*)> _fillDAG, unsigned workgroup_size)
{
// store params
m_params = params;
// get all platforms
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
if (platforms.empty())
{
debugf("No OpenCL platforms found.\n");
return false;
}
// use default platform
fprintf(stderr, "Using platform: %s\n", platforms[0].getInfo<CL_PLATFORM_NAME>().c_str());
// get GPU device of the default platform
std::vector<cl::Device> devices;
platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices);
if (devices.empty())
{
debugf("No OpenCL devices found.\n");
return false;
}
// use default device
unsigned device_num = 0;
cl::Device& device = devices[device_num];
std::string device_version = device.getInfo<CL_DEVICE_VERSION>();
fprintf(stderr, "Using device: %s (%s)\n", device.getInfo<CL_DEVICE_NAME>().c_str(),device_version.c_str());
if (strncmp("OpenCL 1.0", device_version.c_str(), 10) == 0)
{
debugf("OpenCL 1.0 is not supported.\n");
return false;
}
if (strncmp("OpenCL 1.1", device_version.c_str(), 10) == 0)
{
m_opencl_1_1 = true;
}
// create context
m_context = cl::Context(std::vector<cl::Device>(&device, &device + 1));
m_queue = cl::CommandQueue(m_context, device);
// use requested workgroup size, but we require multiple of 8
m_workgroup_size = ((workgroup_size + 7) / 8) * 8;
// patch source code
std::string code(ETHASH_CL_MINER_KERNEL, ETHASH_CL_MINER_KERNEL + ETHASH_CL_MINER_KERNEL_SIZE);
add_definition(code, "GROUP_SIZE", m_workgroup_size);
add_definition(code, "DAG_SIZE", (unsigned)(params.full_size / ETHASH_MIX_BYTES));
add_definition(code, "ACCESSES", ETHASH_ACCESSES);
add_definition(code, "MAX_OUTPUTS", c_max_search_results);
//debugf("%s", code.c_str());
// create miner OpenCL program
cl::Program::Sources sources;
sources.push_back({code.c_str(), code.size()});
cl::Program program(m_context, sources);
try
{
program.build({device});
}
catch (cl::Error err)
{
debugf("%s\n", program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str());
return false;
}
m_hash_kernel = cl::Kernel(program, "ethash_hash");
m_search_kernel = cl::Kernel(program, "ethash_search");
// create buffer for dag
m_dag = cl::Buffer(m_context, CL_MEM_READ_ONLY, params.full_size);
// create buffer for header
m_header = cl::Buffer(m_context, CL_MEM_READ_ONLY, 32);
// compute dag on CPU
{
// if this throws then it's because we probably need to subdivide the dag uploads for compatibility
void* dag_ptr = m_queue.enqueueMapBuffer(m_dag, true, m_opencl_1_1 ? CL_MAP_WRITE : CL_MAP_WRITE_INVALIDATE_REGION, 0, params.full_size);
// memcpying 1GB: horrible... really. horrible. but necessary since we can't mmap *and* gpumap.
_fillDAG(dag_ptr);
m_queue.enqueueUnmapMemObject(m_dag, dag_ptr);
}
// create mining buffers
for (unsigned i = 0; i != c_num_buffers; ++i)
{
m_hash_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY | (!m_opencl_1_1 ? CL_MEM_HOST_READ_ONLY : 0), 32*c_hash_batch_size);
m_search_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, (c_max_search_results + 1) * sizeof(uint32_t));
}
return true;
}
void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count)
{
struct pending_batch
{
unsigned base;
unsigned count;
unsigned buf;
};
std::queue<pending_batch> pending;
// update header constant buffer
m_queue.enqueueWriteBuffer(m_header, true, 0, 32, header);
/*
__kernel void ethash_combined_hash(
__global hash32_t* g_hashes,
__constant hash32_t const* g_header,
__global hash128_t const* g_dag,
ulong start_nonce,
uint isolate
)
*/
m_hash_kernel.setArg(1, m_header);
m_hash_kernel.setArg(2, m_dag);
m_hash_kernel.setArg(3, nonce);
m_hash_kernel.setArg(4, ~0u); // have to pass this to stop the compile unrolling the loop
unsigned buf = 0;
for (unsigned i = 0; i < count || !pending.empty(); )
{
// how many this batch
if (i < count)
{
unsigned const this_count = std::min<unsigned>(count - i, c_hash_batch_size);
unsigned const batch_count = std::max<unsigned>(this_count, m_workgroup_size);
// supply output hash buffer to kernel
m_hash_kernel.setArg(0, m_hash_buf[buf]);
// execute it!
m_queue.enqueueNDRangeKernel(
m_hash_kernel,
cl::NullRange,
cl::NDRange(batch_count),
cl::NDRange(m_workgroup_size)
);
m_queue.flush();
pending.push({i, this_count, buf});
i += this_count;
buf = (buf + 1) % c_num_buffers;
}
// read results
if (i == count || pending.size() == c_num_buffers)
{
pending_batch const& batch = pending.front();
// could use pinned host pointer instead, but this path isn't that important.
uint8_t* hashes = (uint8_t*)m_queue.enqueueMapBuffer(m_hash_buf[batch.buf], true, CL_MAP_READ, 0, batch.count * ETHASH_BYTES);
memcpy(ret + batch.base*ETHASH_BYTES, hashes, batch.count*ETHASH_BYTES);
m_queue.enqueueUnmapMemObject(m_hash_buf[batch.buf], hashes);
pending.pop();
}
}
}
void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook)
{
struct pending_batch
{
uint64_t start_nonce;
unsigned buf;
};
std::queue<pending_batch> pending;
static uint32_t const c_zero = 0;
// update header constant buffer
m_queue.enqueueWriteBuffer(m_header, false, 0, 32, header);
for (unsigned i = 0; i != c_num_buffers; ++i)
{
m_queue.enqueueWriteBuffer(m_search_buf[i], false, 0, 4, &c_zero);
}
#if CL_VERSION_1_2 && 0
cl::Event pre_return_event;
if (!m_opencl_1_1)
{
m_queue.enqueueBarrierWithWaitList(NULL, &pre_return_event);
}
else
#endif
{
m_queue.finish();
}
/*
__kernel void ethash_combined_search(
__global hash32_t* g_hashes, // 0
__constant hash32_t const* g_header, // 1
__global hash128_t const* g_dag, // 2
ulong start_nonce, // 3
ulong target, // 4
uint isolate // 5
)
*/
m_search_kernel.setArg(1, m_header);
m_search_kernel.setArg(2, m_dag);
// pass these to stop the compiler unrolling the loops
m_search_kernel.setArg(4, target);
m_search_kernel.setArg(5, ~0u);
unsigned buf = 0;
for (uint64_t start_nonce = 0; ; start_nonce += c_search_batch_size)
{
// supply output buffer to kernel
m_search_kernel.setArg(0, m_search_buf[buf]);
m_search_kernel.setArg(3, start_nonce);
// execute it!
m_queue.enqueueNDRangeKernel(m_search_kernel, cl::NullRange, c_search_batch_size, m_workgroup_size);
pending.push({start_nonce, buf});
buf = (buf + 1) % c_num_buffers;
// read results
if (pending.size() == c_num_buffers)
{
pending_batch const& batch = pending.front();
// could use pinned host pointer instead
uint32_t* results = (uint32_t*)m_queue.enqueueMapBuffer(m_search_buf[batch.buf], true, CL_MAP_READ, 0, (1+c_max_search_results) * sizeof(uint32_t));
unsigned num_found = std::min<unsigned>(results[0], c_max_search_results);
uint64_t nonces[c_max_search_results];
for (unsigned i = 0; i != num_found; ++i)
{
nonces[i] = batch.start_nonce + results[i+1];
}
m_queue.enqueueUnmapMemObject(m_search_buf[batch.buf], results);
bool exit = num_found && hook.found(nonces, num_found);
exit |= hook.searched(batch.start_nonce, c_search_batch_size); // always report searched before exit
if (exit)
break;
// reset search buffer if we're still going
if (num_found)
m_queue.enqueueWriteBuffer(m_search_buf[batch.buf], true, 0, 4, &c_zero);
pending.pop();
}
}
// not safe to return until this is ready
#if CL_VERSION_1_2 && 0
if (!m_opencl_1_1)
{
pre_return_event.wait();
}
#endif
}