You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

282 lines
7.5 KiB

/*
This file is part of c-ethash.
c-ethash is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
c-ethash is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
*/
/** @file ethash_cu_miner.cpp
* @author Tim Hughes <tim@twistedfury.com>
* @date 2015
*/
#define _CRT_SECURE_NO_WARNINGS
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <assert.h>
#include <queue>
#include <random>
#include <vector>
#include <chrono>
#include <thread>
#include <libethash/util.h>
#include <libethash/ethash.h>
#include <cuda_runtime.h>
#include "ethash_cu_miner.h"
#include "ethash_cu_miner_kernel_globals.h"
#define ETHASH_BYTES 32
// workaround lame platforms
#if !CL_VERSION_1_2
#define CL_MAP_WRITE_INVALIDATE_REGION CL_MAP_WRITE
#define CL_MEM_HOST_READ_ONLY 0
#endif
#undef min
#undef max
using namespace std;
unsigned const ethash_cu_miner::c_defaultLocalWorkSize = 128;
unsigned const ethash_cu_miner::c_defaultGlobalWorkSizeMultiplier = 2048; // * CL_DEFAULT_LOCAL_WORK_SIZE
ethash_cu_miner::search_hook::~search_hook() {}
ethash_cu_miner::ethash_cu_miner()
{
}
std::string ethash_cu_miner::platform_info(unsigned _deviceId)
{
int runtime_version;
int device_count;
device_count = getNumDevices();
if (device_count == 0)
return std::string();
if (cudaRuntimeGetVersion(&runtime_version) == cudaErrorInvalidValue)
{
cout << cudaGetErrorString(cudaErrorInvalidValue) << endl;
return std::string();
}
// use selected default device
int device_num = std::min<int>((int)_deviceId, device_count - 1);
cudaDeviceProp device_props;
if (cudaGetDeviceProperties(&device_props, device_num) == cudaErrorInvalidDevice)
{
cout << cudaGetErrorString(cudaErrorInvalidDevice) << endl;
return std::string();
}
char platform[5];
int version_major = runtime_version / 1000;
int version_minor = (runtime_version - (version_major * 1000)) / 10;
sprintf(platform, "%d.%d", version_major, version_minor);
char compute[5];
sprintf(compute, "%d.%d", device_props.major, device_props.minor);
return "{ \"platform\": \"CUDA " + std::string(platform) + "\", \"device\": \"" + device_props.name + "\", \"version\": \"Compute " + std::string(compute) + "\" }";
}
int ethash_cu_miner::getNumDevices()
{
int device_count;
if (cudaGetDeviceCount(&device_count) == cudaErrorNoDevice)
{
cout << cudaGetErrorString(cudaErrorNoDevice) << endl;
return 0;
}
return device_count;
}
void ethash_cu_miner::finish()
{
for (unsigned i = 0; i != m_num_buffers; i++) {
cudaStreamDestroy(m_streams[i]);
m_streams[i] = 0;
}
cudaDeviceReset();
}
bool ethash_cu_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned num_buffers, unsigned search_batch_size, unsigned workgroup_size, unsigned _deviceId, bool highcpu)
{
int device_count = getNumDevices();
if (device_count == 0)
return false;
// use selected device
int device_num = std::min<int>((int)_deviceId, device_count - 1);
cudaDeviceProp device_props;
if (cudaGetDeviceProperties(&device_props, device_num) == cudaErrorInvalidDevice)
{
cout << cudaGetErrorString(cudaErrorInvalidDevice) << endl;
return false;
}
cout << "Using device: " << device_props.name << "(" << device_props.major << "." << device_props.minor << ")" << endl;
cudaError_t r = cudaSetDevice(device_num);
if (r != cudaSuccess)
{
cout << cudaGetErrorString(r) << endl;
return false;
}
cudaDeviceReset();
cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeEightByte);
m_num_buffers = num_buffers;
m_search_batch_size = search_batch_size;
m_hash_buf = new void *[m_num_buffers];
m_search_buf = new uint32_t *[m_num_buffers];
m_streams = new cudaStream_t[m_num_buffers];
// use requested workgroup size, but we require multiple of 8
m_workgroup_size = ((workgroup_size + 7) / 8) * 8;
m_highcpu = highcpu;
// patch source code
cudaError result;
uint32_t dagSize128 = (unsigned)(_dagSize / ETHASH_MIX_BYTES);
unsigned max_outputs = c_max_search_results;
result = set_constants(&dagSize128, &max_outputs);
// create buffer for dag
result = cudaMalloc(&m_dag_ptr, _dagSize);
// create buffer for header256
result = cudaMalloc(&m_header, 32);
// copy dag to CPU.
result = cudaMemcpy(m_dag_ptr, _dag, _dagSize, cudaMemcpyHostToDevice);
// create mining buffers
for (unsigned i = 0; i != m_num_buffers; ++i)
{
result = cudaMallocHost(&m_hash_buf[i], 32 * c_hash_batch_size);
result = cudaMallocHost(&m_search_buf[i], (c_max_search_results + 1) * sizeof(uint32_t));
result = cudaStreamCreate(&m_streams[i]);
}
if (result != cudaSuccess)
{
cout << cudaGetErrorString(result) << endl;
return false;
}
return true;
}
/**
* Prevent High CPU usage while waiting for an async task
*/
static unsigned waitStream(cudaStream_t stream)
{
unsigned wait_ms = 0;
while (cudaStreamQuery(stream) == cudaErrorNotReady) {
this_thread::sleep_for(chrono::milliseconds(10));
wait_ms += 10;
}
return wait_ms;
}
void ethash_cu_miner::search(uint8_t const* header, uint64_t target, search_hook& hook)
{
struct pending_batch
{
uint64_t start_nonce;
unsigned buf;
};
std::queue<pending_batch> pending;
static uint32_t const c_zero = 0;
// update header constant buffer
cudaMemcpy(m_header, header, 32, cudaMemcpyHostToDevice);
for (unsigned i = 0; i != m_num_buffers; ++i)
{
cudaMemcpy(m_search_buf[i], &c_zero, 4, cudaMemcpyHostToDevice);
}
cudaError err = cudaGetLastError();
if (cudaSuccess != err)
{
throw std::runtime_error(cudaGetErrorString(err));
}
unsigned buf = 0;
std::random_device engine;
uint64_t start_nonce = std::uniform_int_distribution<uint64_t>()(engine);
for (;; start_nonce += m_search_batch_size)
{
run_ethash_search(m_search_batch_size / m_workgroup_size, m_workgroup_size, m_streams[buf], m_search_buf[buf], m_header, m_dag_ptr, start_nonce, target);
pending.push({ start_nonce, buf });
buf = (buf + 1) % m_num_buffers;
// read results
if (pending.size() == m_num_buffers)
{
pending_batch const& batch = pending.front();
uint32_t results[1 + c_max_search_results];
if (!m_highcpu)
waitStream(m_streams[buf]); // 28ms
cudaMemcpyAsync(results, m_search_buf[batch.buf], (1 + c_max_search_results) * sizeof(uint32_t), cudaMemcpyHostToHost, m_streams[batch.buf]);
unsigned num_found = std::min<unsigned>(results[0], c_max_search_results);
uint64_t nonces[c_max_search_results];
for (unsigned i = 0; i != num_found; ++i)
{
nonces[i] = batch.start_nonce + results[i + 1];
//cout << results[i + 1] << ", ";
}
//if (num_found > 0)
// cout << endl;
bool exit = num_found && hook.found(nonces, num_found);
exit |= hook.searched(batch.start_nonce, m_search_batch_size); // always report searched before exit
if (exit)
break;
start_nonce += m_search_batch_size;
// reset search buffer if we're still going
if (num_found)
cudaMemcpyAsync(m_search_buf[batch.buf], &c_zero, 4, cudaMemcpyHostToDevice, m_streams[batch.buf]);
cudaError err = cudaGetLastError();
if (cudaSuccess != err)
{
throw std::runtime_error(cudaGetErrorString(err));
}
pending.pop();
}
}
}