You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
353 lines
10 KiB
353 lines
10 KiB
9 years ago
|
/*
|
||
|
This file is part of c-ethash.
|
||
|
|
||
|
c-ethash is free software: you can redistribute it and/or modify
|
||
|
it under the terms of the GNU General Public License as published by
|
||
|
the Free Software Foundation, either version 3 of the License, or
|
||
|
(at your option) any later version.
|
||
|
|
||
|
c-ethash is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
GNU General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU General Public License
|
||
|
along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
|
||
|
*/
|
||
9 years ago
|
/** @file ethash_cuda_miner.cpp
|
||
|
* @author Genoil <jw@meneer.net>
|
||
9 years ago
|
* @date 2015
|
||
|
*/
|
||
|
|
||
|
|
||
|
#define _CRT_SECURE_NO_WARNINGS
|
||
|
|
||
|
#include <cstdio>
|
||
|
#include <cstdlib>
|
||
|
#include <iostream>
|
||
|
#include <assert.h>
|
||
|
#include <queue>
|
||
|
#include <random>
|
||
9 years ago
|
#include <atomic>
|
||
|
#include <sstream>
|
||
9 years ago
|
#include <vector>
|
||
|
#include <chrono>
|
||
|
#include <thread>
|
||
|
#include <libethash/util.h>
|
||
|
#include <libethash/ethash.h>
|
||
9 years ago
|
#include <libethash/internal.h>
|
||
9 years ago
|
#include <cuda_runtime.h>
|
||
9 years ago
|
#include "ethash_cuda_miner.h"
|
||
|
#include "ethash_cuda_miner_kernel_globals.h"
|
||
9 years ago
|
|
||
|
|
||
|
#define ETHASH_BYTES 32
|
||
|
|
||
|
// workaround lame platforms
|
||
|
#if !CL_VERSION_1_2
|
||
|
#define CL_MAP_WRITE_INVALIDATE_REGION CL_MAP_WRITE
|
||
|
#define CL_MEM_HOST_READ_ONLY 0
|
||
|
#endif
|
||
|
|
||
|
#undef min
|
||
|
#undef max
|
||
|
|
||
|
using namespace std;
|
||
|
|
||
9 years ago
|
unsigned const ethash_cuda_miner::c_defaultBlockSize = 128;
|
||
|
unsigned const ethash_cuda_miner::c_defaultGridSize = 2048; // * CL_DEFAULT_LOCAL_WORK_SIZE
|
||
|
unsigned const ethash_cuda_miner::c_defaultNumStreams = 2;
|
||
9 years ago
|
|
||
|
#if defined(_WIN32)
|
||
|
extern "C" __declspec(dllimport) void __stdcall OutputDebugStringA(const char* lpOutputString);
|
||
|
static std::atomic_flag s_logSpin = ATOMIC_FLAG_INIT;
|
||
9 years ago
|
#define ETHCUDA_LOG(_contents) \
|
||
9 years ago
|
do \
|
||
|
{ \
|
||
|
std::stringstream ss; \
|
||
|
ss << _contents; \
|
||
|
while (s_logSpin.test_and_set(std::memory_order_acquire)) {} \
|
||
|
OutputDebugStringA(ss.str().c_str()); \
|
||
|
cerr << ss.str() << endl << flush; \
|
||
|
s_logSpin.clear(std::memory_order_release); \
|
||
|
} while (false)
|
||
|
#else
|
||
9 years ago
|
#define ETHCUDA_LOG(_contents) cout << "[CUDA]:" << _contents << endl
|
||
9 years ago
|
#endif
|
||
|
|
||
|
#define CUDA_SAFE_CALL(call) \
|
||
|
do { \
|
||
|
cudaError_t err = call; \
|
||
|
if (cudaSuccess != err) { \
|
||
|
fprintf(stderr, "Cuda error in func '%s' at line %i : %s.\n", \
|
||
|
__FUNCTION__, __LINE__, cudaGetErrorString(err) ); \
|
||
|
exit(EXIT_FAILURE); \
|
||
|
} \
|
||
|
} while (0)
|
||
9 years ago
|
|
||
9 years ago
|
ethash_cuda_miner::search_hook::~search_hook() {}
|
||
9 years ago
|
|
||
9 years ago
|
ethash_cuda_miner::ethash_cuda_miner()
|
||
9 years ago
|
{
|
||
|
}
|
||
|
|
||
9 years ago
|
std::string ethash_cuda_miner::platform_info(unsigned _deviceId)
|
||
9 years ago
|
{
|
||
|
int runtime_version;
|
||
|
int device_count;
|
||
|
|
||
9 years ago
|
device_count = getNumDevices();
|
||
9 years ago
|
|
||
|
if (device_count == 0)
|
||
|
return std::string();
|
||
|
|
||
9 years ago
|
CUDA_SAFE_CALL(cudaRuntimeGetVersion(&runtime_version));
|
||
9 years ago
|
|
||
|
// use selected default device
|
||
|
int device_num = std::min<int>((int)_deviceId, device_count - 1);
|
||
|
cudaDeviceProp device_props;
|
||
9 years ago
|
|
||
|
CUDA_SAFE_CALL(cudaGetDeviceProperties(&device_props, device_num));
|
||
9 years ago
|
|
||
|
char platform[5];
|
||
|
int version_major = runtime_version / 1000;
|
||
|
int version_minor = (runtime_version - (version_major * 1000)) / 10;
|
||
|
sprintf(platform, "%d.%d", version_major, version_minor);
|
||
9 years ago
|
|
||
9 years ago
|
char compute[5];
|
||
|
sprintf(compute, "%d.%d", device_props.major, device_props.minor);
|
||
|
|
||
9 years ago
|
return "{ \"platform\": \"CUDA " + std::string(platform) + "\", \"device\": \"" + std::string(device_props.name) + "\", \"version\": \"Compute " + std::string(compute) + "\" }";
|
||
|
|
||
9 years ago
|
}
|
||
|
|
||
9 years ago
|
unsigned ethash_cuda_miner::getNumDevices()
|
||
9 years ago
|
{
|
||
|
int device_count;
|
||
9 years ago
|
CUDA_SAFE_CALL(cudaGetDeviceCount(&device_count));
|
||
|
return device_count;
|
||
|
}
|
||
9 years ago
|
|
||
9 years ago
|
bool ethash_cuda_miner::configureGPU(
|
||
9 years ago
|
unsigned _blockSize,
|
||
|
unsigned _gridSize,
|
||
|
unsigned _numStreams,
|
||
|
unsigned _extraGPUMemory,
|
||
|
bool _highcpu,
|
||
|
uint64_t _currentBlock
|
||
|
)
|
||
|
{
|
||
|
s_blockSize = _blockSize;
|
||
|
s_gridSize = _gridSize;
|
||
|
s_extraRequiredGPUMem = _extraGPUMemory;
|
||
|
s_numStreams = _numStreams;
|
||
|
s_highCPU = _highcpu;
|
||
|
|
||
|
// by default let's only consider the DAG of the first epoch
|
||
|
uint64_t dagSize = ethash_get_datasize(_currentBlock);
|
||
|
uint64_t requiredSize = dagSize + _extraGPUMemory;
|
||
|
for (unsigned int i = 0; i < getNumDevices(); i++)
|
||
9 years ago
|
{
|
||
9 years ago
|
cudaDeviceProp props;
|
||
|
CUDA_SAFE_CALL(cudaGetDeviceProperties(&props, i));
|
||
|
if (props.totalGlobalMem >= requiredSize)
|
||
|
{
|
||
9 years ago
|
ETHCUDA_LOG(
|
||
9 years ago
|
"Found suitable CUDA device [" << string(props.name)
|
||
9 years ago
|
<< "] with " << props.totalGlobalMem << " bytes of GPU memory"
|
||
|
);
|
||
|
return true;
|
||
|
}
|
||
|
|
||
9 years ago
|
ETHCUDA_LOG(
|
||
9 years ago
|
"CUDA device " << string(props.name)
|
||
|
<< " has insufficient GPU memory." << to_string(props.totalGlobalMem) <<
|
||
|
" bytes of memory found < " << to_string(requiredSize) << " bytes of memory required"
|
||
9 years ago
|
);
|
||
9 years ago
|
}
|
||
9 years ago
|
return false;
|
||
|
}
|
||
|
|
||
9 years ago
|
unsigned ethash_cuda_miner::s_extraRequiredGPUMem;
|
||
|
unsigned ethash_cuda_miner::s_blockSize = ethash_cuda_miner::c_defaultBlockSize;
|
||
|
unsigned ethash_cuda_miner::s_gridSize = ethash_cuda_miner::c_defaultGridSize;
|
||
|
unsigned ethash_cuda_miner::s_numStreams = ethash_cuda_miner::c_defaultNumStreams;
|
||
|
bool ethash_cuda_miner::s_highCPU = false;
|
||
9 years ago
|
|
||
9 years ago
|
void ethash_cuda_miner::listDevices()
|
||
9 years ago
|
{
|
||
|
string outString = "\nListing CUDA devices.\nFORMAT: [deviceID] deviceName\n";
|
||
|
for (unsigned int i = 0; i < getNumDevices(); i++)
|
||
|
{
|
||
|
cudaDeviceProp props;
|
||
|
CUDA_SAFE_CALL(cudaGetDeviceProperties(&props, i));
|
||
|
|
||
9 years ago
|
outString += "[" + to_string(i) + "] " + string(props.name) + "\n";
|
||
9 years ago
|
outString += "\tCompute version: " + to_string(props.major) + "." + to_string(props.minor) + "\n";
|
||
|
outString += "\tcudaDeviceProp::totalGlobalMem: " + to_string(props.totalGlobalMem) + "\n";
|
||
|
}
|
||
9 years ago
|
ETHCUDA_LOG(outString);
|
||
9 years ago
|
}
|
||
|
|
||
9 years ago
|
void ethash_cuda_miner::finish()
|
||
9 years ago
|
{
|
||
9 years ago
|
for (unsigned i = 0; i != s_numStreams; i++) {
|
||
9 years ago
|
cudaStreamDestroy(m_streams[i]);
|
||
|
m_streams[i] = 0;
|
||
|
}
|
||
|
cudaDeviceReset();
|
||
|
}
|
||
|
|
||
9 years ago
|
bool ethash_cuda_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned _deviceId)
|
||
9 years ago
|
{
|
||
9 years ago
|
int device_count = getNumDevices();
|
||
9 years ago
|
|
||
|
if (device_count == 0)
|
||
|
return false;
|
||
|
|
||
|
// use selected device
|
||
|
int device_num = std::min<int>((int)_deviceId, device_count - 1);
|
||
|
|
||
|
cudaDeviceProp device_props;
|
||
|
if (cudaGetDeviceProperties(&device_props, device_num) == cudaErrorInvalidDevice)
|
||
|
{
|
||
|
cout << cudaGetErrorString(cudaErrorInvalidDevice) << endl;
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
cout << "Using device: " << device_props.name << "(" << device_props.major << "." << device_props.minor << ")" << endl;
|
||
|
|
||
|
cudaError_t r = cudaSetDevice(device_num);
|
||
|
if (r != cudaSuccess)
|
||
|
{
|
||
|
cout << cudaGetErrorString(r) << endl;
|
||
|
return false;
|
||
|
}
|
||
|
cudaDeviceReset();
|
||
9 years ago
|
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
|
||
|
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
|
||
9 years ago
|
|
||
9 years ago
|
m_hash_buf = new void *[s_numStreams];
|
||
|
m_search_buf = new uint32_t *[s_numStreams];
|
||
|
m_streams = new cudaStream_t[s_numStreams];
|
||
9 years ago
|
|
||
|
// patch source code
|
||
|
cudaError result;
|
||
|
|
||
|
uint32_t dagSize128 = (unsigned)(_dagSize / ETHASH_MIX_BYTES);
|
||
|
unsigned max_outputs = c_max_search_results;
|
||
|
|
||
|
result = set_constants(&dagSize128, &max_outputs);
|
||
|
|
||
|
// create buffer for dag
|
||
|
result = cudaMalloc(&m_dag_ptr, _dagSize);
|
||
|
|
||
|
// create buffer for header256
|
||
|
result = cudaMalloc(&m_header, 32);
|
||
|
|
||
|
// copy dag to CPU.
|
||
|
result = cudaMemcpy(m_dag_ptr, _dag, _dagSize, cudaMemcpyHostToDevice);
|
||
|
|
||
|
// create mining buffers
|
||
9 years ago
|
for (unsigned i = 0; i != s_numStreams; ++i)
|
||
9 years ago
|
{
|
||
|
result = cudaMallocHost(&m_hash_buf[i], 32 * c_hash_batch_size);
|
||
|
result = cudaMallocHost(&m_search_buf[i], (c_max_search_results + 1) * sizeof(uint32_t));
|
||
|
result = cudaStreamCreate(&m_streams[i]);
|
||
|
}
|
||
|
if (result != cudaSuccess)
|
||
|
{
|
||
|
cout << cudaGetErrorString(result) << endl;
|
||
|
return false;
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Prevent High CPU usage while waiting for an async task
|
||
|
*/
|
||
|
static unsigned waitStream(cudaStream_t stream)
|
||
|
{
|
||
|
unsigned wait_ms = 0;
|
||
|
while (cudaStreamQuery(stream) == cudaErrorNotReady) {
|
||
|
this_thread::sleep_for(chrono::milliseconds(10));
|
||
|
wait_ms += 10;
|
||
|
}
|
||
|
return wait_ms;
|
||
|
}
|
||
|
|
||
9 years ago
|
void ethash_cuda_miner::search(uint8_t const* header, uint64_t target, search_hook& hook)
|
||
9 years ago
|
{
|
||
|
struct pending_batch
|
||
|
{
|
||
|
uint64_t start_nonce;
|
||
|
unsigned buf;
|
||
|
};
|
||
|
std::queue<pending_batch> pending;
|
||
|
|
||
|
static uint32_t const c_zero = 0;
|
||
|
|
||
|
// update header constant buffer
|
||
|
cudaMemcpy(m_header, header, 32, cudaMemcpyHostToDevice);
|
||
9 years ago
|
for (unsigned i = 0; i != s_numStreams; ++i)
|
||
9 years ago
|
{
|
||
|
cudaMemcpy(m_search_buf[i], &c_zero, 4, cudaMemcpyHostToDevice);
|
||
|
}
|
||
|
cudaError err = cudaGetLastError();
|
||
|
if (cudaSuccess != err)
|
||
|
{
|
||
|
throw std::runtime_error(cudaGetErrorString(err));
|
||
|
}
|
||
|
|
||
|
unsigned buf = 0;
|
||
|
std::random_device engine;
|
||
|
uint64_t start_nonce = std::uniform_int_distribution<uint64_t>()(engine);
|
||
9 years ago
|
for (;; start_nonce += s_gridSize)
|
||
9 years ago
|
{
|
||
9 years ago
|
run_ethash_search(s_gridSize, s_blockSize, m_streams[buf], m_search_buf[buf], m_header, m_dag_ptr, start_nonce, target);
|
||
9 years ago
|
|
||
|
pending.push({ start_nonce, buf });
|
||
9 years ago
|
buf = (buf + 1) % s_numStreams;
|
||
9 years ago
|
|
||
|
// read results
|
||
9 years ago
|
if (pending.size() == s_numStreams)
|
||
9 years ago
|
{
|
||
|
pending_batch const& batch = pending.front();
|
||
|
|
||
|
uint32_t results[1 + c_max_search_results];
|
||
|
|
||
9 years ago
|
if (!s_highCPU)
|
||
9 years ago
|
waitStream(m_streams[buf]); // 28ms
|
||
|
cudaMemcpyAsync(results, m_search_buf[batch.buf], (1 + c_max_search_results) * sizeof(uint32_t), cudaMemcpyHostToHost, m_streams[batch.buf]);
|
||
|
|
||
|
unsigned num_found = std::min<unsigned>(results[0], c_max_search_results);
|
||
|
uint64_t nonces[c_max_search_results];
|
||
|
for (unsigned i = 0; i != num_found; ++i)
|
||
|
{
|
||
|
nonces[i] = batch.start_nonce + results[i + 1];
|
||
|
//cout << results[i + 1] << ", ";
|
||
|
}
|
||
|
//if (num_found > 0)
|
||
|
// cout << endl;
|
||
|
|
||
|
bool exit = num_found && hook.found(nonces, num_found);
|
||
9 years ago
|
exit |= hook.searched(batch.start_nonce, s_gridSize * s_blockSize); // always report searched before exit
|
||
9 years ago
|
if (exit)
|
||
|
break;
|
||
|
|
||
9 years ago
|
start_nonce += s_gridSize * s_blockSize;
|
||
9 years ago
|
// reset search buffer if we're still going
|
||
|
if (num_found)
|
||
|
cudaMemcpyAsync(m_search_buf[batch.buf], &c_zero, 4, cudaMemcpyHostToDevice, m_streams[batch.buf]);
|
||
|
|
||
|
cudaError err = cudaGetLastError();
|
||
|
if (cudaSuccess != err)
|
||
|
{
|
||
|
throw std::runtime_error(cudaGetErrorString(err));
|
||
|
}
|
||
|
pending.pop();
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|