Gav Wood
10 years ago
6 changed files with 4982 additions and 0 deletions
@ -0,0 +1,47 @@ |
|||||
|
cmake_minimum_required(VERSION 2.8) |
||||
|
|
||||
|
set(LIBRARY ethash-cl) |
||||
|
#set(CMAKE_BUILD_TYPE Release) |
||||
|
|
||||
|
include(bin2h.cmake) |
||||
|
bin2h(SOURCE_FILE ethash_cl_miner_kernel.cl |
||||
|
VARIABLE_NAME ethash_cl_miner_kernel |
||||
|
HEADER_FILE ${CMAKE_CURRENT_BINARY_DIR}/ethash_cl_miner_kernel.h) |
||||
|
|
||||
|
if (NOT MSVC) |
||||
|
# Initialize CXXFLAGS for c++11 |
||||
|
set(CMAKE_CXX_FLAGS "-Wall -std=c++11") |
||||
|
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g") |
||||
|
set(CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG") |
||||
|
set(CMAKE_CXX_FLAGS_RELEASE "-O4 -DNDEBUG") |
||||
|
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g") |
||||
|
|
||||
|
# Compiler-specific C++11 activation. |
||||
|
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU") |
||||
|
execute_process( |
||||
|
COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) |
||||
|
if (NOT (GCC_VERSION VERSION_GREATER 4.7 OR GCC_VERSION VERSION_EQUAL 4.7)) |
||||
|
message(FATAL_ERROR "${PROJECT_NAME} requires g++ 4.7 or greater.") |
||||
|
endif () |
||||
|
elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") |
||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") |
||||
|
else () |
||||
|
message(FATAL_ERROR "Your C++ compiler does not support C++11.") |
||||
|
endif () |
||||
|
endif() |
||||
|
|
||||
|
set(OpenCL_FOUND TRUE) |
||||
|
set(OpenCL_INCLUDE_DIRS /usr/include/CL) |
||||
|
set(OpenCL_LIBRARIES -lOpenCL) |
||||
|
|
||||
|
if (NOT OpenCL_FOUND) |
||||
|
find_package(OpenCL) |
||||
|
endif() |
||||
|
|
||||
|
if (OpenCL_FOUND) |
||||
|
set(CMAKE_CXX_FLAGS "-std=c++11 -Wall -Wno-unknown-pragmas -Wextra -Werror -pedantic -fPIC ${CMAKE_CXX_FLAGS}") |
||||
|
include_directories(${OpenCL_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR}) |
||||
|
include_directories(..) |
||||
|
add_library(${LIBRARY} ethash_cl_miner.cpp ethash_cl_miner.h cl.hpp) |
||||
|
TARGET_LINK_LIBRARIES(${LIBRARY} ${OpenCL_LIBRARIES} ethash) |
||||
|
endif() |
@ -0,0 +1,86 @@ |
|||||
|
# https://gist.github.com/sivachandran/3a0de157dccef822a230 |
||||
|
include(CMakeParseArguments) |
||||
|
|
||||
|
# Function to wrap a given string into multiple lines at the given column position. |
||||
|
# Parameters: |
||||
|
# VARIABLE - The name of the CMake variable holding the string. |
||||
|
# AT_COLUMN - The column position at which string will be wrapped. |
||||
|
function(WRAP_STRING) |
||||
|
set(oneValueArgs VARIABLE AT_COLUMN) |
||||
|
cmake_parse_arguments(WRAP_STRING "${options}" "${oneValueArgs}" "" ${ARGN}) |
||||
|
|
||||
|
string(LENGTH ${${WRAP_STRING_VARIABLE}} stringLength) |
||||
|
math(EXPR offset "0") |
||||
|
|
||||
|
while(stringLength GREATER 0) |
||||
|
|
||||
|
if(stringLength GREATER ${WRAP_STRING_AT_COLUMN}) |
||||
|
math(EXPR length "${WRAP_STRING_AT_COLUMN}") |
||||
|
else() |
||||
|
math(EXPR length "${stringLength}") |
||||
|
endif() |
||||
|
|
||||
|
string(SUBSTRING ${${WRAP_STRING_VARIABLE}} ${offset} ${length} line) |
||||
|
set(lines "${lines}\n${line}") |
||||
|
|
||||
|
math(EXPR stringLength "${stringLength} - ${length}") |
||||
|
math(EXPR offset "${offset} + ${length}") |
||||
|
endwhile() |
||||
|
|
||||
|
set(${WRAP_STRING_VARIABLE} "${lines}" PARENT_SCOPE) |
||||
|
endfunction() |
||||
|
|
||||
|
# Function to embed contents of a file as byte array in C/C++ header file(.h). The header file |
||||
|
# will contain a byte array and integer variable holding the size of the array. |
||||
|
# Parameters |
||||
|
# SOURCE_FILE - The path of source file whose contents will be embedded in the header file. |
||||
|
# VARIABLE_NAME - The name of the variable for the byte array. The string "_SIZE" will be append |
||||
|
# to this name and will be used a variable name for size variable. |
||||
|
# HEADER_FILE - The path of header file. |
||||
|
# APPEND - If specified appends to the header file instead of overwriting it |
||||
|
# NULL_TERMINATE - If specified a null byte(zero) will be append to the byte array. This will be |
||||
|
# useful if the source file is a text file and we want to use the file contents |
||||
|
# as string. But the size variable holds size of the byte array without this |
||||
|
# null byte. |
||||
|
# Usage: |
||||
|
# bin2h(SOURCE_FILE "Logo.png" HEADER_FILE "Logo.h" VARIABLE_NAME "LOGO_PNG") |
||||
|
function(BIN2H) |
||||
|
set(options APPEND NULL_TERMINATE) |
||||
|
set(oneValueArgs SOURCE_FILE VARIABLE_NAME HEADER_FILE) |
||||
|
cmake_parse_arguments(BIN2H "${options}" "${oneValueArgs}" "" ${ARGN}) |
||||
|
|
||||
|
# reads source file contents as hex string |
||||
|
file(READ ${BIN2H_SOURCE_FILE} hexString HEX) |
||||
|
string(LENGTH ${hexString} hexStringLength) |
||||
|
|
||||
|
# appends null byte if asked |
||||
|
if(BIN2H_NULL_TERMINATE) |
||||
|
set(hexString "${hexString}00") |
||||
|
endif() |
||||
|
|
||||
|
# wraps the hex string into multiple lines at column 32(i.e. 16 bytes per line) |
||||
|
wrap_string(VARIABLE hexString AT_COLUMN 32) |
||||
|
math(EXPR arraySize "${hexStringLength} / 2") |
||||
|
|
||||
|
# adds '0x' prefix and comma suffix before and after every byte respectively |
||||
|
string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1, " arrayValues ${hexString}) |
||||
|
# removes trailing comma |
||||
|
string(REGEX REPLACE ", $" "" arrayValues ${arrayValues}) |
||||
|
|
||||
|
# converts the variable name into proper C identifier |
||||
|
IF (${CMAKE_VERSION} GREATER 2.8.10) # fix for legacy cmake |
||||
|
string(MAKE_C_IDENTIFIER "${BIN2H_VARIABLE_NAME}" BIN2H_VARIABLE_NAME) |
||||
|
ENDIF() |
||||
|
string(TOUPPER "${BIN2H_VARIABLE_NAME}" BIN2H_VARIABLE_NAME) |
||||
|
|
||||
|
# declares byte array and the length variables |
||||
|
set(arrayDefinition "const unsigned char ${BIN2H_VARIABLE_NAME}[] = { ${arrayValues} };") |
||||
|
set(arraySizeDefinition "const size_t ${BIN2H_VARIABLE_NAME}_SIZE = ${arraySize};") |
||||
|
|
||||
|
set(declarations "${arrayDefinition}\n\n${arraySizeDefinition}\n\n") |
||||
|
if(BIN2H_APPEND) |
||||
|
file(APPEND ${BIN2H_HEADER_FILE} "${declarations}") |
||||
|
else() |
||||
|
file(WRITE ${BIN2H_HEADER_FILE} "${declarations}") |
||||
|
endif() |
||||
|
endfunction() |
File diff suppressed because it is too large
@ -0,0 +1,332 @@ |
|||||
|
/*
|
||||
|
This file is part of c-ethash. |
||||
|
|
||||
|
c-ethash is free software: you can redistribute it and/or modify |
||||
|
it under the terms of the GNU General Public License as published by |
||||
|
the Free Software Foundation, either version 3 of the License, or |
||||
|
(at your option) any later version. |
||||
|
|
||||
|
c-ethash is distributed in the hope that it will be useful, |
||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
|
GNU General Public License for more details. |
||||
|
|
||||
|
You should have received a copy of the GNU General Public License |
||||
|
along with cpp-ethereum. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
*/ |
||||
|
/** @file ethash_cl_miner.cpp
|
||||
|
* @author Tim Hughes <tim@twistedfury.com> |
||||
|
* @date 2015 |
||||
|
*/ |
||||
|
|
||||
|
|
||||
|
#define _CRT_SECURE_NO_WARNINGS |
||||
|
|
||||
|
#include <cstdio> |
||||
|
#include <cstdlib> |
||||
|
#include <assert.h> |
||||
|
#include <queue> |
||||
|
#include <vector> |
||||
|
#include <libethash/util.h> |
||||
|
#include <libethash/ethash.h> |
||||
|
#include "ethash_cl_miner.h" |
||||
|
#include "ethash_cl_miner_kernel.h" |
||||
|
|
||||
|
#define ETHASH_BYTES 32 |
||||
|
|
||||
|
// workaround lame platforms
|
||||
|
#if !CL_VERSION_1_2 |
||||
|
#define CL_MAP_WRITE_INVALIDATE_REGION CL_MAP_WRITE |
||||
|
#define CL_MEM_HOST_READ_ONLY 0 |
||||
|
#endif |
||||
|
|
||||
|
#undef min |
||||
|
#undef max |
||||
|
|
||||
|
static void add_definition(std::string& source, char const* id, unsigned value) |
||||
|
{ |
||||
|
char buf[256]; |
||||
|
sprintf(buf, "#define %s %uu\n", id, value); |
||||
|
source.insert(source.begin(), buf, buf + strlen(buf)); |
||||
|
} |
||||
|
|
||||
|
ethash_cl_miner::ethash_cl_miner() |
||||
|
: m_opencl_1_1() |
||||
|
{ |
||||
|
} |
||||
|
|
||||
|
void ethash_cl_miner::finish() |
||||
|
{ |
||||
|
if (m_queue()) |
||||
|
{ |
||||
|
m_queue.finish(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
bool ethash_cl_miner::init(ethash_params const& params, std::function<void(void*)> _fillDAG, unsigned workgroup_size) |
||||
|
{ |
||||
|
// store params
|
||||
|
m_params = params; |
||||
|
|
||||
|
// get all platforms
|
||||
|
std::vector<cl::Platform> platforms; |
||||
|
cl::Platform::get(&platforms); |
||||
|
if (platforms.empty()) |
||||
|
{ |
||||
|
debugf("No OpenCL platforms found.\n"); |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
// use default platform
|
||||
|
fprintf(stderr, "Using platform: %s\n", platforms[0].getInfo<CL_PLATFORM_NAME>().c_str()); |
||||
|
|
||||
|
// get GPU device of the default platform
|
||||
|
std::vector<cl::Device> devices; |
||||
|
platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices); |
||||
|
if (devices.empty()) |
||||
|
{ |
||||
|
debugf("No OpenCL devices found.\n"); |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
// use default device
|
||||
|
unsigned device_num = 0; |
||||
|
cl::Device& device = devices[device_num]; |
||||
|
std::string device_version = device.getInfo<CL_DEVICE_VERSION>(); |
||||
|
fprintf(stderr, "Using device: %s (%s)\n", device.getInfo<CL_DEVICE_NAME>().c_str(),device_version.c_str()); |
||||
|
|
||||
|
if (strncmp("OpenCL 1.0", device_version.c_str(), 10) == 0) |
||||
|
{ |
||||
|
debugf("OpenCL 1.0 is not supported.\n"); |
||||
|
return false; |
||||
|
} |
||||
|
if (strncmp("OpenCL 1.1", device_version.c_str(), 10) == 0) |
||||
|
{ |
||||
|
m_opencl_1_1 = true; |
||||
|
} |
||||
|
|
||||
|
// create context
|
||||
|
m_context = cl::Context(std::vector<cl::Device>(&device, &device + 1)); |
||||
|
m_queue = cl::CommandQueue(m_context, device); |
||||
|
|
||||
|
// use requested workgroup size, but we require multiple of 8
|
||||
|
m_workgroup_size = ((workgroup_size + 7) / 8) * 8; |
||||
|
|
||||
|
// patch source code
|
||||
|
std::string code(ETHASH_CL_MINER_KERNEL, ETHASH_CL_MINER_KERNEL + ETHASH_CL_MINER_KERNEL_SIZE); |
||||
|
add_definition(code, "GROUP_SIZE", m_workgroup_size); |
||||
|
add_definition(code, "DAG_SIZE", (unsigned)(params.full_size / ETHASH_MIX_BYTES)); |
||||
|
add_definition(code, "ACCESSES", ETHASH_ACCESSES); |
||||
|
add_definition(code, "MAX_OUTPUTS", c_max_search_results); |
||||
|
//debugf("%s", code.c_str());
|
||||
|
|
||||
|
// create miner OpenCL program
|
||||
|
cl::Program::Sources sources; |
||||
|
sources.push_back({code.c_str(), code.size()}); |
||||
|
|
||||
|
cl::Program program(m_context, sources); |
||||
|
try |
||||
|
{ |
||||
|
program.build({device}); |
||||
|
} |
||||
|
catch (cl::Error err) |
||||
|
{ |
||||
|
debugf("%s\n", program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str()); |
||||
|
return false; |
||||
|
} |
||||
|
m_hash_kernel = cl::Kernel(program, "ethash_hash"); |
||||
|
m_search_kernel = cl::Kernel(program, "ethash_search"); |
||||
|
|
||||
|
// create buffer for dag
|
||||
|
m_dag = cl::Buffer(m_context, CL_MEM_READ_ONLY, params.full_size); |
||||
|
|
||||
|
// create buffer for header
|
||||
|
m_header = cl::Buffer(m_context, CL_MEM_READ_ONLY, 32); |
||||
|
|
||||
|
// compute dag on CPU
|
||||
|
{ |
||||
|
// if this throws then it's because we probably need to subdivide the dag uploads for compatibility
|
||||
|
void* dag_ptr = m_queue.enqueueMapBuffer(m_dag, true, m_opencl_1_1 ? CL_MAP_WRITE : CL_MAP_WRITE_INVALIDATE_REGION, 0, params.full_size); |
||||
|
// memcpying 1GB: horrible... really. horrible. but necessary since we can't mmap *and* gpumap.
|
||||
|
_fillDAG(dag_ptr); |
||||
|
m_queue.enqueueUnmapMemObject(m_dag, dag_ptr); |
||||
|
} |
||||
|
|
||||
|
// create mining buffers
|
||||
|
for (unsigned i = 0; i != c_num_buffers; ++i) |
||||
|
{ |
||||
|
m_hash_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY | (!m_opencl_1_1 ? CL_MEM_HOST_READ_ONLY : 0), 32*c_hash_batch_size); |
||||
|
m_search_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, (c_max_search_results + 1) * sizeof(uint32_t)); |
||||
|
} |
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count) |
||||
|
{ |
||||
|
struct pending_batch |
||||
|
{ |
||||
|
unsigned base; |
||||
|
unsigned count; |
||||
|
unsigned buf; |
||||
|
}; |
||||
|
std::queue<pending_batch> pending; |
||||
|
|
||||
|
// update header constant buffer
|
||||
|
m_queue.enqueueWriteBuffer(m_header, true, 0, 32, header); |
||||
|
|
||||
|
/*
|
||||
|
__kernel void ethash_combined_hash( |
||||
|
__global hash32_t* g_hashes, |
||||
|
__constant hash32_t const* g_header, |
||||
|
__global hash128_t const* g_dag, |
||||
|
ulong start_nonce, |
||||
|
uint isolate |
||||
|
) |
||||
|
*/ |
||||
|
m_hash_kernel.setArg(1, m_header); |
||||
|
m_hash_kernel.setArg(2, m_dag); |
||||
|
m_hash_kernel.setArg(3, nonce); |
||||
|
m_hash_kernel.setArg(4, ~0u); // have to pass this to stop the compile unrolling the loop
|
||||
|
|
||||
|
unsigned buf = 0; |
||||
|
for (unsigned i = 0; i < count || !pending.empty(); ) |
||||
|
{ |
||||
|
// how many this batch
|
||||
|
if (i < count) |
||||
|
{ |
||||
|
unsigned const this_count = std::min<unsigned>(count - i, c_hash_batch_size); |
||||
|
unsigned const batch_count = std::max<unsigned>(this_count, m_workgroup_size); |
||||
|
|
||||
|
// supply output hash buffer to kernel
|
||||
|
m_hash_kernel.setArg(0, m_hash_buf[buf]); |
||||
|
|
||||
|
// execute it!
|
||||
|
m_queue.enqueueNDRangeKernel( |
||||
|
m_hash_kernel, |
||||
|
cl::NullRange, |
||||
|
cl::NDRange(batch_count), |
||||
|
cl::NDRange(m_workgroup_size) |
||||
|
); |
||||
|
m_queue.flush(); |
||||
|
|
||||
|
pending.push({i, this_count, buf}); |
||||
|
i += this_count; |
||||
|
buf = (buf + 1) % c_num_buffers; |
||||
|
} |
||||
|
|
||||
|
// read results
|
||||
|
if (i == count || pending.size() == c_num_buffers) |
||||
|
{ |
||||
|
pending_batch const& batch = pending.front(); |
||||
|
|
||||
|
// could use pinned host pointer instead, but this path isn't that important.
|
||||
|
uint8_t* hashes = (uint8_t*)m_queue.enqueueMapBuffer(m_hash_buf[batch.buf], true, CL_MAP_READ, 0, batch.count * ETHASH_BYTES); |
||||
|
memcpy(ret + batch.base*ETHASH_BYTES, hashes, batch.count*ETHASH_BYTES); |
||||
|
m_queue.enqueueUnmapMemObject(m_hash_buf[batch.buf], hashes); |
||||
|
|
||||
|
pending.pop(); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
|
||||
|
void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook) |
||||
|
{ |
||||
|
struct pending_batch |
||||
|
{ |
||||
|
uint64_t start_nonce; |
||||
|
unsigned buf; |
||||
|
}; |
||||
|
std::queue<pending_batch> pending; |
||||
|
|
||||
|
static uint32_t const c_zero = 0; |
||||
|
|
||||
|
// update header constant buffer
|
||||
|
m_queue.enqueueWriteBuffer(m_header, false, 0, 32, header); |
||||
|
for (unsigned i = 0; i != c_num_buffers; ++i) |
||||
|
{ |
||||
|
m_queue.enqueueWriteBuffer(m_search_buf[i], false, 0, 4, &c_zero); |
||||
|
} |
||||
|
|
||||
|
#if CL_VERSION_1_2 && 0 |
||||
|
cl::Event pre_return_event; |
||||
|
if (!m_opencl_1_1) |
||||
|
{ |
||||
|
m_queue.enqueueBarrierWithWaitList(NULL, &pre_return_event); |
||||
|
} |
||||
|
else |
||||
|
#endif |
||||
|
{ |
||||
|
m_queue.finish(); |
||||
|
} |
||||
|
|
||||
|
/*
|
||||
|
__kernel void ethash_combined_search( |
||||
|
__global hash32_t* g_hashes, // 0
|
||||
|
__constant hash32_t const* g_header, // 1
|
||||
|
__global hash128_t const* g_dag, // 2
|
||||
|
ulong start_nonce, // 3
|
||||
|
ulong target, // 4
|
||||
|
uint isolate // 5
|
||||
|
) |
||||
|
*/ |
||||
|
m_search_kernel.setArg(1, m_header); |
||||
|
m_search_kernel.setArg(2, m_dag); |
||||
|
|
||||
|
// pass these to stop the compiler unrolling the loops
|
||||
|
m_search_kernel.setArg(4, target); |
||||
|
m_search_kernel.setArg(5, ~0u); |
||||
|
|
||||
|
|
||||
|
unsigned buf = 0; |
||||
|
for (uint64_t start_nonce = 0; ; start_nonce += c_search_batch_size) |
||||
|
{ |
||||
|
// supply output buffer to kernel
|
||||
|
m_search_kernel.setArg(0, m_search_buf[buf]); |
||||
|
m_search_kernel.setArg(3, start_nonce); |
||||
|
|
||||
|
// execute it!
|
||||
|
m_queue.enqueueNDRangeKernel(m_search_kernel, cl::NullRange, c_search_batch_size, m_workgroup_size); |
||||
|
|
||||
|
pending.push({start_nonce, buf}); |
||||
|
buf = (buf + 1) % c_num_buffers; |
||||
|
|
||||
|
// read results
|
||||
|
if (pending.size() == c_num_buffers) |
||||
|
{ |
||||
|
pending_batch const& batch = pending.front(); |
||||
|
|
||||
|
// could use pinned host pointer instead
|
||||
|
uint32_t* results = (uint32_t*)m_queue.enqueueMapBuffer(m_search_buf[batch.buf], true, CL_MAP_READ, 0, (1+c_max_search_results) * sizeof(uint32_t)); |
||||
|
unsigned num_found = std::min<unsigned>(results[0], c_max_search_results); |
||||
|
|
||||
|
uint64_t nonces[c_max_search_results]; |
||||
|
for (unsigned i = 0; i != num_found; ++i) |
||||
|
{ |
||||
|
nonces[i] = batch.start_nonce + results[i+1]; |
||||
|
} |
||||
|
|
||||
|
m_queue.enqueueUnmapMemObject(m_search_buf[batch.buf], results); |
||||
|
|
||||
|
bool exit = num_found && hook.found(nonces, num_found); |
||||
|
exit |= hook.searched(batch.start_nonce, c_search_batch_size); // always report searched before exit
|
||||
|
if (exit) |
||||
|
break; |
||||
|
|
||||
|
// reset search buffer if we're still going
|
||||
|
if (num_found) |
||||
|
m_queue.enqueueWriteBuffer(m_search_buf[batch.buf], true, 0, 4, &c_zero); |
||||
|
|
||||
|
pending.pop(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// not safe to return until this is ready
|
||||
|
#if CL_VERSION_1_2 && 0 |
||||
|
if (!m_opencl_1_1) |
||||
|
{ |
||||
|
pre_return_event.wait(); |
||||
|
} |
||||
|
#endif |
||||
|
} |
||||
|
|
@ -0,0 +1,43 @@ |
|||||
|
#pragma once |
||||
|
|
||||
|
#define __CL_ENABLE_EXCEPTIONS |
||||
|
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS |
||||
|
#include "cl.hpp" |
||||
|
#include <time.h> |
||||
|
#include <functional> |
||||
|
#include <libethash/ethash.h> |
||||
|
|
||||
|
class ethash_cl_miner |
||||
|
{ |
||||
|
public: |
||||
|
struct search_hook |
||||
|
{ |
||||
|
// reports progress, return true to abort
|
||||
|
virtual bool found(uint64_t const* nonces, uint32_t count) = 0; |
||||
|
virtual bool searched(uint64_t start_nonce, uint32_t count) = 0; |
||||
|
}; |
||||
|
|
||||
|
public: |
||||
|
ethash_cl_miner(); |
||||
|
|
||||
|
bool init(ethash_params const& params, std::function<void(void*)> _fillDAG, unsigned workgroup_size = 64); |
||||
|
|
||||
|
void finish(); |
||||
|
void hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count); |
||||
|
void search(uint8_t const* header, uint64_t target, search_hook& hook); |
||||
|
|
||||
|
private: |
||||
|
enum { c_max_search_results = 63, c_num_buffers = 2, c_hash_batch_size = 1024, c_search_batch_size = 1024*256 }; |
||||
|
|
||||
|
ethash_params m_params; |
||||
|
cl::Context m_context; |
||||
|
cl::CommandQueue m_queue; |
||||
|
cl::Kernel m_hash_kernel; |
||||
|
cl::Kernel m_search_kernel; |
||||
|
cl::Buffer m_dag; |
||||
|
cl::Buffer m_header; |
||||
|
cl::Buffer m_hash_buf[c_num_buffers]; |
||||
|
cl::Buffer m_search_buf[c_num_buffers]; |
||||
|
unsigned m_workgroup_size; |
||||
|
bool m_opencl_1_1; |
||||
|
}; |
@ -0,0 +1,460 @@ |
|||||
|
// author Tim Hughes <tim@twistedfury.com> |
||||
|
// Tested on Radeon HD 7850 |
||||
|
// Hashrate: 15940347 hashes/s |
||||
|
// Bandwidth: 124533 MB/s |
||||
|
// search kernel should fit in <= 84 VGPRS (3 wavefronts) |
||||
|
|
||||
|
#define THREADS_PER_HASH (128 / 16) |
||||
|
#define HASHES_PER_LOOP (GROUP_SIZE / THREADS_PER_HASH) |
||||
|
|
||||
|
#define FNV_PRIME 0x01000193 |
||||
|
|
||||
|
__constant uint2 const Keccak_f1600_RC[24] = { |
||||
|
(uint2)(0x00000001, 0x00000000), |
||||
|
(uint2)(0x00008082, 0x00000000), |
||||
|
(uint2)(0x0000808a, 0x80000000), |
||||
|
(uint2)(0x80008000, 0x80000000), |
||||
|
(uint2)(0x0000808b, 0x00000000), |
||||
|
(uint2)(0x80000001, 0x00000000), |
||||
|
(uint2)(0x80008081, 0x80000000), |
||||
|
(uint2)(0x00008009, 0x80000000), |
||||
|
(uint2)(0x0000008a, 0x00000000), |
||||
|
(uint2)(0x00000088, 0x00000000), |
||||
|
(uint2)(0x80008009, 0x00000000), |
||||
|
(uint2)(0x8000000a, 0x00000000), |
||||
|
(uint2)(0x8000808b, 0x00000000), |
||||
|
(uint2)(0x0000008b, 0x80000000), |
||||
|
(uint2)(0x00008089, 0x80000000), |
||||
|
(uint2)(0x00008003, 0x80000000), |
||||
|
(uint2)(0x00008002, 0x80000000), |
||||
|
(uint2)(0x00000080, 0x80000000), |
||||
|
(uint2)(0x0000800a, 0x00000000), |
||||
|
(uint2)(0x8000000a, 0x80000000), |
||||
|
(uint2)(0x80008081, 0x80000000), |
||||
|
(uint2)(0x00008080, 0x80000000), |
||||
|
(uint2)(0x80000001, 0x00000000), |
||||
|
(uint2)(0x80008008, 0x80000000), |
||||
|
}; |
||||
|
|
||||
|
void keccak_f1600_round(uint2* a, uint r, uint out_size) |
||||
|
{ |
||||
|
#if !__ENDIAN_LITTLE__ |
||||
|
for (uint i = 0; i != 25; ++i) |
||||
|
a[i] = a[i].yx; |
||||
|
#endif |
||||
|
|
||||
|
uint2 b[25]; |
||||
|
uint2 t; |
||||
|
|
||||
|
// Theta |
||||
|
b[0] = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20]; |
||||
|
b[1] = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21]; |
||||
|
b[2] = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22]; |
||||
|
b[3] = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23]; |
||||
|
b[4] = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24]; |
||||
|
t = b[4] ^ (uint2)(b[1].x << 1 | b[1].y >> 31, b[1].y << 1 | b[1].x >> 31); |
||||
|
a[0] ^= t; |
||||
|
a[5] ^= t; |
||||
|
a[10] ^= t; |
||||
|
a[15] ^= t; |
||||
|
a[20] ^= t; |
||||
|
t = b[0] ^ (uint2)(b[2].x << 1 | b[2].y >> 31, b[2].y << 1 | b[2].x >> 31); |
||||
|
a[1] ^= t; |
||||
|
a[6] ^= t; |
||||
|
a[11] ^= t; |
||||
|
a[16] ^= t; |
||||
|
a[21] ^= t; |
||||
|
t = b[1] ^ (uint2)(b[3].x << 1 | b[3].y >> 31, b[3].y << 1 | b[3].x >> 31); |
||||
|
a[2] ^= t; |
||||
|
a[7] ^= t; |
||||
|
a[12] ^= t; |
||||
|
a[17] ^= t; |
||||
|
a[22] ^= t; |
||||
|
t = b[2] ^ (uint2)(b[4].x << 1 | b[4].y >> 31, b[4].y << 1 | b[4].x >> 31); |
||||
|
a[3] ^= t; |
||||
|
a[8] ^= t; |
||||
|
a[13] ^= t; |
||||
|
a[18] ^= t; |
||||
|
a[23] ^= t; |
||||
|
t = b[3] ^ (uint2)(b[0].x << 1 | b[0].y >> 31, b[0].y << 1 | b[0].x >> 31); |
||||
|
a[4] ^= t; |
||||
|
a[9] ^= t; |
||||
|
a[14] ^= t; |
||||
|
a[19] ^= t; |
||||
|
a[24] ^= t; |
||||
|
|
||||
|
// Rho Pi |
||||
|
b[0] = a[0]; |
||||
|
b[10] = (uint2)(a[1].x << 1 | a[1].y >> 31, a[1].y << 1 | a[1].x >> 31); |
||||
|
b[7] = (uint2)(a[10].x << 3 | a[10].y >> 29, a[10].y << 3 | a[10].x >> 29); |
||||
|
b[11] = (uint2)(a[7].x << 6 | a[7].y >> 26, a[7].y << 6 | a[7].x >> 26); |
||||
|
b[17] = (uint2)(a[11].x << 10 | a[11].y >> 22, a[11].y << 10 | a[11].x >> 22); |
||||
|
b[18] = (uint2)(a[17].x << 15 | a[17].y >> 17, a[17].y << 15 | a[17].x >> 17); |
||||
|
b[3] = (uint2)(a[18].x << 21 | a[18].y >> 11, a[18].y << 21 | a[18].x >> 11); |
||||
|
b[5] = (uint2)(a[3].x << 28 | a[3].y >> 4, a[3].y << 28 | a[3].x >> 4); |
||||
|
b[16] = (uint2)(a[5].y << 4 | a[5].x >> 28, a[5].x << 4 | a[5].y >> 28); |
||||
|
b[8] = (uint2)(a[16].y << 13 | a[16].x >> 19, a[16].x << 13 | a[16].y >> 19); |
||||
|
b[21] = (uint2)(a[8].y << 23 | a[8].x >> 9, a[8].x << 23 | a[8].y >> 9); |
||||
|
b[24] = (uint2)(a[21].x << 2 | a[21].y >> 30, a[21].y << 2 | a[21].x >> 30); |
||||
|
b[4] = (uint2)(a[24].x << 14 | a[24].y >> 18, a[24].y << 14 | a[24].x >> 18); |
||||
|
b[15] = (uint2)(a[4].x << 27 | a[4].y >> 5, a[4].y << 27 | a[4].x >> 5); |
||||
|
b[23] = (uint2)(a[15].y << 9 | a[15].x >> 23, a[15].x << 9 | a[15].y >> 23); |
||||
|
b[19] = (uint2)(a[23].y << 24 | a[23].x >> 8, a[23].x << 24 | a[23].y >> 8); |
||||
|
b[13] = (uint2)(a[19].x << 8 | a[19].y >> 24, a[19].y << 8 | a[19].x >> 24); |
||||
|
b[12] = (uint2)(a[13].x << 25 | a[13].y >> 7, a[13].y << 25 | a[13].x >> 7); |
||||
|
b[2] = (uint2)(a[12].y << 11 | a[12].x >> 21, a[12].x << 11 | a[12].y >> 21); |
||||
|
b[20] = (uint2)(a[2].y << 30 | a[2].x >> 2, a[2].x << 30 | a[2].y >> 2); |
||||
|
b[14] = (uint2)(a[20].x << 18 | a[20].y >> 14, a[20].y << 18 | a[20].x >> 14); |
||||
|
b[22] = (uint2)(a[14].y << 7 | a[14].x >> 25, a[14].x << 7 | a[14].y >> 25); |
||||
|
b[9] = (uint2)(a[22].y << 29 | a[22].x >> 3, a[22].x << 29 | a[22].y >> 3); |
||||
|
b[6] = (uint2)(a[9].x << 20 | a[9].y >> 12, a[9].y << 20 | a[9].x >> 12); |
||||
|
b[1] = (uint2)(a[6].y << 12 | a[6].x >> 20, a[6].x << 12 | a[6].y >> 20); |
||||
|
|
||||
|
// Chi |
||||
|
a[0] = bitselect(b[0] ^ b[2], b[0], b[1]); |
||||
|
a[1] = bitselect(b[1] ^ b[3], b[1], b[2]); |
||||
|
a[2] = bitselect(b[2] ^ b[4], b[2], b[3]); |
||||
|
a[3] = bitselect(b[3] ^ b[0], b[3], b[4]); |
||||
|
if (out_size >= 4) |
||||
|
{ |
||||
|
a[4] = bitselect(b[4] ^ b[1], b[4], b[0]); |
||||
|
a[5] = bitselect(b[5] ^ b[7], b[5], b[6]); |
||||
|
a[6] = bitselect(b[6] ^ b[8], b[6], b[7]); |
||||
|
a[7] = bitselect(b[7] ^ b[9], b[7], b[8]); |
||||
|
a[8] = bitselect(b[8] ^ b[5], b[8], b[9]); |
||||
|
if (out_size >= 8) |
||||
|
{ |
||||
|
a[9] = bitselect(b[9] ^ b[6], b[9], b[5]); |
||||
|
a[10] = bitselect(b[10] ^ b[12], b[10], b[11]); |
||||
|
a[11] = bitselect(b[11] ^ b[13], b[11], b[12]); |
||||
|
a[12] = bitselect(b[12] ^ b[14], b[12], b[13]); |
||||
|
a[13] = bitselect(b[13] ^ b[10], b[13], b[14]); |
||||
|
a[14] = bitselect(b[14] ^ b[11], b[14], b[10]); |
||||
|
a[15] = bitselect(b[15] ^ b[17], b[15], b[16]); |
||||
|
a[16] = bitselect(b[16] ^ b[18], b[16], b[17]); |
||||
|
a[17] = bitselect(b[17] ^ b[19], b[17], b[18]); |
||||
|
a[18] = bitselect(b[18] ^ b[15], b[18], b[19]); |
||||
|
a[19] = bitselect(b[19] ^ b[16], b[19], b[15]); |
||||
|
a[20] = bitselect(b[20] ^ b[22], b[20], b[21]); |
||||
|
a[21] = bitselect(b[21] ^ b[23], b[21], b[22]); |
||||
|
a[22] = bitselect(b[22] ^ b[24], b[22], b[23]); |
||||
|
a[23] = bitselect(b[23] ^ b[20], b[23], b[24]); |
||||
|
a[24] = bitselect(b[24] ^ b[21], b[24], b[20]); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// Iota |
||||
|
a[0] ^= Keccak_f1600_RC[r]; |
||||
|
|
||||
|
#if !__ENDIAN_LITTLE__ |
||||
|
for (uint i = 0; i != 25; ++i) |
||||
|
a[i] = a[i].yx; |
||||
|
#endif |
||||
|
} |
||||
|
|
||||
|
void keccak_f1600_no_absorb(ulong* a, uint in_size, uint out_size, uint isolate) |
||||
|
{ |
||||
|
for (uint i = in_size; i != 25; ++i) |
||||
|
{ |
||||
|
a[i] = 0; |
||||
|
} |
||||
|
#if __ENDIAN_LITTLE__ |
||||
|
a[in_size] ^= 0x0000000000000001; |
||||
|
a[24-out_size*2] ^= 0x8000000000000000; |
||||
|
#else |
||||
|
a[in_size] ^= 0x0100000000000000; |
||||
|
a[24-out_size*2] ^= 0x0000000000000080; |
||||
|
#endif |
||||
|
|
||||
|
// Originally I unrolled the first and last rounds to interface |
||||
|
// better with surrounding code, however I haven't done this |
||||
|
// without causing the AMD compiler to blow up the VGPR usage. |
||||
|
uint r = 0; |
||||
|
do |
||||
|
{ |
||||
|
// This dynamic branch stops the AMD compiler unrolling the loop |
||||
|
// and additionally saves about 33% of the VGPRs, enough to gain another |
||||
|
// wavefront. Ideally we'd get 4 in flight, but 3 is the best I can |
||||
|
// massage out of the compiler. It doesn't really seem to matter how |
||||
|
// much we try and help the compiler save VGPRs because it seems to throw |
||||
|
// that information away, hence the implementation of keccak here |
||||
|
// doesn't bother. |
||||
|
if (isolate) |
||||
|
{ |
||||
|
keccak_f1600_round((uint2*)a, r++, 25); |
||||
|
} |
||||
|
} |
||||
|
while (r < 23); |
||||
|
|
||||
|
// final round optimised for digest size |
||||
|
keccak_f1600_round((uint2*)a, r++, out_size); |
||||
|
} |
||||
|
|
||||
|
#define copy(dst, src, count) for (uint i = 0; i != count; ++i) { (dst)[i] = (src)[i]; } |
||||
|
|
||||
|
#define countof(x) (sizeof(x) / sizeof(x[0])) |
||||
|
|
||||
|
uint fnv(uint x, uint y) |
||||
|
{ |
||||
|
return x * FNV_PRIME ^ y; |
||||
|
} |
||||
|
|
||||
|
uint4 fnv4(uint4 x, uint4 y) |
||||
|
{ |
||||
|
return x * FNV_PRIME ^ y; |
||||
|
} |
||||
|
|
||||
|
uint fnv_reduce(uint4 v) |
||||
|
{ |
||||
|
return fnv(fnv(fnv(v.x, v.y), v.z), v.w); |
||||
|
} |
||||
|
|
||||
|
typedef union |
||||
|
{ |
||||
|
ulong ulongs[32 / sizeof(ulong)]; |
||||
|
uint uints[32 / sizeof(uint)]; |
||||
|
} hash32_t; |
||||
|
|
||||
|
typedef union |
||||
|
{ |
||||
|
ulong ulongs[64 / sizeof(ulong)]; |
||||
|
uint4 uint4s[64 / sizeof(uint4)]; |
||||
|
} hash64_t; |
||||
|
|
||||
|
typedef union |
||||
|
{ |
||||
|
uint uints[128 / sizeof(uint)]; |
||||
|
uint4 uint4s[128 / sizeof(uint4)]; |
||||
|
} hash128_t; |
||||
|
|
||||
|
hash64_t init_hash(__constant hash32_t const* header, ulong nonce, uint isolate) |
||||
|
{ |
||||
|
hash64_t init; |
||||
|
uint const init_size = countof(init.ulongs); |
||||
|
uint const hash_size = countof(header->ulongs); |
||||
|
|
||||
|
// sha3_512(header .. nonce) |
||||
|
ulong state[25]; |
||||
|
copy(state, header->ulongs, hash_size); |
||||
|
state[hash_size] = nonce; |
||||
|
keccak_f1600_no_absorb(state, hash_size + 1, init_size, isolate); |
||||
|
|
||||
|
copy(init.ulongs, state, init_size); |
||||
|
return init; |
||||
|
} |
||||
|
|
||||
|
uint inner_loop(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, uint isolate) |
||||
|
{ |
||||
|
uint4 mix = init; |
||||
|
|
||||
|
// share init0 |
||||
|
if (thread_id == 0) |
||||
|
*share = mix.x; |
||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
uint init0 = *share; |
||||
|
|
||||
|
uint a = 0; |
||||
|
do |
||||
|
{ |
||||
|
bool update_share = thread_id == (a/4) % THREADS_PER_HASH; |
||||
|
|
||||
|
#pragma unroll |
||||
|
for (uint i = 0; i != 4; ++i) |
||||
|
{ |
||||
|
if (update_share) |
||||
|
{ |
||||
|
uint m[4] = { mix.x, mix.y, mix.z, mix.w }; |
||||
|
*share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE; |
||||
|
} |
||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
|
||||
|
mix = fnv4(mix, g_dag[*share].uint4s[thread_id]); |
||||
|
} |
||||
|
} |
||||
|
while ((a += 4) != (ACCESSES & isolate)); |
||||
|
|
||||
|
return fnv_reduce(mix); |
||||
|
} |
||||
|
|
||||
|
hash32_t final_hash(hash64_t const* init, hash32_t const* mix, uint isolate) |
||||
|
{ |
||||
|
ulong state[25]; |
||||
|
|
||||
|
hash32_t hash; |
||||
|
uint const hash_size = countof(hash.ulongs); |
||||
|
uint const init_size = countof(init->ulongs); |
||||
|
uint const mix_size = countof(mix->ulongs); |
||||
|
|
||||
|
// keccak_256(keccak_512(header..nonce) .. mix); |
||||
|
copy(state, init->ulongs, init_size); |
||||
|
copy(state + init_size, mix->ulongs, mix_size); |
||||
|
keccak_f1600_no_absorb(state, init_size+mix_size, hash_size, isolate); |
||||
|
|
||||
|
// copy out |
||||
|
copy(hash.ulongs, state, hash_size); |
||||
|
return hash; |
||||
|
} |
||||
|
|
||||
|
hash32_t compute_hash_simple( |
||||
|
__constant hash32_t const* g_header, |
||||
|
__global hash128_t const* g_dag, |
||||
|
ulong nonce, |
||||
|
uint isolate |
||||
|
) |
||||
|
{ |
||||
|
hash64_t init = init_hash(g_header, nonce, isolate); |
||||
|
|
||||
|
hash128_t mix; |
||||
|
for (uint i = 0; i != countof(mix.uint4s); ++i) |
||||
|
{ |
||||
|
mix.uint4s[i] = init.uint4s[i % countof(init.uint4s)]; |
||||
|
} |
||||
|
|
||||
|
uint mix_val = mix.uints[0]; |
||||
|
uint init0 = mix.uints[0]; |
||||
|
uint a = 0; |
||||
|
do |
||||
|
{ |
||||
|
uint pi = fnv(init0 ^ a, mix_val) % DAG_SIZE; |
||||
|
uint n = (a+1) % countof(mix.uints); |
||||
|
|
||||
|
#pragma unroll |
||||
|
for (uint i = 0; i != countof(mix.uints); ++i) |
||||
|
{ |
||||
|
mix.uints[i] = fnv(mix.uints[i], g_dag[pi].uints[i]); |
||||
|
mix_val = i == n ? mix.uints[i] : mix_val; |
||||
|
} |
||||
|
} |
||||
|
while (++a != (ACCESSES & isolate)); |
||||
|
|
||||
|
// reduce to output |
||||
|
hash32_t fnv_mix; |
||||
|
for (uint i = 0; i != countof(fnv_mix.uints); ++i) |
||||
|
{ |
||||
|
fnv_mix.uints[i] = fnv_reduce(mix.uint4s[i]); |
||||
|
} |
||||
|
|
||||
|
return final_hash(&init, &fnv_mix, isolate); |
||||
|
} |
||||
|
|
||||
|
typedef union |
||||
|
{ |
||||
|
struct |
||||
|
{ |
||||
|
hash64_t init; |
||||
|
uint pad; // avoid lds bank conflicts |
||||
|
}; |
||||
|
hash32_t mix; |
||||
|
} compute_hash_share; |
||||
|
|
||||
|
hash32_t compute_hash( |
||||
|
__local compute_hash_share* share, |
||||
|
__constant hash32_t const* g_header, |
||||
|
__global hash128_t const* g_dag, |
||||
|
ulong nonce, |
||||
|
uint isolate |
||||
|
) |
||||
|
{ |
||||
|
uint const gid = get_global_id(0); |
||||
|
|
||||
|
// Compute one init hash per work item. |
||||
|
hash64_t init = init_hash(g_header, nonce, isolate); |
||||
|
|
||||
|
// Threads work together in this phase in groups of 8. |
||||
|
uint const thread_id = gid % THREADS_PER_HASH; |
||||
|
uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH; |
||||
|
|
||||
|
hash32_t mix; |
||||
|
uint i = 0; |
||||
|
do |
||||
|
{ |
||||
|
// share init with other threads |
||||
|
if (i == thread_id) |
||||
|
share[hash_id].init = init; |
||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
|
||||
|
uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))]; |
||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
|
||||
|
uint thread_mix = inner_loop(thread_init, thread_id, share[hash_id].mix.uints, g_dag, isolate); |
||||
|
|
||||
|
share[hash_id].mix.uints[thread_id] = thread_mix; |
||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
|
||||
|
if (i == thread_id) |
||||
|
mix = share[hash_id].mix; |
||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
} |
||||
|
while (++i != (THREADS_PER_HASH & isolate)); |
||||
|
|
||||
|
return final_hash(&init, &mix, isolate); |
||||
|
} |
||||
|
|
||||
|
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) |
||||
|
__kernel void ethash_hash_simple( |
||||
|
__global hash32_t* g_hashes, |
||||
|
__constant hash32_t const* g_header, |
||||
|
__global hash128_t const* g_dag, |
||||
|
ulong start_nonce, |
||||
|
uint isolate |
||||
|
) |
||||
|
{ |
||||
|
uint const gid = get_global_id(0); |
||||
|
g_hashes[gid] = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate); |
||||
|
} |
||||
|
|
||||
|
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) |
||||
|
__kernel void ethash_search_simple( |
||||
|
__global volatile uint* restrict g_output, |
||||
|
__constant hash32_t const* g_header, |
||||
|
__global hash128_t const* g_dag, |
||||
|
ulong start_nonce, |
||||
|
ulong target, |
||||
|
uint isolate |
||||
|
) |
||||
|
{ |
||||
|
uint const gid = get_global_id(0); |
||||
|
hash32_t hash = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate); |
||||
|
if (as_ulong(as_uchar8(hash.ulongs[0]).s76543210) < target) |
||||
|
{ |
||||
|
uint slot = min(MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1); |
||||
|
g_output[slot] = gid; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) |
||||
|
__kernel void ethash_hash( |
||||
|
__global hash32_t* g_hashes, |
||||
|
__constant hash32_t const* g_header, |
||||
|
__global hash128_t const* g_dag, |
||||
|
ulong start_nonce, |
||||
|
uint isolate |
||||
|
) |
||||
|
{ |
||||
|
__local compute_hash_share share[HASHES_PER_LOOP]; |
||||
|
|
||||
|
uint const gid = get_global_id(0); |
||||
|
g_hashes[gid] = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate); |
||||
|
} |
||||
|
|
||||
|
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) |
||||
|
__kernel void ethash_search( |
||||
|
__global volatile uint* restrict g_output, |
||||
|
__constant hash32_t const* g_header, |
||||
|
__global hash128_t const* g_dag, |
||||
|
ulong start_nonce, |
||||
|
ulong target, |
||||
|
uint isolate |
||||
|
) |
||||
|
{ |
||||
|
__local compute_hash_share share[HASHES_PER_LOOP]; |
||||
|
|
||||
|
uint const gid = get_global_id(0); |
||||
|
hash32_t hash = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate); |
||||
|
|
||||
|
if (as_ulong(as_uchar8(hash.ulongs[0]).s76543210) < target) |
||||
|
{ |
||||
|
uint slot = min(MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1); |
||||
|
g_output[slot] = gid; |
||||
|
} |
||||
|
} |
Loading…
Reference in new issue