@ -24,6 +24,7 @@
# include <cstdio>
# include <cstdlib>
# include <fstream>
# include <iostream>
# include <assert.h>
# include <queue>
@ -173,195 +174,249 @@ void ethash_cl_miner::finish()
m_queue . finish ( ) ;
}
bool ethash_cl_miner : : init ( uint8_t const * _dag , uint64_t _dagSize , unsigned workgroup_size , unsigned _platformId , unsigned _deviceId )
bool ethash_cl_miner : : init (
uint8_t const * _dag ,
uint64_t _dagSize ,
unsigned workgroup_size ,
unsigned _platformId ,
unsigned _deviceId ,
unsigned _dagChunksNum
)
{
// for now due to the .cl kernels we can only have either 1 big chunk or 4 chunks
assert ( _dagChunksNum = = 1 | | _dagChunksNum = = 4 ) ;
// now create the number of chunk buffers
m_dagChunksNum = _dagChunksNum ;
// get all platforms
std : : vector < cl : : Platform > platforms ;
cl : : Platform : : get ( & platforms ) ;
if ( platforms . empty ( ) )
try
{
ETHCL_LOG ( " No OpenCL platforms found. " ) ;
return false ;
}
// use selected platform
_platformId = std : : min < unsigned > ( _platformId , platforms . size ( ) - 1 ) ;
ETHCL_LOG ( " Using platform: " < < platforms [ _platformId ] . getInfo < CL_PLATFORM_NAME > ( ) . c_str ( ) ) ;
std : : vector < cl : : Platform > platforms ;
cl : : Platform : : get ( & platforms ) ;
if ( platforms . empty ( ) )
{
ETHCL_LOG ( " No OpenCL platforms found. " ) ;
return false ;
}
// get GPU device of the default platform
std : : vector < cl : : Device > devices ;
platforms [ _platformId ] . getDevices ( CL_DEVICE_TYPE_ALL , & devices ) ;
if ( devices . empty ( ) )
{
ETHCL_LOG ( " No OpenCL devices found. " ) ;
return false ;
}
// use selected platform
_platformId = std : : min < unsigned > ( _platformId , platforms . size ( ) - 1 ) ;
ETHCL_LOG ( " Using platform: " < < platforms [ _platformId ] . getInfo < CL_PLATFORM_NAME > ( ) . c_str ( ) ) ;
// use selected device
cl : : Device & device = devices [ std : : min < unsigned > ( _deviceId , devices . size ( ) - 1 ) ] ;
std : : string device_version = device . getInfo < CL_DEVICE_VERSION > ( ) ;
ETHCL_LOG ( " Using device: " < < device . getInfo < CL_DEVICE_NAME > ( ) . c_str ( ) < < " ( " < < device_version . c_str ( ) < < " ) " ) ;
// get GPU device of the default platform
std : : vector < cl : : Device > devices ;
platforms [ _platformId ] . getDevices ( CL_DEVICE_TYPE_ALL , & devices ) ;
if ( devices . empty ( ) )
{
ETHCL_LOG ( " No OpenCL devices found. " ) ;
return false ;
}
if ( strncmp ( " OpenCL 1.0 " , device_version . c_str ( ) , 10 ) = = 0 )
{
ETHCL_LOG ( " OpenCL 1.0 is not supported. " ) ;
return false ;
}
if ( strncmp ( " OpenCL 1.1 " , device_version . c_str ( ) , 10 ) = = 0 )
m_opencl_1_1 = true ;
// use selected device
cl : : Device & device = devices [ std : : min < unsigned > ( _deviceId , devices . size ( ) - 1 ) ] ;
std : : string device_version = device . getInfo < CL_DEVICE_VERSION > ( ) ;
ETHCL_LOG ( " Using device: " < < device . getInfo < CL_DEVICE_NAME > ( ) . c_str ( ) < < " ( " < < device_version . c_str ( ) < < " ) " ) ;
// create context
m_context = cl : : Context ( std : : vector < cl : : Device > ( & device , & device + 1 ) ) ;
m_queue = cl : : CommandQueue ( m_context , device ) ;
if ( strncmp ( " OpenCL 1.0 " , device_version . c_str ( ) , 10 ) = = 0 )
{
ETHCL_LOG ( " OpenCL 1.0 is not supported. " ) ;
return false ;
}
if ( strncmp ( " OpenCL 1.1 " , device_version . c_str ( ) , 10 ) = = 0 )
m_opencl_1_1 = true ;
// create context
m_context = cl : : Context ( std : : vector < cl : : Device > ( & device , & device + 1 ) ) ;
m_queue = cl : : CommandQueue ( m_context , device ) ;
// use requested workgroup size, but we require multiple of 8
m_workgroup_size = ( ( workgroup_size + 7 ) / 8 ) * 8 ;
// patch source code
// note: ETHASH_CL_MINER_KERNEL is simply ethash_cl_miner_kernel.cl compiled
// into a byte array by bin2h.cmake. There is no need to load the file by hand in runtime
std : : string code ( ETHASH_CL_MINER_KERNEL , ETHASH_CL_MINER_KERNEL + ETHASH_CL_MINER_KERNEL_SIZE ) ;
add_definition ( code , " GROUP_SIZE " , m_workgroup_size ) ;
add_definition ( code , " DAG_SIZE " , ( unsigned ) ( _dagSize / ETHASH_MIX_BYTES ) ) ;
add_definition ( code , " ACCESSES " , ETHASH_ACCESSES ) ;
add_definition ( code , " MAX_OUTPUTS " , c_max_search_results ) ;
//debugf("%s", code.c_str());
// create miner OpenCL program
cl : : Program : : Sources sources ;
sources . push_back ( { code . c_str ( ) , code . size ( ) } ) ;
cl : : Program program ( m_context , sources ) ;
try
{
program . build ( { device } ) ;
ETHCL_LOG ( " Printing program log " ) ;
ETHCL_LOG ( program . getBuildInfo < CL_PROGRAM_BUILD_LOG > ( device ) . c_str ( ) ) ;
}
catch ( cl : : Error err )
{
ETHCL_LOG ( program . getBuildInfo < CL_PROGRAM_BUILD_LOG > ( device ) . c_str ( ) ) ;
return false ;
}
if ( _dagChunksNum = = 1 )
{
ETHCL_LOG ( " Loading single big chunk kernels " ) ;
m_hash_kernel = cl : : Kernel ( program , " ethash_hash " ) ;
m_search_kernel = cl : : Kernel ( program , " ethash_search " ) ;
}
else
{
ETHCL_LOG ( " Loading chunk kernels " ) ;
m_hash_kernel = cl : : Kernel ( program , " ethash_hash_chunks " ) ;
m_search_kernel = cl : : Kernel ( program , " ethash_search_chunks " ) ;
}
// use requested workgroup size, but we require multiple of 8
m_workgroup_size = ( ( workgroup_size + 7 ) / 8 ) * 8 ;
// create buffer for dag
if ( _dagChunksNum = = 1 )
{
ETHCL_LOG ( " Creating one big buffer " ) ;
m_dagChunks . push_back ( cl : : Buffer ( m_context , CL_MEM_READ_ONLY , _dagSize ) ) ;
}
else
for ( unsigned i = 0 ; i < _dagChunksNum ; i + + )
{
// TODO Note: If we ever change to _dagChunksNum other than 4, then the size would need recalculation
ETHCL_LOG ( " Creating buffer for chunk " < < i ) ;
m_dagChunks . push_back ( cl : : Buffer (
m_context ,
CL_MEM_READ_ONLY ,
( i = = 3 ) ? ( _dagSize - 3 * ( ( _dagSize > > 9 ) < < 7 ) ) : ( _dagSize > > 9 ) < < 7
) ) ;
}
// patch source code
std : : string code ( ETHASH_CL_MINER_KERNEL , ETHASH_CL_MINER_KERNEL + ETHASH_CL_MINER_KERNEL_SIZE ) ;
add_definition ( code , " GROUP_SIZE " , m_workgroup_size ) ;
add_definition ( code , " DAG_SIZE " , ( unsigned ) ( _dagSize / ETHASH_MIX_BYTES ) ) ;
add_definition ( code , " ACCESSES " , ETHASH_ACCESSES ) ;
add_definition ( code , " MAX_OUTPUTS " , c_max_search_results ) ;
//debugf("%s", code.c_str());
// create buffer for header
ETHCL_LOG ( " Creating buffer for header. " ) ;
m_header = cl : : Buffer ( m_context , CL_MEM_READ_ONLY , 32 ) ;
// create miner OpenCL program
cl : : Program : : Sources sources ;
sources . push_back ( { code . c_str ( ) , code . size ( ) } ) ;
if ( _dagChunksNum = = 1 )
{
ETHCL_LOG ( " Mapping one big chunk. " ) ;
m_queue . enqueueWriteBuffer ( m_dagChunks [ 0 ] , CL_TRUE , 0 , _dagSize , _dag ) ;
}
else
{
// TODO Note: If we ever change to _dagChunksNum other than 4, then the size would need recalculation
void * dag_ptr [ 4 ] ;
for ( unsigned i = 0 ; i < _dagChunksNum ; i + + )
{
ETHCL_LOG ( " Mapping chunk " < < i ) ;
dag_ptr [ i ] = m_queue . enqueueMapBuffer ( m_dagChunks [ i ] , true , m_opencl_1_1 ? CL_MAP_WRITE : CL_MAP_WRITE_INVALIDATE_REGION , 0 , ( i = = 3 ) ? ( _dagSize - 3 * ( ( _dagSize > > 9 ) < < 7 ) ) : ( _dagSize > > 9 ) < < 7 ) ;
}
for ( unsigned i = 0 ; i < _dagChunksNum ; i + + )
{
memcpy ( dag_ptr [ i ] , ( char * ) _dag + i * ( ( _dagSize > > 9 ) < < 7 ) , ( i = = 3 ) ? ( _dagSize - 3 * ( ( _dagSize > > 9 ) < < 7 ) ) : ( _dagSize > > 9 ) < < 7 ) ;
m_queue . enqueueUnmapMemObject ( m_dagChunks [ i ] , dag_ptr [ i ] ) ;
}
}
cl : : Program program ( m_context , sources ) ;
try
{
program . build ( { device } ) ;
// create mining buffers
for ( unsigned i = 0 ; i ! = c_num_buffers ; + + i )
{
ETHCL_LOG ( " Creating mining buffer " < < i ) ;
m_hash_buf [ i ] = cl : : Buffer ( m_context , CL_MEM_WRITE_ONLY | ( ! m_opencl_1_1 ? CL_MEM_HOST_READ_ONLY : 0 ) , 32 * c_hash_batch_size ) ;
m_search_buf [ i ] = cl : : Buffer ( m_context , CL_MEM_WRITE_ONLY , ( c_max_search_results + 1 ) * sizeof ( uint32_t ) ) ;
}
}
catch ( cl : : Error err )
{
ETHCL_LOG ( program . getBuildInfo < CL_PROGRAM_BUILD_LOG > ( device ) . c_str ( ) ) ;
ETHCL_LOG ( err . what ( ) < < " ( " < < err . err ( ) < < " ) " ) ;
return false ;
}
m_hash_kernel = cl : : Kernel ( program , " ethash_hash " ) ;
m_search_kernel = cl : : Kernel ( program , " ethash_search " ) ;
// create buffer for dag
m_dag = cl : : Buffer ( m_context , CL_MEM_READ_ONLY , _dagSize ) ;
// create buffer for header
m_header = cl : : Buffer ( m_context , CL_MEM_READ_ONLY , 32 ) ;
// compute dag on CPU
try {
m_queue . enqueueWriteBuffer ( m_dag , CL_TRUE , 0 , _dagSize , _dag ) ;
}
catch ( . . . )
{
// didn't work. shitty driver. try allocating in CPU RAM and manually memcpying it.
void * dag_ptr = m_queue . enqueueMapBuffer ( m_dag , true , m_opencl_1_1 ? CL_MAP_WRITE : CL_MAP_WRITE_INVALIDATE_REGION , 0 , _dagSize ) ;
memcpy ( dag_ptr , _dag , _dagSize ) ;
m_queue . enqueueUnmapMemObject ( m_dag , dag_ptr ) ;
}
// create mining buffers
for ( unsigned i = 0 ; i ! = c_num_buffers ; + + i )
{
m_hash_buf [ i ] = cl : : Buffer ( m_context , CL_MEM_WRITE_ONLY | ( ! m_opencl_1_1 ? CL_MEM_HOST_READ_ONLY : 0 ) , 32 * c_hash_batch_size ) ;
m_search_buf [ i ] = cl : : Buffer ( m_context , CL_MEM_WRITE_ONLY , ( c_max_search_results + 1 ) * sizeof ( uint32_t ) ) ;
}
return true ;
}
void ethash_cl_miner : : search ( uint8_t const * header , uint64_t target , search_hook & hook )
{
struct pending_batch
try
{
uint64_t start_nonce ;
unsigned buf ;
} ;
std : : queue < pending_batch > pending ;
struct pending_batch
{
uint64_t start_nonce ;
unsigned buf ;
} ;
std : : queue < pending_batch > pending ;
uint32_t const c_zero = 0 ;
static uint32_t const c_zero = 0 ;
// update header constant buffer
m_queue . enqueueWriteBuffer ( m_header , false , 0 , 32 , header ) ;
for ( unsigned i = 0 ; i ! = c_num_buffers ; + + i )
m_queue . enqueueWriteBuffer ( m_search_buf [ i ] , false , 0 , 4 , & c_zero ) ;
// update header constant buffer
m_queue . enqueueWriteBuffer ( m_header , false , 0 , 32 , header ) ;
for ( unsigned i = 0 ; i ! = c_num_buffers ; + + i )
m_queue . enqueueWriteBuffer ( m_search_buf [ i ] , false , 0 , 4 , & c_zero ) ;
# if CL_VERSION_1_2 && 0
cl : : Event pre_return_event ;
if ( ! m_opencl_1_1 )
m_queue . enqueueBarrierWithWaitList ( NULL , & pre_return_event ) ;
else
cl : : Event pre_return_event ;
if ( ! m_opencl_1_1 )
m_queue . enqueueBarrierWithWaitList ( NULL , & pre_return_event ) ;
else
# endif
m_queue . finish ( ) ;
/*
__kernel void ethash_combined_search (
__global hash32_t * g_hashes , // 0
__constant hash32_t const * g_header , // 1
__global hash128_t const * g_dag , // 2
ulong start_nonce , // 3
ulong target , // 4
uint isolate // 5
)
*/
m_search_kernel . setArg ( 1 , m_header ) ;
m_search_kernel . setArg ( 2 , m_dag ) ;
// pass these to stop the compiler unrolling the loops
m_search_kernel . setArg ( 4 , target ) ;
m_search_kernel . setArg ( 5 , ~ 0u ) ;
unsigned buf = 0 ;
std : : random_device engine ;
uint64_t start_nonce = std : : uniform_int_distribution < uint64_t > ( ) ( engine ) ;
for ( ; ; start_nonce + = c_search_batch_size )
{
// supply output buffer to kernel
m_search_kernel . setArg ( 0 , m_search_buf [ buf ] ) ;
m_search_kernel . setArg ( 3 , start_nonce ) ;
// execute it!
m_queue . enqueueNDRangeKernel ( m_search_kernel , cl : : NullRange , c_search_batch_size , m_workgroup_size ) ;
pending . push ( { start_nonce , buf } ) ;
buf = ( buf + 1 ) % c_num_buffers ;
// read results
if ( pending . size ( ) = = c_num_buffers )
m_queue . finish ( ) ;
unsigned argPos = 2 ;
m_search_kernel . setArg ( 1 , m_header ) ;
for ( unsigned i = 0 ; i < m_dagChunksNum ; + + i , + + argPos )
m_search_kernel . setArg ( argPos , m_dagChunks [ i ] ) ;
// pass these to stop the compiler unrolling the loops
m_search_kernel . setArg ( argPos + 1 , target ) ;
m_search_kernel . setArg ( argPos + 2 , ~ 0u ) ;
unsigned buf = 0 ;
std : : random_device engine ;
uint64_t start_nonce = std : : uniform_int_distribution < uint64_t > ( ) ( engine ) ;
for ( ; ; start_nonce + = c_search_batch_size )
{
pending_batch const & batch = pending . front ( ) ;
// supply output buffer to kernel
m_search_kernel . setArg ( 0 , m_search_buf [ buf ] ) ;
if ( m_dagChunksNum = = 1 )
m_search_kernel . setArg ( 3 , start_nonce ) ;
else
m_search_kernel . setArg ( 6 , start_nonce ) ;
// could use pinned host pointer instead
uint32_t * results = ( uint32_t * ) m_queue . enqueueMapBuffer ( m_search_buf [ batch . buf ] , true , CL_MAP_READ , 0 , ( 1 + c_max_search_results ) * sizeof ( uint32_t ) ) ;
unsigned num_found = std : : min < unsigned > ( results [ 0 ] , c_max_search_results ) ;
// execute it!
m_queue . enqueueNDRangeKernel ( m_search_kernel , cl : : NullRange , c_search_batch_size , m_workgroup_size ) ;
uint64_t nonces [ c_max_search_results ] ;
for ( unsigned i = 0 ; i ! = num_found ; + + i )
pending . push ( { start_nonce , buf } ) ;
buf = ( buf + 1 ) % c_num_buffers ;
// read results
if ( pending . size ( ) = = c_num_buffers )
{
nonces [ i ] = batch . start_nonce + results [ i + 1 ] ;
}
pending_batch const & batch = pending . front ( ) ;
m_queue . enqueueUnmapMemObject ( m_search_buf [ batch . buf ] , results ) ;
bool exit = num_found & & hook . found ( nonces , num_found ) ;
exit | = hook . searched ( batch . start_nonce , c_search_batch_size ) ; // always report searched before exit
if ( exit )
break ;
// could use pinned host pointer instead
uint32_t * results = ( uint32_t * ) m_queue . enqueueMapBuffer ( m_search_buf [ batch . buf ] , true , CL_MAP_READ , 0 , ( 1 + c_max_search_results ) * sizeof ( uint32_t ) ) ;
unsigned num_found = std : : min < unsigned > ( results [ 0 ] , c_max_search_results ) ;
// reset search buffer if we're still going
i f ( num_found )
m_queue . enqueueWriteBuffer ( m_search_buf [ batch . buf ] , true , 0 , 4 , & c_zero ) ;
uint64_t nonces [ c_max_search_results ] ;
for ( unsigned i = 0 ; i ! = num_found ; + + i )
nonces [ i ] = batch . start_nonce + results [ i + 1 ] ;
pending . pop ( ) ;
m_queue . enqueueUnmapMemObject ( m_search_buf [ batch . buf ] , results ) ;
bool exit = num_found & & hook . found ( nonces , num_found ) ;
exit | = hook . searched ( batch . start_nonce , c_search_batch_size ) ; // always report searched before exit
if ( exit )
break ;
// reset search buffer if we're still going
if ( num_found )
m_queue . enqueueWriteBuffer ( m_search_buf [ batch . buf ] , true , 0 , 4 , & c_zero ) ;
pending . pop ( ) ;
}
}
}
// not safe to return until this is ready
// not safe to return until this is ready
# if CL_VERSION_1_2 && 0
if ( ! m_opencl_1_1 )
pre_return_event . wait ( ) ;
if ( ! m_opencl_1_1 )
pre_return_event . wait ( ) ;
# endif
}
catch ( cl : : Error err )
{
ETHCL_LOG ( err . what ( ) < < " ( " < < err . err ( ) < < " ) " ) ;
}
}