Browse Source

cleanup of chunks and msperbatch host code

cl-refactor
Genoil 9 years ago
parent
commit
322534d26e
  1. 13
      ethminer/MinerAux.h
  2. 63
      libethash-cl/ethash_cl_miner.cpp
  3. 15
      libethash-cl/ethash_cl_miner.h
  4. 2
      libethcore/EthashGPUMiner.cpp
  5. 1
      libethcore/EthashGPUMiner.h

13
ethminer/MinerAux.h

@ -211,16 +211,6 @@ public:
m_extraGPUMemory = 1000000 * stol(argv[++i]); m_extraGPUMemory = 1000000 * stol(argv[++i]);
#endif #endif
#if ETH_ETHASHCL || !ETH_TRUE #if ETH_ETHASHCL || !ETH_TRUE
else if (arg == "--cl-ms-per-batch" && i + 1 < argc)
try
{
m_msPerBatch = stol(argv[++i]);
}
catch (...)
{
cerr << "Bad " << arg << " option: " << argv[i] << endl;
BOOST_THROW_EXCEPTION(BadArgument());
}
else if (arg == "--allow-opencl-cpu") else if (arg == "--allow-opencl-cpu")
m_clAllowCPU = true; m_clAllowCPU = true;
#endif #endif
@ -447,7 +437,6 @@ public:
if (!EthashGPUMiner::configureGPU( if (!EthashGPUMiner::configureGPU(
m_localWorkSize, m_localWorkSize,
m_globalWorkSizeMultiplier, m_globalWorkSizeMultiplier,
m_msPerBatch,
m_openclPlatform, m_openclPlatform,
m_openclDevice, m_openclDevice,
m_clAllowCPU, m_clAllowCPU,
@ -540,7 +529,6 @@ public:
<< " --cl-extragpu-mem Set the memory (in MB) you believe your GPU requires for stuff other than mining. Windows rendering e.t.c.." << endl << " --cl-extragpu-mem Set the memory (in MB) you believe your GPU requires for stuff other than mining. Windows rendering e.t.c.." << endl
<< " --cl-local-work Set the OpenCL local work size. Default is " << toString(ethash_cl_miner::c_defaultLocalWorkSize) << endl << " --cl-local-work Set the OpenCL local work size. Default is " << toString(ethash_cl_miner::c_defaultLocalWorkSize) << endl
<< " --cl-global-work Set the OpenCL global work size as a multiple of the local work size. Default is " << toString(ethash_cl_miner::c_defaultGlobalWorkSizeMultiplier) << " * " << toString(ethash_cl_miner::c_defaultLocalWorkSize) << endl << " --cl-global-work Set the OpenCL global work size as a multiple of the local work size. Default is " << toString(ethash_cl_miner::c_defaultGlobalWorkSizeMultiplier) << " * " << toString(ethash_cl_miner::c_defaultLocalWorkSize) << endl
<< " --cl-ms-per-batch Set the OpenCL target milliseconds per batch (global workgroup size). Default is " << toString(ethash_cl_miner::c_defaultMSPerBatch) << ". If 0 is given then no autoadjustment of global work size will happen" << endl
#endif #endif
#if ETH_ETHASHCUDA || !ETH_TRUE #if ETH_ETHASHCUDA || !ETH_TRUE
<< " --cuda-extragpu-mem Set the memory (in MB) you believe your GPU requires for stuff other than mining. Windows rendering e.t.c.." << endl << " --cuda-extragpu-mem Set the memory (in MB) you believe your GPU requires for stuff other than mining. Windows rendering e.t.c.." << endl
@ -914,7 +902,6 @@ private:
unsigned m_globalWorkSizeMultiplier = ethash_cl_miner::c_defaultGlobalWorkSizeMultiplier; unsigned m_globalWorkSizeMultiplier = ethash_cl_miner::c_defaultGlobalWorkSizeMultiplier;
unsigned m_localWorkSize = ethash_cl_miner::c_defaultLocalWorkSize; unsigned m_localWorkSize = ethash_cl_miner::c_defaultLocalWorkSize;
#endif #endif
unsigned m_msPerBatch = ethash_cl_miner::c_defaultMSPerBatch;
#endif #endif
#if ETH_ETHASHCUDA || !ETH_TRUE #if ETH_ETHASHCUDA || !ETH_TRUE
unsigned m_globalWorkSizeMultiplier = ethash_cuda_miner::c_defaultGridSize; unsigned m_globalWorkSizeMultiplier = ethash_cuda_miner::c_defaultGridSize;

63
libethash-cl/ethash_cl_miner.cpp

@ -55,7 +55,6 @@ using namespace std;
unsigned const ethash_cl_miner::c_defaultLocalWorkSize = 64; unsigned const ethash_cl_miner::c_defaultLocalWorkSize = 64;
unsigned const ethash_cl_miner::c_defaultGlobalWorkSizeMultiplier = 4096; // * CL_DEFAULT_LOCAL_WORK_SIZE unsigned const ethash_cl_miner::c_defaultGlobalWorkSizeMultiplier = 4096; // * CL_DEFAULT_LOCAL_WORK_SIZE
unsigned const ethash_cl_miner::c_defaultMSPerBatch = 0;
// TODO: If at any point we can use libdevcore in here then we should switch to using a LogChannel // TODO: If at any point we can use libdevcore in here then we should switch to using a LogChannel
#if defined(_WIN32) #if defined(_WIN32)
@ -184,7 +183,6 @@ bool ethash_cl_miner::configureGPU(
unsigned _platformId, unsigned _platformId,
unsigned _localWorkSize, unsigned _localWorkSize,
unsigned _globalWorkSize, unsigned _globalWorkSize,
unsigned _msPerBatch,
bool _allowCPU, bool _allowCPU,
unsigned _extraGPUMemory, unsigned _extraGPUMemory,
uint64_t _currentBlock uint64_t _currentBlock
@ -192,7 +190,6 @@ bool ethash_cl_miner::configureGPU(
{ {
s_workgroupSize = _localWorkSize; s_workgroupSize = _localWorkSize;
s_initialGlobalWorkSize = _globalWorkSize; s_initialGlobalWorkSize = _globalWorkSize;
s_msPerBatch = _msPerBatch;
s_allowCPU = _allowCPU; s_allowCPU = _allowCPU;
s_extraRequiredGPUMem = _extraGPUMemory; s_extraRequiredGPUMem = _extraGPUMemory;
// by default let's only consider the DAG of the first epoch // by default let's only consider the DAG of the first epoch
@ -223,7 +220,6 @@ bool ethash_cl_miner::configureGPU(
bool ethash_cl_miner::s_allowCPU = false; bool ethash_cl_miner::s_allowCPU = false;
unsigned ethash_cl_miner::s_extraRequiredGPUMem; unsigned ethash_cl_miner::s_extraRequiredGPUMem;
unsigned ethash_cl_miner::s_msPerBatch = ethash_cl_miner::c_defaultMSPerBatch;
unsigned ethash_cl_miner::s_workgroupSize = ethash_cl_miner::c_defaultLocalWorkSize; unsigned ethash_cl_miner::s_workgroupSize = ethash_cl_miner::c_defaultLocalWorkSize;
unsigned ethash_cl_miner::s_initialGlobalWorkSize = ethash_cl_miner::c_defaultGlobalWorkSizeMultiplier * ethash_cl_miner::c_defaultLocalWorkSize; unsigned ethash_cl_miner::s_initialGlobalWorkSize = ethash_cl_miner::c_defaultGlobalWorkSizeMultiplier * ethash_cl_miner::c_defaultLocalWorkSize;
@ -362,10 +358,6 @@ bool ethash_cl_miner::init(
m_globalWorkSize = s_initialGlobalWorkSize; m_globalWorkSize = s_initialGlobalWorkSize;
if (m_globalWorkSize % s_workgroupSize != 0) if (m_globalWorkSize % s_workgroupSize != 0)
m_globalWorkSize = ((m_globalWorkSize / s_workgroupSize) + 1) * s_workgroupSize; m_globalWorkSize = ((m_globalWorkSize / s_workgroupSize) + 1) * s_workgroupSize;
// remember the device's address bits
m_deviceBits = device.getInfo<CL_DEVICE_ADDRESS_BITS>();
// make sure first step of global work size adjustment is large enough
m_stepWorkSizeAdjust = pow(2, m_deviceBits / 2 + 1);
// patch source code // patch source code
// note: ETHASH_CL_MINER_KERNEL is simply ethash_cl_miner_kernel.cl compiled // note: ETHASH_CL_MINER_KERNEL is simply ethash_cl_miner_kernel.cl compiled
@ -397,13 +389,12 @@ bool ethash_cl_miner::init(
// create buffer for dag // create buffer for dag
try try
{ {
m_dagChunksCount = 1;
ETHCL_LOG("Creating one big buffer for the DAG"); ETHCL_LOG("Creating one big buffer for the DAG");
m_dagChunks.push_back(cl::Buffer(m_context, CL_MEM_READ_ONLY, _dagSize)); m_dag = cl::Buffer(m_context, CL_MEM_READ_ONLY, _dagSize);
ETHCL_LOG("Loading single big chunk kernels"); ETHCL_LOG("Loading single big chunk kernels");
m_searchKernel = cl::Kernel(program, "ethash_search"); m_searchKernel = cl::Kernel(program, "ethash_search");
ETHCL_LOG("Mapping one big chunk."); ETHCL_LOG("Mapping one big chunk.");
m_queue.enqueueWriteBuffer(m_dagChunks[0], CL_TRUE, 0, _dagSize, _dag); m_queue.enqueueWriteBuffer(m_dag, CL_TRUE, 0, _dagSize, _dag);
} }
catch (cl::Error const& err) catch (cl::Error const& err)
{ {
@ -456,13 +447,12 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
#endif #endif
m_queue.finish(); m_queue.finish();
unsigned argPos = 2;
m_searchKernel.setArg(1, m_header); m_searchKernel.setArg(1, m_header);
for (unsigned i = 0; i < m_dagChunksCount; ++i, ++argPos) m_searchKernel.setArg(2, m_dag );
m_searchKernel.setArg(argPos, m_dagChunks[i]);
// pass these to stop the compiler unrolling the loops // pass these to stop the compiler unrolling the loops
m_searchKernel.setArg(argPos + 1, target); m_searchKernel.setArg(4, target);
m_searchKernel.setArg(argPos + 2, ~0u); m_searchKernel.setArg(5, ~0u);
unsigned buf = 0; unsigned buf = 0;
random_device engine; random_device engine;
@ -472,10 +462,7 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
auto t = chrono::high_resolution_clock::now(); auto t = chrono::high_resolution_clock::now();
// supply output buffer to kernel // supply output buffer to kernel
m_searchKernel.setArg(0, m_searchBuffer[buf]); m_searchKernel.setArg(0, m_searchBuffer[buf]);
if (m_dagChunksCount == 1)
m_searchKernel.setArg(3, start_nonce); m_searchKernel.setArg(3, start_nonce);
else
m_searchKernel.setArg(6, start_nonce);
// execute it! // execute it!
m_queue.enqueueNDRangeKernel(m_searchKernel, cl::NullRange, m_globalWorkSize, s_workgroupSize); m_queue.enqueueNDRangeKernel(m_searchKernel, cl::NullRange, m_globalWorkSize, s_workgroupSize);
@ -508,44 +495,6 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
pending.pop(); pending.pop();
} }
// adjust global work size depending on last search time
if (s_msPerBatch)
{
// Global work size must be:
// - less than or equal to 2 ^ DEVICE_BITS - 1
// - divisible by lobal work size (workgroup size)
auto d = chrono::duration_cast<chrono::milliseconds>(chrono::high_resolution_clock::now() - t);
if (d != chrono::milliseconds(0)) // if duration is zero, we did not get in the actual searh/or search not finished
{
if (d > chrono::milliseconds(s_msPerBatch * 10 / 9))
{
// Divide the step by 2 when adjustment way change
if (m_wayWorkSizeAdjust > -1)
m_stepWorkSizeAdjust = max<unsigned>(1, m_stepWorkSizeAdjust / 2);
m_wayWorkSizeAdjust = -1;
// cerr << "m_stepWorkSizeAdjust: " << m_stepWorkSizeAdjust << ", m_wayWorkSizeAdjust: " << m_wayWorkSizeAdjust << endl;
// cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast<chrono::milliseconds>(d).count() << " ms, >> " << s_msPerBatch << " ms." << endl;
m_globalWorkSize = max<unsigned>(128, m_globalWorkSize - m_stepWorkSizeAdjust);
// cerr << "New global work size" << m_globalWorkSize << endl;
}
else if (d < chrono::milliseconds(s_msPerBatch * 9 / 10))
{
// Divide the step by 2 when adjustment way change
if (m_wayWorkSizeAdjust < 1)
m_stepWorkSizeAdjust = max<unsigned>(1, m_stepWorkSizeAdjust / 2);
m_wayWorkSizeAdjust = 1;
// cerr << "m_stepWorkSizeAdjust: " << m_stepWorkSizeAdjust << ", m_wayWorkSizeAdjust: " << m_wayWorkSizeAdjust << endl;
// cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast<chrono::milliseconds>(d).count() << " ms, << " << s_msPerBatch << " ms." << endl;
m_globalWorkSize = min<unsigned>(pow(2, m_deviceBits) - 1, m_globalWorkSize + m_stepWorkSizeAdjust);
// Global work size should never be less than the workgroup size
m_globalWorkSize = max<unsigned>(s_workgroupSize, m_globalWorkSize);
// cerr << "New global work size" << m_globalWorkSize << endl;
}
}
}
} }
// not safe to return until this is ready // not safe to return until this is ready

15
libethash-cl/ethash_cl_miner.h

@ -46,7 +46,6 @@ public:
unsigned _platformId, unsigned _platformId,
unsigned _localWorkSize, unsigned _localWorkSize,
unsigned _globalWorkSize, unsigned _globalWorkSize,
unsigned _msPerBatch,
bool _allowCPU, bool _allowCPU,
unsigned _extraGPUMemory, unsigned _extraGPUMemory,
uint64_t _currentBlock uint64_t _currentBlock
@ -61,16 +60,11 @@ public:
void finish(); void finish();
void search(uint8_t const* _header, uint64_t _target, search_hook& _hook); void search(uint8_t const* _header, uint64_t _target, search_hook& _hook);
void hash_chunk(uint8_t* _ret, uint8_t const* _header, uint64_t _nonce, unsigned _count);
void search_chunk(uint8_t const*_header, uint64_t _target, search_hook& _hook);
/* -- default values -- */ /* -- default values -- */
/// Default value of the local work size. Also known as workgroup size. /// Default value of the local work size. Also known as workgroup size.
static unsigned const c_defaultLocalWorkSize; static unsigned const c_defaultLocalWorkSize;
/// Default value of the global work size as a multiplier of the local work size /// Default value of the global work size as a multiplier of the local work size
static unsigned const c_defaultGlobalWorkSizeMultiplier; static unsigned const c_defaultGlobalWorkSizeMultiplier;
/// Default value of the milliseconds per global work size (per batch)
static unsigned const c_defaultMSPerBatch;
private: private:
@ -80,18 +74,11 @@ private:
cl::Context m_context; cl::Context m_context;
cl::CommandQueue m_queue; cl::CommandQueue m_queue;
cl::Kernel m_searchKernel; cl::Kernel m_searchKernel;
unsigned int m_dagChunksCount; cl::Buffer m_dag;
std::vector<cl::Buffer> m_dagChunks;
cl::Buffer m_header; cl::Buffer m_header;
cl::Buffer m_searchBuffer[c_bufferCount]; cl::Buffer m_searchBuffer[c_bufferCount];
unsigned m_globalWorkSize; unsigned m_globalWorkSize;
bool m_openclOnePointOne; bool m_openclOnePointOne;
unsigned m_deviceBits;
/// The step used in the work size adjustment
unsigned int m_stepWorkSizeAdjust;
/// The Work Size way of adjustment, > 0 when previously increased, < 0 when previously decreased
int m_wayWorkSizeAdjust = 0;
/// The local work size for the search /// The local work size for the search
static unsigned s_workgroupSize; static unsigned s_workgroupSize;

2
libethcore/EthashGPUMiner.cpp

@ -202,7 +202,6 @@ void EthashGPUMiner::listDevices()
bool EthashGPUMiner::configureGPU( bool EthashGPUMiner::configureGPU(
unsigned _localWorkSize, unsigned _localWorkSize,
unsigned _globalWorkSizeMultiplier, unsigned _globalWorkSizeMultiplier,
unsigned _msPerBatch,
unsigned _platformId, unsigned _platformId,
unsigned _deviceId, unsigned _deviceId,
bool _allowCPU, bool _allowCPU,
@ -223,7 +222,6 @@ bool EthashGPUMiner::configureGPU(
_platformId, _platformId,
_localWorkSize, _localWorkSize,
_globalWorkSizeMultiplier * _localWorkSize, _globalWorkSizeMultiplier * _localWorkSize,
_msPerBatch,
_allowCPU, _allowCPU,
_extraGPUMemory, _extraGPUMemory,
_currentBlock) _currentBlock)

1
libethcore/EthashGPUMiner.h

@ -48,7 +48,6 @@ public:
static bool configureGPU( static bool configureGPU(
unsigned _localWorkSize, unsigned _localWorkSize,
unsigned _globalWorkSizeMultiplier, unsigned _globalWorkSizeMultiplier,
unsigned _msPerBatch,
unsigned _platformId, unsigned _platformId,
unsigned _deviceId, unsigned _deviceId,
bool _allowCPU, bool _allowCPU,

Loading…
Cancel
Save