Browse Source

New OpenCL arguments

- Adding an argument to specify OpenCL global work size.

- Adding an argument to specify milliseconds per global work
  size (msPerBatch). If this is 0 then no adjustment of the global work
  size happens.
cl-refactor
Lefteris Karapetsas 10 years ago
parent
commit
15fc63d6a2
  1. 25
      ethminer/MinerAux.h
  2. 47
      libethash-cl/ethash_cl_miner.cpp
  3. 13
      libethash-cl/ethash_cl_miner.h
  4. 4
      libethcore/Ethash.cpp
  5. 4
      libethcore/Ethash.h

25
ethminer/MinerAux.h

@ -39,6 +39,7 @@
#include <libdevcore/SHA3.h> #include <libdevcore/SHA3.h>
#include <libethcore/ProofOfWork.h> #include <libethcore/ProofOfWork.h>
#include <libethcore/EthashAux.h> #include <libethcore/EthashAux.h>
#include <libethash-cl/ethash_cl_miner.h>
#include <libethcore/Farm.h> #include <libethcore/Farm.h>
#if ETH_JSONRPC || !ETH_TRUE #if ETH_JSONRPC || !ETH_TRUE
#include <libweb3jsonrpc/WebThreeStubServer.h> #include <libweb3jsonrpc/WebThreeStubServer.h>
@ -128,6 +129,24 @@ public:
cerr << "Bad " << arg << " option: " << argv[i] << endl; cerr << "Bad " << arg << " option: " << argv[i] << endl;
BOOST_THROW_EXCEPTION(BadArgument()); BOOST_THROW_EXCEPTION(BadArgument());
} }
else if (arg == "--cl-global-work-size" && i + 1 < argc)
try {
m_globalWorkSize = stol(argv[++i]);
}
catch (...)
{
cerr << "Bad " << arg << " option: " << argv[i] << endl;
BOOST_THROW_EXCEPTION(BadArgument());
}
else if (arg == "--cl-ms-per-batch" && i + 1 < argc)
try {
m_msPerBatch = stol(argv[++i]);
}
catch (...)
{
cerr << "Bad " << arg << " option: " << argv[i] << endl;
BOOST_THROW_EXCEPTION(BadArgument());
}
else if (arg == "--list-devices") else if (arg == "--list-devices")
m_shouldListDevices = true; m_shouldListDevices = true;
else if (arg == "--allow-opencl-cpu") else if (arg == "--allow-opencl-cpu")
@ -266,6 +285,8 @@ public:
else if (m_minerType == MinerType::GPU) else if (m_minerType == MinerType::GPU)
{ {
if (!ProofOfWork::GPUMiner::configureGPU( if (!ProofOfWork::GPUMiner::configureGPU(
m_globalWorkSize,
m_msPerBatch,
m_openclPlatform, m_openclPlatform,
m_openclDevice, m_openclDevice,
m_clAllowCPU, m_clAllowCPU,
@ -318,6 +339,8 @@ public:
<< " --list-devices List the detected OpenCL devices and exit." << endl << " --list-devices List the detected OpenCL devices and exit." << endl
<< " --current-block Let the miner know the current block number at configuration time. Will help determine DAG size and required GPU memory." << endl << " --current-block Let the miner know the current block number at configuration time. Will help determine DAG size and required GPU memory." << endl
<< " --cl-extragpu-mem Set the memory (in MB) you believe your GPU requires for stuff other than mining. Windows rendering e.t.c.." << endl << " --cl-extragpu-mem Set the memory (in MB) you believe your GPU requires for stuff other than mining. Windows rendering e.t.c.." << endl
<< " --cl-global-work Set the OpenCL global work size. Default is " << toString(CL_DEFAULT_GLOBAL_WORK_SIZE) << endl
<< " --cl-ms-per-batch Set the OpenCL target milliseconds per batch (global workgroup size). Default is " << toString(CL_DEFAULT_MS_PER_BATCH) << ". If 0 is given then no autoadjustment of global work size will happen" << endl
; ;
} }
@ -506,6 +529,8 @@ private:
unsigned m_miningThreads = UINT_MAX; unsigned m_miningThreads = UINT_MAX;
bool m_shouldListDevices = false; bool m_shouldListDevices = false;
bool m_clAllowCPU = false; bool m_clAllowCPU = false;
unsigned m_globalWorkSize = CL_DEFAULT_GLOBAL_WORK_SIZE;
unsigned m_msPerBatch = CL_DEFAULT_MS_PER_BATCH;
boost::optional<uint64_t> m_currentBlock; boost::optional<uint64_t> m_currentBlock;
// default value is 350MB of GPU memory for other stuff (windows system rendering, e.t.c.) // default value is 350MB of GPU memory for other stuff (windows system rendering, e.t.c.)
unsigned m_extraGPUMemory = 350000000; unsigned m_extraGPUMemory = 350000000;

47
libethash-cl/ethash_cl_miner.cpp

@ -140,11 +140,15 @@ unsigned ethash_cl_miner::getNumDevices(unsigned _platformId)
bool ethash_cl_miner::configureGPU( bool ethash_cl_miner::configureGPU(
unsigned _platformId, unsigned _platformId,
unsigned _globalWorkSize,
unsigned _msPerBatch,
bool _allowCPU, bool _allowCPU,
unsigned _extraGPUMemory, unsigned _extraGPUMemory,
boost::optional<uint64_t> _currentBlock boost::optional<uint64_t> _currentBlock
) )
{ {
s_initialGlobalWorkSize = _globalWorkSize;
s_msPerBatch = _msPerBatch;
s_allowCPU = _allowCPU; s_allowCPU = _allowCPU;
s_extraRequiredGPUMem = _extraGPUMemory; s_extraRequiredGPUMem = _extraGPUMemory;
// by default let's only consider the DAG of the first epoch // by default let's only consider the DAG of the first epoch
@ -175,6 +179,8 @@ bool ethash_cl_miner::configureGPU(
bool ethash_cl_miner::s_allowCPU = false; bool ethash_cl_miner::s_allowCPU = false;
unsigned ethash_cl_miner::s_extraRequiredGPUMem; unsigned ethash_cl_miner::s_extraRequiredGPUMem;
unsigned ethash_cl_miner::s_msPerBatch = CL_DEFAULT_MS_PER_BATCH;
unsigned ethash_cl_miner::s_initialGlobalWorkSize = CL_DEFAULT_GLOBAL_WORK_SIZE;
bool ethash_cl_miner::searchForAllDevices(function<bool(cl::Device const&)> _callback) bool ethash_cl_miner::searchForAllDevices(function<bool(cl::Device const&)> _callback)
{ {
@ -302,6 +308,7 @@ bool ethash_cl_miner::init(
// use requested workgroup size, but we require multiple of 8 // use requested workgroup size, but we require multiple of 8
m_workgroupSize = ((_workgroupSize + 7) / 8) * 8; m_workgroupSize = ((_workgroupSize + 7) / 8) * 8;
// make sure that global work size is evenly divisible by the local workgroup size // make sure that global work size is evenly divisible by the local workgroup size
m_globalWorkSize = s_initialGlobalWorkSize;
if (m_globalWorkSize % m_workgroupSize != 0) if (m_globalWorkSize % m_workgroupSize != 0)
m_globalWorkSize = ((m_globalWorkSize / m_workgroupSize) + 1) * m_workgroupSize; m_globalWorkSize = ((m_globalWorkSize / m_workgroupSize) + 1) * m_workgroupSize;
// remember the device's address bits // remember the device's address bits
@ -420,9 +427,8 @@ bool ethash_cl_miner::init(
return true; return true;
} }
void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook, unsigned _msPerBatch) void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook)
{ {
(void)_msPerBatch;
try try
{ {
struct pending_batch struct pending_batch
@ -502,25 +508,28 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
} }
// adjust global work size depending on last search time // adjust global work size depending on last search time
// Global work size must be: if (s_msPerBatch)
// - less than or equal to 2 ^ DEVICE_BITS - 1
// - divisible by lobal work size (workgroup size)
auto d = chrono::duration_cast<chrono::milliseconds>(chrono::high_resolution_clock::now() - t);
if (d != chrono::milliseconds(0)) // if duration is zero, we did not get in the actual searh/or search not finished
{ {
if (d > chrono::milliseconds(_msPerBatch * 10 / 9)) // Global work size must be:
// - less than or equal to 2 ^ DEVICE_BITS - 1
// - divisible by lobal work size (workgroup size)
auto d = chrono::duration_cast<chrono::milliseconds>(chrono::high_resolution_clock::now() - t);
if (d != chrono::milliseconds(0)) // if duration is zero, we did not get in the actual searh/or search not finished
{ {
// cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast<chrono::milliseconds>(d).count() << " ms, >> " << _msPerBatch << " ms." << endl; if (d > chrono::milliseconds(s_msPerBatch * 10 / 9))
m_globalWorkSize = max<unsigned>(128, m_globalWorkSize + m_workgroupSize); {
// cerr << "New global work size" << m_globalWorkSize << endl; // cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast<chrono::milliseconds>(d).count() << " ms, >> " << _msPerBatch << " ms." << endl;
} m_globalWorkSize = max<unsigned>(128, m_globalWorkSize + m_workgroupSize);
else if (d < chrono::milliseconds(_msPerBatch * 9 / 10)) // cerr << "New global work size" << m_globalWorkSize << endl;
{ }
// cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast<chrono::milliseconds>(d).count() << " ms, << " << _msPerBatch << " ms." << endl; else if (d < chrono::milliseconds(s_msPerBatch * 9 / 10))
m_globalWorkSize = min<unsigned>(pow(2, m_deviceBits) - 1, m_globalWorkSize - m_workgroupSize); {
// Global work size should never be less than the workgroup size // cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast<chrono::milliseconds>(d).count() << " ms, << " << _msPerBatch << " ms." << endl;
m_globalWorkSize = max<unsigned>(m_workgroupSize, m_globalWorkSize); m_globalWorkSize = min<unsigned>(pow(2, m_deviceBits) - 1, m_globalWorkSize - m_workgroupSize);
// cerr << "New global work size" << m_globalWorkSize << endl; // Global work size should never be less than the workgroup size
m_globalWorkSize = max<unsigned>(m_workgroupSize, m_globalWorkSize);
// cerr << "New global work size" << m_globalWorkSize << endl;
}
} }
} }
} }

13
libethash-cl/ethash_cl_miner.h

@ -17,6 +17,9 @@
#include <functional> #include <functional>
#include <libethash/ethash.h> #include <libethash/ethash.h>
#define CL_DEFAULT_GLOBAL_WORK_SIZE 1024 * 16
#define CL_DEFAULT_MS_PER_BATCH 100
class ethash_cl_miner class ethash_cl_miner
{ {
private: private:
@ -45,6 +48,8 @@ public:
static void listDevices(); static void listDevices();
static bool configureGPU( static bool configureGPU(
unsigned _platformId, unsigned _platformId,
unsigned _globalWorkSize,
unsigned _msPerBatch,
bool _allowCPU, bool _allowCPU,
unsigned _extraGPUMemory, unsigned _extraGPUMemory,
boost::optional<uint64_t> _currentBlock boost::optional<uint64_t> _currentBlock
@ -58,7 +63,7 @@ public:
unsigned _deviceId = 0 unsigned _deviceId = 0
); );
void finish(); void finish();
void search(uint8_t const* _header, uint64_t _target, search_hook& _hook, unsigned _msPerBatch = 100); void search(uint8_t const* _header, uint64_t _target, search_hook& _hook);
void hash_chunk(uint8_t* _ret, uint8_t const* _header, uint64_t _nonce, unsigned _count); void hash_chunk(uint8_t* _ret, uint8_t const* _header, uint64_t _nonce, unsigned _count);
void search_chunk(uint8_t const*_header, uint64_t _target, search_hook& _hook); void search_chunk(uint8_t const*_header, uint64_t _target, search_hook& _hook);
@ -77,10 +82,14 @@ private:
cl::Buffer m_hashBuffer[c_bufferCount]; cl::Buffer m_hashBuffer[c_bufferCount];
cl::Buffer m_searchBuffer[c_bufferCount]; cl::Buffer m_searchBuffer[c_bufferCount];
unsigned m_workgroupSize; unsigned m_workgroupSize;
unsigned m_globalWorkSize = c_searchBatchSize; unsigned m_globalWorkSize;
bool m_openclOnePointOne; bool m_openclOnePointOne;
unsigned m_deviceBits; unsigned m_deviceBits;
/// The initial global work size for the searches
static unsigned s_initialGlobalWorkSize;
/// The target milliseconds per batch for the search. If 0, then no adjustment will happen
static unsigned s_msPerBatch;
/// Allow CPU to appear as an OpenCL device or not. Default is false /// Allow CPU to appear as an OpenCL device or not. Default is false
static bool s_allowCPU; static bool s_allowCPU;
/// GPU memory required for other things, like window rendering e.t.c. /// GPU memory required for other things, like window rendering e.t.c.

4
libethcore/Ethash.cpp

@ -409,6 +409,8 @@ void Ethash::GPUMiner::listDevices()
} }
bool Ethash::GPUMiner::configureGPU( bool Ethash::GPUMiner::configureGPU(
unsigned _globalWorkSize,
unsigned _msPerBatch,
unsigned _platformId, unsigned _platformId,
unsigned _deviceId, unsigned _deviceId,
bool _allowCPU, bool _allowCPU,
@ -418,7 +420,7 @@ bool Ethash::GPUMiner::configureGPU(
{ {
s_platformId = _platformId; s_platformId = _platformId;
s_deviceId = _deviceId; s_deviceId = _deviceId;
return ethash_cl_miner::configureGPU(_platformId, _allowCPU, _extraGPUMemory, _currentBlock); return ethash_cl_miner::configureGPU(_globalWorkSize, _msPerBatch, _allowCPU, _extraGPUMemory, _currentBlock);
} }
#endif #endif

4
libethcore/Ethash.h

@ -88,7 +88,7 @@ public:
static unsigned instances() { return s_numInstances > 0 ? s_numInstances : std::thread::hardware_concurrency(); } static unsigned instances() { return s_numInstances > 0 ? s_numInstances : std::thread::hardware_concurrency(); }
static std::string platformInfo(); static std::string platformInfo();
static void listDevices() {} static void listDevices() {}
static bool configureGPU(unsigned, unsigned, bool, unsigned, boost::optional<uint64_t>) { return false; } static bool configureGPU(unsigned, unsigned, unsigned, unsigned, bool, unsigned, boost::optional<uint64_t>) { return false; }
static void setNumInstances(unsigned _instances) { s_numInstances = std::min<unsigned>(_instances, std::thread::hardware_concurrency()); } static void setNumInstances(unsigned _instances) { s_numInstances = std::min<unsigned>(_instances, std::thread::hardware_concurrency()); }
protected: protected:
void kickOff() override void kickOff() override
@ -118,6 +118,8 @@ public:
static unsigned getNumDevices(); static unsigned getNumDevices();
static void listDevices(); static void listDevices();
static bool configureGPU( static bool configureGPU(
unsigned _globalWorkSize,
unsigned _msPerBatch,
unsigned _platformId, unsigned _platformId,
unsigned _deviceId, unsigned _deviceId,
bool _allowCPU, bool _allowCPU,

Loading…
Cancel
Save