Browse Source

Merge pull request #2298 from LefterisJP/cl_batch_size_adjust

Adjust CL miner work batch size properly
cl-refactor
Gav Wood 10 years ago
parent
commit
80249970c5
  1. 39
      ethminer/MinerAux.h
  2. 68
      libethash-cl/ethash_cl_miner.cpp
  3. 16
      libethash-cl/ethash_cl_miner.h
  4. 30
      libethcore/Ethash.cpp
  5. 11
      libethcore/Ethash.h

39
ethminer/MinerAux.h

@ -128,6 +128,33 @@ public:
cerr << "Bad " << arg << " option: " << argv[i] << endl;
BOOST_THROW_EXCEPTION(BadArgument());
}
else if (arg == "--cl-global-work" && i + 1 < argc)
try {
m_globalWorkSizeMultiplier = stol(argv[++i]);
}
catch (...)
{
cerr << "Bad " << arg << " option: " << argv[i] << endl;
BOOST_THROW_EXCEPTION(BadArgument());
}
else if (arg == "--cl-local-work" && i + 1 < argc)
try {
m_localWorkSize = stol(argv[++i]);
}
catch (...)
{
cerr << "Bad " << arg << " option: " << argv[i] << endl;
BOOST_THROW_EXCEPTION(BadArgument());
}
else if (arg == "--cl-ms-per-batch" && i + 1 < argc)
try {
m_msPerBatch = stol(argv[++i]);
}
catch (...)
{
cerr << "Bad " << arg << " option: " << argv[i] << endl;
BOOST_THROW_EXCEPTION(BadArgument());
}
else if (arg == "--list-devices")
m_shouldListDevices = true;
else if (arg == "--allow-opencl-cpu")
@ -266,16 +293,16 @@ public:
else if (m_minerType == MinerType::GPU)
{
if (!ProofOfWork::GPUMiner::configureGPU(
m_localWorkSize,
m_globalWorkSizeMultiplier,
m_msPerBatch,
m_openclPlatform,
m_openclDevice,
m_clAllowCPU,
m_extraGPUMemory,
m_currentBlock
))
{
cout << "No GPU device with sufficient memory was found. Can't GPU mine. Remove the -G argument" << endl;
exit(1);
}
ProofOfWork::GPUMiner::setNumInstances(m_miningThreads);
}
if (mode == OperationMode::DAGInit)
@ -318,6 +345,9 @@ public:
<< " --list-devices List the detected OpenCL devices and exit." << endl
<< " --current-block Let the miner know the current block number at configuration time. Will help determine DAG size and required GPU memory." << endl
<< " --cl-extragpu-mem Set the memory (in MB) you believe your GPU requires for stuff other than mining. Windows rendering e.t.c.." << endl
<< " --cl-local-work Set the OpenCL local work size. Default is " << toString(dev::eth::Ethash::defaultLocalWorkSize) << endl
<< " --cl-global-work Set the OpenCL global work size as a multiple of the local work size. Default is " << toString(dev::eth::Ethash::defaultGlobalWorkSizeMultiplier) << " * " << toString(dev::eth::Ethash::defaultLocalWorkSize) << endl
<< " --cl-ms-per-batch Set the OpenCL target milliseconds per batch (global workgroup size). Default is " << toString(dev::eth::Ethash::defaultMSPerBatch) << ". If 0 is given then no autoadjustment of global work size will happen" << endl
;
}
@ -506,6 +536,9 @@ private:
unsigned m_miningThreads = UINT_MAX;
bool m_shouldListDevices = false;
bool m_clAllowCPU = false;
unsigned m_globalWorkSizeMultiplier = dev::eth::Ethash::defaultGlobalWorkSizeMultiplier;
unsigned m_localWorkSize = dev::eth::Ethash::defaultLocalWorkSize;
unsigned m_msPerBatch = dev::eth::Ethash::defaultMSPerBatch;
boost::optional<uint64_t> m_currentBlock;
// default value is 350MB of GPU memory for other stuff (windows system rendering, e.t.c.)
unsigned m_extraGPUMemory = 350000000;

68
libethash-cl/ethash_cl_miner.cpp

@ -33,6 +33,7 @@
#include <vector>
#include <libethash/util.h>
#include <libethash/ethash.h>
#include <libethcore/Ethash.h>
#include <libethash/internal.h>
#include "ethash_cl_miner.h"
#include "ethash_cl_miner_kernel.h"
@ -49,6 +50,7 @@
#undef max
using namespace std;
using namespace dev::eth;
// TODO: If at any point we can use libdevcore in here then we should switch to using a LogChannel
#define ETHCL_LOG(_contents) cout << "[OPENCL]:" << _contents << endl
@ -140,11 +142,17 @@ unsigned ethash_cl_miner::getNumDevices(unsigned _platformId)
bool ethash_cl_miner::configureGPU(
unsigned _platformId,
unsigned _localWorkSize,
unsigned _globalWorkSize,
unsigned _msPerBatch,
bool _allowCPU,
unsigned _extraGPUMemory,
boost::optional<uint64_t> _currentBlock
)
{
s_workgroupSize = _localWorkSize;
s_initialGlobalWorkSize = _globalWorkSize;
s_msPerBatch = _msPerBatch;
s_allowCPU = _allowCPU;
s_extraRequiredGPUMem = _extraGPUMemory;
// by default let's only consider the DAG of the first epoch
@ -175,6 +183,9 @@ bool ethash_cl_miner::configureGPU(
bool ethash_cl_miner::s_allowCPU = false;
unsigned ethash_cl_miner::s_extraRequiredGPUMem;
unsigned ethash_cl_miner::s_msPerBatch = Ethash::defaultMSPerBatch;
unsigned ethash_cl_miner::s_workgroupSize = Ethash::defaultLocalWorkSize;
unsigned ethash_cl_miner::s_initialGlobalWorkSize = Ethash::defaultGlobalWorkSizeMultiplier * Ethash::defaultLocalWorkSize;
bool ethash_cl_miner::searchForAllDevices(function<bool(cl::Device const&)> _callback)
{
@ -254,7 +265,6 @@ void ethash_cl_miner::finish()
bool ethash_cl_miner::init(
uint8_t const* _dag,
uint64_t _dagSize,
unsigned _workgroupSize,
unsigned _platformId,
unsigned _deviceId
)
@ -299,14 +309,18 @@ bool ethash_cl_miner::init(
m_context = cl::Context(vector<cl::Device>(&device, &device + 1));
m_queue = cl::CommandQueue(m_context, device);
// use requested workgroup size, but we require multiple of 8
m_workgroupSize = ((_workgroupSize + 7) / 8) * 8;
// make sure that global work size is evenly divisible by the local workgroup size
m_globalWorkSize = s_initialGlobalWorkSize;
if (m_globalWorkSize % s_workgroupSize != 0)
m_globalWorkSize = ((m_globalWorkSize / s_workgroupSize) + 1) * s_workgroupSize;
// remember the device's address bits
m_deviceBits = device.getInfo<CL_DEVICE_ADDRESS_BITS>();
// patch source code
// note: ETHASH_CL_MINER_KERNEL is simply ethash_cl_miner_kernel.cl compiled
// into a byte array by bin2h.cmake. There is no need to load the file by hand in runtime
string code(ETHASH_CL_MINER_KERNEL, ETHASH_CL_MINER_KERNEL + ETHASH_CL_MINER_KERNEL_SIZE);
addDefinition(code, "GROUP_SIZE", m_workgroupSize);
addDefinition(code, "GROUP_SIZE", s_workgroupSize);
addDefinition(code, "DAG_SIZE", (unsigned)(_dagSize / ETHASH_MIX_BYTES));
addDefinition(code, "ACCESSES", ETHASH_ACCESSES);
addDefinition(code, "MAX_OUTPUTS", c_maxSearchResults);
@ -415,9 +429,8 @@ bool ethash_cl_miner::init(
return true;
}
void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook, unsigned _msPerBatch)
void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook)
{
(void)_msPerBatch;
try
{
struct pending_batch
@ -454,10 +467,9 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
unsigned buf = 0;
random_device engine;
uint64_t start_nonce = uniform_int_distribution<uint64_t>()(engine);
for (;; start_nonce += m_batchSize)
for (;; start_nonce += m_globalWorkSize)
{
// chrono::high_resolution_clock::time_point t = chrono::high_resolution_clock::now();
auto t = chrono::high_resolution_clock::now();
// supply output buffer to kernel
m_searchKernel.setArg(0, m_searchBuffer[buf]);
if (m_dagChunksCount == 1)
@ -466,7 +478,7 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
m_searchKernel.setArg(6, start_nonce);
// execute it!
m_queue.enqueueNDRangeKernel(m_searchKernel, cl::NullRange, m_batchSize, m_workgroupSize);
m_queue.enqueueNDRangeKernel(m_searchKernel, cl::NullRange, m_globalWorkSize, s_workgroupSize);
pending.push({ start_nonce, buf });
buf = (buf + 1) % c_bufferCount;
@ -486,7 +498,7 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
m_queue.enqueueUnmapMemObject(m_searchBuffer[batch.buf], results);
bool exit = num_found && hook.found(nonces, num_found);
exit |= hook.searched(batch.start_nonce, m_batchSize); // always report searched before exit
exit |= hook.searched(batch.start_nonce, m_globalWorkSize); // always report searched before exit
if (exit)
break;
@ -497,19 +509,31 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
pending.pop();
}
/* chrono::high_resolution_clock::duration d = chrono::high_resolution_clock::now() - t;
if (d > chrono::milliseconds(_msPerBatch * 10 / 9))
// adjust global work size depending on last search time
if (s_msPerBatch)
{
cerr << "Batch of" << m_batchSize << "took" << chrono::duration_cast<chrono::milliseconds>(d).count() << "ms, >>" << _msPerBatch << "ms.";
m_batchSize = max<unsigned>(128, m_batchSize * 9 / 10);
cerr << "New batch size" << m_batchSize;
// Global work size must be:
// - less than or equal to 2 ^ DEVICE_BITS - 1
// - divisible by lobal work size (workgroup size)
auto d = chrono::duration_cast<chrono::milliseconds>(chrono::high_resolution_clock::now() - t);
if (d != chrono::milliseconds(0)) // if duration is zero, we did not get in the actual searh/or search not finished
{
if (d > chrono::milliseconds(s_msPerBatch * 10 / 9))
{
// cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast<chrono::milliseconds>(d).count() << " ms, >> " << _msPerBatch << " ms." << endl;
m_globalWorkSize = max<unsigned>(128, m_globalWorkSize + s_workgroupSize);
// cerr << "New global work size" << m_globalWorkSize << endl;
}
else if (d < chrono::milliseconds(s_msPerBatch * 9 / 10))
{
// cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast<chrono::milliseconds>(d).count() << " ms, << " << _msPerBatch << " ms." << endl;
m_globalWorkSize = min<unsigned>(pow(2, m_deviceBits) - 1, m_globalWorkSize - s_workgroupSize);
// Global work size should never be less than the workgroup size
m_globalWorkSize = max<unsigned>(s_workgroupSize, m_globalWorkSize);
// cerr << "New global work size" << m_globalWorkSize << endl;
}
}
}
else if (d < chrono::milliseconds(_msPerBatch * 9 / 10))
{
cerr << "Batch of" << m_batchSize << "took" << chrono::duration_cast<chrono::milliseconds>(d).count() << "ms, <<" << _msPerBatch << "ms.";
m_batchSize = m_batchSize * 10 / 9;
cerr << "New batch size" << m_batchSize;
}*/
}
// not safe to return until this is ready

16
libethash-cl/ethash_cl_miner.h

@ -45,6 +45,9 @@ public:
static void listDevices();
static bool configureGPU(
unsigned _platformId,
unsigned _localWorkSize,
unsigned _globalWorkSize,
unsigned _msPerBatch,
bool _allowCPU,
unsigned _extraGPUMemory,
boost::optional<uint64_t> _currentBlock
@ -53,12 +56,11 @@ public:
bool init(
uint8_t const* _dag,
uint64_t _dagSize,
unsigned _workgroupSize = 64,
unsigned _platformId = 0,
unsigned _deviceId = 0
);
void finish();
void search(uint8_t const* _header, uint64_t _target, search_hook& _hook, unsigned _msPerBatch = 100);
void search(uint8_t const* _header, uint64_t _target, search_hook& _hook);
void hash_chunk(uint8_t* _ret, uint8_t const* _header, uint64_t _nonce, unsigned _count);
void search_chunk(uint8_t const*_header, uint64_t _target, search_hook& _hook);
@ -76,10 +78,16 @@ private:
cl::Buffer m_header;
cl::Buffer m_hashBuffer[c_bufferCount];
cl::Buffer m_searchBuffer[c_bufferCount];
unsigned m_workgroupSize;
unsigned m_batchSize = c_searchBatchSize;
unsigned m_globalWorkSize;
bool m_openclOnePointOne;
unsigned m_deviceBits;
/// The local work size for the search
static unsigned s_workgroupSize;
/// The initial global work size for the searches
static unsigned s_initialGlobalWorkSize;
/// The target milliseconds per batch for the search. If 0, then no adjustment will happen
static unsigned s_msPerBatch;
/// Allow CPU to appear as an OpenCL device or not. Default is false
static bool s_allowCPU;
/// GPU memory required for other things, like window rendering e.t.c.

30
libethcore/Ethash.cpp

@ -54,6 +54,9 @@ namespace dev
namespace eth
{
const unsigned Ethash::defaultLocalWorkSize = 64;
const unsigned Ethash::defaultGlobalWorkSizeMultiplier = 512; // * CL_DEFAULT_LOCAL_WORK_SIZE
const unsigned Ethash::defaultMSPerBatch = 100;
const Ethash::WorkPackage Ethash::NullWorkPackage = Ethash::WorkPackage();
std::string Ethash::name()
@ -373,7 +376,7 @@ void Ethash::GPUMiner::workLoop()
this_thread::sleep_for(chrono::milliseconds(500));
}
bytesConstRef dagData = dag->data();
m_miner->init(dagData.data(), dagData.size(), 32, s_platformId, device);
m_miner->init(dagData.data(), dagData.size(), s_platformId, device);
}
uint64_t upper64OfBoundary = (uint64_t)(u64)((u256)w.boundary >> 192);
@ -409,6 +412,9 @@ void Ethash::GPUMiner::listDevices()
}
bool Ethash::GPUMiner::configureGPU(
unsigned _localWorkSize,
unsigned _globalWorkSizeMultiplier,
unsigned _msPerBatch,
unsigned _platformId,
unsigned _deviceId,
bool _allowCPU,
@ -418,7 +424,27 @@ bool Ethash::GPUMiner::configureGPU(
{
s_platformId = _platformId;
s_deviceId = _deviceId;
return ethash_cl_miner::configureGPU(_platformId, _allowCPU, _extraGPUMemory, _currentBlock);
if (_localWorkSize != 32 && _localWorkSize != 64 && _localWorkSize != 128)
{
cout << "Given localWorkSize of " << toString(_localWorkSize) << "is invalid. Must be either 32,64, or 128" << endl;
return false;
}
if (!ethash_cl_miner::configureGPU(
_platformId,
_localWorkSize,
_globalWorkSizeMultiplier * _localWorkSize,
_msPerBatch,
_allowCPU,
_extraGPUMemory,
_currentBlock)
)
{
cout << "No GPU device with sufficient memory was found. Can't GPU mine. Remove the -G argument" << endl;
return false;
}
return true;
}
#endif

11
libethcore/Ethash.h

@ -88,7 +88,7 @@ public:
static unsigned instances() { return s_numInstances > 0 ? s_numInstances : std::thread::hardware_concurrency(); }
static std::string platformInfo();
static void listDevices() {}
static bool configureGPU(unsigned, unsigned, bool, unsigned, boost::optional<uint64_t>) { return false; }
static bool configureGPU(unsigned, unsigned, unsigned, unsigned, unsigned, bool, unsigned, boost::optional<uint64_t>) { return false; }
static void setNumInstances(unsigned _instances) { s_numInstances = std::min<unsigned>(_instances, std::thread::hardware_concurrency()); }
protected:
void kickOff() override
@ -118,6 +118,9 @@ public:
static unsigned getNumDevices();
static void listDevices();
static bool configureGPU(
unsigned _localWorkSize,
unsigned _globalWorkSizeMultiplier,
unsigned _msPerBatch,
unsigned _platformId,
unsigned _deviceId,
bool _allowCPU,
@ -147,6 +150,12 @@ public:
#else
using GPUMiner = CPUMiner;
#endif
/// Default value of the local work size. Also known as workgroup size.
static const unsigned defaultLocalWorkSize;
/// Default value of the global work size as a multiplier of the local work size
static const unsigned defaultGlobalWorkSizeMultiplier;
/// Default value of the milliseconds per global work size (per batch)
static const unsigned defaultMSPerBatch;
};
}

Loading…
Cancel
Save