Browse Source

CL Argument for local work size

- Now the user can also set the local work size (workgroup size)

- In addition the global work size is specified in the command line only
  as a multiplier of the local work size.
cl-refactor
Lefteris Karapetsas 10 years ago
parent
commit
db54ff3b4a
  1. 25
      ethminer/MinerAux.h
  2. 22
      libethash-cl/ethash_cl_miner.cpp
  3. 11
      libethash-cl/ethash_cl_miner.h
  4. 26
      libethcore/Ethash.cpp
  5. 5
      libethcore/Ethash.h

25
ethminer/MinerAux.h

@ -129,9 +129,18 @@ public:
cerr << "Bad " << arg << " option: " << argv[i] << endl;
BOOST_THROW_EXCEPTION(BadArgument());
}
else if (arg == "--cl-global-work-size" && i + 1 < argc)
else if (arg == "--cl-global-work" && i + 1 < argc)
try {
m_globalWorkSize = stol(argv[++i]);
m_globalWorkSizeMultiplier = stol(argv[++i]);
}
catch (...)
{
cerr << "Bad " << arg << " option: " << argv[i] << endl;
BOOST_THROW_EXCEPTION(BadArgument());
}
else if (arg == "--cl-local-work" && i + 1 < argc)
try {
m_localWorkSize = stol(argv[++i]);
}
catch (...)
{
@ -285,7 +294,8 @@ public:
else if (m_minerType == MinerType::GPU)
{
if (!ProofOfWork::GPUMiner::configureGPU(
m_globalWorkSize,
m_localWorkSize,
m_globalWorkSizeMultiplier,
m_msPerBatch,
m_openclPlatform,
m_openclDevice,
@ -293,10 +303,7 @@ public:
m_extraGPUMemory,
m_currentBlock
))
{
cout << "No GPU device with sufficient memory was found. Can't GPU mine. Remove the -G argument" << endl;
exit(1);
}
ProofOfWork::GPUMiner::setNumInstances(m_miningThreads);
}
if (mode == OperationMode::DAGInit)
@ -339,7 +346,8 @@ public:
<< " --list-devices List the detected OpenCL devices and exit." << endl
<< " --current-block Let the miner know the current block number at configuration time. Will help determine DAG size and required GPU memory." << endl
<< " --cl-extragpu-mem Set the memory (in MB) you believe your GPU requires for stuff other than mining. Windows rendering e.t.c.." << endl
<< " --cl-global-work Set the OpenCL global work size. Default is " << toString(CL_DEFAULT_GLOBAL_WORK_SIZE) << endl
<< " --cl-local-work Set the OpenCL local work size. Default is " << toString(CL_DEFAULT_LOCAL_WORK_SIZE) << endl
<< " --cl-global-work Set the OpenCL global work size as a multiple of the local work size. Default is " << toString(CL_DEFAULT_GLOBAL_WORK_SIZE_MULTIPLIER) << " * " << toString(CL_DEFAULT_LOCAL_WORK_SIZE) << endl
<< " --cl-ms-per-batch Set the OpenCL target milliseconds per batch (global workgroup size). Default is " << toString(CL_DEFAULT_MS_PER_BATCH) << ". If 0 is given then no autoadjustment of global work size will happen" << endl
;
}
@ -529,7 +537,8 @@ private:
unsigned m_miningThreads = UINT_MAX;
bool m_shouldListDevices = false;
bool m_clAllowCPU = false;
unsigned m_globalWorkSize = CL_DEFAULT_GLOBAL_WORK_SIZE;
unsigned m_globalWorkSizeMultiplier = CL_DEFAULT_GLOBAL_WORK_SIZE_MULTIPLIER;
unsigned m_localWorkSize = CL_DEFAULT_LOCAL_WORK_SIZE;
unsigned m_msPerBatch = CL_DEFAULT_MS_PER_BATCH;
boost::optional<uint64_t> m_currentBlock;
// default value is 350MB of GPU memory for other stuff (windows system rendering, e.t.c.)

22
libethash-cl/ethash_cl_miner.cpp

@ -140,6 +140,7 @@ unsigned ethash_cl_miner::getNumDevices(unsigned _platformId)
bool ethash_cl_miner::configureGPU(
unsigned _platformId,
unsigned _localWorkSize,
unsigned _globalWorkSize,
unsigned _msPerBatch,
bool _allowCPU,
@ -147,6 +148,7 @@ bool ethash_cl_miner::configureGPU(
boost::optional<uint64_t> _currentBlock
)
{
s_workgroupSize = _localWorkSize;
s_initialGlobalWorkSize = _globalWorkSize;
s_msPerBatch = _msPerBatch;
s_allowCPU = _allowCPU;
@ -180,7 +182,8 @@ bool ethash_cl_miner::configureGPU(
bool ethash_cl_miner::s_allowCPU = false;
unsigned ethash_cl_miner::s_extraRequiredGPUMem;
unsigned ethash_cl_miner::s_msPerBatch = CL_DEFAULT_MS_PER_BATCH;
unsigned ethash_cl_miner::s_initialGlobalWorkSize = CL_DEFAULT_GLOBAL_WORK_SIZE;
unsigned ethash_cl_miner::s_workgroupSize = CL_DEFAULT_LOCAL_WORK_SIZE;
unsigned ethash_cl_miner::s_initialGlobalWorkSize = CL_DEFAULT_GLOBAL_WORK_SIZE_MULTIPLIER * CL_DEFAULT_LOCAL_WORK_SIZE;
bool ethash_cl_miner::searchForAllDevices(function<bool(cl::Device const&)> _callback)
{
@ -260,7 +263,6 @@ void ethash_cl_miner::finish()
bool ethash_cl_miner::init(
uint8_t const* _dag,
uint64_t _dagSize,
unsigned _workgroupSize,
unsigned _platformId,
unsigned _deviceId
)
@ -305,12 +307,10 @@ bool ethash_cl_miner::init(
m_context = cl::Context(vector<cl::Device>(&device, &device + 1));
m_queue = cl::CommandQueue(m_context, device);
// use requested workgroup size, but we require multiple of 8
m_workgroupSize = ((_workgroupSize + 7) / 8) * 8;
// make sure that global work size is evenly divisible by the local workgroup size
m_globalWorkSize = s_initialGlobalWorkSize;
if (m_globalWorkSize % m_workgroupSize != 0)
m_globalWorkSize = ((m_globalWorkSize / m_workgroupSize) + 1) * m_workgroupSize;
if (m_globalWorkSize % s_workgroupSize != 0)
m_globalWorkSize = ((m_globalWorkSize / s_workgroupSize) + 1) * s_workgroupSize;
// remember the device's address bits
m_deviceBits = device.getInfo<CL_DEVICE_ADDRESS_BITS>();
@ -318,7 +318,7 @@ bool ethash_cl_miner::init(
// note: ETHASH_CL_MINER_KERNEL is simply ethash_cl_miner_kernel.cl compiled
// into a byte array by bin2h.cmake. There is no need to load the file by hand in runtime
string code(ETHASH_CL_MINER_KERNEL, ETHASH_CL_MINER_KERNEL + ETHASH_CL_MINER_KERNEL_SIZE);
addDefinition(code, "GROUP_SIZE", m_workgroupSize);
addDefinition(code, "GROUP_SIZE", s_workgroupSize);
addDefinition(code, "DAG_SIZE", (unsigned)(_dagSize / ETHASH_MIX_BYTES));
addDefinition(code, "ACCESSES", ETHASH_ACCESSES);
addDefinition(code, "MAX_OUTPUTS", c_maxSearchResults);
@ -476,7 +476,7 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
m_searchKernel.setArg(6, start_nonce);
// execute it!
m_queue.enqueueNDRangeKernel(m_searchKernel, cl::NullRange, m_globalWorkSize, m_workgroupSize);
m_queue.enqueueNDRangeKernel(m_searchKernel, cl::NullRange, m_globalWorkSize, s_workgroupSize);
pending.push({ start_nonce, buf });
buf = (buf + 1) % c_bufferCount;
@ -519,15 +519,15 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
if (d > chrono::milliseconds(s_msPerBatch * 10 / 9))
{
// cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast<chrono::milliseconds>(d).count() << " ms, >> " << _msPerBatch << " ms." << endl;
m_globalWorkSize = max<unsigned>(128, m_globalWorkSize + m_workgroupSize);
m_globalWorkSize = max<unsigned>(128, m_globalWorkSize + s_workgroupSize);
// cerr << "New global work size" << m_globalWorkSize << endl;
}
else if (d < chrono::milliseconds(s_msPerBatch * 9 / 10))
{
// cerr << "Batch of " << m_globalWorkSize << " took " << chrono::duration_cast<chrono::milliseconds>(d).count() << " ms, << " << _msPerBatch << " ms." << endl;
m_globalWorkSize = min<unsigned>(pow(2, m_deviceBits) - 1, m_globalWorkSize - m_workgroupSize);
m_globalWorkSize = min<unsigned>(pow(2, m_deviceBits) - 1, m_globalWorkSize - s_workgroupSize);
// Global work size should never be less than the workgroup size
m_globalWorkSize = max<unsigned>(m_workgroupSize, m_globalWorkSize);
m_globalWorkSize = max<unsigned>(s_workgroupSize, m_globalWorkSize);
// cerr << "New global work size" << m_globalWorkSize << endl;
}
}

11
libethash-cl/ethash_cl_miner.h

@ -17,7 +17,11 @@
#include <functional>
#include <libethash/ethash.h>
#define CL_DEFAULT_GLOBAL_WORK_SIZE 1024 * 16
/// Default value of the local work size. Also known as workgroup size.
#define CL_DEFAULT_LOCAL_WORK_SIZE 64
/// Default value of the global work size as a multiplier of the local work size
#define CL_DEFAULT_GLOBAL_WORK_SIZE_MULTIPLIER 512 // * CL_DEFAULT_LOCAL_WORK_SIZE
/// Default value of the milliseconds per global work size (per batch)
#define CL_DEFAULT_MS_PER_BATCH 100
class ethash_cl_miner
@ -48,6 +52,7 @@ public:
static void listDevices();
static bool configureGPU(
unsigned _platformId,
unsigned _localWorkSize,
unsigned _globalWorkSize,
unsigned _msPerBatch,
bool _allowCPU,
@ -58,7 +63,6 @@ public:
bool init(
uint8_t const* _dag,
uint64_t _dagSize,
unsigned _workgroupSize = 64,
unsigned _platformId = 0,
unsigned _deviceId = 0
);
@ -81,11 +85,12 @@ private:
cl::Buffer m_header;
cl::Buffer m_hashBuffer[c_bufferCount];
cl::Buffer m_searchBuffer[c_bufferCount];
unsigned m_workgroupSize;
unsigned m_globalWorkSize;
bool m_openclOnePointOne;
unsigned m_deviceBits;
/// The local work size for the search
static unsigned s_workgroupSize;
/// The initial global work size for the searches
static unsigned s_initialGlobalWorkSize;
/// The target milliseconds per batch for the search. If 0, then no adjustment will happen

26
libethcore/Ethash.cpp

@ -373,7 +373,7 @@ void Ethash::GPUMiner::workLoop()
this_thread::sleep_for(chrono::milliseconds(500));
}
bytesConstRef dagData = dag->data();
m_miner->init(dagData.data(), dagData.size(), 32, s_platformId, device);
m_miner->init(dagData.data(), dagData.size(), s_platformId, device);
}
uint64_t upper64OfBoundary = (uint64_t)(u64)((u256)w.boundary >> 192);
@ -409,7 +409,8 @@ void Ethash::GPUMiner::listDevices()
}
bool Ethash::GPUMiner::configureGPU(
unsigned _globalWorkSize,
unsigned _localWorkSize,
unsigned _globalWorkSizeMultiplier,
unsigned _msPerBatch,
unsigned _platformId,
unsigned _deviceId,
@ -420,7 +421,26 @@ bool Ethash::GPUMiner::configureGPU(
{
s_platformId = _platformId;
s_deviceId = _deviceId;
return ethash_cl_miner::configureGPU(_globalWorkSize, _msPerBatch, _allowCPU, _extraGPUMemory, _currentBlock);
if (_localWorkSize != 32 && _localWorkSize != 64 && _localWorkSize != 128)
{
cout << "Given localWorkSize of " << toString(_localWorkSize) << "is invalid. Must be either 32,64, or 128" << endl;
return false;
}
if (!ethash_cl_miner::configureGPU(
_localWorkSize,
_globalWorkSizeMultiplier * _localWorkSize,
_msPerBatch,
_allowCPU,
_extraGPUMemory,
_currentBlock)
)
{
cout << "No GPU device with sufficient memory was found. Can't GPU mine. Remove the -G argument" << endl;
return false;
}
return true;
}
#endif

5
libethcore/Ethash.h

@ -88,7 +88,7 @@ public:
static unsigned instances() { return s_numInstances > 0 ? s_numInstances : std::thread::hardware_concurrency(); }
static std::string platformInfo();
static void listDevices() {}
static bool configureGPU(unsigned, unsigned, unsigned, unsigned, bool, unsigned, boost::optional<uint64_t>) { return false; }
static bool configureGPU(unsigned, unsigned, unsigned, unsigned, unsigned, bool, unsigned, boost::optional<uint64_t>) { return false; }
static void setNumInstances(unsigned _instances) { s_numInstances = std::min<unsigned>(_instances, std::thread::hardware_concurrency()); }
protected:
void kickOff() override
@ -118,7 +118,8 @@ public:
static unsigned getNumDevices();
static void listDevices();
static bool configureGPU(
unsigned _globalWorkSize,
unsigned _localWorkSize,
unsigned _globalWorkSizeMultiplier,
unsigned _msPerBatch,
unsigned _platformId,
unsigned _deviceId,

Loading…
Cancel
Save