Browse Source

amd_bitalign

cl-refactor
Genoil 9 years ago
parent
commit
9276235da6
  1. 15
      libethash-cl/ethash_cl_miner.cpp
  2. 28
      libethash-cl/ethash_cl_miner_kernel.cl

15
libethash-cl/ethash_cl_miner.cpp

@ -327,8 +327,19 @@ bool ethash_cl_miner::init(
// use selected platform
_platformId = min<unsigned>(_platformId, platforms.size() - 1);
ETHCL_LOG("Using platform: " << platforms[_platformId].getInfo<CL_PLATFORM_NAME>().c_str());
string platformName = platforms[_platformId].getInfo<CL_PLATFORM_NAME>();
ETHCL_LOG("Using platform: " << platformName.c_str());
int platformId = 0;
if (platformName == "NVIDIA CUDA")
{
platformId = 1;
}
else if (platformName == "AMD Accelerated Parallel Processing")
{
platformId = 2;
}
// get GPU device of the default platform
vector<cl::Device> devices = getDevices(platforms, _platformId);
if (devices.empty())
@ -367,6 +378,8 @@ bool ethash_cl_miner::init(
addDefinition(code, "DAG_SIZE", (unsigned)(_dagSize / ETHASH_MIX_BYTES));
addDefinition(code, "ACCESSES", ETHASH_ACCESSES);
addDefinition(code, "MAX_OUTPUTS", c_maxSearchResults);
addDefinition(code, "PLATFORM", platformId);
//debugf("%s", code.c_str());
// create miner OpenCL program

28
libethash-cl/ethash_cl_miner_kernel.cl

@ -36,6 +36,33 @@ __constant uint2 const Keccak_f1600_RC[24] = {
(uint2)(0x80008008, 0x80000000),
};
#if PLATFORM == 1 // CUDA
static uint2 ROL2(const uint2 a, const int offset) {
uint2 result;
if (offset >= 32) {
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.x), "r"(a.y), "r"(offset));
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.y), "r"(a.x), "r"(offset));
}
else {
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.y), "r"(a.x), "r"(offset));
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset));
}
return result;
}
#elif PLATFORM == 2 // APP
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
static uint2 ROL2(const uint2 vv, const int r)
{
if (r <= 32)
{
return amd_bitalign((vv).xy, (vv).yx, 32 - r);
}
else
{
return amd_bitalign((vv).yx, (vv).xy, 64 - r);
}
}
#else
static uint2 ROL2(const uint2 v, const int n)
{
uint2 result;
@ -51,6 +78,7 @@ static uint2 ROL2(const uint2 v, const int n)
}
return result;
}
#endif
static void keccak_f1600_round(uint2* a, uint r, uint out_size)
{

Loading…
Cancel
Save