diff --git a/libethash-cl/ethash_cl_miner.cpp b/libethash-cl/ethash_cl_miner.cpp index 226d019ec..44ae5efe7 100644 --- a/libethash-cl/ethash_cl_miner.cpp +++ b/libethash-cl/ethash_cl_miner.cpp @@ -327,8 +327,19 @@ bool ethash_cl_miner::init( // use selected platform _platformId = min(_platformId, platforms.size() - 1); - ETHCL_LOG("Using platform: " << platforms[_platformId].getInfo().c_str()); + string platformName = platforms[_platformId].getInfo(); + ETHCL_LOG("Using platform: " << platformName.c_str()); + + int platformId = 0; + if (platformName == "NVIDIA CUDA") + { + platformId = 1; + } + else if (platformName == "AMD Accelerated Parallel Processing") + { + platformId = 2; + } // get GPU device of the default platform vector devices = getDevices(platforms, _platformId); if (devices.empty()) @@ -367,6 +378,8 @@ bool ethash_cl_miner::init( addDefinition(code, "DAG_SIZE", (unsigned)(_dagSize / ETHASH_MIX_BYTES)); addDefinition(code, "ACCESSES", ETHASH_ACCESSES); addDefinition(code, "MAX_OUTPUTS", c_maxSearchResults); + addDefinition(code, "PLATFORM", platformId); + //debugf("%s", code.c_str()); // create miner OpenCL program diff --git a/libethash-cl/ethash_cl_miner_kernel.cl b/libethash-cl/ethash_cl_miner_kernel.cl index b513d0c5a..c1eed75c6 100644 --- a/libethash-cl/ethash_cl_miner_kernel.cl +++ b/libethash-cl/ethash_cl_miner_kernel.cl @@ -36,6 +36,33 @@ __constant uint2 const Keccak_f1600_RC[24] = { (uint2)(0x80008008, 0x80000000), }; +#if PLATFORM == 1 // CUDA +static uint2 ROL2(const uint2 a, const int offset) { + uint2 result; + if (offset >= 32) { + asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.x), "r"(a.y), "r"(offset)); + asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.y), "r"(a.x), "r"(offset)); + } + else { + asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.y), "r"(a.x), "r"(offset)); + asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset)); + } + return result; +} +#elif PLATFORM == 2 // APP +#pragma OPENCL EXTENSION cl_amd_media_ops : enable +static uint2 ROL2(const uint2 vv, const int r) +{ + if (r <= 32) + { + return amd_bitalign((vv).xy, (vv).yx, 32 - r); + } + else + { + return amd_bitalign((vv).yx, (vv).xy, 64 - r); + } +} +#else static uint2 ROL2(const uint2 v, const int n) { uint2 result; @@ -51,6 +78,7 @@ static uint2 ROL2(const uint2 v, const int n) } return result; } +#endif static void keccak_f1600_round(uint2* a, uint r, uint out_size) {