diff --git a/CMakeLists.txt b/CMakeLists.txt index e87d6ac18..4dbca5af7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 2.8.12) set(PROJECT_VERSION "0.9.41") -set(GENOIL_VERSION "1.0.6") +set(GENOIL_VERSION "1.0.7") if (${CMAKE_VERSION} VERSION_GREATER 3.0) cmake_policy(SET CMP0042 OLD) # fix MACOSX_RPATH cmake_policy(SET CMP0048 NEW) # allow VERSION argument in project() diff --git a/ethminer/MinerAux.h b/ethminer/MinerAux.h index 58cca14e1..a5b97e78e 100644 --- a/ethminer/MinerAux.h +++ b/ethminer/MinerAux.h @@ -835,7 +835,7 @@ private: f.start("cuda"); EthashProofOfWork::WorkPackage current; EthashAux::FullType dag; - while (true) + while (m_running) try { bool completed = false; @@ -909,13 +909,20 @@ private: } catch (jsonrpc::JsonRpcException&) { - for (auto i = 3; --i; this_thread::sleep_for(chrono::seconds(1))) - cerr << "JSON-RPC problem. Probably couldn't connect. Retrying in " << i << "... \r"; - cerr << endl; + if (m_maxFarmRetries > 0) + { + for (auto i = 3; --i; this_thread::sleep_for(chrono::seconds(1))) + cerr << "JSON-RPC problem. Probably couldn't connect. Retrying in " << i << "... \r"; + cerr << endl; + } + else + { + cerr << "JSON-RPC problem. Probably couldn't connect." << endl; + } if (m_farmFailOverURL != "") { m_farmRetries++; - if (m_farmRetries == m_maxFarmRetries) + if (m_farmRetries >= m_maxFarmRetries) { if (_remote == m_farmURL) { _remote = m_farmFailOverURL; @@ -927,6 +934,10 @@ private: } m_farmRetries = 0; } + if (_remote == "exit") + { + m_running = false; + } } } #endif @@ -976,6 +987,7 @@ private: DAGEraseMode m_eraseMode = DAGEraseMode::None; /// Mining options + bool m_running = true; MinerType m_minerType = MinerType::CPU; unsigned m_openclPlatform = 0; unsigned m_openclDevice = 0; diff --git a/libethash-cl/ethash_cl_miner_kernel.cl b/libethash-cl/ethash_cl_miner_kernel.cl index 2bb9a1c59..29ae6a487 100644 --- a/libethash-cl/ethash_cl_miner_kernel.cl +++ b/libethash-cl/ethash_cl_miner_kernel.cl @@ -1,6 +1,6 @@ #define OPENCL_PLATFORM_UNKNOWN 0 #define OPENCL_PLATFORM_NVIDIA 1 -#define OPENCL_PLATFORM_AMD 2 +#define OPENCL_PLATFORM_AMD 2 #define THREADS_PER_HASH (128 / 16) @@ -79,10 +79,19 @@ static uint2 ROL2(const uint2 v, const int n) } #endif +static void chi(uint2 * a, const uint n, const uint2 * t) +{ + a[n+0] = bitselect(t[n + 0] ^ t[n + 2], t[n + 0], t[n + 1]); + a[n+1] = bitselect(t[n + 1] ^ t[n + 3], t[n + 1], t[n + 2]); + a[n+2] = bitselect(t[n + 2] ^ t[n + 4], t[n + 2], t[n + 3]); + a[n+3] = bitselect(t[n + 3] ^ t[n + 0], t[n + 3], t[n + 4]); + a[n+4] = bitselect(t[n + 4] ^ t[n + 1], t[n + 4], t[n + 0]); +} + static void keccak_f1600_round(uint2* a, uint r) { uint2 t[25]; - uint2 u, v; + uint2 u; // Theta t[0] = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20]; @@ -122,66 +131,47 @@ static void keccak_f1600_round(uint2* a, uint r) a[24] ^= u; // Rho Pi - u = a[1]; - t[0] = a[0]; - t[1] = ROL2(a[6], 44); - t[6] = ROL2(a[9], 20); - t[9] = ROL2(a[22], 61); - t[22] = ROL2(a[14], 39); - t[14] = ROL2(a[20], 18); + + t[0] = a[0]; + t[10] = ROL2(a[1], 1); t[20] = ROL2(a[2], 62); - t[2] = ROL2(a[12], 43); - t[12] = ROL2(a[13], 25); - t[13] = ROL2(a[19], 8); - t[19] = ROL2(a[23], 56); - t[23] = ROL2(a[15], 41); + t[5] = ROL2(a[3], 28); t[15] = ROL2(a[4], 27); - t[4] = ROL2(a[24], 14); - t[24] = ROL2(a[21], 2); - t[21] = ROL2(a[8], 55); - t[8] = ROL2(a[16], 45); + t[16] = ROL2(a[5], 36); - t[5] = ROL2(a[3], 28); - t[3] = ROL2(a[18], 21); - t[18] = ROL2(a[17], 15); + t[1] = ROL2(a[6], 44); + t[11] = ROL2(a[7], 6); + t[21] = ROL2(a[8], 55); + t[6] = ROL2(a[9], 20); + + t[7] = ROL2(a[10], 3); t[17] = ROL2(a[11], 10); - t[11] = ROL2(a[7], 6); - t[7] = ROL2(a[10], 3); - t[10] = ROL2(u, 1); + t[2] = ROL2(a[12], 43); + t[12] = ROL2(a[13], 25); + t[22] = ROL2(a[14], 39); + + t[23] = ROL2(a[15], 41); + t[8] = ROL2(a[16], 45); + t[18] = ROL2(a[17], 15); + t[3] = ROL2(a[18], 21); + t[13] = ROL2(a[19], 8); + + t[14] = ROL2(a[20], 18); + t[24] = ROL2(a[21], 2); + t[9] = ROL2(a[22], 61); + t[19] = ROL2(a[23], 56); + t[4] = ROL2(a[24], 14); // Chi - a[0] = bitselect(t[0] ^ t[2], t[0], t[1]); - a[1] = bitselect(t[1] ^ t[3], t[1], t[2]); - a[2] = bitselect(t[2] ^ t[4], t[2], t[3]); - a[3] = bitselect(t[3] ^ t[0], t[3], t[4]); - a[4] = bitselect(t[4] ^ t[1], t[4], t[0]); + chi(a, 0, t); // Iota a[0] ^= Keccak_f1600_RC[r]; - a[5] = bitselect(t[5] ^ t[7], t[5], t[6]); - a[6] = bitselect(t[6] ^ t[8], t[6], t[7]); - a[7] = bitselect(t[7] ^ t[9], t[7], t[8]); - a[8] = bitselect(t[8] ^ t[5], t[8], t[9]); - a[9] = bitselect(t[9] ^ t[6], t[9], t[5]); - - a[10] = bitselect(t[10] ^ t[12], t[10], t[11]); - a[11] = bitselect(t[11] ^ t[13], t[11], t[12]); - a[12] = bitselect(t[12] ^ t[14], t[12], t[13]); - a[13] = bitselect(t[13] ^ t[10], t[13], t[14]); - a[14] = bitselect(t[14] ^ t[11], t[14], t[10]); - - a[15] = bitselect(t[15] ^ t[17], t[15], t[16]); - a[16] = bitselect(t[16] ^ t[18], t[16], t[17]); - a[17] = bitselect(t[17] ^ t[19], t[17], t[18]); - a[18] = bitselect(t[18] ^ t[15], t[18], t[19]); - a[19] = bitselect(t[19] ^ t[16], t[19], t[15]); - - a[20] = bitselect(t[20] ^ t[22], t[20], t[21]); - a[21] = bitselect(t[21] ^ t[23], t[21], t[22]); - a[22] = bitselect(t[22] ^ t[24], t[22], t[23]); - a[23] = bitselect(t[23] ^ t[20], t[23], t[24]); - a[24] = bitselect(t[24] ^ t[21], t[24], t[20]); + chi(a, 5, t); + chi(a, 10, t); + chi(a, 15, t); + chi(a, 20, t); } static void keccak_f1600_no_absorb(uint2* a, uint out_size, uint isolate) @@ -192,9 +182,9 @@ static void keccak_f1600_no_absorb(uint2* a, uint out_size, uint isolate) // better with surrounding code, however I haven't done this // without causing the AMD compiler to blow up the VGPR usage. - uint r = 0; - uint o = 25; - do + + //uint o = 25; + for (uint r = 0; r < 24;) { // This dynamic branch stops the AMD compiler unrolling the loop // and additionally saves about 33% of the VGPRs, enough to gain another @@ -206,10 +196,10 @@ static void keccak_f1600_no_absorb(uint2* a, uint out_size, uint isolate) if (isolate) { keccak_f1600_round(a, r++); - if (r == 23) o = out_size; + //if (r == 23) o = out_size; } } - while (r < 24); + // final round optimised for digest size //keccak_f1600_round(a, 23, out_size); diff --git a/libethcore/EthashCUDAMiner.cpp b/libethcore/EthashCUDAMiner.cpp index 65777efca..3438d1d10 100644 --- a/libethcore/EthashCUDAMiner.cpp +++ b/libethcore/EthashCUDAMiner.cpp @@ -219,11 +219,13 @@ bool EthashCUDAMiner::configureGPU( uint64_t _currentBlock ) { + if (_blockSize != 32 && _blockSize != 64 && _blockSize != 128) { cout << "Given localWorkSize of " << toString(_blockSize) << "is invalid. Must be either 32,64 or 128" << endl; return false; } + if (!ethash_cuda_miner::configureGPU( s_devices, _blockSize,