From 7bd0d4b381ae422bc80f355cb71ff4a667c5b864 Mon Sep 17 00:00:00 2001 From: Jan Willem Penterman Date: Wed, 2 Sep 2015 14:12:37 +0200 Subject: [PATCH] lop3 instructions work. added simulation mode for kernel testing. --- ethminer/MinerAux.h | 85 +++++++++++++++++++++++++++++++++++++++ libethash-cuda/keccak.cuh | 32 ++++++++------- 2 files changed, 103 insertions(+), 14 deletions(-) diff --git a/ethminer/MinerAux.h b/ethminer/MinerAux.h index 29a69b3ba..c604dbe0e 100644 --- a/ethminer/MinerAux.h +++ b/ethminer/MinerAux.h @@ -103,6 +103,7 @@ public: None, DAGInit, Benchmark, + Simulation, Farm }; @@ -318,6 +319,8 @@ public: } else if (arg == "-M" || arg == "--benchmark") mode = OperationMode::Benchmark; + else if (arg == "-S" || arg == "--simulation") + mode = OperationMode::Simulation; else if ((arg == "-t" || arg == "--mining-threads") && i + 1 < argc) { try @@ -404,6 +407,8 @@ public: doBenchmark(m_minerType, m_phoneHome, m_benchmarkWarmup, m_benchmarkTrial, m_benchmarkTrials); else if (mode == OperationMode::Farm) doFarm(m_minerType, m_farmURL, m_farmRecheckPeriod); + else if (mode == OperationMode::Simulation) + doSimulation(m_minerType); } static void streamHelp(ostream& _out) @@ -423,6 +428,8 @@ public: << " --benchmark-warmup Set the duration of warmup for the benchmark tests (default: 3)." << endl << " --benchmark-trial Set the duration for each trial for the benchmark tests (default: 3)." << endl << " --benchmark-trials Set the duration of warmup for the benchmark tests (default: 5)." << endl + << "Simulation mode:" << endl + << " -S,--simulation Mining test mode. Used to validate kernel optimizations." << endl #if ETH_JSONRPC || !ETH_TRUE << " --phone-home When benchmarking, publish results (default: on)" << endl #endif @@ -557,6 +564,84 @@ private: exit(0); } + void doSimulation(MinerType _m, int difficulty = 20) + { + Ethash::BlockHeader genesis; + genesis.setDifficulty(1 << 18); + cdebug << genesis.boundary(); + + GenericFarm f; + map::SealerDescriptor> sealers; + sealers["cpu"] = GenericFarm::SealerDescriptor{ &EthashCPUMiner::instances, [](GenericMiner::ConstructionInfo ci){ return new EthashCPUMiner(ci); } }; +#if ETH_ETHASHCL + sealers["opencl"] = GenericFarm::SealerDescriptor{ &EthashGPUMiner::instances, [](GenericMiner::ConstructionInfo ci){ return new EthashGPUMiner(ci); } }; +#endif +#if ETH_ETHASHCUDA + sealers["cuda"] = GenericFarm::SealerDescriptor{ &EthashCUDAMiner::instances, [](GenericMiner::ConstructionInfo ci){ return new EthashCUDAMiner(ci); } }; +#endif + f.setSealers(sealers); + + string platformInfo = _m == MinerType::CPU ? "CPU" : (_m == MinerType::CL ? "CL" : "CUDA"); + cout << "Benchmarking on platform: " << platformInfo << endl; + + cout << "Preparing DAG..." << endl; + genesis.prep(); + + genesis.setDifficulty(u256(1) << difficulty); + f.setWork(genesis); + + if (_m == MinerType::CPU) + f.start("cpu"); + else if (_m == MinerType::CL) + f.start("opencl"); + else if (_m == MinerType::CUDA) + f.start("cuda"); + + EthashAux::FullType dag; + + int time = 0; + + EthashProofOfWork::WorkPackage current = EthashProofOfWork::WorkPackage(genesis); + while (true) { + bool completed = false; + EthashProofOfWork::Solution solution; + f.onSolutionFound([&](EthashProofOfWork::Solution sol) + { + solution = sol; + return completed = true; + }); + for (unsigned i = 0; !completed; ++i) + { + cnote << "Mining on difficulty " << difficulty << " " << f.miningProgress(); + this_thread::sleep_for(chrono::milliseconds(1000)); + time++; + } + //cnote << "Solution found"; + cnote << "Difficulty:" << difficulty << " Nonce:" << solution.nonce.hex(); + //cnote << " Mixhash:" << solution.mixHash.hex(); + //cnote << " Header-hash:" << current.headerHash.hex(); + //cnote << " Seedhash:" << current.seedHash.hex(); + //cnote << " Target: " << h256(current.boundary).hex(); + //cnote << " Ethash: " << h256(EthashAux::eval(current.seedHash, current.headerHash, solution.nonce).value).hex(); + if (EthashAux::eval(current.seedHash, current.headerHash, solution.nonce).value < current.boundary) + { + cnote << "SUCCESS: GPU gave correct result!"; + } + else + cwarn << "FAILURE: GPU gave incorrect result!"; + + if (time < 12) + difficulty++; + else if (time > 18) + difficulty--; + time = 0; + genesis.setDifficulty(u256(1) << difficulty); + genesis.noteDirty(); + f.setWork(genesis); + current = EthashProofOfWork::WorkPackage(genesis); + } + } + void doFarm(MinerType _m, string const& _remote, unsigned _recheckPeriod) { map::SealerDescriptor> sealers; diff --git a/libethash-cuda/keccak.cuh b/libethash-cuda/keccak.cuh index 861a0200c..2b0187303 100644 --- a/libethash-cuda/keccak.cuh +++ b/libethash-cuda/keccak.cuh @@ -26,20 +26,24 @@ uint2 xor5(const uint2 a, const uint2 b, const uint2 c, const uint2 d, const uin uint2 f = lop3xor(a,b,c); return lop3xor(d,e,f); } +#else +__device__ __forceinline__ +uint2 xor5(const uint2 a, const uint2 b, const uint2 c, const uint2 d, const uint2 e) { + return a ^ b ^ c ^ d ^ e; +} +#endif +#if __CUDA_ARCH__ >= 500 __device__ __forceinline__ uint2 chi(const uint2 a, const uint2 b, const uint2 c) { uint2 result; - asm("lop3.b32 %0, %1, %2, %3, 0x82;" : "=r"(result.x) : "r"(a.x), "r"(b.x), "r"(c.x)); - asm("lop3.b32 %0, %1, %2, %3, 0x82;" : "=r"(result.y) : "r"(a.y), "r"(b.y), "r"(c.y)); + asm("lop3.b32 %0, %1, %2, %3, 0xd2;" : "=r"(result.x) : "r"(a.x), "r"(b.x), "r"(c.x)); + asm("lop3.b32 %0, %1, %2, %3, 0xd2;" : "=r"(result.y) : "r"(a.y), "r"(b.y), "r"(c.y)); return result; } #else -__device__ __forceinline__ -uint2 xor5(const uint2 a, const uint2 b, const uint2 c, const uint2 d, const uint2 e) { - return a ^ b ^ c ^ d ^ e; -} + __device__ __forceinline__ uint2 chi(const uint2 a, const uint2 b, const uint2 c) { return a ^ (~b) & c; @@ -105,7 +109,7 @@ __device__ __forceinline__ void keccak_f1600_block(uint2* s, uint32_t out_size) // squeeze this in here /* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */ u = s[0]; v = s[1]; - s[0] = chi(s[0], v, s[2]); + s[0] = chi(s[0], s[1], s[2]); /* iota: a[0,0] ^= round constant */ s[0] ^= vectorize(keccak_round_constants[i]); @@ -119,7 +123,7 @@ __device__ __forceinline__ void keccak_f1600_block(uint2* s, uint32_t out_size) u = s[5]; v = s[6]; - s[5] = chi(s[5], v, s[7]); + s[5] = chi(s[5], s[6], s[7]); s[6] = chi(s[6], s[7], s[8]); s[7] = chi(s[7], s[8], s[9]); @@ -129,21 +133,21 @@ __device__ __forceinline__ void keccak_f1600_block(uint2* s, uint32_t out_size) s[9] = chi(s[9], u, v); u = s[10]; v = s[11]; - s[10] = chi(s[10], v, s[12]); + s[10] = chi(s[10], s[11], s[12]); s[11] = chi(s[11], s[12], s[13]); s[12] = chi(s[12], s[13], s[14]); s[13] = chi(s[13], s[14], u); s[14] = chi(s[14], u, v); u = s[15]; v = s[16]; - s[15] = chi(s[15], v, s[17]); - s[16] = chi(s[16], s[12], s[18]); - s[17] = chi(s[17], s[13], s[19]); - s[18] = chi(s[18], s[14], u); + s[15] = chi(s[15], s[16], s[17]); + s[16] = chi(s[16], s[17], s[18]); + s[17] = chi(s[17], s[18], s[19]); + s[18] = chi(s[18], s[19], u); s[19] = chi(s[19], u, v); u = s[20]; v = s[21]; - s[20] = chi(s[20], v, s[22]); + s[20] = chi(s[20], s[21], s[22]); s[21] = chi(s[21], s[22], s[23]); s[22] = chi(s[22], s[23], s[24]); s[23] = chi(s[23], s[24], u);