Browse Source

lop3 instructions work. added simulation mode for kernel testing.

cl-refactor
Jan Willem Penterman 10 years ago
parent
commit
7bd0d4b381
  1. 85
      ethminer/MinerAux.h
  2. 32
      libethash-cuda/keccak.cuh

85
ethminer/MinerAux.h

@ -103,6 +103,7 @@ public:
None,
DAGInit,
Benchmark,
Simulation,
Farm
};
@ -318,6 +319,8 @@ public:
}
else if (arg == "-M" || arg == "--benchmark")
mode = OperationMode::Benchmark;
else if (arg == "-S" || arg == "--simulation")
mode = OperationMode::Simulation;
else if ((arg == "-t" || arg == "--mining-threads") && i + 1 < argc)
{
try
@ -404,6 +407,8 @@ public:
doBenchmark(m_minerType, m_phoneHome, m_benchmarkWarmup, m_benchmarkTrial, m_benchmarkTrials);
else if (mode == OperationMode::Farm)
doFarm(m_minerType, m_farmURL, m_farmRecheckPeriod);
else if (mode == OperationMode::Simulation)
doSimulation(m_minerType);
}
static void streamHelp(ostream& _out)
@ -423,6 +428,8 @@ public:
<< " --benchmark-warmup <seconds> Set the duration of warmup for the benchmark tests (default: 3)." << endl
<< " --benchmark-trial <seconds> Set the duration for each trial for the benchmark tests (default: 3)." << endl
<< " --benchmark-trials <n> Set the duration of warmup for the benchmark tests (default: 5)." << endl
<< "Simulation mode:" << endl
<< " -S,--simulation Mining test mode. Used to validate kernel optimizations." << endl
#if ETH_JSONRPC || !ETH_TRUE
<< " --phone-home <on/off> When benchmarking, publish results (default: on)" << endl
#endif
@ -557,6 +564,84 @@ private:
exit(0);
}
void doSimulation(MinerType _m, int difficulty = 20)
{
Ethash::BlockHeader genesis;
genesis.setDifficulty(1 << 18);
cdebug << genesis.boundary();
GenericFarm<EthashProofOfWork> f;
map<string, GenericFarm<EthashProofOfWork>::SealerDescriptor> sealers;
sealers["cpu"] = GenericFarm<EthashProofOfWork>::SealerDescriptor{ &EthashCPUMiner::instances, [](GenericMiner<EthashProofOfWork>::ConstructionInfo ci){ return new EthashCPUMiner(ci); } };
#if ETH_ETHASHCL
sealers["opencl"] = GenericFarm<EthashProofOfWork>::SealerDescriptor{ &EthashGPUMiner::instances, [](GenericMiner<EthashProofOfWork>::ConstructionInfo ci){ return new EthashGPUMiner(ci); } };
#endif
#if ETH_ETHASHCUDA
sealers["cuda"] = GenericFarm<EthashProofOfWork>::SealerDescriptor{ &EthashCUDAMiner::instances, [](GenericMiner<EthashProofOfWork>::ConstructionInfo ci){ return new EthashCUDAMiner(ci); } };
#endif
f.setSealers(sealers);
string platformInfo = _m == MinerType::CPU ? "CPU" : (_m == MinerType::CL ? "CL" : "CUDA");
cout << "Benchmarking on platform: " << platformInfo << endl;
cout << "Preparing DAG..." << endl;
genesis.prep();
genesis.setDifficulty(u256(1) << difficulty);
f.setWork(genesis);
if (_m == MinerType::CPU)
f.start("cpu");
else if (_m == MinerType::CL)
f.start("opencl");
else if (_m == MinerType::CUDA)
f.start("cuda");
EthashAux::FullType dag;
int time = 0;
EthashProofOfWork::WorkPackage current = EthashProofOfWork::WorkPackage(genesis);
while (true) {
bool completed = false;
EthashProofOfWork::Solution solution;
f.onSolutionFound([&](EthashProofOfWork::Solution sol)
{
solution = sol;
return completed = true;
});
for (unsigned i = 0; !completed; ++i)
{
cnote << "Mining on difficulty " << difficulty << " " << f.miningProgress();
this_thread::sleep_for(chrono::milliseconds(1000));
time++;
}
//cnote << "Solution found";
cnote << "Difficulty:" << difficulty << " Nonce:" << solution.nonce.hex();
//cnote << " Mixhash:" << solution.mixHash.hex();
//cnote << " Header-hash:" << current.headerHash.hex();
//cnote << " Seedhash:" << current.seedHash.hex();
//cnote << " Target: " << h256(current.boundary).hex();
//cnote << " Ethash: " << h256(EthashAux::eval(current.seedHash, current.headerHash, solution.nonce).value).hex();
if (EthashAux::eval(current.seedHash, current.headerHash, solution.nonce).value < current.boundary)
{
cnote << "SUCCESS: GPU gave correct result!";
}
else
cwarn << "FAILURE: GPU gave incorrect result!";
if (time < 12)
difficulty++;
else if (time > 18)
difficulty--;
time = 0;
genesis.setDifficulty(u256(1) << difficulty);
genesis.noteDirty();
f.setWork(genesis);
current = EthashProofOfWork::WorkPackage(genesis);
}
}
void doFarm(MinerType _m, string const& _remote, unsigned _recheckPeriod)
{
map<string, GenericFarm<EthashProofOfWork>::SealerDescriptor> sealers;

32
libethash-cuda/keccak.cuh

@ -26,20 +26,24 @@ uint2 xor5(const uint2 a, const uint2 b, const uint2 c, const uint2 d, const uin
uint2 f = lop3xor(a,b,c);
return lop3xor(d,e,f);
}
#else
__device__ __forceinline__
uint2 xor5(const uint2 a, const uint2 b, const uint2 c, const uint2 d, const uint2 e) {
return a ^ b ^ c ^ d ^ e;
}
#endif
#if __CUDA_ARCH__ >= 500
__device__ __forceinline__
uint2 chi(const uint2 a, const uint2 b, const uint2 c) {
uint2 result;
asm("lop3.b32 %0, %1, %2, %3, 0x82;" : "=r"(result.x) : "r"(a.x), "r"(b.x), "r"(c.x));
asm("lop3.b32 %0, %1, %2, %3, 0x82;" : "=r"(result.y) : "r"(a.y), "r"(b.y), "r"(c.y));
asm("lop3.b32 %0, %1, %2, %3, 0xd2;" : "=r"(result.x) : "r"(a.x), "r"(b.x), "r"(c.x));
asm("lop3.b32 %0, %1, %2, %3, 0xd2;" : "=r"(result.y) : "r"(a.y), "r"(b.y), "r"(c.y));
return result;
}
#else
__device__ __forceinline__
uint2 xor5(const uint2 a, const uint2 b, const uint2 c, const uint2 d, const uint2 e) {
return a ^ b ^ c ^ d ^ e;
}
__device__ __forceinline__
uint2 chi(const uint2 a, const uint2 b, const uint2 c) {
return a ^ (~b) & c;
@ -105,7 +109,7 @@ __device__ __forceinline__ void keccak_f1600_block(uint2* s, uint32_t out_size)
// squeeze this in here
/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
u = s[0]; v = s[1];
s[0] = chi(s[0], v, s[2]);
s[0] = chi(s[0], s[1], s[2]);
/* iota: a[0,0] ^= round constant */
s[0] ^= vectorize(keccak_round_constants[i]);
@ -119,7 +123,7 @@ __device__ __forceinline__ void keccak_f1600_block(uint2* s, uint32_t out_size)
u = s[5]; v = s[6];
s[5] = chi(s[5], v, s[7]);
s[5] = chi(s[5], s[6], s[7]);
s[6] = chi(s[6], s[7], s[8]);
s[7] = chi(s[7], s[8], s[9]);
@ -129,21 +133,21 @@ __device__ __forceinline__ void keccak_f1600_block(uint2* s, uint32_t out_size)
s[9] = chi(s[9], u, v);
u = s[10]; v = s[11];
s[10] = chi(s[10], v, s[12]);
s[10] = chi(s[10], s[11], s[12]);
s[11] = chi(s[11], s[12], s[13]);
s[12] = chi(s[12], s[13], s[14]);
s[13] = chi(s[13], s[14], u);
s[14] = chi(s[14], u, v);
u = s[15]; v = s[16];
s[15] = chi(s[15], v, s[17]);
s[16] = chi(s[16], s[12], s[18]);
s[17] = chi(s[17], s[13], s[19]);
s[18] = chi(s[18], s[14], u);
s[15] = chi(s[15], s[16], s[17]);
s[16] = chi(s[16], s[17], s[18]);
s[17] = chi(s[17], s[18], s[19]);
s[18] = chi(s[18], s[19], u);
s[19] = chi(s[19], u, v);
u = s[20]; v = s[21];
s[20] = chi(s[20], v, s[22]);
s[20] = chi(s[20], s[21], s[22]);
s[21] = chi(s[21], s[22], s[23]);
s[22] = chi(s[22], s[23], s[24]);
s[23] = chi(s[23], s[24], u);

Loading…
Cancel
Save