Browse Source

Merge branch '117'

cl-refactor
Genoil 8 years ago
parent
commit
690a0f75b8
  1. 6
      CMakeLists.txt
  2. 240
      CodingStandards.txt
  3. 38
      EthereumMacOSXBundleInfo.plist.in
  4. 13
      appdmg.json.in
  5. 11
      astylerc
  6. 0
      cmake/scripts/macdeployfix.sh
  7. 351
      ethminer/MinerAux.h
  8. 2
      ethminer/main.cpp
  9. 1
      extdep/getstuff.bat
  10. BIN
      install-folder-bg.png
  11. BIN
      install-folder-bg@2x.png
  12. 57
      libethash-cl/ethash_cl_miner.cpp
  13. 15
      libethash-cl/ethash_cl_miner.h
  14. 75
      libethash-cl/ethash_cl_miner_kernel.cl
  15. 2
      libethash-cuda/CMakeLists.txt
  16. 3
      libethash-cuda/cuda_helper.h
  17. 71
      libethash-cuda/dagger_shared.cuh
  18. 16
      libethash-cuda/dagger_shuffled.cuh
  19. 88
      libethash-cuda/ethash_cuda_miner.cpp
  20. 13
      libethash-cuda/ethash_cuda_miner.h
  21. 193
      libethash-cuda/ethash_cuda_miner_kernel.cu
  22. 26
      libethash-cuda/ethash_cuda_miner_kernel.h
  23. 4
      libethash-cuda/ethash_cuda_miner_kernel_globals.h
  24. 4
      libethash-cuda/fnv.cuh
  25. 186
      libethash-cuda/keccak.cuh
  26. 777
      libethash-cuda/keccak_u64.cuh
  27. 2
      libethcore/EthashAux.cpp
  28. 9
      libethcore/EthashAux.h
  29. 83
      libethcore/EthashCUDAMiner.cpp
  30. 5
      libethcore/EthashCUDAMiner.h
  31. 45
      libethcore/EthashGPUMiner.cpp
  32. 5
      libethcore/EthashGPUMiner.h
  33. 2
      libethcore/EthashSealEngine.cpp
  34. 23
      libethcore/Farm.h
  35. 19
      libethcore/Miner.cpp
  36. 31
      libethcore/Miner.h
  37. 301
      libstratum/EthStratumClient.cpp
  38. 22
      libstratum/EthStratumClient.h
  39. 515
      libstratum/EthStratumClientV2.cpp
  40. 95
      libstratum/EthStratumClientV2.h
  41. 19
      package.sh
  42. BIN
      releases/ethminer-0.9.41-genoil-1.0.1.zip
  43. BIN
      releases/ethminer-0.9.41-genoil-1.0.2.zip
  44. BIN
      releases/ethminer-0.9.41-genoil-1.0.3.zip
  45. BIN
      releases/ethminer-0.9.41-genoil-1.0.4b3.zip
  46. BIN
      releases/ethminer-0.9.41-genoil-1.0.5.zip
  47. BIN
      releases/ethminer-0.9.41-genoil-1.0.6.zip
  48. BIN
      releases/ethminer-0.9.41-genoil-1.0.7.zip
  49. BIN
      releases/ethminer-0.9.41-genoil-1.1.6.zip
  50. BIN
      releases/ethminer-0.9.41-genoil-1.1.7.zip

6
CMakeLists.txt

@ -2,7 +2,7 @@
cmake_minimum_required(VERSION 2.8.12)
set(PROJECT_VERSION "0.9.41")
set(GENOIL_VERSION "1.0.8")
set(GENOIL_VERSION "1.1.7")
if (${CMAKE_VERSION} VERSION_GREATER 3.0)
cmake_policy(SET CMP0042 OLD) # fix MACOSX_RPATH
cmake_policy(SET CMP0048 NEW) # allow VERSION argument in project()
@ -266,11 +266,13 @@ message("-- MINER Build miner ${MINER}")
message("-- GUI Build GUI components ${GUI}")
message("-- ETHASHCL Build OpenCL components ${ETHASHCL}")
message("-- ETHASHCUDA Build CUDA components ${ETHASHCUDA}")
message("-- ETHSTRATUM Build Stratum components (experimental) ${ETHSTRATUM}")
message("-- ETHSTRATUM Build Stratum components ${ETHSTRATUM}")
message("------------------------------------------------------------------------")
message("")
if (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "MSVC")
set(CMAKE_THREAD_LIBS_INIT pthread)
endif()
include(EthCompilerSettings)
message("-- CXXFLAGS: ${CMAKE_CXX_FLAGS}")

240
CodingStandards.txt

@ -1,240 +0,0 @@
0. Formatting
GOLDEN RULE: Never *ever* use spaces for formatting.
a. Use tabs for indentation!
- tab stops are every 4 characters.
- One indentation level -> exactly one byte (i.e. a tab character) in the source file.
- Never use spaces to line up sequential lines: If you have run-on lines, indent as you would for a block.
b. Line widths:
- Don't worry about having lines of code > 80-char wide.
- Lines of comments should be formatted according to ease of viewing, but simplicity is to be prefered over beauty.
c. Don't use braces for condition-body one-liners.
d. Never place condition bodies on same line as condition.
e. Space between first paren and keyword, but *not* following first paren or preceeding final paren.
f. No spaces when fewer than intra-expression three parens together; when three or more, space according to clarity.
g. No spaces for subscripting or unary operators.
h. No space before ':' but one after it, except in the ternary operator: one on both sides.
i. Space all other operators.
j. Braces, when used, always have their own lines and are at same indentation level as "parent" scope.
(WRONG)
if( a==b[ i ] ) { printf ("Hello\n"); }
foo->bar(someLongVariableName,
anotherLongVariableName,
anotherLongVariableName,
anotherLongVariableName,
anotherLongVariableName);
(RIGHT)
if (a == b[i])
printf("Hello\n"); // NOTE spaces used instead of tab here for clarity - first byte should be '\t'.
foo->bar(
someLongVariableName,
anotherLongVariableName,
anotherLongVariableName,
anotherLongVariableName,
anotherLongVariableName
);
1. Namespaces;
a. No "using namespace" declarations in header files.
b. All symbols should be declared in a namespace except for final applications.
c. Preprocessor symbols should be prefixed with the namespace in all-caps and an underscore.
(WRONG)
#include <cassert>
using namespace std;
tuple<float, float> meanAndSigma(vector<float> const& _v);
(CORRECT)
#include <cassert>
std::tuple<float, float> meanAndSigma(std::vector<float> const& _v);
2. Preprocessor;
a. File comment is always at top, and includes:
- Original author, date.
- Later maintainers (not contributors - they can be seen through VCS log).
- Copyright.
- License (e.g. see COPYING).
b. Never use #ifdef/#define/#endif file guards. Prefer #pragma once as first line below file comment.
c. Prefer static const variable to value macros.
d. Prefer inline constexpr functions to function macros.
e. Split complex macro on multiple lines with '\'.
3. Capitalization;
GOLDEN RULE: Preprocessor: ALL_CAPS; C++: camelCase.
a. Use camelCase for splitting words in names, except where obviously extending STL/boost functionality in which case follow those naming conventions.
b. The following entities' first alpha is upper case:
- Type names.
- Template parameters.
- Enum members.
- static const variables that form an external API.
c. All preprocessor symbols (macros, macro argments) in full uppercase with underscore word separation.
All other entities' first alpha is lower case.
4. Variable prefixes:
a. Leading underscore "_" to parameter names (both normal and template).
- Exception: "o_parameterName" when it is used exclusively for output. See 6(f).
- Exception: "io_parameterName" when it is used for both input and output. See 6(f).
b. Leading "c_" to const variables (unless part of an external API).
c. Leading "g_" to global (non-const) variables.
d. Leading "s_" to static (non-const, non-global) variables.
5. Error reporting:
- Prefer exception to bool/int return type.
6. Declarations:
a. {Typename} + {qualifiers} + {name}.
b. Only one per line.
c. Associate */& with type, not variable (at ends with parser, but more readable, and safe if in conjunction with (b)).
d. Favour declarations close to use; don't habitually declare at top of scope ala C.
e. Always pass non-trivial parameters with a const& suffix.
f. If a function returns multiple values, use std::tuple (std::pair acceptable). Prefer not using */& arguments, except where efficiency requires.
g. Never use a macro where adequate non-preprocessor C++ can be written.
h. Make use of auto whenever type is clear or unimportant:
- Always avoid doubly-stating the type.
- Use to avoid vast and unimportant type declarations.
- However, avoid using auto where type is not immediately obvious from the context, and especially not for arithmetic expressions.
i. Don't pass bools: prefer enumerations instead.
j. Prefer enum class to straight enum.
(WRONG)
const double d = 0;
int i, j;
char *s;
float meanAndSigma(std::vector<float> _v, float* _sigma, bool _approximate);
Derived* x(dynamic_cast<Derived*>(base));
for (map<ComplexTypeOne, ComplexTypeTwo>::iterator i = l.begin(); i != l.end(); ++l) {}
(CORRECT)
enum class Accuracy
{
Approximate,
Exact
};
double const d = 0;
int i;
int j;
char* s;
std::tuple<float, float> meanAndSigma(std::vector<float> const& _v, Accuracy _a);
auto x = dynamic_cast<Derived*>(base);
for (auto i = x.begin(); i != x.end(); ++i) {}
7. Structs & classes
a. Structs to be used when all members public and no virtual functions.
- In this case, members should be named naturally and not prefixed with 'm_'
b. Classes to be used in all other circumstances.
8. Members:
a. One member per line only.
b. Private, non-static, non-const fields prefixed with m_.
c. Avoid public fields, except in structs.
d. Use override, final and const as much as possible.
e. No implementations with the class declaration, except:
- template or force-inline method (though prefer implementation at bottom of header file).
- one-line implementation (in which case include it in same line as declaration).
f. For a property 'foo'
- Member: m_foo;
- Getter: foo() [ also: for booleans, isFoo() ];
- Setter: setFoo();
9. Naming
a. Collection conventions:
- -s means std::vector e.g. using MyTypes = std::vector<MyType>
- -Set means std::set e.g. using MyTypeSet = std::set<MyType>
- -Hash means std::unordered_set e.g. using MyTypeHash = std::unordered_set<MyType>
b. Class conventions:
- -Face means the interface of some shared concept. (e.g. FooFace might be a pure virtual class.)
c. Avoid unpronouncable names;
- If you need to shorten a name favour a pronouncable slice of the original to a scatterred set of consonants.
- e.g. Manager shortens to Man rather than Mgr.
d. Avoid prefixes of initials (e.g. DON'T use IMyInterface, CMyImplementation)
e. Find short, memorable & (at least semi-) descriptive names for commonly used classes or name-fragments.
- A dictionary and thesaurus are your friends.
- Spell correctly.
- Think carefully about the class's purpose.
- Imagine it as an isolated component to try to decontextualise it when considering its name.
- Don't be trapped into naming it (purely) in terms of its implementation.
10. Type-definitions
a. Prefer 'using' to 'typedef'. e.g. using ints = std::vector<int>; rather than typedef std::vector<int> ints;
b. Generally avoid shortening a standard form that already includes all important information:
- e.g. stick to shared_ptr<X> rather than shortening to ptr<X>.
c. Where there are exceptions to this (due to excessive use and clear meaning), note the change prominently and use it consistently.
- e.g. using Guard = std::lock_guard<std::mutex>; ///< Guard is used throughout the codebase since it's clear in meaning and used commonly.
d. In general expressions should be roughly as important/semantically meaningful as the space they occupy.
11. Commenting
a. Comments should be doxygen-compilable, using @notation rather than \notation.
b. Document the interface, not the implementation.
- Documentation should be able to remain completely unchanged, even if the method is reimplemented.
- Comment in terms of the method properties and intended alteration to class state (or what aspects of the state it reports).
- Be careful to scrutinise documentation that extends only to intended purpose and usage.
- Reject documentation that is simply an English transaction of the implementation.
12. Include Headers
a. Includes should go in order of lower level (STL -> boost -> libdevcore -> libdevcrypto -> libethcore -> libethereum) to higher level. Lower levels are basically dependencies to the higher levels. For example:
#include <string>
#include <boost/filesystem.hpp>
#include <libdevcore/Common.h>
#include <libdevcore/CommonData.h>
#include <libdevcore/Exceptions.h>
#include <libdevcore/Log.h>
#include <libdevcrypto/SHA3.h>
#include <libethereum/Defaults.h>
b. The only exception to the above rule is the top of a .cpp file where its corresponding header should be located.
13. Logging
Logging should be performed at appropriate verbosities depending on the logging message.
The more likely a message is to repeat (and thus cuase noise) the higher in verbosity it should be.
Some rules to keep in mind:
- Verbosity == 0 -> Reserved for important stuff that users must see and can understand.
- Verbosity == 1 -> Reserved for stuff that users don't need to see but can understand.
- Verbosity >= 2 -> Anything that is or might be displayed more than once every minute
- Verbosity >= 3 -> Anything that only a developer would understand
- Verbosity >= 4 -> Anything that is low-level (e.g. peer disconnects, timers being cancelled)

38
EthereumMacOSXBundleInfo.plist.in

@ -1,38 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>English</string>
<key>CFBundleExecutable</key>
<string>${MACOSX_BUNDLE_EXECUTABLE_NAME}</string>
<key>CFBundleGetInfoString</key>
<string>${MACOSX_BUNDLE_INFO_STRING}</string>
<key>CFBundleIconFile</key>
<string>${MACOSX_BUNDLE_ICON_FILE}</string>
<key>CFBundleIdentifier</key>
<string>${MACOSX_BUNDLE_GUI_IDENTIFIER}</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleLongVersionString</key>
<string>${MACOSX_BUNDLE_LONG_VERSION_STRING}</string>
<key>CFBundleName</key>
<string>${MACOSX_BUNDLE_BUNDLE_NAME}</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleShortVersionString</key>
<string>${MACOSX_BUNDLE_SHORT_VERSION_STRING}</string>
<key>CFBundleSignature</key>
<string>????</string>
<key>CFBundleVersion</key>
<string>${MACOSX_BUNDLE_BUNDLE_VERSION}</string>
<key>CSResourcesFileMapped</key>
<true/>
<key>LSRequiresCarbon</key>
<true/>
<key>NSHumanReadableCopyright</key>
<string>${MACOSX_BUNDLE_COPYRIGHT}</string>
<key>NSHighResolutionCapable</key>
<true/>
</dict>
</plist>

13
appdmg.json.in

@ -1,13 +0,0 @@
{
"title": "Ethereum",
"icon": "appdmg_icon.icns",
"background": "appdmg_background.png",
"icon-size": 55,
"contents": [
{ "x": 242, "y": 240, "type": "link", "path": "/Applications" },
{ "x": 145, "y": 125, "type": "file", "path": "${ETH_ALETHZERO_APP}" },
{ "x": 339, "y": 125, "type": "file", "path": "${ETH_MIX_APP}" }
]
}

11
astylerc

@ -1,11 +0,0 @@
style=allman
indent=force-tab=4
convert-tabs
indent-preprocessor
min-conditional-indent=1
pad-oper
pad-header
unpad-paren
align-pointer=type
keep-one-line-blocks
close-templates

0
cmake/scripts/macdeployfix.sh

351
ethminer/MinerAux.h

@ -60,6 +60,7 @@
#endif
#if ETH_STRATUM || !ETH_TRUE
#include <libstratum/EthStratumClient.h>
#include <libstratum/EthStratumClientV2.h>
#endif
using namespace std;
using namespace dev;
@ -187,6 +188,41 @@ public:
if (p + 1 <= userpass.length())
m_pass = userpass.substr(p+1);
}
else if ((arg == "-SC" || arg == "--stratum-client") && i + 1 < argc)
{
try {
m_stratumClientVersion = atoi(argv[++i]);
if (m_stratumClientVersion > 2) m_stratumClientVersion = 2;
else if (m_stratumClientVersion < 1) m_stratumClientVersion = 1;
}
catch (...)
{
cerr << "Bad " << arg << " option: " << argv[i] << endl;
BOOST_THROW_EXCEPTION(BadArgument());
}
}
else if ((arg == "-SP" || arg == "--stratum-protocol") && i + 1 < argc)
{
try {
m_stratumProtocol = atoi(argv[++i]);
}
catch (...)
{
cerr << "Bad " << arg << " option: " << argv[i] << endl;
BOOST_THROW_EXCEPTION(BadArgument());
}
}
else if ((arg == "-SE" || arg == "--stratum-email") && i + 1 < argc)
{
try {
m_email = string(argv[++i]);
}
catch (...)
{
cerr << "Bad " << arg << " option: " << argv[i] << endl;
BOOST_THROW_EXCEPTION(BadArgument());
}
}
else if ((arg == "-FO" || arg == "--failover-userpass") && i + 1 < argc)
{
string userpass = string(argv[++i]);
@ -311,6 +347,23 @@ public:
else if (arg == "--cuda-streams" && i + 1 < argc)
m_numStreams = stol(argv[++i]);
#endif
else if ((arg == "-L" || arg == "--dag-load-mode") && i + 1 < argc)
{
string mode = argv[++i];
if (mode == "parallel") m_dagLoadMode = DAG_LOAD_MODE_PARALLEL;
else if (mode == "sequential") m_dagLoadMode = DAG_LOAD_MODE_SEQUENTIAL;
else if (mode == "single")
{
m_dagLoadMode = DAG_LOAD_MODE_SINGLE;
m_dagCreateDevice = stol(argv[++i]);
}
else
{
cerr << "Bad " << arg << " option: " << argv[i] << endl;
BOOST_THROW_EXCEPTION(BadArgument());
}
}
/*
else if (arg == "--phone-home" && i + 1 < argc)
{
string m = argv[++i];
@ -324,6 +377,7 @@ public:
BOOST_THROW_EXCEPTION(BadArgument());
}
}
*/
else if (arg == "--benchmark-warmup" && i + 1 < argc)
try {
m_benchmarkWarmup = stol(argv[++i]);
@ -360,77 +414,9 @@ public:
{
m_minerType = MinerType::CUDA;
}
else if (arg == "--current-block" && i + 1 < argc)
m_currentBlock = stol(argv[++i]);
else if ((arg == "-R" || arg == "--dag-dir") && i + 1 < argc)
{
strcpy(s_dagDir, argv[++i]);
}
else if ((arg == "-E" || arg == "--erase-dags") && i + 1 < argc)
else if (arg == "-X" || arg == "--cuda-opencl")
{
string m = string(argv[++i]);
if (m == "none") m_eraseMode = DAGEraseMode::None;
else if (m == "old") m_eraseMode = DAGEraseMode::Old;
else if (m == "bench") m_eraseMode = DAGEraseMode::Bench;
else if (m == "all") m_eraseMode = DAGEraseMode::All;
else
{
cerr << "Bad " << arg << " option: " << argv[i] << endl;
BOOST_THROW_EXCEPTION(BadArgument());
}
}
else if (arg == "--no-precompute")
m_precompute = false;
else if ((arg == "-D" || arg == "--create-dag") && i + 1 < argc)
{
string m = boost::to_lower_copy(string(argv[++i]));
mode = OperationMode::DAGInit;
try
{
m_initDAG = stol(m);
}
catch (...)
{
cerr << "Bad " << arg << " option: " << m << endl;
BOOST_THROW_EXCEPTION(BadArgument());
}
}
else if ((arg == "-w" || arg == "--check-pow") && i + 4 < argc)
{
string m;
try
{
Ethash::BlockHeader bi;
m = boost::to_lower_copy(string(argv[++i]));
h256 powHash(m);
m = boost::to_lower_copy(string(argv[++i]));
h256 seedHash;
if (m.size() == 64 || m.size() == 66)
seedHash = h256(m);
else
seedHash = EthashAux::seedHash(stol(m));
m = boost::to_lower_copy(string(argv[++i]));
bi.setDifficulty(u256(m));
auto boundary = bi.boundary();
m = boost::to_lower_copy(string(argv[++i]));
bi.setNonce(h64(m));
auto r = EthashAux::eval(seedHash, powHash, bi.nonce());
bool valid = r.value < boundary;
cout << (valid ? "VALID :-)" : "INVALID :-(") << endl;
cout << r.value << (valid ? " < " : " >= ") << boundary << endl;
cout << " where " << boundary << " = 2^256 / " << bi.difficulty() << endl;
cout << " and " << r.value << " = ethash(" << powHash << ", " << bi.nonce() << ")" << endl;
cout << " with seed as " << seedHash << endl;
if (valid)
cout << "(mixHash = " << r.mixHash << ")" << endl;
cout << "SHA3( light(seed) ) = " << sha3(EthashAux::light(bi.seedHash())->data()) << endl;
exit(0);
}
catch (...)
{
cerr << "Bad " << arg << " option: " << m << endl;
BOOST_THROW_EXCEPTION(BadArgument());
}
m_minerType = MinerType::Mixed;
}
else if (arg == "-M" || arg == "--benchmark")
{
@ -494,22 +480,14 @@ public:
void execute()
{
EthashAux::setDAGDirName(s_dagDir);
EthashAux::setDAGEraseMode(m_eraseMode);
EthashAux::eraseDAGs();
if (m_eraseMode == DAGEraseMode::All)
{
m_eraseMode = DAGEraseMode::None;
}
if (m_shouldListDevices)
{
#if ETH_ETHASHCL || !ETH_TRUE
if (m_minerType == MinerType::CL)
if (m_minerType == MinerType::CL || m_minerType == MinerType::Mixed)
EthashGPUMiner::listDevices();
#endif
#if ETH_ETHASHCUDA || !ETH_TRUE
if (m_minerType == MinerType::CUDA)
if (m_minerType == MinerType::CUDA || m_minerType == MinerType::Mixed)
EthashCUDAMiner::listDevices();
#endif
if (m_minerType == MinerType::CPU)
@ -518,8 +496,11 @@ public:
}
if (m_minerType == MinerType::CPU)
EthashCPUMiner::setNumInstances(m_miningThreads);
else if (m_minerType == MinerType::CL)
{
cout << "CPU mining is no longer supported in this miner. Use -G (opencl) or -U (cuda) flag to select GPU platform." << endl;
exit(0);
}
else if (m_minerType == MinerType::CL || m_minerType == MinerType::Mixed)
{
#if ETH_ETHASHCL || !ETH_TRUE
if (m_openclDeviceCount > 0)
@ -535,7 +516,9 @@ public:
m_openclDevice,
m_clAllowCPU,
m_extraGPUMemory,
m_currentBlock
0,
m_dagLoadMode,
m_dagCreateDevice
))
exit(1);
EthashGPUMiner::setNumInstances(m_miningThreads);
@ -544,7 +527,7 @@ public:
exit(1);
#endif
}
else if (m_minerType == MinerType::CUDA)
else if (m_minerType == MinerType::CUDA || m_minerType == MinerType::Mixed)
{
#if ETH_ETHASHCUDA || !ETH_TRUE
if (m_cudaDeviceCount > 0)
@ -560,7 +543,9 @@ public:
m_numStreams,
m_extraGPUMemory,
m_cudaSchedule,
m_currentBlock
0,
m_dagLoadMode,
m_dagCreateDevice
))
exit(1);
#else
@ -568,9 +553,7 @@ public:
exit(1);
#endif
}
if (mode == OperationMode::DAGInit)
doInitDAG(m_initDAG);
else if (mode == OperationMode::Benchmark)
if (mode == OperationMode::Benchmark)
doBenchmark(m_minerType, m_phoneHome, m_benchmarkWarmup, m_benchmarkTrial, m_benchmarkTrials);
else if (mode == OperationMode::Farm)
doFarm(m_minerType, m_activeFarmURL, m_farmRecheckPeriod);
@ -596,14 +579,17 @@ public:
<< " -FS, --failover-stratum <host:port> Failover stratum server at host:port" << endl
<< " -O, --userpass <username.workername:password> Stratum login credentials" << endl
<< " -FO, --failover-userpass <username.workername:password> Failover stratum login credentials (optional, will use normal credentials when omitted)" << endl
<< " --work-timeout <n> reconnect/failover after n seconds of working on the same (stratum) job. Defaults to 60. Don't set lower than max. avg. block time" << endl
<< " --work-timeout <n> reconnect/failover after n seconds of working on the same (stratum) job. Defaults to 180. Don't set lower than max. avg. block time" << endl
<< " -SC, --stratum-client <n> Stratum client version. Defaults to 1 (async client). Use 2 to use the new synchronous client." << endl
<< " -SP, --stratum-protocol <n> Choose which stratum protocol to use:" << endl
<< " 0: official stratum spec: ethpool, ethermine, coinotron, mph, nanopool (default)" << endl
<< " 1: eth-proxy compatible: dwarfpool, f2pool, nanopool" << endl
<< " 2: EthereumStratum/1.0.0: nicehash" << endl
<< " -SE, --stratum-email <s> Email address used in eth-proxy (optional)" << endl
#endif
#if ETH_JSONRPC || ETH_STRATUM || !ETH_TRUE
<< " --farm-recheck <n> Leave n ms between checks for changed work (default: 500). When using stratum, use a high value (i.e. 2000) to get more stable hashrate output" << endl
<< " --no-precompute Don't precompute the next epoch's DAG." << endl
#endif
<< "Ethash verify mode:" << endl
<< " -w,--check-pow <headerHash> <seedHash> <difficulty> <nonce> Check PoW credentials for validity." << endl
<< endl
<< "Benchmarking mode:" << endl
<< " -M [<n>],--benchmark [<n>] Benchmark for mining and exit; Optionally specify block number to benchmark against specific DAG." << endl
@ -612,28 +598,20 @@ public:
<< " --benchmark-trials <n> Set the duration of warmup for the benchmark tests (default: 5)." << endl
<< "Simulation mode:" << endl
<< " -Z [<n>],--simulation [<n>] Mining test mode. Used to validate kernel optimizations. Optionally specify block number." << endl
#if ETH_JSONRPC || !ETH_TRUE
<< " --phone-home <on/off> When benchmarking, publish results (default: off)" << endl
#endif
<< "DAG file management:" << endl
<< " -D,--create-dag <number> Create the DAG in preparation for mining on given block and exit." << endl
<< " -R <s>, --dag-dir <s> Store/Load DAG files in/from the specified directory. Useful for running multiple instances with different configurations." << endl
<< " -E <mode>, --erase-dags <mode> Erase unneeded DAG files. Default is 'none'. Possible values are:" << endl
<< " none - don't erase DAG files (default)" << endl
<< " old - erase all DAG files older than current epoch" << endl
<< " bench - like old, but keep epoch 0 for benchmarking" << endl
<< " all - erase all DAG files. After deleting all files, setting changes to none." << endl
<< "Mining configuration:" << endl
<< " -C,--cpu When mining, use the CPU." << endl
<< " -G,--opencl When mining use the GPU via OpenCL." << endl
<< " -U,--cuda When mining use the GPU via CUDA." << endl
<< " -X,--cuda-opencl Use OpenCL + CUDA in a system with mixed AMD/Nvidia cards. May require setting --opencl-platform 1" << endl
<< " --opencl-platform <n> When mining using -G/--opencl use OpenCL platform n (default: 0)." << endl
<< " --opencl-device <n> When mining using -G/--opencl use OpenCL device n (default: 0)." << endl
<< " --opencl-devices <0 1 ..n> Select which OpenCL devices to mine on. Default is to use all" << endl
<< " -t, --mining-threads <n> Limit number of CPU/GPU miners to n (default: use everything available on selected platform)" << endl
<< " --allow-opencl-cpu Allows CPU to be considered as an OpenCL device if the OpenCL platform supports it." << endl
<< " --list-devices List the detected OpenCL/CUDA devices and exit. Should be combined with -G or -U flag" << endl
<< " --current-block Let the miner know the current block number at configuration time. Will help determine DAG size and required GPU memory." << endl
<< " -L, --dag-load-mode <mode> DAG generation mode." << endl
<< " parallel - load DAG on all GPUs at the same time (default)" << endl
<< " sequential - load DAG on GPUs one after another. Use this when the miner crashes during DAG generation" << endl
<< " single <n> - generate DAG on device n, then copy to other devices" << endl
#if ETH_ETHASHCL || !ETH_TRUE
<< " --cl-extragpu-mem Set the memory (in MB) you believe your GPU requires for stuff other than mining. default: 0" << endl
<< " --cl-local-work Set the OpenCL local work size. Default is " << toString(ethash_cl_miner::c_defaultLocalWorkSize) << endl
@ -655,7 +633,6 @@ public:
}
MinerType minerType() const { return m_minerType; }
bool shouldPrecompute() const { return m_precompute; }
private:
void doInitDAG(unsigned _n)
@ -691,16 +668,16 @@ private:
cout << "Benchmarking on platform: " << platformInfo << endl;
cout << "Preparing DAG for block #" << m_benchmarkBlock << endl;
genesis.prep();
//genesis.prep();
genesis.setDifficulty(u256(1) << 63);
f.setWork(genesis);
if (_m == MinerType::CPU)
f.start("cpu");
f.start("cpu", false);
else if (_m == MinerType::CL)
f.start("opencl");
f.start("opencl", false);
else if (_m == MinerType::CUDA)
f.start("cuda");
f.start("cuda", false);
map<uint64_t, WorkingProgress> results;
uint64_t mean = 0;
@ -774,17 +751,17 @@ private:
cout << "Running mining simulation on platform: " << platformInfo << endl;
cout << "Preparing DAG for block #" << m_benchmarkBlock << endl;
genesis.prep();
//genesis.prep();
genesis.setDifficulty(u256(1) << difficulty);
f.setWork(genesis);
if (_m == MinerType::CPU)
f.start("cpu");
f.start("cpu", false);
else if (_m == MinerType::CL)
f.start("opencl");
f.start("opencl", false);
else if (_m == MinerType::CUDA)
f.start("cuda");
f.start("cuda", false);
int time = 0;
@ -806,13 +783,7 @@ private:
this_thread::sleep_for(chrono::milliseconds(1000));
time++;
}
//cnote << "Solution found";
cnote << "Difficulty:" << difficulty << " Nonce:" << solution.nonce.hex();
//cnote << " Mixhash:" << solution.mixHash.hex();
//cnote << " Header-hash:" << current.headerHash.hex();
//cnote << " Seedhash:" << current.seedHash.hex();
//cnote << " Target: " << h256(current.boundary).hex();
//cnote << " Ethash: " << h256(EthashAux::eval(current.seedHash, current.headerHash, solution.nonce).value).hex();
if (EthashAux::eval(current.seedHash, current.headerHash, solution.nonce).value < current.boundary)
{
cnote << "SUCCESS: GPU gave correct result!";
@ -869,11 +840,11 @@ private:
GenericFarm<EthashProofOfWork> f;
f.setSealers(sealers);
if (_m == MinerType::CPU)
f.start("cpu");
f.start("cpu", false);
else if (_m == MinerType::CL)
f.start("opencl");
f.start("opencl", false);
else if (_m == MinerType::CUDA)
f.start("cuda");
f.start("cuda", false);
EthashProofOfWork::WorkPackage current, previous;
boost::mutex x_current;
EthashAux::FullType dag;
@ -911,18 +882,7 @@ private:
Json::Value v = prpc->eth_getWork();
h256 hh(v[0].asString());
h256 newSeedHash(v[1].asString());
if (current.seedHash != newSeedHash)
{
minelog << "Grabbing DAG for" << newSeedHash;
}
if (!(dag = EthashAux::full(newSeedHash, true, [&](unsigned _pc){ cout << "\rCreating DAG. " << _pc << "% done..." << flush; return 0; })))
{
BOOST_THROW_EXCEPTION(DAGCreationFailure());
}
if (m_precompute)
{
EthashAux::computeFull(sha3(newSeedHash), true);
}
if (hh != current.headerHash)
{
x_current.lock();
@ -933,8 +893,6 @@ private:
current.seedHash = newSeedHash;
current.boundary = h256(fromHex(v[2].asString()), h256::AlignRight);
minelog << "Got work package: #" + current.headerHash.hex().substr(0,8);
//minelog << " Seedhash:" << current.seedHash.hex();
//minelog << " Target: " << h256(current.boundary).hex();
f.setWork(current);
x_current.unlock();
}
@ -942,11 +900,6 @@ private:
}
cnote << "Solution found; Submitting to" << _remote << "...";
cnote << " Nonce:" << solution.nonce.hex();
//cnote << " Mixhash:" << solution.mixHash.hex();
//cnote << " Header-hash:" << current.headerHash.hex();
//cnote << " Seedhash:" << solved.seedHash.hex();
//cnote << " Target: " << h256(solved.boundary).hex();
//cnote << " Ethash: " << h256(EthashAux::eval(solved.seedHash, solved.headerHash, solution.nonce).value).hex();
if (EthashAux::eval(current.seedHash, current.headerHash, solution.nonce).value < current.boundary)
{
bool ok = prpc->eth_submitWork("0x" + toString(solution.nonce), "0x" + toString(current.headerHash), "0x" + toString(solution.mixHash));
@ -958,6 +911,7 @@ private:
cwarn << ":-( Not accepted.";
f.rejectedSolution(false);
}
//exit(0);
}
else if (EthashAux::eval(previous.seedHash, previous.headerHash, solution.nonce).value < previous.boundary)
{
@ -970,6 +924,7 @@ private:
cwarn << ":-( Not accepted.";
f.rejectedSolution(true);
}
//exit(0);
}
else {
f.failedSolution();
@ -1030,45 +985,89 @@ private:
m_farmRecheckPeriod = m_defaultStratumFarmRecheckPeriod;
GenericFarm<EthashProofOfWork> f;
EthStratumClient client(&f, m_minerType, m_farmURL, m_port, m_user, m_pass, m_maxFarmRetries, m_worktimeout, m_precompute);
if (m_farmFailOverURL != "")
{
if (m_fuser != "")
// this is very ugly, but if Stratum Client V2 tunrs out to be a success, V1 will be completely removed anyway
if (m_stratumClientVersion == 1) {
EthStratumClient client(&f, m_minerType, m_farmURL, m_port, m_user, m_pass, m_maxFarmRetries, m_worktimeout, m_stratumProtocol, m_email);
if (m_farmFailOverURL != "")
{
client.setFailover(m_farmFailOverURL, m_fport, m_fuser, m_fpass);
if (m_fuser != "")
{
client.setFailover(m_farmFailOverURL, m_fport, m_fuser, m_fpass);
}
else
{
client.setFailover(m_farmFailOverURL, m_fport);
}
}
else
f.setSealers(sealers);
f.onSolutionFound([&](EthashProofOfWork::Solution sol)
{
if (client.isConnected()) {
client.submit(sol);
}
else {
cwarn << "Can't submit solution: Not connected";
}
return false;
});
while (client.isRunning())
{
client.setFailover(m_farmFailOverURL, m_fport);
auto mp = f.miningProgress();
f.resetMiningProgress();
if (client.isConnected())
{
if (client.current())
minelog << "Mining on PoWhash" << "#" + (client.currentHeaderHash().hex().substr(0, 8)) << ": " << mp << f.getSolutionStats();
else if (client.waitState() == MINER_WAIT_STATE_WORK)
minelog << "Waiting for work package...";
}
this_thread::sleep_for(chrono::milliseconds(m_farmRecheckPeriod));
}
}
f.setSealers(sealers);
else if (m_stratumClientVersion == 2) {
EthStratumClientV2 client(&f, m_minerType, m_farmURL, m_port, m_user, m_pass, m_maxFarmRetries, m_worktimeout, m_stratumProtocol, m_email);
if (m_farmFailOverURL != "")
{
if (m_fuser != "")
{
client.setFailover(m_farmFailOverURL, m_fport, m_fuser, m_fpass);
}
else
{
client.setFailover(m_farmFailOverURL, m_fport);
}
}
f.setSealers(sealers);
f.onSolutionFound([&](EthashProofOfWork::Solution sol)
{
client.submit(sol);
return false;
});
while (client.isRunning())
{
auto mp = f.miningProgress();
f.resetMiningProgress();
if (client.isConnected())
f.onSolutionFound([&](EthashProofOfWork::Solution sol)
{
if (client.current())
minelog << "Mining on PoWhash" << "#"+(client.currentHeaderHash().hex().substr(0,8)) << ": " << mp << f.getSolutionStats();
else if (client.waitState() == MINER_WAIT_STATE_WORK)
minelog << "Waiting for work package...";
client.submit(sol);
return false;
});
while (client.isRunning())
{
auto mp = f.miningProgress();
f.resetMiningProgress();
if (client.isConnected())
{
if (client.current())
minelog << "Mining on PoWhash" << "#" + (client.currentHeaderHash().hex().substr(0, 8)) << ": " << mp << f.getSolutionStats();
else if (client.waitState() == MINER_WAIT_STATE_WORK)
minelog << "Waiting for work package...";
}
this_thread::sleep_for(chrono::milliseconds(m_farmRecheckPeriod));
}
this_thread::sleep_for(chrono::milliseconds(m_farmRecheckPeriod));
}
}
#endif
/// Operating mode.
OperationMode mode;
DAGEraseMode m_eraseMode = DAGEraseMode::None;
/// Mining options
bool m_running = true;
@ -1094,17 +1093,13 @@ private:
unsigned m_numStreams = ethash_cuda_miner::c_defaultNumStreams;
unsigned m_cudaSchedule = 4; // sync
#endif
uint64_t m_currentBlock = 0;
static char s_dagDir[256];
// default value was 350MB of GPU memory for other stuff (windows system rendering, e.t.c.)
unsigned m_extraGPUMemory = 0;// 350000000; don't assume miners run desktops...
/// DAG initialisation param.
unsigned m_initDAG = 0;
unsigned m_dagLoadMode = 0; // parallel
unsigned m_dagCreateDevice = 0;
/// Benchmarking params
bool m_phoneHome = false;
unsigned m_benchmarkWarmup = 3;
unsigned m_benchmarkWarmup = 15;
unsigned m_benchmarkTrial = 3;
unsigned m_benchmarkTrials = 5;
unsigned m_benchmarkBlock = 0;
@ -1119,17 +1114,17 @@ private:
unsigned m_farmRecheckPeriod = 500;
unsigned m_defaultStratumFarmRecheckPeriod = 2000;
bool m_farmRecheckSet = false;
int m_worktimeout = 90;
bool m_precompute = true;
int m_worktimeout = 180;
#if ETH_STRATUM || !ETH_TRUE
int m_stratumClientVersion = 1;
int m_stratumProtocol = STRATUM_PROTOCOL_STRATUM;
string m_user;
string m_pass;
string m_port;
string m_fuser = "";
string m_fpass = "";
string m_email = "";
#endif
string m_fport = "";
};
char MinerCLI::s_dagDir[256] = "";

2
ethminer/main.cpp

@ -72,7 +72,7 @@ int main(int argc, char** argv)
cout << "=====================================================================" << endl;
cout << "Forked from github.com/ethereum/cpp-ethereum" << endl;
cout << "CUDA kernel ported from Tim Hughes' OpenCL kernel" << endl;
cout << "With contributions from nerdralph, RoBiK, tpruvot and sp_ " << endl << endl;
cout << "With contributions from nicehash, nerdralph, RoBiK and sp_ " << endl << endl;
cout << "Please consider a donation to:" << endl;
cout << "ETH: 0xeb9310b185455f863f526dab3d245809f6854b4d" << endl << endl;

1
extdep/getstuff.bat

@ -36,3 +36,4 @@ cd ..
goto :EOF

BIN
install-folder-bg.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

BIN
install-folder-bg@2x.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 34 KiB

57
libethash-cl/ethash_cl_miner.cpp

@ -205,6 +205,7 @@ bool ethash_cl_miner::configureGPU(
s_initialGlobalWorkSize = _globalWorkSize;
s_allowCPU = _allowCPU;
s_extraRequiredGPUMem = _extraGPUMemory;
// by default let's only consider the DAG of the first epoch
uint64_t dagSize = ethash_get_datasize(_currentBlock);
uint64_t requiredSize = dagSize + _extraGPUMemory;
@ -324,9 +325,11 @@ void ethash_cl_miner::finish()
m_queue.finish();
}
bool ethash_cl_miner::init(
uint8_t const* _dag,
uint64_t _dagSize,
ethash_light_t _light,
uint8_t const* _lightData,
uint64_t _lightSize,
unsigned _platformId,
unsigned _deviceId
)
@ -399,12 +402,17 @@ bool ethash_cl_miner::init(
if (m_globalWorkSize % s_workgroupSize != 0)
m_globalWorkSize = ((m_globalWorkSize / s_workgroupSize) + 1) * s_workgroupSize;
uint64_t dagSize = ethash_get_datasize(_light->block_number);
uint32_t dagSize128 = (unsigned)(dagSize / ETHASH_MIX_BYTES);
uint32_t lightSize64 = (unsigned)(_lightSize / sizeof(node));
// patch source code
// note: ETHASH_CL_MINER_KERNEL is simply ethash_cl_miner_kernel.cl compiled
// into a byte array by bin2h.cmake. There is no need to load the file by hand in runtime
string code(ETHASH_CL_MINER_KERNEL, ETHASH_CL_MINER_KERNEL + ETHASH_CL_MINER_KERNEL_SIZE);
addDefinition(code, "GROUP_SIZE", s_workgroupSize);
addDefinition(code, "DAG_SIZE", (unsigned)(_dagSize / ETHASH_MIX_BYTES));
addDefinition(code, "DAG_SIZE", dagSize128);
addDefinition(code, "LIGHT_SIZE", lightSize64);
addDefinition(code, "ACCESSES", ETHASH_ACCESSES);
addDefinition(code, "MAX_OUTPUTS", c_maxSearchResults);
addDefinition(code, "PLATFORM", platformId);
@ -430,16 +438,19 @@ bool ethash_cl_miner::init(
// create buffer for dag
try
{
ETHCL_LOG("Creating one big buffer for the DAG");
m_dag = cl::Buffer(m_context, CL_MEM_READ_ONLY, _dagSize);
ETHCL_LOG("Loading single big chunk kernels");
ETHCL_LOG("Creating cache buffer");
m_light = cl::Buffer(m_context, CL_MEM_READ_ONLY, _lightSize);
ETHCL_LOG("Creating DAG buffer");
m_dag = cl::Buffer(m_context, CL_MEM_READ_ONLY, dagSize);
ETHCL_LOG("Loading kernels");
m_searchKernel = cl::Kernel(program, "ethash_search");
ETHCL_LOG("Mapping one big chunk.");
m_queue.enqueueWriteBuffer(m_dag, CL_TRUE, 0, _dagSize, _dag);
m_dagKernel = cl::Kernel(program, "ethash_calculate_dag_item");
ETHCL_LOG("Writing cache buffer");
m_queue.enqueueWriteBuffer(m_light, CL_TRUE, 0, _lightSize, _lightData);
}
catch (cl::Error const& err)
{
ETHCL_LOG("Allocating/mapping single buffer failed with: " << err.what() << "(" << err.err() << "). GPU can't allocate the DAG in a single chunk. Bailing.");
ETHCL_LOG("Allocating/mapping DAG buffer failed with: " << err.what() << "(" << err.err() << "). GPU can't allocate the DAG in a single chunk. Bailing.");
return false;
}
// create buffer for header
@ -456,6 +467,28 @@ bool ethash_cl_miner::init(
ETHCL_LOG("Creating mining buffer " << i);
m_searchBuffer[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, (c_maxSearchResults + 1) * sizeof(uint32_t));
}
ETHCL_LOG("Generating DAG data");
uint32_t const work = (uint32_t)(dagSize / sizeof(node));
//while (work < blocks * threads) blocks /= 2;
uint32_t fullRuns = work / m_globalWorkSize;
uint32_t const restWork = work % m_globalWorkSize;
if (restWork > 0) fullRuns++;
m_dagKernel.setArg(1, m_light);
m_dagKernel.setArg(2, m_dag);
m_dagKernel.setArg(3, ~0u);
for (uint32_t i = 0; i < fullRuns; i++)
{
m_dagKernel.setArg(0, i * m_globalWorkSize);
m_queue.enqueueNDRangeKernel(m_dagKernel, cl::NullRange, m_globalWorkSize, s_workgroupSize);
m_queue.finish();
printf("OPENCL#%d: %.0f%%\n", _deviceId, 100.0f * (float)i / (float)fullRuns);
}
}
catch (cl::Error const& err)
{
@ -471,7 +504,7 @@ typedef struct
unsigned buf;
} pending_batch;
void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook)
void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook, bool _ethStratum, uint64_t _startN)
{
try
{
@ -498,7 +531,9 @@ void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook
unsigned buf = 0;
random_device engine;
uint64_t start_nonce = uniform_int_distribution<uint64_t>()(engine);
uint64_t start_nonce;
if (_ethStratum) start_nonce = _startN;
else start_nonce = uniform_int_distribution<uint64_t>()(engine);
for (;; start_nonce += m_globalWorkSize)
{
// supply output buffer to kernel

15
libethash-cl/ethash_cl_miner.h

@ -52,13 +52,14 @@ public:
);
bool init(
uint8_t const* _dag,
uint64_t _dagSize,
unsigned _platformId = 0,
unsigned _deviceId = 0
);
ethash_light_t _light,
uint8_t const* _lightData,
uint64_t _lightSize,
unsigned _platformId,
unsigned _deviceId
);
void finish();
void search(uint8_t const* _header, uint64_t _target, search_hook& _hook);
void search(uint8_t const* _header, uint64_t _target, search_hook& _hook, bool _ethStratum, uint64_t _startN);
/* -- default values -- */
/// Default value of the local work size. Also known as workgroup size.
@ -74,7 +75,9 @@ private:
cl::Context m_context;
cl::CommandQueue m_queue;
cl::Kernel m_searchKernel;
cl::Kernel m_dagKernel;
cl::Buffer m_dag;
cl::Buffer m_light;
cl::Buffer m_header;
cl::Buffer m_searchBuffer[c_bufferCount];
unsigned m_globalWorkSize;

75
libethash-cl/ethash_cl_miner_kernel.cl

@ -1,11 +1,36 @@
#define OPENCL_PLATFORM_UNKNOWN 0
#define OPENCL_PLATFORM_NVIDIA 1
#define OPENCL_PLATFORM_AMD 2
#define OPENCL_PLATFORM_AMD 2
#ifndef ACCESSES
#define ACCESSES 64
#endif
#ifndef GROUP_SIZE
#define GROUP_SIZE 128
#endif
#ifndef MAX_OUTPUTS
#define MAX_OUTPUTS 63U
#endif
#ifndef PLATFORM
#define PLATFORM 2
#endif
#ifndef DAG_SIZE
#define DAG_SIZE 8388593
#endif
#ifndef LIGHT_SIZE
#define LIGHT_SIZE 262139
#endif
#define ETHASH_DATASET_PARENTS 256
#define NODE_WORDS (64/4)
#define THREADS_PER_HASH (128 / 16)
#define HASHES_PER_LOOP (GROUP_SIZE / THREADS_PER_HASH)
#define FNV_PRIME 0x01000193
__constant uint2 const Keccak_f1600_RC[24] = {
@ -176,8 +201,6 @@ static void keccak_f1600_round(uint2* a, uint r)
static void keccak_f1600_no_absorb(uint2* a, uint out_size, uint isolate)
{
// Originally I unrolled the first and last rounds to interface
// better with surrounding code, however I haven't done this
// without causing the AMD compiler to blow up the VGPR usage.
@ -227,6 +250,18 @@ typedef struct
ulong ulongs[32 / sizeof(ulong)];
} hash32_t;
typedef union {
uint words[64 / sizeof(uint)];
uint2 uint2s[64 / sizeof(uint2)];
uint4 uint4s[64 / sizeof(uint4)];
} hash64_t;
typedef union {
uint words[200 / sizeof(uint)];
uint2 uint2s[200 / sizeof(uint2)];
uint4 uint4s[200 / sizeof(uint4)];
} hash200_t;
typedef struct
{
uint4 uint4s[128 / sizeof(uint4)];
@ -334,3 +369,35 @@ __kernel void ethash_search(
g_output[slot] = gid;
}
}
static void SHA3_512(uint2* s, uint isolate)
{
for (uint i = 8; i != 25; ++i)
{
s[i] = (uint2){ 0, 0 };
}
s[8].x = 0x00000001;
s[8].y = 0x80000000;
keccak_f1600_no_absorb(s, 8, isolate);
}
__kernel void ethash_calculate_dag_item(uint start, __global hash64_t const* g_light, __global hash64_t * g_dag, uint isolate)
{
uint const node_index = start + get_global_id(0);
if (node_index > DAG_SIZE * 2) return;
hash200_t dag_node;
copy(dag_node.uint4s, g_light[node_index % LIGHT_SIZE].uint4s, 4);
dag_node.words[0] ^= node_index;
SHA3_512(dag_node.uint2s, isolate);
for (uint i = 0; i != ETHASH_DATASET_PARENTS; ++i) {
uint parent_index = fnv(node_index ^ i, dag_node.words[i % NODE_WORDS]) % LIGHT_SIZE;
for (uint w = 0; w != 4; ++w) {
dag_node.uint4s[w] = fnv4(dag_node.uint4s[w], g_light[parent_index].uint4s[w]);
}
}
SHA3_512(dag_node.uint2s, isolate);
copy(g_dag[node_index].uint4s, dag_node.uint4s, 4);
}

2
libethash-cuda/CMakeLists.txt

@ -13,7 +13,7 @@ LIST(APPEND CUDA_NVCC_FLAGS_DEBUG -G)
if(COMPUTE AND (COMPUTE GREATER 0))
LIST(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_${COMPUTE},code=sm_${COMPUTE})
else(COMPUTE AND (COMPUTE GREATER 0))
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_30,code=sm_30;-gencode arch=compute_35,code=sm_35;-gencode arch=compute_50,code=sm_50;-gencode arch=compute_52,code=sm_52)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_20,code=sm_20;-gencode arch=compute_30,code=sm_30;-gencode arch=compute_35,code=sm_35;-gencode arch=compute_50,code=sm_50;-gencode arch=compute_52,code=sm_52;-gencode arch=compute_61,code=sm_61)
endif(COMPUTE AND (COMPUTE GREATER 0))

3
libethash-cuda/cuda_helper.h

@ -10,6 +10,7 @@
#include <device_launch_parameters.h>
#define __launch_bounds__(max_tpb, min_blocks)
#define asm("a" : "=l"(result) : "l"(a))
#define __CUDA_ARCH__ 520 // highlight shuffle code by default.
uint32_t __byte_perm(uint32_t x, uint32_t y, uint32_t z);
uint32_t __shfl(uint32_t x, uint32_t y, uint32_t z);
@ -337,7 +338,7 @@ uint64_t shl_t64(uint64_t x, uint32_t n)
}
#ifndef USE_ROT_ASM_OPT
#define USE_ROT_ASM_OPT 1
#define USE_ROT_ASM_OPT 2
#endif
// 64-bit ROTATE RIGHT

71
libethash-cuda/dagger_shared.cuh

@ -0,0 +1,71 @@
#include "ethash_cuda_miner_kernel_globals.h"
#include "ethash_cuda_miner_kernel.h"
typedef union {
uint4 uint4s[4];
uint64_t ulongs[8];
uint32_t uints[16];
} compute_hash_share;
__device__ uint64_t compute_hash(
uint64_t nonce
)
{
// sha3_512(header .. nonce)
uint64_t state[25];
state[4] = nonce;
keccak_f1600_init(state);
// Threads work together in this phase in groups of 8.
const int thread_id = threadIdx.x & (THREADS_PER_HASH - 1);
const int hash_id = threadIdx.x >> 3;
extern __shared__ compute_hash_share share[];
for (int i = 0; i < THREADS_PER_HASH; i++)
{
// share init with other threads
if (i == thread_id)
copy(share[hash_id].ulongs, state, 8);
__syncthreads();
uint4 mix = share[hash_id].uint4s[thread_id & 3];
__syncthreads();
uint32_t *share0 = share[hash_id].uints;
// share init0
if (thread_id == 0)
*share0 = mix.x;
__syncthreads();
uint32_t init0 = *share0;
for (uint32_t a = 0; a < ACCESSES; a += 4)
{
int t = bfe(a, 2u, 3u);
for (uint32_t b = 0; b < 4; b++)
{
if (thread_id == t) {
*share0 = fnv(init0 ^ (a + b), ((uint32_t *)&mix)[b]) % d_dag_size;
}
__syncthreads();
mix = fnv4(mix, d_dag[*share0].uint4s[thread_id]);
}
}
share[hash_id].uints[thread_id] = fnv_reduce(mix);
__syncthreads();
if (i == thread_id)
copy(state + 8, share[hash_id].ulongs, 4);
__syncthreads();
}
// keccak_256(keccak_512(header..nonce) .. mix);
return keccak_f1600_final(state);
}

16
libethash-cuda/dagger_shuffled.cuh

@ -1,12 +1,8 @@
#include "ethash_cuda_miner_kernel_globals.h"
#include "ethash_cuda_miner_kernel.h"
#include "keccak.cuh"
#include "fnv.cuh"
#include "cuda_helper.h"
#define ACCESSES 64
#define THREADS_PER_HASH (128 / 16)
__device__ uint64_t compute_hash_shuffle(
__device__ uint64_t compute_hash(
uint64_t nonce
)
{
@ -19,7 +15,6 @@ __device__ uint64_t compute_hash_shuffle(
// Threads work together in this phase in groups of 8.
const int thread_id = threadIdx.x & (THREADS_PER_HASH - 1);
const int start_lane = threadIdx.x & ~(THREADS_PER_HASH - 1);
const int mix_idx = thread_id & 3;
uint4 mix;
@ -47,11 +42,11 @@ __device__ uint64_t compute_hash_shuffle(
mix = vectorize2(shuffle[6], shuffle[7]);
}
uint32_t init0 = __shfl(shuffle[0].x, start_lane);
uint32_t init0 = __shfl(shuffle[0].x, 0, THREADS_PER_HASH);
for (uint32_t a = 0; a < ACCESSES; a += 4)
{
int t = ((a >> 2) & (THREADS_PER_HASH - 1));
int t = bfe(a, 2u, 3u);
for (uint32_t b = 0; b < 4; b++)
{
@ -60,8 +55,7 @@ __device__ uint64_t compute_hash_shuffle(
shuffle[0].x = fnv(init0 ^ (a + b), ((uint32_t *)&mix)[b]) % d_dag_size;
}
shuffle[0].x = __shfl(shuffle[0].x, t, THREADS_PER_HASH);
mix = fnv4(mix, (&d_dag[shuffle[0].x])->uint4s[thread_id]);
mix = fnv4(mix, d_dag[shuffle[0].x].uint4s[thread_id]);
}
}

88
libethash-cuda/ethash_cuda_miner.cpp

@ -199,7 +199,7 @@ void ethash_cuda_miner::finish()
CUDA_SAFE_CALL(cudaDeviceReset());
}
bool ethash_cuda_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned _deviceId)
bool ethash_cuda_miner::init(ethash_light_t _light, uint8_t const* _lightData, uint64_t _lightSize, unsigned _deviceId, bool _cpyToHost, volatile void** hostDAG)
{
try
{
@ -224,25 +224,61 @@ bool ethash_cuda_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned _d
m_search_buf = new volatile uint32_t *[s_numStreams];
m_streams = new cudaStream_t[s_numStreams];
uint32_t dagSize128 = (unsigned)(_dagSize / ETHASH_MIX_BYTES);
uint64_t dagSize = ethash_get_datasize(_light->block_number);
uint32_t dagSize128 = (unsigned)(dagSize / ETHASH_MIX_BYTES);
uint32_t lightSize64 = (unsigned)(_lightSize / sizeof(node));
// create buffer for cache
hash64_t * light = NULL;
if (!*hostDAG)
{
CUDA_SAFE_CALL(cudaMalloc(reinterpret_cast<void**>(&light), _lightSize));
// copy dag cache to CPU.
CUDA_SAFE_CALL(cudaMemcpy(reinterpret_cast<void*>(light), _lightData, _lightSize, cudaMemcpyHostToDevice));
}
// create buffer for dag
hash128_t * dag;
CUDA_SAFE_CALL(cudaMalloc(reinterpret_cast<void**>(&dag), _dagSize));
// copy dag to CPU.
CUDA_SAFE_CALL(cudaMemcpy(reinterpret_cast<void*>(dag), _dag, _dagSize, cudaMemcpyHostToDevice));
CUDA_SAFE_CALL(cudaMalloc(reinterpret_cast<void**>(&dag), dagSize));
// create mining buffers
for (unsigned i = 0; i != s_numStreams; ++i)
{
CUDA_SAFE_CALL(cudaMallocHost(&m_search_buf[i], SEARCH_RESULT_BUFFER_SIZE * sizeof(uint32_t)));
CUDA_SAFE_CALL(cudaStreamCreate(&m_streams[i]));
}
set_constants(dag, dagSize128);
set_constants(dag, dagSize128, light, lightSize64);
memset(&m_current_header, 0, sizeof(hash32_t));
m_current_target = 0;
m_current_nonce = 0;
m_current_index = 0;
m_sharedBytes = device_props.major * 100 < SHUFFLE_MIN_VER ? (64 * s_blockSize) / 8 : 0 ;
if (!*hostDAG)
{
cout << "Generating DAG for GPU #" << device_num << endl;
ethash_generate_dag(dagSize, s_gridSize, s_blockSize, m_streams[0], device_num);
if (_cpyToHost)
{
uint8_t* memoryDAG = new uint8_t[dagSize];
if (!memoryDAG) throw std::runtime_error("Failed to init host memory for DAG, not enough memory?");
cout << "Copying DAG from GPU #" << device_num << " to host" << endl;
CUDA_SAFE_CALL(cudaMemcpy(reinterpret_cast<void*>(memoryDAG), dag, dagSize, cudaMemcpyDeviceToHost));
*hostDAG = (void*)memoryDAG;
}
}
else
{
cout << "Copying DAG from host to GPU #" << device_num << endl;
const void* hdag = (const void*)(*hostDAG);
CUDA_SAFE_CALL(cudaMemcpy(reinterpret_cast<void*>(dag), hdag, dagSize, cudaMemcpyHostToDevice));
}
return true;
}
catch (runtime_error)
@ -251,7 +287,7 @@ bool ethash_cuda_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned _d
}
}
void ethash_cuda_miner::search(uint8_t const* header, uint64_t target, search_hook& hook)
void ethash_cuda_miner::search(uint8_t const* header, uint64_t target, search_hook& hook, bool _ethStratum, uint64_t _startN)
{
bool initialize = false;
bool exit = false;
@ -267,14 +303,34 @@ void ethash_cuda_miner::search(uint8_t const* header, uint64_t target, search_ho
set_target(m_current_target);
initialize = true;
}
if (initialize)
if (_ethStratum)
{
random_device engine;
m_current_nonce = uniform_int_distribution<uint64_t>()(engine);
m_current_index = 0;
CUDA_SAFE_CALL(cudaDeviceSynchronize());
for (unsigned int i = 0; i < s_numStreams; i++)
m_search_buf[i][0] = 0;
if (initialize)
{
m_starting_nonce = 0;
m_current_index = 0;
CUDA_SAFE_CALL(cudaDeviceSynchronize());
for (unsigned int i = 0; i < s_numStreams; i++)
m_search_buf[i][0] = 0;
}
if (m_starting_nonce != _startN)
{
// reset nonce counter
m_starting_nonce = _startN;
m_current_nonce = m_starting_nonce;
}
}
else
{
if (initialize)
{
random_device engine;
m_current_nonce = uniform_int_distribution<uint64_t>()(engine);
m_current_index = 0;
CUDA_SAFE_CALL(cudaDeviceSynchronize());
for (unsigned int i = 0; i < s_numStreams; i++)
m_search_buf[i][0] = 0;
}
}
uint64_t batch_size = s_gridSize * s_blockSize;
for (; !exit; m_current_index++, m_current_nonce += batch_size)
@ -294,7 +350,7 @@ void ethash_cuda_miner::search(uint8_t const* header, uint64_t target, search_ho
for (unsigned int j = 0; j < found_count; j++)
nonces[j] = nonce_base + buffer[j + 1];
}
run_ethash_search(s_gridSize, s_blockSize, stream, buffer, m_current_nonce);
run_ethash_search(s_gridSize, s_blockSize, m_sharedBytes, stream, buffer, m_current_nonce);
if (m_current_index >= s_numStreams)
{
exit = found_count && hook.found(nonces, found_count);

13
libethash-cuda/ethash_cuda_miner.h

@ -34,13 +34,11 @@ public:
unsigned _scheduleFlag,
uint64_t _currentBlock
);
bool init(
uint8_t const* _dag,
uint64_t _dagSize,
unsigned _deviceId = 0
);
bool init(ethash_light_t _light, uint8_t const* _lightData, uint64_t _lightSize, unsigned _deviceId, bool _cpyToHost, volatile void** hostDAG);
void finish();
void search(uint8_t const* header, uint64_t target, search_hook& hook);
void search(uint8_t const* header, uint64_t target, search_hook& hook, bool _ethStratum, uint64_t _startN);
/* -- default values -- */
/// Default value of the block size. Also known as workgroup size.
@ -54,8 +52,11 @@ private:
hash32_t m_current_header;
uint64_t m_current_target;
uint64_t m_current_nonce;
uint64_t m_starting_nonce;
uint64_t m_current_index;
uint32_t m_sharedBytes;
volatile uint32_t ** m_search_buf;
cudaStream_t * m_streams;

193
libethash-cuda/ethash_cuda_miner_kernel.cu

@ -8,131 +8,142 @@
#include "ethash_cuda_miner_kernel_globals.h"
#include "cuda_helper.h"
#include "keccak.cuh"
#include "fnv.cuh"
#define ACCESSES 64
#define THREADS_PER_HASH (128 / 16)
__device__ uint64_t compute_hash_shuffle(
uint64_t nonce
)
{
// sha3_512(header .. nonce)
uint2 state[25];
state[4] = vectorize(nonce);
keccak_f1600_init(state);
// Threads work together in this phase in groups of 8.
const int thread_id = threadIdx.x & (THREADS_PER_HASH - 1);
const int start_lane = threadIdx.x & ~(THREADS_PER_HASH - 1);
const int mix_idx = thread_id & 3;
uint4 mix;
uint2 shuffle[8];
for (int i = 0; i < THREADS_PER_HASH; i++)
{
// share init among threads
for (int j = 0; j < 8; j++) {
shuffle[j].x = __shfl(state[j].x, i, THREADS_PER_HASH);
shuffle[j].y = __shfl(state[j].y, i, THREADS_PER_HASH);
}
// ugly but avoids local reads/writes
if (mix_idx < 2) {
if (mix_idx == 0)
mix = vectorize2(shuffle[0], shuffle[1]);
else
mix = vectorize2(shuffle[2], shuffle[3]);
}
else {
if (mix_idx == 2)
mix = vectorize2(shuffle[4], shuffle[5]);
else
mix = vectorize2(shuffle[6], shuffle[7]);
}
#define copy(dst, src, count) for (int i = 0; i != count; ++i) { (dst)[i] = (src)[i]; }
uint32_t init0 = __shfl(shuffle[0].x, start_lane);
for (uint32_t a = 0; a < ACCESSES; a += 4)
{
int t = ((a >> 2) & (THREADS_PER_HASH - 1));
for (uint32_t b = 0; b < 4; b++)
{
if (thread_id == t)
{
shuffle[0].x = fnv(init0 ^ (a + b), ((uint32_t *)&mix)[b]) % d_dag_size;
}
shuffle[0].x = __shfl(shuffle[0].x, t, THREADS_PER_HASH);
mix = fnv4(mix, (&d_dag[shuffle[0].x])->uint4s[thread_id]);
}
}
uint32_t thread_mix = fnv_reduce(mix);
// update mix accross threads
shuffle[0].x = __shfl(thread_mix, 0, THREADS_PER_HASH);
shuffle[0].y = __shfl(thread_mix, 1, THREADS_PER_HASH);
shuffle[1].x = __shfl(thread_mix, 2, THREADS_PER_HASH);
shuffle[1].y = __shfl(thread_mix, 3, THREADS_PER_HASH);
shuffle[2].x = __shfl(thread_mix, 4, THREADS_PER_HASH);
shuffle[2].y = __shfl(thread_mix, 5, THREADS_PER_HASH);
shuffle[3].x = __shfl(thread_mix, 6, THREADS_PER_HASH);
shuffle[3].y = __shfl(thread_mix, 7, THREADS_PER_HASH);
if (i == thread_id) {
//move mix into state:
state[8] = shuffle[0];
state[9] = shuffle[1];
state[10] = shuffle[2];
state[11] = shuffle[3];
}
}
// keccak_256(keccak_512(header..nonce) .. mix);
return keccak_f1600_final(state);
}
#if __CUDA_ARCH__ < SHUFFLE_MIN_VER
#include "keccak_u64.cuh"
#include "dagger_shared.cuh"
#define TPB 128
#define BPSM 4
#else
#include "keccak.cuh"
#include "dagger_shuffled.cuh"
#define TPB 896
#define BPSM 1
#endif
__global__ void
__launch_bounds__(896, 1)
__launch_bounds__(TPB, BPSM)
ethash_search(
volatile uint32_t* g_output,
uint64_t start_nonce
)
{
uint32_t const gid = blockIdx.x * blockDim.x + threadIdx.x;
uint64_t hash = compute_hash_shuffle(start_nonce + gid);
uint64_t hash = compute_hash(start_nonce + gid);
if (cuda_swab64(hash) > d_target) return;
uint32_t index = atomicInc(const_cast<uint32_t*>(g_output), SEARCH_RESULT_BUFFER_SIZE - 1) + 1;
g_output[index] = gid;
__threadfence_system();
}
void run_ethash_search(
uint32_t blocks,
uint32_t threads,
uint32_t sharedbytes,
cudaStream_t stream,
volatile uint32_t* g_output,
uint64_t start_nonce
)
{
ethash_search <<<blocks, threads, 0, stream >>>(g_output, start_nonce);
ethash_search << <blocks, threads, sharedbytes, stream >> >(g_output, start_nonce);
CUDA_SAFE_CALL(cudaGetLastError());
}
#define ETHASH_DATASET_PARENTS 256
#define NODE_WORDS (64/4)
__global__ void
__launch_bounds__(128, 7)
ethash_calculate_dag_item(uint32_t start)
{
uint32_t const node_index = start + blockIdx.x * blockDim.x + threadIdx.x;
if (node_index > d_dag_size * 2) return;
hash200_t dag_node;
copy(dag_node.uint4s, d_light[node_index % d_light_size].uint4s, 4);
dag_node.words[0] ^= node_index;
SHA3_512(dag_node.uint2s);
const int thread_id = threadIdx.x & 3;
for (uint32_t i = 0; i != ETHASH_DATASET_PARENTS; ++i) {
uint32_t parent_index = fnv(node_index ^ i, dag_node.words[i % NODE_WORDS]) % d_light_size;
#if __CUDA_ARCH__ < SHUFFLE_MIN_VER
for (unsigned w = 0; w != 4; ++w) {
dag_node.uint4s[w] = fnv4(dag_node.uint4s[w], d_light[parent_index].uint4s[w]);
}
#else
for (uint32_t t = 0; t < 4; t++) {
uint32_t shuffle_index = __shfl(parent_index, t, 4);
uint4 p4 = d_light[shuffle_index].uint4s[thread_id];
for (int w = 0; w < 4; w++) {
uint4 s4 = make_uint4(__shfl(p4.x, w, 4), __shfl(p4.y, w, 4), __shfl(p4.z, w, 4), __shfl(p4.w, w, 4));
if (t == thread_id) {
dag_node.uint4s[w] = fnv4(dag_node.uint4s[w], s4);
}
}
}
#endif
}
SHA3_512(dag_node.uint2s);
hash64_t * dag_nodes = (hash64_t *)d_dag;
#if __CUDA_ARCH__ < SHUFFLE_MIN_VER
for (uint32_t i = 0; i < 4; i++) {
dag_nodes[node_index].uint4s[i] = dag_node.uint4s[i];
}
#else
for (uint32_t t = 0; t < 4; t++) {
uint32_t shuffle_index = __shfl(node_index, t, 4);
uint4 s[4];
for (uint32_t w = 0; w < 4; w++) {
s[w] = make_uint4(__shfl(dag_node.uint4s[w].x, t, 4), __shfl(dag_node.uint4s[w].y, t, 4), __shfl(dag_node.uint4s[w].z, t, 4), __shfl(dag_node.uint4s[w].w, t, 4));
}
dag_nodes[shuffle_index].uint4s[thread_id] = s[thread_id];
}
#endif
}
void ethash_generate_dag(
uint64_t dag_size,
uint32_t blocks,
uint32_t threads,
cudaStream_t stream,
int device
)
{
uint32_t const work = (uint32_t)(dag_size / sizeof(hash64_t));
uint32_t fullRuns = work / (blocks * threads);
uint32_t const restWork = work % (blocks * threads);
if (restWork > 0) fullRuns++;
for (uint32_t i = 0; i < fullRuns; i++)
{
ethash_calculate_dag_item <<<blocks, threads, 0, stream >>>(i * blocks * threads);
CUDA_SAFE_CALL(cudaDeviceSynchronize());
printf("CUDA#%d: %.0f%%\n",device, 100.0f * (float)i / (float)fullRuns);
}
//printf("GPU#%d 100%%\n");
CUDA_SAFE_CALL(cudaGetLastError());
}
void set_constants(
hash128_t* _dag,
uint32_t _dag_size
uint32_t _dag_size,
hash64_t * _light,
uint32_t _light_size
)
{
CUDA_SAFE_CALL(cudaMemcpyToSymbol(d_dag, &_dag, sizeof(hash128_t *)));
CUDA_SAFE_CALL(cudaMemcpyToSymbol(d_dag_size, &_dag_size, sizeof(uint32_t)));
CUDA_SAFE_CALL(cudaMemcpyToSymbol(d_light, &_light, sizeof(hash64_t *)));
CUDA_SAFE_CALL(cudaMemcpyToSymbol(d_light_size, &_light_size, sizeof(uint32_t)));
}
void set_header(

26
libethash-cuda/ethash_cuda_miner_kernel.h

@ -6,6 +6,8 @@
#include <cuda_runtime.h>
#define SEARCH_RESULT_BUFFER_SIZE 64
#define ACCESSES 64
#define THREADS_PER_HASH (128 / 16)
typedef struct
{
@ -17,10 +19,23 @@ typedef struct
uint4 uint4s[128 / sizeof(uint4)];
} hash128_t;
typedef union {
uint32_t words[64 / sizeof(uint32_t)];
uint2 uint2s[64 / sizeof(uint2)];
uint4 uint4s[64 / sizeof(uint4)];
} hash64_t;
typedef union {
uint32_t words[200 / sizeof(uint32_t)];
uint2 uint2s[200 / sizeof(uint2)];
uint4 uint4s[200 / sizeof(uint4)];
} hash200_t;
void set_constants(
hash128_t* _dag,
uint32_t _dag_size
uint32_t _dag_size,
hash64_t * _light,
uint32_t _light_size
);
void set_header(
@ -34,11 +49,20 @@ void set_target(
void run_ethash_search(
uint32_t search_batch_size,
uint32_t workgroup_size,
uint32_t sharedbytes,
cudaStream_t stream,
volatile uint32_t* g_output,
uint64_t start_nonce
);
void ethash_generate_dag(
uint64_t dag_size,
uint32_t blocks,
uint32_t threads,
cudaStream_t stream,
int device
);
#define CUDA_SAFE_CALL(call) \
do { \

4
libethash-cuda/ethash_cuda_miner_kernel_globals.h

@ -1,10 +1,14 @@
#ifndef _ETHASH_CUDA_MINER_KERNEL_GLOBALS_H_
#define _ETHASH_CUDA_MINER_KERNEL_GLOBALS_H_
#define SHUFFLE_MIN_VER 300
//#include "cuda_helper.h"
__constant__ uint32_t d_dag_size;
__constant__ hash128_t* d_dag;
__constant__ uint32_t d_light_size;
__constant__ hash64_t* d_light;
__constant__ hash32_t d_header;
__constant__ uint64_t d_target;

4
libethash-cuda/fnv.cuh

@ -1,3 +1,4 @@
#define FNV_PRIME 0x01000193
#define fnv(x,y) ((x) * FNV_PRIME ^(y))
@ -15,4 +16,5 @@ __device__ uint4 fnv4(uint4 a, uint4 b)
__device__ uint32_t fnv_reduce(uint4 v)
{
return fnv(fnv(fnv(v.x, v.y), v.z), v.w);
}
}

186
libethash-cuda/keccak.cuh

@ -588,4 +588,190 @@ __device__ __forceinline__ uint64_t keccak_f1600_final(uint2* s)
/* iota: a[0,0] ^= round constant */
//s[0] ^= vectorize(keccak_round_constants[23]);
return devectorize(s[0]) ^ keccak_round_constants[23];
}
__device__ __forceinline__ void SHA3_512(uint2* s) {
uint2 t[5], u, v;
for (uint32_t i = 8; i < 25; i++)
{
s[i] = make_uint2(0, 0);
}
s[8].x = 1;
s[8].y = 0x80000000;
for (int i = 0; i < 23; i++)
{
/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
t[0] = xor5(s[0], s[5], s[10], s[15], s[20]);
t[1] = xor5(s[1], s[6], s[11], s[16], s[21]);
t[2] = xor5(s[2], s[7], s[12], s[17], s[22]);
t[3] = xor5(s[3], s[8], s[13], s[18], s[23]);
t[4] = xor5(s[4], s[9], s[14], s[19], s[24]);
/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
u = ROL2(t[1], 1);
s[0] = xor3(s[0], t[4], u);
s[5] = xor3(s[5], t[4], u);
s[10] = xor3(s[10], t[4], u);
s[15] = xor3(s[15], t[4], u);
s[20] = xor3(s[20], t[4], u);
u = ROL2(t[2], 1);
s[1] = xor3(s[1], t[0], u);
s[6] = xor3(s[6], t[0], u);
s[11] = xor3(s[11], t[0], u);
s[16] = xor3(s[16], t[0], u);
s[21] = xor3(s[21], t[0], u);
u = ROL2(t[3], 1);
s[2] = xor3(s[2], t[1], u);
s[7] = xor3(s[7], t[1], u);
s[12] = xor3(s[12], t[1], u);
s[17] = xor3(s[17], t[1], u);
s[22] = xor3(s[22], t[1], u);
u = ROL2(t[4], 1);
s[3] = xor3(s[3], t[2], u);
s[8] = xor3(s[8], t[2], u);
s[13] = xor3(s[13], t[2], u);
s[18] = xor3(s[18], t[2], u);
s[23] = xor3(s[23], t[2], u);
u = ROL2(t[0], 1);
s[4] = xor3(s[4], t[3], u);
s[9] = xor3(s[9], t[3], u);
s[14] = xor3(s[14], t[3], u);
s[19] = xor3(s[19], t[3], u);
s[24] = xor3(s[24], t[3], u);
/* rho pi: b[..] = rotl(a[..], ..) */
u = s[1];
s[1] = ROL2(s[6], 44);
s[6] = ROL2(s[9], 20);
s[9] = ROL2(s[22], 61);
s[22] = ROL2(s[14], 39);
s[14] = ROL2(s[20], 18);
s[20] = ROL2(s[2], 62);
s[2] = ROL2(s[12], 43);
s[12] = ROL2(s[13], 25);
s[13] = ROL2(s[19], 8);
s[19] = ROL2(s[23], 56);
s[23] = ROL2(s[15], 41);
s[15] = ROL2(s[4], 27);
s[4] = ROL2(s[24], 14);
s[24] = ROL2(s[21], 2);
s[21] = ROL2(s[8], 55);
s[8] = ROL2(s[16], 45);
s[16] = ROL2(s[5], 36);
s[5] = ROL2(s[3], 28);
s[3] = ROL2(s[18], 21);
s[18] = ROL2(s[17], 15);
s[17] = ROL2(s[11], 10);
s[11] = ROL2(s[7], 6);
s[7] = ROL2(s[10], 3);
s[10] = ROL2(u, 1);
/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
u = s[0]; v = s[1];
s[0] = chi(s[0], s[1], s[2]);
s[1] = chi(s[1], s[2], s[3]);
s[2] = chi(s[2], s[3], s[4]);
s[3] = chi(s[3], s[4], u);
s[4] = chi(s[4], u, v);
u = s[5]; v = s[6];
s[5] = chi(s[5], s[6], s[7]);
s[6] = chi(s[6], s[7], s[8]);
s[7] = chi(s[7], s[8], s[9]);
s[8] = chi(s[8], s[9], u);
s[9] = chi(s[9], u, v);
u = s[10]; v = s[11];
s[10] = chi(s[10], s[11], s[12]);
s[11] = chi(s[11], s[12], s[13]);
s[12] = chi(s[12], s[13], s[14]);
s[13] = chi(s[13], s[14], u);
s[14] = chi(s[14], u, v);
u = s[15]; v = s[16];
s[15] = chi(s[15], s[16], s[17]);
s[16] = chi(s[16], s[17], s[18]);
s[17] = chi(s[17], s[18], s[19]);
s[18] = chi(s[18], s[19], u);
s[19] = chi(s[19], u, v);
u = s[20]; v = s[21];
s[20] = chi(s[20], s[21], s[22]);
s[21] = chi(s[21], s[22], s[23]);
s[22] = chi(s[22], s[23], s[24]);
s[23] = chi(s[23], s[24], u);
s[24] = chi(s[24], u, v);
/* iota: a[0,0] ^= round constant */
s[0] ^= vectorize(keccak_round_constants[i]);
}
/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
t[0] = xor5(s[0], s[5], s[10], s[15], s[20]);
t[1] = xor5(s[1], s[6], s[11], s[16], s[21]);
t[2] = xor5(s[2], s[7], s[12], s[17], s[22]);
t[3] = xor5(s[3], s[8], s[13], s[18], s[23]);
t[4] = xor5(s[4], s[9], s[14], s[19], s[24]);
/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
u = ROL2(t[1], 1);
s[0] = xor3(s[0], t[4], u);
s[10] = xor3(s[10], t[4], u);
u = ROL2(t[2], 1);
s[6] = xor3(s[6], t[0], u);
s[16] = xor3(s[16], t[0], u);
u = ROL2(t[3], 1);
s[12] = xor3(s[12], t[1], u);
s[22] = xor3(s[22], t[1], u);
u = ROL2(t[4], 1);
s[3] = xor3(s[3], t[2], u);
s[18] = xor3(s[18], t[2], u);
u = ROL2(t[0], 1);
s[9] = xor3(s[9], t[3], u);
s[24] = xor3(s[24], t[3], u);
/* rho pi: b[..] = rotl(a[..], ..) */
u = s[1];
s[1] = ROL2(s[6], 44);
s[6] = ROL2(s[9], 20);
s[9] = ROL2(s[22], 61);
s[2] = ROL2(s[12], 43);
s[4] = ROL2(s[24], 14);
s[8] = ROL2(s[16], 45);
s[5] = ROL2(s[3], 28);
s[3] = ROL2(s[18], 21);
s[7] = ROL2(s[10], 3);
/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
u = s[0]; v = s[1];
s[0] = chi(s[0], s[1], s[2]);
s[1] = chi(s[1], s[2], s[3]);
s[2] = chi(s[2], s[3], s[4]);
s[3] = chi(s[3], s[4], u);
s[4] = chi(s[4], u, v);
s[5] = chi(s[5], s[6], s[7]);
s[6] = chi(s[6], s[7], s[8]);
s[7] = chi(s[7], s[8], s[9]);
/* iota: a[0,0] ^= round constant */
s[0] ^= vectorize(keccak_round_constants[23]);
}

777
libethash-cuda/keccak_u64.cuh

@ -0,0 +1,777 @@
#include "cuda_helper.h"
__device__ __constant__ uint64_t const keccak_round_constants[24] = {
0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808AULL,
0x8000000080008000ULL, 0x000000000000808BULL, 0x0000000080000001ULL,
0x8000000080008081ULL, 0x8000000000008009ULL, 0x000000000000008AULL,
0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000AULL,
0x000000008000808BULL, 0x800000000000008BULL, 0x8000000000008089ULL,
0x8000000000008003ULL, 0x8000000000008002ULL, 0x8000000000000080ULL,
0x000000000000800AULL, 0x800000008000000AULL, 0x8000000080008081ULL,
0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL
};
__device__ __forceinline__
uint64_t xor5(const uint64_t a, const uint64_t b, const uint64_t c, const uint64_t d, const uint64_t e) {
return a ^ b ^ c ^ d ^ e;
}
__device__ __forceinline__
uint64_t xor3(const uint64_t a, const uint64_t b, const uint64_t c) {
return a ^ b ^ c;
}
__device__ __forceinline__
uint64_t chi(const uint64_t a, const uint64_t b, const uint64_t c) {
return a ^ (~b) & c;
}
__device__ __forceinline__ void keccak_f1600_init(uint64_t * s)
{
uint64_t t[5], u, v;
devectorize4(d_header.uint4s[0], s[0], s[1]);
devectorize4(d_header.uint4s[1], s[2], s[3]);
for (uint32_t i = 5; i < 25; i++)
{
s[i] = 0;
}
s[5] = 0x0000000000000001;
s[8] = 0x8000000000000000;
/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
t[0] = s[0] ^ s[5];
t[1] = s[1];
t[2] = s[2];
t[3] = s[3] ^ s[8];
t[4] = s[4];
/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
u = ROTL64(t[1], 1);
s[0] = xor3(s[0], t[4], u);
s[5] = xor3(s[5], t[4], u);
s[10] = xor3(s[10], t[4], u);
s[15] = xor3(s[15], t[4], u);
s[20] = xor3(s[20], t[4], u);
u = ROTL64(t[2], 1);
s[1] = xor3(s[1], t[0], u);
s[6] = xor3(s[6], t[0], u);
s[11] = xor3(s[11], t[0], u);
s[16] = xor3(s[16], t[0], u);
s[21] = xor3(s[21], t[0], u);
u = ROTL64(t[3], 1);
s[2] = xor3(s[2], t[1], u);
s[7] = xor3(s[7], t[1], u);
s[12] = xor3(s[12], t[1], u);
s[17] = xor3(s[17], t[1], u);
s[22] = xor3(s[22], t[1], u);
u = ROTL64(t[4], 1);
s[3] = xor3(s[3], t[2], u);
s[8] = xor3(s[8], t[2], u);
s[13] = xor3(s[13], t[2], u);
s[18] = xor3(s[18], t[2], u);
s[23] = xor3(s[23], t[2], u);
u = ROTL64(t[0], 1);
s[4] = xor3(s[4], t[3], u);
s[9] = xor3(s[9], t[3], u);
s[14] = xor3(s[14], t[3], u);
s[19] = xor3(s[19], t[3], u);
s[24] = xor3(s[24], t[3], u);
/* rho pi: b[..] = rotl(a[..], ..) */
u = s[1];
s[1] = ROTL64(s[6], 44);
s[6] = ROTL64(s[9], 20);
s[9] = ROTL64(s[22], 61);
s[22] = ROTL64(s[14], 39);
s[14] = ROTL64(s[20], 18);
s[20] = ROTL64(s[2], 62);
s[2] = ROTL64(s[12], 43);
s[12] = ROTL64(s[13], 25);
s[13] = ROTL64(s[19], 8);
s[19] = ROTL64(s[23], 56);
s[23] = ROTL64(s[15], 41);
s[15] = ROTL64(s[4], 27);
s[4] = ROTL64(s[24], 14);
s[24] = ROTL64(s[21], 2);
s[21] = ROTL64(s[8], 55);
s[8] = ROTL64(s[16], 45);
s[16] = ROTL64(s[5], 36);
s[5] = ROTL64(s[3], 28);
s[3] = ROTL64(s[18], 21);
s[18] = ROTL64(s[17], 15);
s[17] = ROTL64(s[11], 10);
s[11] = ROTL64(s[7], 6);
s[7] = ROTL64(s[10], 3);
s[10] = ROTL64(u, 1);
/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
u = s[0]; v = s[1];
s[0] = chi(s[0], s[1], s[2]);
s[1] = chi(s[1], s[2], s[3]);
s[2] = chi(s[2], s[3], s[4]);
s[3] = chi(s[3], s[4], u);
s[4] = chi(s[4], u, v);
u = s[5]; v = s[6];
s[5] = chi(s[5], s[6], s[7]);
s[6] = chi(s[6], s[7], s[8]);
s[7] = chi(s[7], s[8], s[9]);
s[8] = chi(s[8], s[9], u);
s[9] = chi(s[9], u, v);
u = s[10]; v = s[11];
s[10] = chi(s[10], s[11], s[12]);
s[11] = chi(s[11], s[12], s[13]);
s[12] = chi(s[12], s[13], s[14]);
s[13] = chi(s[13], s[14], u);
s[14] = chi(s[14], u, v);
u = s[15]; v = s[16];
s[15] = chi(s[15], s[16], s[17]);
s[16] = chi(s[16], s[17], s[18]);
s[17] = chi(s[17], s[18], s[19]);
s[18] = chi(s[18], s[19], u);
s[19] = chi(s[19], u, v);
u = s[20]; v = s[21];
s[20] = chi(s[20], s[21], s[22]);
s[21] = chi(s[21], s[22], s[23]);
s[22] = chi(s[22], s[23], s[24]);
s[23] = chi(s[23], s[24], u);
s[24] = chi(s[24], u, v);
/* iota: a[0,0] ^= round constant */
s[0] ^= keccak_round_constants[0];
for (int i = 1; i < 23; i++)
{
/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
t[0] = xor5(s[0] , s[5] , s[10] , s[15] , s[20]);
t[1] = xor5(s[1] , s[6] , s[11] , s[16] , s[21]);
t[2] = xor5(s[2] , s[7] , s[12] , s[17] , s[22]);
t[3] = xor5(s[3] , s[8] , s[13] , s[18] , s[23]);
t[4] = xor5(s[4] , s[9] , s[14] , s[19] , s[24]);
/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
u = ROTL64(t[1], 1);
s[0] = xor3(s[0], t[4], u);
s[5] = xor3(s[5], t[4], u);
s[10] = xor3(s[10], t[4], u);
s[15] = xor3(s[15], t[4], u);
s[20] = xor3(s[20], t[4], u);
u = ROTL64(t[2], 1);
s[1] = xor3(s[1], t[0], u);
s[6] = xor3(s[6], t[0], u);
s[11] = xor3(s[11], t[0], u);
s[16] = xor3(s[16], t[0], u);
s[21] = xor3(s[21], t[0], u);
u = ROTL64(t[3], 1);
s[2] = xor3(s[2], t[1], u);
s[7] = xor3(s[7], t[1], u);
s[12] = xor3(s[12], t[1], u);
s[17] = xor3(s[17], t[1], u);
s[22] = xor3(s[22], t[1], u);
u = ROTL64(t[4], 1);
s[3] = xor3(s[3], t[2], u);
s[8] = xor3(s[8], t[2], u);
s[13] = xor3(s[13], t[2], u);
s[18] = xor3(s[18], t[2], u);
s[23] = xor3(s[23], t[2], u);
u = ROTL64(t[0], 1);
s[4] = xor3(s[4], t[3], u);
s[9] = xor3(s[9], t[3], u);
s[14] = xor3(s[14], t[3], u);
s[19] = xor3(s[19], t[3], u);
s[24] = xor3(s[24], t[3], u);
/* rho pi: b[..] = rotl(a[..], ..) */
u = s[1];
s[1] = ROTL64(s[6], 44);
s[6] = ROTL64(s[9], 20);
s[9] = ROTL64(s[22], 61);
s[22] = ROTL64(s[14], 39);
s[14] = ROTL64(s[20], 18);
s[20] = ROTL64(s[2], 62);
s[2] = ROTL64(s[12], 43);
s[12] = ROTL64(s[13], 25);
s[13] = ROTL64(s[19], 8);
s[19] = ROTL64(s[23], 56);
s[23] = ROTL64(s[15], 41);
s[15] = ROTL64(s[4], 27);
s[4] = ROTL64(s[24], 14);
s[24] = ROTL64(s[21], 2);
s[21] = ROTL64(s[8], 55);
s[8] = ROTL64(s[16], 45);
s[16] = ROTL64(s[5], 36);
s[5] = ROTL64(s[3], 28);
s[3] = ROTL64(s[18], 21);
s[18] = ROTL64(s[17], 15);
s[17] = ROTL64(s[11], 10);
s[11] = ROTL64(s[7], 6);
s[7] = ROTL64(s[10], 3);
s[10] = ROTL64(u, 1);
/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
u = s[0]; v = s[1];
s[0] = chi(s[0], s[1], s[2]);
s[1] = chi(s[1], s[2], s[3]);
s[2] = chi(s[2], s[3], s[4]);
s[3] = chi(s[3], s[4], u);
s[4] = chi(s[4], u, v);
u = s[5]; v = s[6];
s[5] = chi(s[5], s[6], s[7]);
s[6] = chi(s[6], s[7], s[8]);
s[7] = chi(s[7], s[8], s[9]);
s[8] = chi(s[8], s[9], u);
s[9] = chi(s[9], u, v);
u = s[10]; v = s[11];
s[10] = chi(s[10], s[11], s[12]);
s[11] = chi(s[11], s[12], s[13]);
s[12] = chi(s[12], s[13], s[14]);
s[13] = chi(s[13], s[14], u);
s[14] = chi(s[14], u, v);
u = s[15]; v = s[16];
s[15] = chi(s[15], s[16], s[17]);
s[16] = chi(s[16], s[17], s[18]);
s[17] = chi(s[17], s[18], s[19]);
s[18] = chi(s[18], s[19], u);
s[19] = chi(s[19], u, v);
u = s[20]; v = s[21];
s[20] = chi(s[20], s[21], s[22]);
s[21] = chi(s[21], s[22], s[23]);
s[22] = chi(s[22], s[23], s[24]);
s[23] = chi(s[23], s[24], u);
s[24] = chi(s[24], u, v);
/* iota: a[0,0] ^= round constant */
s[0] ^= keccak_round_constants[i];
}
/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
t[0] = xor5(s[0], s[5], s[10], s[15], s[20]);
t[1] = xor5(s[1], s[6], s[11], s[16], s[21]);
t[2] = xor5(s[2], s[7], s[12], s[17], s[22]);
t[3] = xor5(s[3], s[8], s[13], s[18], s[23]);
t[4] = xor5(s[4], s[9], s[14], s[19], s[24]);
/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
u = ROTL64(t[1], 1);
s[0] = xor3(s[0], t[4], u);
s[10] = xor3(s[10], t[4], u);
u = ROTL64(t[2], 1);
s[6] = xor3(s[6], t[0], u);
s[16] = xor3(s[16], t[0], u);
u = ROTL64(t[3], 1);
s[12] = xor3(s[12], t[1], u);
s[22] = xor3(s[22], t[1], u);
u = ROTL64(t[4], 1);
s[3] = xor3(s[3], t[2], u);
s[18] = xor3(s[18], t[2], u);
u = ROTL64(t[0], 1);
s[9] = xor3(s[9], t[3], u);
s[24] = xor3(s[24], t[3], u);
/* rho pi: b[..] = rotl(a[..], ..) */
u = s[1];
s[1] = ROTL64(s[6], 44);
s[6] = ROTL64(s[9], 20);
s[9] = ROTL64(s[22], 61);
s[2] = ROTL64(s[12], 43);
s[4] = ROTL64(s[24], 14);
s[8] = ROTL64(s[16], 45);
s[5] = ROTL64(s[3], 28);
s[3] = ROTL64(s[18], 21);
s[7] = ROTL64(s[10], 3);
/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
u = s[0]; v = s[1];
s[0] = chi(s[0], s[1], s[2]);
s[1] = chi(s[1], s[2], s[3]);
s[2] = chi(s[2], s[3], s[4]);
s[3] = chi(s[3], s[4], u);
s[4] = chi(s[4], u, v);
s[5] = chi(s[5], s[6], s[7]);
s[6] = chi(s[6], s[7], s[8]);
s[7] = chi(s[7], s[8], s[9]);
/* iota: a[0,0] ^= round constant */
s[0] ^= keccak_round_constants[23];
}
__device__ __forceinline__ uint64_t keccak_f1600_final(uint64_t* s)
{
uint64_t t[5], u, v;
for (uint32_t i = 12; i < 25; i++)
{
s[i] = 0;
}
s[12] = 0x0000000000000001;
s[16] = 0x8000000000000000;
/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
t[0] = xor3(s[0], s[5], s[10]);
t[1] = xor3(s[1], s[6], s[11]) ^ s[16];
t[2] = xor3(s[2], s[7], s[12]);
t[3] = s[3] ^ s[8];
t[4] = s[4] ^ s[9];
/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
u = ROTL64(t[1], 1);
s[0] = xor3(s[0], t[4], u);
s[5] = xor3(s[5], t[4], u);
s[10] = xor3(s[10], t[4], u);
s[15] = xor3(s[15], t[4], u);
s[20] = xor3(s[20], t[4], u);
u = ROTL64(t[2], 1);
s[1] = xor3(s[1], t[0], u);
s[6] = xor3(s[6], t[0], u);
s[11] = xor3(s[11], t[0], u);
s[16] = xor3(s[16], t[0], u);
s[21] = xor3(s[21], t[0], u);
u = ROTL64(t[3], 1);
s[2] = xor3(s[2], t[1], u);
s[7] = xor3(s[7], t[1], u);
s[12] = xor3(s[12], t[1], u);
s[17] = xor3(s[17], t[1], u);
s[22] = xor3(s[22], t[1], u);
u = ROTL64(t[4], 1);
s[3] = xor3(s[3], t[2], u);
s[8] = xor3(s[8], t[2], u);
s[13] = xor3(s[13], t[2], u);
s[18] = xor3(s[18], t[2], u);
s[23] = xor3(s[23], t[2], u);
u = ROTL64(t[0], 1);
s[4] = xor3(s[4], t[3], u);
s[9] = xor3(s[9], t[3], u);
s[14] = xor3(s[14], t[3], u);
s[19] = xor3(s[19], t[3], u);
s[24] = xor3(s[24], t[3], u);
/* rho pi: b[..] = rotl(a[..], ..) */
u = s[1];
s[1] = ROTL64(s[6], 44);
s[6] = ROTL64(s[9], 20);
s[9] = ROTL64(s[22], 61);
s[22] = ROTL64(s[14], 39);
s[14] = ROTL64(s[20], 18);
s[20] = ROTL64(s[2], 62);
s[2] = ROTL64(s[12], 43);
s[12] = ROTL64(s[13], 25);
s[13] = ROTL64(s[19], 8);
s[19] = ROTL64(s[23], 56);
s[23] = ROTL64(s[15], 41);
s[15] = ROTL64(s[4], 27);
s[4] = ROTL64(s[24], 14);
s[24] = ROTL64(s[21], 2);
s[21] = ROTL64(s[8], 55);
s[8] = ROTL64(s[16], 45);
s[16] = ROTL64(s[5], 36);
s[5] = ROTL64(s[3], 28);
s[3] = ROTL64(s[18], 21);
s[18] = ROTL64(s[17], 15);
s[17] = ROTL64(s[11], 10);
s[11] = ROTL64(s[7], 6);
s[7] = ROTL64(s[10], 3);
s[10] = ROTL64(u, 1);
/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
u = s[0]; v = s[1];
s[0] = chi(s[0], s[1], s[2]);
s[1] = chi(s[1], s[2], s[3]);
s[2] = chi(s[2], s[3], s[4]);
s[3] = chi(s[3], s[4], u);
s[4] = chi(s[4], u, v);
u = s[5]; v = s[6];
s[5] = chi(s[5], s[6], s[7]);
s[6] = chi(s[6], s[7], s[8]);
s[7] = chi(s[7], s[8], s[9]);
s[8] = chi(s[8], s[9], u);
s[9] = chi(s[9], u, v);
u = s[10]; v = s[11];
s[10] = chi(s[10], s[11], s[12]);
s[11] = chi(s[11], s[12], s[13]);
s[12] = chi(s[12], s[13], s[14]);
s[13] = chi(s[13], s[14], u);
s[14] = chi(s[14], u, v);
u = s[15]; v = s[16];
s[15] = chi(s[15], s[16], s[17]);
s[16] = chi(s[16], s[17], s[18]);
s[17] = chi(s[17], s[18], s[19]);
s[18] = chi(s[18], s[19], u);
s[19] = chi(s[19], u, v);
u = s[20]; v = s[21];
s[20] = chi(s[20], s[21], s[22]);
s[21] = chi(s[21], s[22], s[23]);
s[22] = chi(s[22], s[23], s[24]);
s[23] = chi(s[23], s[24], u);
s[24] = chi(s[24], u, v);
/* iota: a[0,0] ^= round constant */
s[0] ^= keccak_round_constants[0];
for (int i = 1; i < 23; i++)
{
/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
t[0] = xor5(s[0], s[5], s[10], s[15], s[20]);
t[1] = xor5(s[1], s[6], s[11], s[16], s[21]);
t[2] = xor5(s[2], s[7], s[12], s[17], s[22]);
t[3] = xor5(s[3], s[8], s[13], s[18], s[23]);
t[4] = xor5(s[4], s[9], s[14], s[19], s[24]);
/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
u = ROTL64(t[1], 1);
s[0] = xor3(s[0], t[4], u);
s[5] = xor3(s[5], t[4], u);
s[10] = xor3(s[10], t[4], u);
s[15] = xor3(s[15], t[4], u);
s[20] = xor3(s[20], t[4], u);
u = ROTL64(t[2], 1);
s[1] = xor3(s[1], t[0], u);
s[6] = xor3(s[6], t[0], u);
s[11] = xor3(s[11], t[0], u);
s[16] = xor3(s[16], t[0], u);
s[21] = xor3(s[21], t[0], u);
u = ROTL64(t[3], 1);
s[2] = xor3(s[2], t[1], u);
s[7] = xor3(s[7], t[1], u);
s[12] = xor3(s[12], t[1], u);
s[17] = xor3(s[17], t[1], u);
s[22] = xor3(s[22], t[1], u);
u = ROTL64(t[4], 1);
s[3] = xor3(s[3], t[2], u);
s[8] = xor3(s[8], t[2], u);
s[13] = xor3(s[13], t[2], u);
s[18] = xor3(s[18], t[2], u);
s[23] = xor3(s[23], t[2], u);
u = ROTL64(t[0], 1);
s[4] = xor3(s[4], t[3], u);
s[9] = xor3(s[9], t[3], u);
s[14] = xor3(s[14], t[3], u);
s[19] = xor3(s[19], t[3], u);
s[24] = xor3(s[24], t[3], u);
/* rho pi: b[..] = rotl(a[..], ..) */
u = s[1];
s[1] = ROTL64(s[6], 44);
s[6] = ROTL64(s[9], 20);
s[9] = ROTL64(s[22], 61);
s[22] = ROTL64(s[14], 39);
s[14] = ROTL64(s[20], 18);
s[20] = ROTL64(s[2], 62);
s[2] = ROTL64(s[12], 43);
s[12] = ROTL64(s[13], 25);
s[13] = ROTL64(s[19], 8);
s[19] = ROTL64(s[23], 56);
s[23] = ROTL64(s[15], 41);
s[15] = ROTL64(s[4], 27);
s[4] = ROTL64(s[24], 14);
s[24] = ROTL64(s[21], 2);
s[21] = ROTL64(s[8], 55);
s[8] = ROTL64(s[16], 45);
s[16] = ROTL64(s[5], 36);
s[5] = ROTL64(s[3], 28);
s[3] = ROTL64(s[18], 21);
s[18] = ROTL64(s[17], 15);
s[17] = ROTL64(s[11], 10);
s[11] = ROTL64(s[7], 6);
s[7] = ROTL64(s[10], 3);
s[10] = ROTL64(u, 1);
/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
u = s[0]; v = s[1];
s[0] = chi(s[0], s[1], s[2]);
s[1] = chi(s[1], s[2], s[3]);
s[2] = chi(s[2], s[3], s[4]);
s[3] = chi(s[3], s[4], u);
s[4] = chi(s[4], u, v);
u = s[5]; v = s[6];
s[5] = chi(s[5], s[6], s[7]);
s[6] = chi(s[6], s[7], s[8]);
s[7] = chi(s[7], s[8], s[9]);
s[8] = chi(s[8], s[9], u);
s[9] = chi(s[9], u, v);
u = s[10]; v = s[11];
s[10] = chi(s[10], s[11], s[12]);
s[11] = chi(s[11], s[12], s[13]);
s[12] = chi(s[12], s[13], s[14]);
s[13] = chi(s[13], s[14], u);
s[14] = chi(s[14], u, v);
u = s[15]; v = s[16];
s[15] = chi(s[15], s[16], s[17]);
s[16] = chi(s[16], s[17], s[18]);
s[17] = chi(s[17], s[18], s[19]);
s[18] = chi(s[18], s[19], u);
s[19] = chi(s[19], u, v);
u = s[20]; v = s[21];
s[20] = chi(s[20], s[21], s[22]);
s[21] = chi(s[21], s[22], s[23]);
s[22] = chi(s[22], s[23], s[24]);
s[23] = chi(s[23], s[24], u);
s[24] = chi(s[24], u, v);
/* iota: a[0,0] ^= round constant */
s[0] ^= keccak_round_constants[i];
}
t[0] = xor5(s[0], s[5], s[10], s[15], s[20]);
t[1] = xor5(s[1], s[6], s[11], s[16], s[21]);
t[2] = xor5(s[2], s[7], s[12], s[17], s[22]);
t[3] = xor5(s[3], s[8], s[13], s[18], s[23]);
t[4] = xor5(s[4], s[9], s[14], s[19], s[24]);
s[0] = xor3(s[0], t[4], ROTL64(t[1], 1));
s[6] = xor3(s[6], t[0], ROTL64(t[2], 1));
s[12] = xor3(s[12], t[1], ROTL64(t[3], 1));
s[1] = ROTL64(s[6], 44);
s[2] = ROTL64(s[12], 43);
s[0] = chi(s[0], s[1], s[2]);
/* iota: a[0,0] ^= round constant */
//s[0] ^= vectorize(keccak_round_constants[23]);
return s[0] ^ keccak_round_constants[23];
}
__device__ __forceinline__ void SHA3_512(uint2* s2) {
uint64_t * s = (uint64_t*)s2; //dirty
uint64_t t[5], u, v;
for (uint32_t i = 9; i < 25; i++)
{
s[i] = 0;
}
s[8] = 0x8000000000000001;
for (int i = 0; i < 23; i++)
{
/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
t[0] = xor5(s[0], s[5], s[10], s[15], s[20]);
t[1] = xor5(s[1], s[6], s[11], s[16], s[21]);
t[2] = xor5(s[2], s[7], s[12], s[17], s[22]);
t[3] = xor5(s[3], s[8], s[13], s[18], s[23]);
t[4] = xor5(s[4], s[9], s[14], s[19], s[24]);
/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
u = ROTL64(t[1], 1);
s[0] = xor3(s[0], t[4], u);
s[5] = xor3(s[5], t[4], u);
s[10] = xor3(s[10], t[4], u);
s[15] = xor3(s[15], t[4], u);
s[20] = xor3(s[20], t[4], u);
u = ROTL64(t[2], 1);
s[1] = xor3(s[1], t[0], u);
s[6] = xor3(s[6], t[0], u);
s[11] = xor3(s[11], t[0], u);
s[16] = xor3(s[16], t[0], u);
s[21] = xor3(s[21], t[0], u);
u = ROTL64(t[3], 1);
s[2] = xor3(s[2], t[1], u);
s[7] = xor3(s[7], t[1], u);
s[12] = xor3(s[12], t[1], u);
s[17] = xor3(s[17], t[1], u);
s[22] = xor3(s[22], t[1], u);
u = ROTL64(t[4], 1);
s[3] = xor3(s[3], t[2], u);
s[8] = xor3(s[8], t[2], u);
s[13] = xor3(s[13], t[2], u);
s[18] = xor3(s[18], t[2], u);
s[23] = xor3(s[23], t[2], u);
u = ROTL64(t[0], 1);
s[4] = xor3(s[4], t[3], u);
s[9] = xor3(s[9], t[3], u);
s[14] = xor3(s[14], t[3], u);
s[19] = xor3(s[19], t[3], u);
s[24] = xor3(s[24], t[3], u);
/* rho pi: b[..] = rotl(a[..], ..) */
u = s[1];
s[1] = ROTL64(s[6], 44);
s[6] = ROTL64(s[9], 20);
s[9] = ROTL64(s[22], 61);
s[22] = ROTL64(s[14], 39);
s[14] = ROTL64(s[20], 18);
s[20] = ROTL64(s[2], 62);
s[2] = ROTL64(s[12], 43);
s[12] = ROTL64(s[13], 25);
s[13] = ROTL64(s[19], 8);
s[19] = ROTL64(s[23], 56);
s[23] = ROTL64(s[15], 41);
s[15] = ROTL64(s[4], 27);
s[4] = ROTL64(s[24], 14);
s[24] = ROTL64(s[21], 2);
s[21] = ROTL64(s[8], 55);
s[8] = ROTL64(s[16], 45);
s[16] = ROTL64(s[5], 36);
s[5] = ROTL64(s[3], 28);
s[3] = ROTL64(s[18], 21);
s[18] = ROTL64(s[17], 15);
s[17] = ROTL64(s[11], 10);
s[11] = ROTL64(s[7], 6);
s[7] = ROTL64(s[10], 3);
s[10] = ROTL64(u, 1);
/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
u = s[0]; v = s[1];
s[0] = chi(s[0], s[1], s[2]);
s[1] = chi(s[1], s[2], s[3]);
s[2] = chi(s[2], s[3], s[4]);
s[3] = chi(s[3], s[4], u);
s[4] = chi(s[4], u, v);
u = s[5]; v = s[6];
s[5] = chi(s[5], s[6], s[7]);
s[6] = chi(s[6], s[7], s[8]);
s[7] = chi(s[7], s[8], s[9]);
s[8] = chi(s[8], s[9], u);
s[9] = chi(s[9], u, v);
u = s[10]; v = s[11];
s[10] = chi(s[10], s[11], s[12]);
s[11] = chi(s[11], s[12], s[13]);
s[12] = chi(s[12], s[13], s[14]);
s[13] = chi(s[13], s[14], u);
s[14] = chi(s[14], u, v);
u = s[15]; v = s[16];
s[15] = chi(s[15], s[16], s[17]);
s[16] = chi(s[16], s[17], s[18]);
s[17] = chi(s[17], s[18], s[19]);
s[18] = chi(s[18], s[19], u);
s[19] = chi(s[19], u, v);
u = s[20]; v = s[21];
s[20] = chi(s[20], s[21], s[22]);
s[21] = chi(s[21], s[22], s[23]);
s[22] = chi(s[22], s[23], s[24]);
s[23] = chi(s[23], s[24], u);
s[24] = chi(s[24], u, v);
/* iota: a[0,0] ^= round constant */
s[0] ^= keccak_round_constants[i];
}
/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
t[0] = xor5(s[0], s[5], s[10], s[15], s[20]);
t[1] = xor5(s[1], s[6], s[11], s[16], s[21]);
t[2] = xor5(s[2], s[7], s[12], s[17], s[22]);
t[3] = xor5(s[3], s[8], s[13], s[18], s[23]);
t[4] = xor5(s[4], s[9], s[14], s[19], s[24]);
/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
u = ROTL64(t[1], 1);
s[0] = xor3(s[0], t[4], u);
s[10] = xor3(s[10], t[4], u);
u = ROTL64(t[2], 1);
s[6] = xor3(s[6], t[0], u);
s[16] = xor3(s[16], t[0], u);
u = ROTL64(t[3], 1);
s[12] = xor3(s[12], t[1], u);
s[22] = xor3(s[22], t[1], u);
u = ROTL64(t[4], 1);
s[3] = xor3(s[3], t[2], u);
s[18] = xor3(s[18], t[2], u);
u = ROTL64(t[0], 1);
s[9] = xor3(s[9], t[3], u);
s[24] = xor3(s[24], t[3], u);
/* rho pi: b[..] = rotl(a[..], ..) */
u = s[1];
s[1] = ROTL64(s[6], 44);
s[6] = ROTL64(s[9], 20);
s[9] = ROTL64(s[22], 61);
s[2] = ROTL64(s[12], 43);
s[4] = ROTL64(s[24], 14);
s[8] = ROTL64(s[16], 45);
s[5] = ROTL64(s[3], 28);
s[3] = ROTL64(s[18], 21);
s[7] = ROTL64(s[10], 3);
/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
u = s[0]; v = s[1];
s[0] = chi(s[0], s[1], s[2]);
s[1] = chi(s[1], s[2], s[3]);
s[2] = chi(s[2], s[3], s[4]);
s[3] = chi(s[3], s[4], u);
s[4] = chi(s[4], u, v);
s[5] = chi(s[5], s[6], s[7]);
s[6] = chi(s[6], s[7], s[8]);
s[7] = chi(s[7], s[8], s[9]);
/* iota: a[0,0] ^= round constant */
s[0] ^= keccak_round_constants[23];
}

2
libethcore/EthashAux.cpp

@ -52,6 +52,8 @@ const unsigned EthashProofOfWork::defaultGlobalWorkSizeMultiplier = 4096; // * C
const unsigned EthashProofOfWork::defaultMSPerBatch = 0;
const EthashProofOfWork::WorkPackage EthashProofOfWork::NullWorkPackage = EthashProofOfWork::WorkPackage();
//unsigned EthashProofOfWork::s_dagLoadMode = 0;
EthashAux::~EthashAux()
{
}

9
libethcore/EthashAux.h

@ -32,6 +32,7 @@ namespace dev
namespace eth
{
struct DAGChannel: public LogChannel { static const char* name(); static const int verbosity = 1; };
/// Proof of work definition for Ethash.
@ -52,17 +53,20 @@ struct EthashProofOfWork
struct WorkPackage
{
WorkPackage() = default;
WorkPackage(Ethash::BlockHeader const& _bh):
WorkPackage(Ethash::BlockHeader const& _bh) :
boundary(_bh.boundary()),
headerHash(_bh.hashWithout()),
seedHash(_bh.seedHash())
{}
{ }
void reset() { headerHash = h256(); }
operator bool() const { return headerHash != h256(); }
h256 boundary;
h256 headerHash; ///< When h256() means "pause until notified a new work package is available".
h256 seedHash;
uint64_t startNonce = 0;
int exSizeBits = -1;
};
static const WorkPackage NullWorkPackage;
@ -73,6 +77,7 @@ struct EthashProofOfWork
static const unsigned defaultGlobalWorkSizeMultiplier;
/// Default value of the milliseconds per global work size (per batch)
static const unsigned defaultMSPerBatch;
};
enum class DAGEraseMode

83
libethcore/EthashCUDAMiner.cpp

@ -112,16 +112,8 @@ EthashCUDAMiner::EthashCUDAMiner(ConstructionInfo const& _ci) :
Worker("cudaminer" + toString(index())),
m_hook( new EthashCUDAHook(this))
{
/*
#if defined(WIN32)
SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
int num_cpus = sysinfo.dwNumberOfProcessors;
SetThreadAffinityMask(GetCurrentThread(), 1 << (index() % num_cpus));
SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
#endif
*/
}
EthashCUDAMiner::~EthashCUDAMiner()
{
pause();
@ -149,38 +141,65 @@ void EthashCUDAMiner::workLoop()
// take local copy of work since it may end up being overwritten by kickOff/pause.
try {
WorkPackage w = work();
//cnote << "seedhash" << "#" + m_minerSeed.hex().substr(0, 16);
cnote << "set work to" << "#" + w.headerHash.hex().substr(0, 8) + ", target " << "#" + w.boundary.hex().substr(0, 16);
cnote << "set work; seed: " << "#" + w.seedHash.hex().substr(0, 8) + ", target: " << "#" + w.boundary.hex().substr(0, 12);
if (!m_miner || m_minerSeed != w.seedHash)
{
unsigned device = s_devices[index()] > -1 ? s_devices[index()] : index();
if (s_dagLoadMode == DAG_LOAD_MODE_SEQUENTIAL)
{
while (s_dagLoadIndex < index()) {
this_thread::sleep_for(chrono::seconds(1));
}
}
else if (s_dagLoadMode == DAG_LOAD_MODE_SINGLE)
{
if (device != s_dagCreateDevice)
{
// wait until DAG is created on selected device
while (s_dagInHostMemory == NULL) {
this_thread::sleep_for(chrono::seconds(1));
}
}
else
{
// reset load index
s_dagLoadIndex = 0;
}
}
cnote << "Initialising miner...";
m_minerSeed = w.seedHash;
delete m_miner;
m_miner = new ethash_cuda_miner;
unsigned device = s_devices[index()] > -1 ? s_devices[index()] : index();
EthashAux::LightType light;
light = EthashAux::light(w.seedHash);
//bytesConstRef dagData = dag->data();
bytesConstRef lightData = light->data();
m_miner->init(light->light, lightData.data(), lightData.size(), device, (s_dagLoadMode == DAG_LOAD_MODE_SINGLE), &s_dagInHostMemory);
s_dagLoadIndex++;
EthashAux::FullType dag;
while (true)
if (s_dagLoadMode == DAG_LOAD_MODE_SINGLE)
{
if ((dag = EthashAux::full(w.seedHash, true)))
break;
if (shouldStop())
if (s_dagLoadIndex >= s_numInstances && s_dagInHostMemory)
{
delete m_miner;
m_miner = nullptr;
return;
// all devices have loaded DAG, we can free now
delete[] s_dagInHostMemory;
s_dagInHostMemory = NULL;
cout << "Freeing DAG from host" << endl;
}
cnote << "Awaiting DAG";
this_thread::sleep_for(chrono::milliseconds(500));
}
bytesConstRef dagData = dag->data();
m_miner->init(dagData.data(), dagData.size(), device);
}
uint64_t upper64OfBoundary = (uint64_t)(u64)((u256)w.boundary >> 192);
m_miner->search(w.headerHash.data(), upper64OfBoundary, *m_hook);
uint64_t startN;
if (w.exSizeBits >= 0)
startN = w.startNonce | ((uint64_t)index() << (64 - 4 - w.exSizeBits)); // this can support up to 16 devices
m_miner->search(w.headerHash.data(), upper64OfBoundary, *m_hook, (w.exSizeBits >= 0), startN);
}
catch (std::runtime_error const& _e)
{
@ -217,17 +236,15 @@ bool EthashCUDAMiner::configureGPU(
unsigned _numStreams,
unsigned _extraGPUMemory,
unsigned _scheduleFlag,
uint64_t _currentBlock
uint64_t _currentBlock,
unsigned _dagLoadMode,
unsigned _dagCreateDevice
)
{
s_dagLoadMode = _dagLoadMode;
s_dagCreateDevice = _dagCreateDevice;
_blockSize = ((_blockSize + 7) / 8) * 8;
/*
if (_blockSize != 32 && _blockSize != 64 && _blockSize != 128)
{
cout << "Given localWorkSize of " << toString(_blockSize) << "is invalid. Must be either 32,64 or 128" << endl;
return false;
}
*/
if (!ethash_cuda_miner::configureGPU(
s_devices,
_blockSize,

5
libethcore/EthashCUDAMiner.h

@ -53,7 +53,9 @@ namespace eth
unsigned _numStreams,
unsigned _extraGPUMemory,
unsigned _scheduleFlag,
uint64_t _currentBlock
uint64_t _currentBlock,
unsigned _dagLoadMode,
unsigned _dagCreateDevice
);
static void setNumInstances(unsigned _instances)
{
@ -84,6 +86,7 @@ namespace eth
static unsigned s_deviceId;
static unsigned s_numInstances;
static int s_devices[16];
};
}
}

45
libethcore/EthashGPUMiner.cpp

@ -107,7 +107,7 @@ int EthashGPUMiner::s_devices[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
EthashGPUMiner::EthashGPUMiner(ConstructionInfo const& _ci):
GenericMiner<EthashProofOfWork>(_ci),
Worker("gpuminer" + toString(index())),
Worker("openclminer" + toString(index())),
m_hook(new EthashCLHook(this))
{
}
@ -139,9 +139,16 @@ void EthashGPUMiner::workLoop()
// take local copy of work since it may end up being overwritten by kickOff/pause.
try {
WorkPackage w = work();
cnote << "set work to:" << "#" + w.headerHash.hex().substr(0, 8) + ", target " << "#" + w.boundary.hex().substr(0, 16);
cnote << "set work; seed: " << "#" + w.seedHash.hex().substr(0, 8) + ", target: " << "#" + w.boundary.hex().substr(0, 12);
if (!m_miner || m_minerSeed != w.seedHash)
{
if (s_dagLoadMode == DAG_LOAD_MODE_SEQUENTIAL)
{
while (s_dagLoadIndex < index()) {
this_thread::sleep_for(chrono::seconds(1));
}
}
cnote << "Initialising miner...";
m_minerSeed = w.seedHash;
@ -150,6 +157,7 @@ void EthashGPUMiner::workLoop()
unsigned device = s_devices[index()] > -1 ? s_devices[index()] : index();
/*
EthashAux::FullType dag;
while (true)
{
@ -164,12 +172,21 @@ void EthashGPUMiner::workLoop()
cnote << "Awaiting DAG";
this_thread::sleep_for(chrono::milliseconds(500));
}
bytesConstRef dagData = dag->data();
m_miner->init(dagData.data(), dagData.size(), s_platformId, device);
*/
EthashAux::LightType light;
light = EthashAux::light(w.seedHash);
bytesConstRef lightData = light->data();
m_miner->init(light->light, lightData.data(), lightData.size(), s_platformId, device);
s_dagLoadIndex++;
}
uint64_t upper64OfBoundary = (uint64_t)(u64)((u256)w.boundary >> 192);
m_miner->search(w.headerHash.data(), upper64OfBoundary, *m_hook);
uint64_t startN;
if (w.exSizeBits >= 0)
startN = w.startNonce | ((uint64_t)index() << (64 - 4 - w.exSizeBits)); // this can support up to 16 devices
m_miner->search(w.headerHash.data(), upper64OfBoundary, *m_hook, (w.exSizeBits >= 0), startN);
}
catch (cl::Error const& _e)
{
@ -207,27 +224,27 @@ bool EthashGPUMiner::configureGPU(
unsigned _deviceId,
bool _allowCPU,
unsigned _extraGPUMemory,
uint64_t _currentBlock
uint64_t _currentBlock,
unsigned _dagLoadMode,
unsigned _dagCreateDevice
)
{
s_dagLoadMode = _dagLoadMode;
s_dagCreateDevice = _dagCreateDevice;
s_platformId = _platformId;
s_deviceId = _deviceId;
_localWorkSize = ((_localWorkSize + 7) / 8) * 8;
/*
if (_localWorkSize != 32 && _localWorkSize != 64 && _localWorkSize != 128 && _localWorkSize != 256)
{
cout << "Given localWorkSize of " << toString(_localWorkSize) << "is invalid. Must be either 32,64,128 or 256" << endl;
return false;
}
*/
if (!ethash_cl_miner::configureGPU(
_platformId,
_localWorkSize,
_globalWorkSizeMultiplier * _localWorkSize,
_allowCPU,
_extraGPUMemory,
_currentBlock)
_currentBlock
)
)
{
cout << "No GPU device with sufficient memory was found. Can't GPU mine. Remove the -G argument" << endl;

5
libethcore/EthashGPUMiner.h

@ -52,7 +52,9 @@ public:
unsigned _deviceId,
bool _allowCPU,
unsigned _extraGPUMemory,
uint64_t _currentBlock
uint64_t _currentBlock,
unsigned _dagLoadMode,
unsigned _dagCreateDevice
);
static void setNumInstances(unsigned _instances) { s_numInstances = std::min<unsigned>(_instances, getNumDevices()); }
static void setDevices(unsigned * _devices, unsigned _selectedDeviceCount)
@ -81,6 +83,7 @@ private:
static unsigned s_deviceId;
static unsigned s_numInstances;
static int s_devices[16];
};
}

2
libethcore/EthashSealEngine.cpp

@ -59,7 +59,7 @@ void EthashSealEngine::generateSeal(BlockInfo const& _bi)
{
m_sealing = Ethash::BlockHeader(_bi);
m_farm.setWork(m_sealing);
m_farm.start(m_sealer);
m_farm.start(m_sealer, false);
m_farm.setWork(m_sealing); // TODO: take out one before or one after...
bytes shouldPrecompute = option("precomputeDAG");
if (!shouldPrecompute.empty() && shouldPrecompute[0] == 1)

23
libethcore/Farm.h

@ -67,7 +67,7 @@ public:
void setWork(WorkPackage const& _wp)
{
WriteGuard l(x_minerWork);
if (_wp.headerHash == m_work.headerHash)
if (_wp.headerHash == m_work.headerHash && _wp.startNonce == m_work.startNonce)
return;
m_work = _wp;
for (auto const& m: m_miners)
@ -80,7 +80,7 @@ public:
/**
* @brief Start a number of miners.
*/
bool start(std::string const& _sealer)
bool start(std::string const& _sealer, bool mixed)
{
WriteGuard l(x_minerWork);
if (!m_miners.empty() && m_lastSealer == _sealer)
@ -88,10 +88,23 @@ public:
if (!m_sealers.count(_sealer))
return false;
m_miners.clear();
if (!mixed)
{
m_miners.clear();
}
auto ins = m_sealers[_sealer].instances();
m_miners.reserve(ins);
for (unsigned i = 0; i < ins; ++i)
unsigned start = 0;
if (!mixed)
{
m_miners.reserve(ins);
}
else
{
start = m_miners.size();
ins += start;
m_miners.reserve(ins);
}
for (unsigned i = start; i < ins; ++i)
{
m_miners.push_back(std::shared_ptr<Miner>(m_sealers[_sealer].create(std::make_pair(this, i))));
m_miners.back()->setWork(m_work);

19
libethcore/Miner.cpp

@ -0,0 +1,19 @@
#include "Miner.h"
#include "EthashAux.h"
using namespace dev;
using namespace eth;
template <>
unsigned dev::eth::GenericMiner<dev::eth::EthashProofOfWork>::s_dagLoadMode = 0;
template <>
volatile unsigned dev::eth::GenericMiner<dev::eth::EthashProofOfWork>::s_dagLoadIndex = 0;
template <>
unsigned dev::eth::GenericMiner<dev::eth::EthashProofOfWork>::s_dagCreateDevice = 0;
template <>
volatile void* dev::eth::GenericMiner<dev::eth::EthashProofOfWork>::s_dagInHostMemory = NULL;

31
libethcore/Miner.h

@ -24,6 +24,7 @@
#include <thread>
#include <list>
#include <atomic>
#include <string>
#include <boost/timer.hpp>
#include <libdevcore/Common.h>
#include <libdevcore/Log.h>
@ -34,6 +35,24 @@
#define MINER_WAIT_STATE_WORK 1
#define MINER_WAIT_STATE_DAG 2
#define DAG_LOAD_MODE_PARALLEL 0
#define DAG_LOAD_MODE_SEQUENTIAL 1
#define DAG_LOAD_MODE_SINGLE 2
#define STRATUM_PROTOCOL_STRATUM 0
#define STRATUM_PROTOCOL_ETHPROXY 1
#define STRATUM_PROTOCOL_ETHEREUMSTRATUM 2
using namespace std;
typedef struct {
string host;
string port;
string user;
string pass;
} cred_t;
namespace dev
{
@ -44,7 +63,8 @@ enum class MinerType
{
CPU,
CL,
CUDA
CUDA,
Mixed
};
struct MineInfo: public WorkingProgress {};
@ -91,6 +111,7 @@ inline std::ostream& operator<<(std::ostream& os, SolutionStats s)
template <class PoW> class GenericMiner;
/**
* @brief Class for hosting one or more Miners.
* @warning Must be implemented in a threadsafe manner since it will be called from multiple
@ -162,6 +183,7 @@ public:
protected:
// REQUIRED TO BE REIMPLEMENTED BY A SUBCLASS:
/**
@ -199,6 +221,10 @@ protected:
void accumulateHashes(unsigned _n) { m_hashCount += _n; }
static unsigned s_dagLoadMode;
static volatile unsigned s_dagLoadIndex;
static unsigned s_dagCreateDevice;
static volatile void* s_dagInHostMemory;
private:
FarmFace* m_farm = nullptr;
unsigned m_index;
@ -207,6 +233,9 @@ private:
WorkPackage m_work;
mutable Mutex x_work;
bool m_dagLoaded = false;
};
}

301
libstratum/EthStratumClient.cpp

@ -1,10 +1,33 @@
#include "EthStratumClient.h"
#include <libdevcore/Log.h>
#include <libethash/endian.h>
using boost::asio::ip::tcp;
EthStratumClient::EthStratumClient(GenericFarm<EthashProofOfWork> * f, MinerType m, string const & host, string const & port, string const & user, string const & pass, int const & retries, int const & worktimeout, bool const & precompute)
static void diffToTarget(uint32_t *target, double diff)
{
uint32_t target2[8];
uint64_t m;
int k;
for (k = 6; k > 0 && diff > 1.0; k--)
diff /= 4294967296.0;
m = (uint64_t)(4294901760.0 / diff);
if (m == 0 && k == 6)
memset(target2, 0xff, 32);
else {
memset(target2, 0, 32);
target2[k] = (uint32_t)m;
target2[k + 1] = (uint32_t)(m >> 32);
}
for (int i = 0; i < 32; i++)
((uint8_t*)target)[31 - i] = ((uint8_t*)target2)[i];
}
EthStratumClient::EthStratumClient(GenericFarm<EthashProofOfWork> * f, MinerType m, string const & host, string const & port, string const & user, string const & pass, int const & retries, int const & worktimeout, int const & protocol, string const & email)
: m_socket(m_io_service)
{
m_minerType = m;
@ -17,11 +40,13 @@ EthStratumClient::EthStratumClient(GenericFarm<EthashProofOfWork> * f, MinerType
m_authorized = false;
m_connected = false;
m_precompute = precompute;
m_pending = 0;
m_maxRetries = retries;
m_worktimeout = worktimeout;
m_protocol = protocol;
m_email = email;
p_farm = f;
p_worktimer = nullptr;
connect();
@ -65,25 +90,16 @@ void EthStratumClient::connect()
void EthStratumClient::reconnect()
{
/*
if (p_farm->isMining())
{
cnote << "Stopping farm";
p_farm->stop();
}
*/
if (p_worktimer) {
p_worktimer->cancel();
p_worktimer = nullptr;
}
m_io_service.reset();
m_socket.close();
//m_socket.close(); // leads to crashes on Linux
m_authorized = false;
m_connected = false;
if (!m_failover.host.empty())
{
m_retries++;
@ -153,15 +169,46 @@ void EthStratumClient::connect_handler(const boost::system::error_code& ec, tcp:
{
cnote << "Starting farm";
if (m_minerType == MinerType::CPU)
p_farm->start("cpu");
p_farm->start("cpu", false);
else if (m_minerType == MinerType::CL)
p_farm->start("opencl");
p_farm->start("opencl", false);
else if (m_minerType == MinerType::CUDA)
p_farm->start("cuda");
p_farm->start("cuda", false);
else if (m_minerType == MinerType::Mixed) {
p_farm->start("cuda", false);
p_farm->start("opencl", true);
}
}
std::ostream os(&m_requestBuffer);
os << "{\"id\": 1, \"method\": \"mining.subscribe\", \"params\": []}\n";
string user;
size_t p;
switch (m_protocol) {
case STRATUM_PROTOCOL_STRATUM:
os << "{\"id\": 1, \"method\": \"mining.subscribe\", \"params\": []}\n";
break;
case STRATUM_PROTOCOL_ETHPROXY:
p = p_active->user.find_first_of(".");
user = p_active->user.substr(0, p);
if (p + 1 <= p_active->user.length())
m_worker = p_active->user.substr(p + 1);
else
m_worker = "";
if (m_email.empty())
{
os << "{\"id\": 1, \"worker\":\"" << m_worker << "\", \"method\": \"eth_submitLogin\", \"params\": [\"" << user << "\"]}\n";
}
else
{
os << "{\"id\": 1, \"worker\":\"" << m_worker << "\", \"method\": \"eth_submitLogin\", \"params\": [\"" << user << "\", \"" << m_email << "\"]}\n";
}
break;
case STRATUM_PROTOCOL_ETHEREUMSTRATUM:
os << "{\"id\": 1, \"method\": \"mining.subscribe\", \"params\": [\"ethminer/" << ETH_PROJECT_VERSION << "\",\"EthereumStratum/1.0.0\"]}\n";
break;
}
async_write(m_socket, m_requestBuffer,
boost::bind(&EthStratumClient::handleResponse, this,
@ -213,7 +260,7 @@ void EthStratumClient::readResponse(const boost::system::error_code& ec, std::si
std::string response;
getline(is, response);
if (response.front() == '{' && response.back() == '}')
if (!response.empty() && response.front() == '{' && response.back() == '}')
{
Json::Value responseObject;
Json::Reader reader;
@ -222,12 +269,12 @@ void EthStratumClient::readResponse(const boost::system::error_code& ec, std::si
processReponse(responseObject);
m_response = response;
}
else
else
{
cwarn << "Parse response failed: " << reader.getFormattedErrorMessages();
}
}
else
else if (m_protocol != STRATUM_PROTOCOL_ETHPROXY)
{
cwarn << "Discarding incomplete response";
}
@ -242,6 +289,16 @@ void EthStratumClient::readResponse(const boost::system::error_code& ec, std::si
}
}
void EthStratumClient::processExtranonce(std::string& enonce)
{
m_extraNonceHexSize = enonce.length();
cnote << "Extranonce set to " << enonce;
for (int i = enonce.length(); i < 16; ++i) enonce += "0";
m_extraNonce = h64(enonce);
}
void EthStratumClient::processReponse(Json::Value& responseObject)
{
Json::Value error = responseObject.get("error", new Json::Value);
@ -255,16 +312,37 @@ void EthStratumClient::processReponse(Json::Value& responseObject)
int id = responseObject.get("id", Json::Value::null).asInt();
switch (id)
{
case 1:
cnote << "Subscribed to stratum server";
os << "{\"id\": 2, \"method\": \"mining.authorize\", \"params\": [\"" << p_active->user << "\",\"" << p_active->pass << "\"]}\n";
case 1:
if (m_protocol == STRATUM_PROTOCOL_ETHEREUMSTRATUM)
{
m_nextWorkDifficulty = 1;
params = responseObject.get("result", Json::Value::null);
if (params.isArray())
{
std::string enonce = params.get((Json::Value::ArrayIndex)1, "").asString();
processExtranonce(enonce);
}
os << "{\"id\": 2, \"method\": \"mining.extranonce.subscribe\", \"params\": []}\n";
}
if (m_protocol != STRATUM_PROTOCOL_ETHPROXY)
{
cnote << "Subscribed to stratum server";
os << "{\"id\": 3, \"method\": \"mining.authorize\", \"params\": [\"" << p_active->user << "\",\"" << p_active->pass << "\"]}\n";
}
else
{
m_authorized = true;
os << "{\"id\": 5, \"method\": \"eth_getWork\", \"params\": []}\n"; // not strictly required but it does speed up initialization
}
async_write(m_socket, m_requestBuffer,
boost::bind(&EthStratumClient::handleResponse, this,
boost::asio::placeholders::error));
break;
case 2:
// nothing to do...
break;
case 3:
m_authorized = responseObject.get("result", Json::Value::null).asBool();
if (!m_authorized)
{
@ -285,76 +363,121 @@ void EthStratumClient::processReponse(Json::Value& responseObject)
}
break;
default:
string method = responseObject.get("method", "").asString();
string method, workattr;
unsigned index;
if (m_protocol != STRATUM_PROTOCOL_ETHPROXY)
{
method = responseObject.get("method", "").asString();
workattr = "params";
index = 1;
}
else
{
method = "mining.notify";
workattr = "result";
index = 0;
}
if (method == "mining.notify")
{
params = responseObject.get("params", Json::Value::null);
params = responseObject.get(workattr.c_str(), Json::Value::null);
if (params.isArray())
{
string job = params.get((Json::Value::ArrayIndex)0, "").asString();
string sHeaderHash = params.get((Json::Value::ArrayIndex)1, "").asString();
string sSeedHash = params.get((Json::Value::ArrayIndex)2, "").asString();
string sShareTarget = params.get((Json::Value::ArrayIndex)3, "").asString();
//bool cleanJobs = params.get((Json::Value::ArrayIndex)4, "").asBool();
// coinmine.pl fix
int l = sShareTarget.length();
if (l < 66)
sShareTarget = "0x" + string(66 - l, '0') + sShareTarget.substr(2);
if (sHeaderHash != "" && sSeedHash != "" && sShareTarget != "")
{
cnote << "Received new job #" + job.substr(0,8);
//cnote << "Header hash: " + sHeaderHash;
//cnote << "Seed hash: " + sSeedHash;
//cnote << "Share target: " + sShareTarget;
h256 seedHash = h256(sSeedHash);
h256 headerHash = h256(sHeaderHash);
EthashAux::FullType dag;
if (m_protocol == STRATUM_PROTOCOL_ETHEREUMSTRATUM)
{
string sSeedHash = params.get((Json::Value::ArrayIndex)1, "").asString();
string sHeaderHash = params.get((Json::Value::ArrayIndex)2, "").asString();
if (seedHash != m_current.seedHash)
{
cnote << "Grabbing DAG for" << seedHash;
}
if (!(dag = EthashAux::full(seedHash, true, [&](unsigned _pc){ m_waitState = _pc < 100 ? MINER_WAIT_STATE_DAG : MINER_WAIT_STATE_WORK; cnote << "Creating DAG. " << _pc << "% done..."; return 0; })))
{
BOOST_THROW_EXCEPTION(DAGCreationFailure());
}
if (m_precompute)
if (sHeaderHash != "" && sSeedHash != "")
{
EthashAux::computeFull(sha3(seedHash), true);
}
if (headerHash != m_current.headerHash)
{
//x_current.lock();
if (p_worktimer)
p_worktimer->cancel();
cnote << "Received new job #" + job;
h256 seedHash = h256(sSeedHash);
h256 headerHash = h256(sHeaderHash);
m_previous.headerHash = m_current.headerHash;
m_previous.seedHash = m_current.seedHash;
m_previous.boundary = m_current.boundary;
m_previous.startNonce = m_current.startNonce;
m_previous.exSizeBits = m_previous.exSizeBits;
m_previousJob = m_job;
m_current.headerHash = h256(sHeaderHash);
m_current.seedHash = seedHash;
m_current.boundary = h256(sShareTarget);// , h256::AlignRight);
m_current.boundary = h256();
diffToTarget((uint32_t*)m_current.boundary.data(), m_nextWorkDifficulty);
m_current.startNonce = ethash_swap_u64(*((uint64_t*)m_extraNonce.data()));
m_current.exSizeBits = m_extraNonceHexSize * 4;
m_job = job;
p_farm->setWork(m_current);
//x_current.unlock();
p_worktimer = new boost::asio::deadline_timer(m_io_service, boost::posix_time::seconds(m_worktimeout));
p_worktimer->async_wait(boost::bind(&EthStratumClient::work_timeout_handler, this, boost::asio::placeholders::error));
}
}
else
{
string sHeaderHash = params.get((Json::Value::ArrayIndex)index++, "").asString();
string sSeedHash = params.get((Json::Value::ArrayIndex)index++, "").asString();
string sShareTarget = params.get((Json::Value::ArrayIndex)index++, "").asString();
// coinmine.pl fix
int l = sShareTarget.length();
if (l < 66)
sShareTarget = "0x" + string(66 - l, '0') + sShareTarget.substr(2);
if (sHeaderHash != "" && sSeedHash != "" && sShareTarget != "")
{
cnote << "Received new job #" + job.substr(0, 8);
h256 seedHash = h256(sSeedHash);
h256 headerHash = h256(sHeaderHash);
if (headerHash != m_current.headerHash)
{
//x_current.lock();
if (p_worktimer)
p_worktimer->cancel();
m_previous.headerHash = m_current.headerHash;
m_previous.seedHash = m_current.seedHash;
m_previous.boundary = m_current.boundary;
m_previousJob = m_job;
m_current.headerHash = h256(sHeaderHash);
m_current.seedHash = seedHash;
m_current.boundary = h256(sShareTarget);
m_job = job;
p_farm->setWork(m_current);
//x_current.unlock();
p_worktimer = new boost::asio::deadline_timer(m_io_service, boost::posix_time::seconds(m_worktimeout));
p_worktimer->async_wait(boost::bind(&EthStratumClient::work_timeout_handler, this, boost::asio::placeholders::error));
}
}
}
}
}
else if (method == "mining.set_difficulty")
else if (method == "mining.set_difficulty" && m_protocol == STRATUM_PROTOCOL_ETHEREUMSTRATUM)
{
params = responseObject.get("params", Json::Value::null);
if (params.isArray())
{
m_nextWorkDifficulty = params.get((Json::Value::ArrayIndex)0, 1).asDouble();
if (m_nextWorkDifficulty <= 0.0001) m_nextWorkDifficulty = 0.0001;
cnote << "Difficulty set to " << m_nextWorkDifficulty;
}
}
else if (method == "mining.set_extranonce" && m_protocol == STRATUM_PROTOCOL_ETHEREUMSTRATUM)
{
params = responseObject.get("params", Json::Value::null);
if (params.isArray())
{
std::string enonce = params.get((Json::Value::ArrayIndex)0, "").asString();
processExtranonce(enonce);
}
}
else if (method == "client.get_version")
{
@ -384,11 +507,30 @@ bool EthStratumClient::submit(EthashProofOfWork::Solution solution) {
x_current.unlock();
cnote << "Solution found; Submitting to" << p_active->host << "...";
cnote << " Nonce:" << "0x" + solution.nonce.hex();
string minernonce;
if (m_protocol != STRATUM_PROTOCOL_ETHEREUMSTRATUM)
cnote << " Nonce:" << "0x" + solution.nonce.hex();
else
minernonce = solution.nonce.hex().substr(m_extraNonceHexSize, 16 - m_extraNonceHexSize);
if (EthashAux::eval(tempWork.seedHash, tempWork.headerHash, solution.nonce).value < tempWork.boundary)
{
string json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_job + "\",\"0x" + solution.nonce.hex() + "\",\"0x" + tempWork.headerHash.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
string json;
switch (m_protocol) {
case STRATUM_PROTOCOL_STRATUM:
json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_job + "\",\"0x" + solution.nonce.hex() + "\",\"0x" + tempWork.headerHash.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
break;
case STRATUM_PROTOCOL_ETHPROXY:
json = "{\"id\": 4, \"worker\":\"" + m_worker + "\", \"method\": \"eth_submitWork\", \"params\": [\"0x" + solution.nonce.hex() + "\",\"0x" + tempWork.headerHash.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
break;
case STRATUM_PROTOCOL_ETHEREUMSTRATUM:
json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_job + "\",\"" + minernonce + "\"]}\n";
break;
}
std::ostream os(&m_requestBuffer);
os << json;
m_stale = false;
@ -399,7 +541,20 @@ bool EthStratumClient::submit(EthashProofOfWork::Solution solution) {
}
else if (EthashAux::eval(tempPreviousWork.seedHash, tempPreviousWork.headerHash, solution.nonce).value < tempPreviousWork.boundary)
{
string json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_previous_job + "\",\"0x" + solution.nonce.hex() + "\",\"0x" + tempPreviousWork.headerHash.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
string json;
switch (m_protocol) {
case STRATUM_PROTOCOL_STRATUM:
json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_previous_job + "\",\"0x" + solution.nonce.hex() + "\",\"0x" + tempPreviousWork.headerHash.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
break;
case STRATUM_PROTOCOL_ETHPROXY:
json = "{\"id\": 4, \"worker\":\"" + m_worker + "\", \"method\": \"eth_submitWork\", \"params\": [\"0x" + solution.nonce.hex() + "\",\"0x" + tempPreviousWork.headerHash.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
break;
case STRATUM_PROTOCOL_ETHEREUMSTRATUM:
json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_previous_job + "\",\"" + minernonce + "\"]}\n";
break;
}
std::ostream os(&m_requestBuffer);
os << json;
m_stale = true;

22
libstratum/EthStratumClient.h

@ -17,24 +17,18 @@ using boost::asio::ip::tcp;
using namespace dev;
using namespace dev::eth;
typedef struct {
string host;
string port;
string user;
string pass;
} cred_t;
class EthStratumClient
{
public:
EthStratumClient(GenericFarm<EthashProofOfWork> * f, MinerType m, string const & host, string const & port, string const & user, string const & pass, int const & retries, int const & worktimeout, bool const & precompute);
EthStratumClient(GenericFarm<EthashProofOfWork> * f, MinerType m, string const & host, string const & port, string const & user, string const & pass, int const & retries, int const & worktimeout, int const & protocol, string const & email);
~EthStratumClient();
void setFailover(string const & host, string const & port);
void setFailover(string const & host, string const & port, string const & user, string const & pass);
bool isRunning() { return m_running; }
bool isConnected() { return m_connected; }
bool isConnected() { return m_connected && m_authorized; }
h256 currentHeaderHash() { return m_current.headerHash; }
bool current() { return m_current; }
unsigned waitState() { return m_waitState; }
@ -59,9 +53,10 @@ private:
cred_t m_primary;
cred_t m_failover;
string m_worker; // eth-proxy only;
bool m_authorized;
bool m_connected;
bool m_precompute;
bool m_running = true;
int m_retries = 0;
@ -93,4 +88,13 @@ private:
boost::asio::deadline_timer * p_worktimer;
int m_protocol;
string m_email;
double m_nextWorkDifficulty;
h64 m_extraNonce;
int m_extraNonceHexSize;
void processExtranonce(std::string& enonce);
};

515
libstratum/EthStratumClientV2.cpp

@ -0,0 +1,515 @@
#include "EthStratumClientV2.h"
#include <libdevcore/Log.h>
#include <libethash/endian.h>
using boost::asio::ip::tcp;
static void diffToTarget(uint32_t *target, double diff)
{
uint32_t target2[8];
uint64_t m;
int k;
for (k = 6; k > 0 && diff > 1.0; k--)
diff /= 4294967296.0;
m = (uint64_t)(4294901760.0 / diff);
if (m == 0 && k == 6)
memset(target2, 0xff, 32);
else {
memset(target2, 0, 32);
target2[k] = (uint32_t)m;
target2[k + 1] = (uint32_t)(m >> 32);
}
for (int i = 0; i < 32; i++)
((uint8_t*)target)[31 - i] = ((uint8_t*)target2)[i];
}
EthStratumClientV2::EthStratumClientV2(GenericFarm<EthashProofOfWork> * f, MinerType m, string const & host, string const & port, string const & user, string const & pass, int const & retries, int const & worktimeout, int const & protocol, string const & email)
: Worker("stratum"),
m_socket(m_io_service)
{
m_minerType = m;
m_primary.host = host;
m_primary.port = port;
m_primary.user = user;
m_primary.pass = pass;
p_active = &m_primary;
m_authorized = false;
m_connected = false;
m_maxRetries = retries;
m_worktimeout = worktimeout;
m_protocol = protocol;
m_email = email;
p_farm = f;
p_worktimer = nullptr;
startWorking();
}
EthStratumClientV2::~EthStratumClientV2()
{
}
void EthStratumClientV2::setFailover(string const & host, string const & port)
{
setFailover(host, port, p_active->user, p_active->pass);
}
void EthStratumClientV2::setFailover(string const & host, string const & port, string const & user, string const & pass)
{
m_failover.host = host;
m_failover.port = port;
m_failover.user = user;
m_failover.pass = pass;
}
void EthStratumClientV2::workLoop()
{
while (m_running)
{
try {
if (!m_connected)
{
//m_io_service.run();
//boost::thread t(boost::bind(&boost::asio::io_service::run, &m_io_service));
connect();
}
read_until(m_socket, m_responseBuffer, "\n");
std::istream is(&m_responseBuffer);
std::string response;
getline(is, response);
if (!response.empty() && response.front() == '{' && response.back() == '}')
{
Json::Value responseObject;
Json::Reader reader;
if (reader.parse(response.c_str(), responseObject))
{
processReponse(responseObject);
m_response = response;
}
else
{
cwarn << "Parse response failed: " << reader.getFormattedErrorMessages();
}
}
else if (m_protocol != STRATUM_PROTOCOL_ETHPROXY)
{
cwarn << "Discarding incomplete response";
}
}
catch (std::exception const& _e) {
cwarn << _e.what();
reconnect();
}
}
}
void EthStratumClientV2::connect()
{
cnote << "Connecting to stratum server " << p_active->host + ":" + p_active->port;
tcp::resolver r(m_io_service);
tcp::resolver::query q(p_active->host, p_active->port);
tcp::resolver::iterator endpoint_iterator = r.resolve(q);
tcp::resolver::iterator end;
boost::system::error_code error = boost::asio::error::host_not_found;
while (error && endpoint_iterator != end)
{
m_socket.close();
m_socket.connect(*endpoint_iterator++, error);
}
if (error)
{
cerr << "Could not connect to stratum server " << p_active->host + ":" + p_active->port + ", " << error.message();
reconnect();
}
else
{
cnote << "Connected!";
m_connected = true;
if (!p_farm->isMining())
{
cnote << "Starting farm";
if (m_minerType == MinerType::CPU)
p_farm->start("cpu", false);
else if (m_minerType == MinerType::CL)
p_farm->start("opencl", false);
else if (m_minerType == MinerType::CUDA)
p_farm->start("cuda", false);
else if (m_minerType == MinerType::Mixed) {
p_farm->start("cuda", false);
p_farm->start("opencl", true);
}
}
std::ostream os(&m_requestBuffer);
string user;
size_t p;
switch (m_protocol) {
case STRATUM_PROTOCOL_STRATUM:
os << "{\"id\": 1, \"method\": \"mining.subscribe\", \"params\": []}\n";
break;
case STRATUM_PROTOCOL_ETHPROXY:
p = p_active->user.find_first_of(".");
user = p_active->user.substr(0, p);
if (p + 1 <= p_active->user.length())
m_worker = p_active->user.substr(p + 1);
else
m_worker = "";
if (m_email.empty())
{
os << "{\"id\": 1, \"worker\":\"" << m_worker << "\", \"method\": \"eth_submitLogin\", \"params\": [\"" << user << "\"]}\n";
}
else
{
os << "{\"id\": 1, \"worker\":\"" << m_worker << "\", \"method\": \"eth_submitLogin\", \"params\": [\"" << user << "\", \"" << m_email << "\"]}\n";
}
break;
case STRATUM_PROTOCOL_ETHEREUMSTRATUM:
os << "{\"id\": 1, \"method\": \"mining.subscribe\", \"params\": [\"ethminer/" << ETH_PROJECT_VERSION << "\",\"EthereumStratum/1.0.0\"]}\n";
break;
}
write(m_socket, m_requestBuffer);
}
}
void EthStratumClientV2::reconnect()
{
if (p_worktimer) {
p_worktimer->cancel();
p_worktimer = nullptr;
}
//m_io_service.reset();
//m_socket.close(); // leads to crashes on Linux
m_authorized = false;
m_connected = false;
if (!m_failover.host.empty())
{
m_retries++;
if (m_retries > m_maxRetries)
{
if (m_failover.host == "exit") {
disconnect();
return;
}
else if (p_active == &m_primary)
{
p_active = &m_failover;
}
else {
p_active = &m_primary;
}
m_retries = 0;
}
}
cnote << "Reconnecting in 3 seconds...";
boost::asio::deadline_timer timer(m_io_service, boost::posix_time::seconds(3));
timer.wait();
}
void EthStratumClientV2::disconnect()
{
cnote << "Disconnecting";
m_connected = false;
m_running = false;
if (p_farm->isMining())
{
cnote << "Stopping farm";
p_farm->stop();
}
m_socket.close();
//m_io_service.stop();
}
void EthStratumClientV2::processExtranonce(std::string& enonce)
{
m_extraNonceHexSize = enonce.length();
cnote << "Extranonce set to " << enonce;
for (int i = enonce.length(); i < 16; ++i) enonce += "0";
m_extraNonce = h64(enonce);
}
void EthStratumClientV2::processReponse(Json::Value& responseObject)
{
Json::Value error = responseObject.get("error", new Json::Value);
if (error.isArray())
{
string msg = error.get(1, "Unknown error").asString();
cnote << msg;
}
std::ostream os(&m_requestBuffer);
Json::Value params;
int id = responseObject.get("id", Json::Value::null).asInt();
switch (id)
{
case 1:
if (m_protocol == STRATUM_PROTOCOL_ETHEREUMSTRATUM)
{
m_nextWorkDifficulty = 1;
params = responseObject.get("result", Json::Value::null);
if (params.isArray())
{
std::string enonce = params.get((Json::Value::ArrayIndex)1, "").asString();
processExtranonce(enonce);
}
os << "{\"id\": 2, \"method\": \"mining.extranonce.subscribe\", \"params\": []}\n";
}
if (m_protocol != STRATUM_PROTOCOL_ETHPROXY)
{
cnote << "Subscribed to stratum server";
os << "{\"id\": 3, \"method\": \"mining.authorize\", \"params\": [\"" << p_active->user << "\",\"" << p_active->pass << "\"]}\n";
write(m_socket, m_requestBuffer);
}
else
{
m_authorized = true;
os << "{\"id\": 5, \"method\": \"eth_getWork\", \"params\": []}\n"; // not strictly required but it does speed up initialization
write(m_socket, m_requestBuffer);
}
break;
case 2:
// nothing to do...
break;
case 3:
m_authorized = responseObject.get("result", Json::Value::null).asBool();
if (!m_authorized)
{
cnote << "Worker not authorized:" << p_active->user;
disconnect();
return;
}
cnote << "Authorized worker " << p_active->user;
break;
case 4:
if (responseObject.get("result", false).asBool()) {
cnote << "B-) Submitted and accepted.";
p_farm->acceptedSolution(m_stale);
}
else {
cwarn << ":-( Not accepted.";
p_farm->rejectedSolution(m_stale);
}
break;
default:
string method, workattr;
unsigned index;
if (m_protocol != STRATUM_PROTOCOL_ETHPROXY)
{
method = responseObject.get("method", "").asString();
workattr = "params";
index = 1;
}
else
{
method = "mining.notify";
workattr = "result";
index = 0;
}
if (method == "mining.notify")
{
params = responseObject.get(workattr, Json::Value::null);
if (params.isArray())
{
string job = params.get((Json::Value::ArrayIndex)0, "").asString();
if (m_protocol == STRATUM_PROTOCOL_ETHEREUMSTRATUM)
{
string job = params.get((Json::Value::ArrayIndex)0, "").asString();
string sSeedHash = params.get((Json::Value::ArrayIndex)1, "").asString();
string sHeaderHash = params.get((Json::Value::ArrayIndex)2, "").asString();
if (sHeaderHash != "" && sSeedHash != "")
{
cnote << "Received new job #" + job;
h256 seedHash = h256(sSeedHash);
h256 headerHash = h256(sHeaderHash);
m_previous.headerHash = m_current.headerHash;
m_previous.seedHash = m_current.seedHash;
m_previous.boundary = m_current.boundary;
m_previous.startNonce = m_current.startNonce;
m_previous.exSizeBits = m_previous.exSizeBits;
m_previousJob = m_job;
m_current.headerHash = h256(sHeaderHash);
m_current.seedHash = seedHash;
m_current.boundary = h256();
diffToTarget((uint32_t*)m_current.boundary.data(), m_nextWorkDifficulty);
m_current.startNonce = ethash_swap_u64(*((uint64_t*)m_extraNonce.data()));
m_current.exSizeBits = m_extraNonceHexSize * 4;
m_job = job;
p_farm->setWork(m_current);
}
}
else
{
string sHeaderHash = params.get((Json::Value::ArrayIndex)index++, "").asString();
string sSeedHash = params.get((Json::Value::ArrayIndex)index++, "").asString();
string sShareTarget = params.get((Json::Value::ArrayIndex)index++, "").asString();
// coinmine.pl fix
int l = sShareTarget.length();
if (l < 66)
sShareTarget = "0x" + string(66 - l, '0') + sShareTarget.substr(2);
if (sHeaderHash != "" && sSeedHash != "" && sShareTarget != "")
{
cnote << "Received new job #" + job.substr(0, 8);
h256 seedHash = h256(sSeedHash);
h256 headerHash = h256(sHeaderHash);
if (headerHash != m_current.headerHash)
{
//x_current.lock();
//if (p_worktimer)
// p_worktimer->cancel();
m_previous.headerHash = m_current.headerHash;
m_previous.seedHash = m_current.seedHash;
m_previous.boundary = m_current.boundary;
m_previousJob = m_job;
m_current.headerHash = h256(sHeaderHash);
m_current.seedHash = seedHash;
m_current.boundary = h256(sShareTarget);
m_job = job;
p_farm->setWork(m_current);
//x_current.unlock();
//p_worktimer = new boost::asio::deadline_timer(m_io_service, boost::posix_time::seconds(m_worktimeout));
//p_worktimer->async_wait(boost::bind(&EthStratumClientV2::work_timeout_handler, this, boost::asio::placeholders::error));
}
}
}
}
}
else if (method == "mining.set_difficulty" && m_protocol == STRATUM_PROTOCOL_ETHEREUMSTRATUM)
{
params = responseObject.get("params", Json::Value::null);
if (params.isArray())
{
m_nextWorkDifficulty = params.get((Json::Value::ArrayIndex)0, 1).asDouble();
if (m_nextWorkDifficulty <= 0.0001) m_nextWorkDifficulty = 0.0001;
cnote << "Difficulty set to " << m_nextWorkDifficulty;
}
}
else if (method == "mining.set_extranonce" && m_protocol == STRATUM_PROTOCOL_ETHEREUMSTRATUM)
{
params = responseObject.get("params", Json::Value::null);
if (params.isArray())
{
std::string enonce = params.get((Json::Value::ArrayIndex)0, "").asString();
processExtranonce(enonce);
}
}
else if (method == "client.get_version")
{
os << "{\"error\": null, \"id\" : " << id << ", \"result\" : \"" << ETH_PROJECT_VERSION << "\"}\n";
write(m_socket, m_requestBuffer);
}
break;
}
}
void EthStratumClientV2::work_timeout_handler(const boost::system::error_code& ec) {
if (!ec) {
cnote << "No new work received in" << m_worktimeout << "seconds.";
reconnect();
}
}
bool EthStratumClientV2::submit(EthashProofOfWork::Solution solution) {
x_current.lock();
EthashProofOfWork::WorkPackage tempWork(m_current);
string temp_job = m_job;
EthashProofOfWork::WorkPackage tempPreviousWork(m_previous);
string temp_previous_job = m_previousJob;
x_current.unlock();
cnote << "Solution found; Submitting to" << p_active->host << "...";
string minernonce;
if (m_protocol != STRATUM_PROTOCOL_ETHEREUMSTRATUM)
cnote << " Nonce:" << "0x" + solution.nonce.hex();
else
minernonce = solution.nonce.hex().substr(m_extraNonceHexSize, 16 - m_extraNonceHexSize);
if (EthashAux::eval(tempWork.seedHash, tempWork.headerHash, solution.nonce).value < tempWork.boundary)
{
string json;
switch (m_protocol) {
case STRATUM_PROTOCOL_STRATUM:
json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_job + "\",\"0x" + solution.nonce.hex() + "\",\"0x" + tempWork.headerHash.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
break;
case STRATUM_PROTOCOL_ETHPROXY:
json = "{\"id\": 4, \"worker\":\"" + m_worker + "\", \"method\": \"eth_submitWork\", \"params\": [\"0x" + solution.nonce.hex() + "\",\"0x" + tempWork.headerHash.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
break;
case STRATUM_PROTOCOL_ETHEREUMSTRATUM:
json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_job + "\",\"" + minernonce + "\"]}\n";
break;
}
std::ostream os(&m_requestBuffer);
os << json;
m_stale = false;
write(m_socket, m_requestBuffer);
return true;
}
else if (EthashAux::eval(tempPreviousWork.seedHash, tempPreviousWork.headerHash, solution.nonce).value < tempPreviousWork.boundary)
{
string json;
switch (m_protocol) {
case STRATUM_PROTOCOL_STRATUM:
json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_previous_job + "\",\"0x" + solution.nonce.hex() + "\",\"0x" + tempPreviousWork.headerHash.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
break;
case STRATUM_PROTOCOL_ETHPROXY:
json = "{\"id\": 4, \"worker\":\"" + m_worker + "\", \"method\": \"eth_submitWork\", \"params\": [\"0x" + solution.nonce.hex() + "\",\"0x" + tempPreviousWork.headerHash.hex() + "\",\"0x" + solution.mixHash.hex() + "\"]}\n";
break;
case STRATUM_PROTOCOL_ETHEREUMSTRATUM:
json = "{\"id\": 4, \"method\": \"mining.submit\", \"params\": [\"" + p_active->user + "\",\"" + temp_previous_job + "\",\"" + minernonce + "\"]}\n";
break;
} std::ostream os(&m_requestBuffer);
os << json;
m_stale = true;
cwarn << "Submitting stale solution.";
write(m_socket, m_requestBuffer);
return true;
}
else {
m_stale = false;
cwarn << "FAILURE: GPU gave incorrect result!";
p_farm->failedSolution();
}
return false;
}

95
libstratum/EthStratumClientV2.h

@ -0,0 +1,95 @@
#include <iostream>
#include <boost/array.hpp>
#include <boost/asio.hpp>
#include <boost/bind.hpp>
#include <json/json.h>
#include <libdevcore/Log.h>
#include <libdevcore/FixedHash.h>
#include <libdevcore/Worker.h>
#include <libethcore/Farm.h>
#include <libethcore/EthashAux.h>
#include <libethcore/Miner.h>
#include "BuildInfo.h"
using namespace std;
using namespace boost::asio;
using boost::asio::ip::tcp;
using namespace dev;
using namespace dev::eth;
class EthStratumClientV2 : public Worker
{
public:
EthStratumClientV2(GenericFarm<EthashProofOfWork> * f, MinerType m, string const & host, string const & port, string const & user, string const & pass, int const & retries, int const & worktimeout, int const & protocol, string const & email);
~EthStratumClientV2();
void setFailover(string const & host, string const & port);
void setFailover(string const & host, string const & port, string const & user, string const & pass);
bool isRunning() { return m_running; }
bool isConnected() { return m_connected && m_authorized; }
h256 currentHeaderHash() { return m_current.headerHash; }
bool current() { return m_current; }
unsigned waitState() { return m_waitState; }
bool submit(EthashProofOfWork::Solution solution);
void reconnect();
private:
void workLoop() override;
void connect();
void disconnect();
void work_timeout_handler(const boost::system::error_code& ec);
void processReponse(Json::Value& responseObject);
MinerType m_minerType;
cred_t * p_active;
cred_t m_primary;
cred_t m_failover;
string m_worker; // eth-proxy only;
bool m_authorized;
bool m_connected;
bool m_running = true;
int m_retries = 0;
int m_maxRetries;
int m_worktimeout = 60;
int m_waitState = MINER_WAIT_STATE_WORK;
string m_response;
GenericFarm<EthashProofOfWork> * p_farm;
mutex x_current;
EthashProofOfWork::WorkPackage m_current;
EthashProofOfWork::WorkPackage m_previous;
bool m_stale = false;
string m_job;
string m_previousJob;
EthashAux::FullType m_dag;
boost::asio::io_service m_io_service;
tcp::socket m_socket;
boost::asio::streambuf m_requestBuffer;
boost::asio::streambuf m_responseBuffer;
boost::asio::deadline_timer * p_worktimer;
int m_protocol;
string m_email;
double m_nextWorkDifficulty;
h64 m_extraNonce;
int m_extraNonceHexSize;
void processExtranonce(std::string& enonce);
};

19
package.sh

@ -1,19 +0,0 @@
#!/bin/bash
opwd="$PWD"
br=$(git branch | grep '\*' | sed 's/^..//')
n=cpp-ethereum-src-$(date "+%Y%m%d%H%M%S" --date="1970-01-01 $(git log -1 --date=short --pretty=format:%ct) sec GMT")-$(grep "Version = " libdevcore/Common.cpp | sed 's/^[^"]*"//' | sed 's/".*$//')-$(git rev-parse HEAD | cut -c1-6)
cd /tmp
git clone "$opwd" $n
cd $n
git checkout $br
rm -f package.sh
cd ..
tar c $n | bzip2 -- > $opwd/../${n}.tar.bz2
rm -rf $n
cd $opwd
echo "SHA1(${n}.tar.bz2) = $(shasum $opwd/../${n}.tar.bz2 | cut -d' ' -f 1)"

BIN
releases/ethminer-0.9.41-genoil-1.0.1.zip

Binary file not shown.

BIN
releases/ethminer-0.9.41-genoil-1.0.2.zip

Binary file not shown.

BIN
releases/ethminer-0.9.41-genoil-1.0.3.zip

Binary file not shown.

BIN
releases/ethminer-0.9.41-genoil-1.0.4b3.zip

Binary file not shown.

BIN
releases/ethminer-0.9.41-genoil-1.0.5.zip

Binary file not shown.

BIN
releases/ethminer-0.9.41-genoil-1.0.6.zip

Binary file not shown.

BIN
releases/ethminer-0.9.41-genoil-1.0.7.zip

Binary file not shown.

BIN
releases/ethminer-0.9.41-genoil-1.1.6.zip

Binary file not shown.

BIN
releases/ethminer-0.9.41-genoil-1.1.7.zip

Binary file not shown.
Loading…
Cancel
Save