work in progress

9 years ago · 6b091ff10b
9 changed files with 112 additions and 192 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -2,7 +2,7 @@
 cmake_minimum_required(VERSION 2.8.12)
 set(PROJECT_VERSION "0.9.41")
-set(GENOIL_VERSION "1.0.3")
+set(GENOIL_VERSION "1.0.4")
 if (${CMAKE_VERSION} VERSION_GREATER 3.0)
 	cmake_policy(SET CMP0042 OLD) 	# fix MACOSX_RPATH
 	cmake_policy(SET CMP0048 NEW) 	# allow VERSION argument in project()
@ -42,7 +42,7 @@ else ()
 endif ()
 #defaults:
-set(D_CMAKE_BUILD_TYPE "RelWithDebInfo")
+set(D_CMAKE_BUILD_TYPE "Release")
 set(D_SERPENT ${DECENT_PLATFORM})
 set(D_USENPM OFF)
 set(D_GUI ON)
@ -61,6 +61,7 @@ set(D_ROCKSDB OFF)
 set(D_OLYMPIC OFF)
 set(D_MINER ON)
 set(D_ETHKEY ON)
 set(D_ETHSTRATUM OFF)
 if (BUNDLE STREQUAL "minimal")
 	set(D_SERPENT OFF)
@ -128,6 +129,7 @@ elseif (BUNDLE STREQUAL "miner")
 	set(D_JSONRPC ON)
 	set(D_JSCONSOLE OFF)
 	set(D_EVMJIT OFF)
 	set(D_ETHSTRATUM ON)
 elseif (BUNDLE STREQUAL "cudaminer")
 	set(D_SERPENT OFF)
 	set(D_USENPM OFF)
@ -142,6 +144,7 @@ elseif (BUNDLE STREQUAL "cudaminer")
 	set(D_JSONRPC ON)
 	set(D_JSCONSOLE OFF)
 	set(D_EVMJIT OFF)
 	set(D_ETHSTRATUM ON)
 elseif (BUNDLE STREQUAL "release")      # release builds
 	set(D_SERPENT ${DECENT_PLATFORM})
 	set(D_USENPM OFF)
@ -178,6 +181,10 @@ function(configureProject)
 		add_definitions(-DETH_ETHASHCUDA)
 	endif()
 	if (ETHSTRATUM)
 		add_definitions(-DETH_STRATUM)
 	endif()
 	if (EVMJIT)
 		add_definitions(-DETH_EVMJIT)
 	endif()
@ -315,6 +322,7 @@ eth_format_option(ETHASHCUDA)
 eth_format_option(JSCONSOLE)
 eth_format_option(OLYMPIC)
 eth_format_option(SERPENT)
 eth_format_option(ETHSTRATUM)
 if (JSCONSOLE)
 	set(JSONRPC ON)
@ -363,6 +371,7 @@ message("-- GUI              Build GUI components                     ${GUI}")
 message("-- TESTS            Build tests                              ${TESTS}")
 message("-- ETHASHCL         Build OpenCL components                  ${ETHASHCL}")
 message("-- ETHASHCUDA       Build CUDA components                    ${ETHASHCUDA}")
 message("-- ETHSTRATUM       Build Stratum components                 ${ETHSTRATUM}")
 message("-- JSCONSOLE        Build with javascript console            ${JSCONSOLE}")
 message("-- EVMJIT           Build LLVM-based JIT EVM                 ${EVMJIT}")
 message("------------------------------------------------------------------------")
@ -454,6 +463,9 @@ if (GENERAL OR MINER)
 	if (ETHASHCUDA)
 		add_subdirectory(libethash-cuda)
 	endif ()
 	if(ETHSTRATUM)
 		add_subdirectory(libstratum)
 	endif()
 endif ()
 add_subdirectory(libethcore)
--- a/ethminer/CMakeLists.txt
+++ b/ethminer/CMakeLists.txt
@ -37,6 +37,10 @@ target_link_libraries(${EXECUTABLE} ethcore)
 target_link_libraries(${EXECUTABLE} ethash)
 target_link_libraries(${EXECUTABLE} devcrypto)
 if (ETHSTRATUM)
 target_link_libraries(${EXECUTABLE} ethstratum)
 endif()
 if (DEFINED WIN32 AND NOT DEFINED CMAKE_COMPILER_IS_MINGW)
 	eth_copy_dlls("${EXECUTABLE}" MHD_DLLS)
 	eth_copy_dlls("${EXECUTABLE}" OpenCL_DLLS)
--- a/ethminer/MinerAux.h
+++ b/ethminer/MinerAux.h
@ -60,6 +60,9 @@
 #include "PhoneHome.h"
 #include "FarmClient.h"
 #endif
 #if ETH_STRATUM || !ETH_TRUE
 #include <libstratum/EthStratumClient.h>
 #endif
 using namespace std;
 using namespace dev;
 using namespace dev::eth;
@ -105,7 +108,8 @@ public:
 		DAGInit,
 		Benchmark,
 		Simulation,
-		Farm
+		Farm,
 		Stratum
 	};
@ -128,6 +132,44 @@ public:
 				cerr << "Bad " << arg << " option: " << argv[i] << endl;
 				BOOST_THROW_EXCEPTION(BadArgument());
 			}
 #if ETH_STRATUM || !ETH_TRUE
 		if ((arg == "-S" || arg == "--stratum") && i + 1 < argc)
 		{
 			mode = OperationMode::Stratum;
 			string url = string(argv[++i]);
 			size_t p = url.find_last_of(":");
 			if (p > 0) 
 			{
 				m_farmURL = url.substr(0, p);
 				if (p + 1 <= url.length())
 					m_port = url.substr(p+1);
 			}
 			else
 			{
 				m_farmURL = url;
 			}
 		}
 		else if ((arg == "-O" || arg == "--userpass") && i + 1 < argc)
 		{
 			string userpass = string(argv[++i]);
 			size_t p = userpass.find_first_of(":");
 			m_user = userpass.substr(0, p);
 			if (p + 1 <= userpass.length())
 				m_pass = userpass.substr(p+1);
 		}
 		else if ((arg == "-u" || arg == "--user") && i + 1 < argc)
 		{
 			m_user = string(argv[++i]);
 		}
 		else if ((arg == "-p" || arg == "--pass") && i + 1 < argc)
 		{
 			m_pass = string(argv[++i]);
 		}
 		else if ((arg == "-o" || arg == "--port") && i + 1 < argc)
 		{
 			m_port = string(argv[++i]);
 		}
 #endif
 		else if (arg == "--opencl-platform" && i + 1 < argc)
 			try {
 				m_openclPlatform = stol(argv[++i]);
@ -347,7 +389,7 @@ public:
 				}
 			}
 		}
-		else if (arg == "-S" || arg == "--simulation") {
+		else if (arg == "-Z" || arg == "--simulation") {
 			mode = OperationMode::Simulation;
 			if (i + 1 < argc)
 			{
@ -456,6 +498,10 @@ public:
 			doFarm(m_minerType, m_farmURL, m_farmRecheckPeriod);
 		else if (mode == OperationMode::Simulation)
 			doSimulation(m_minerType);
 #if ETH_STRATUM || !ETH_TRUE
 		else if (mode == OperationMode::Stratum)
 			doStratum(m_minerType, m_farmURL, m_port, m_user, m_pass);
 #endif
 	}
 	static void streamHelp(ostream& _out)
@ -824,6 +870,27 @@ private:
 		exit(0);
 	}
 	void doStratum(MinerType _m, string const & host, string const & port, string const & user, string const & pass)
 	{
 		EthashAux::setCustomDirName(s_dagDir);
 		map<string, GenericFarm<EthashProofOfWork>::SealerDescriptor> sealers;
 		sealers["cpu"] = GenericFarm<EthashProofOfWork>::SealerDescriptor{ &EthashCPUMiner::instances, [](GenericMiner<EthashProofOfWork>::ConstructionInfo ci){ return new EthashCPUMiner(ci); } };
 #if ETH_ETHASHCL
 		sealers["opencl"] = GenericFarm<EthashProofOfWork>::SealerDescriptor{ &EthashGPUMiner::instances, [](GenericMiner<EthashProofOfWork>::ConstructionInfo ci){ return new EthashGPUMiner(ci); } };
 #endif
 #if ETH_ETHASHCUDA
 		sealers["cuda"] = GenericFarm<EthashProofOfWork>::SealerDescriptor{ &EthashCUDAMiner::instances, [](GenericMiner<EthashProofOfWork>::ConstructionInfo ci){ return new EthashCUDAMiner(ci); } };
 #endif
 		EthStratumClient client(host, port, user, pass);
 		client.connect();
 		while (true)
 			this_thread::sleep_for(chrono::milliseconds(1000));
 	}
 	/// Operating mode.
 	OperationMode mode;
@ -867,6 +934,13 @@ private:
 	string m_farmURL = "http://127.0.0.1:8545";
 	unsigned m_farmRecheckPeriod = 500;
 	bool m_precompute = true;
 #if ETH_STRATUM || !ETH_TRUE
 	string m_user;
 	string m_pass;
 	string m_port;
 #endif
 };
 char MinerCLI::s_dagDir[256] = "";
--- a/libethash-cuda/CMakeLists.txt
+++ b/libethash-cuda/CMakeLists.txt
@ -13,7 +13,7 @@ LIST(APPEND CUDA_NVCC_FLAGS_DEBUG -G)
 if(COMPUTE AND (COMPUTE GREATER 0))
 	LIST(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_${COMPUTE},code=sm_${COMPUTE})
 else(COMPUTE AND (COMPUTE GREATER 0))
-	set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_20,code=sm_20;-gencode arch=compute_30,code=sm_30;-gencode arch=compute_35,code=sm_35;-gencode arch=compute_50,code=sm_50;-gencode arch=compute_52,code=sm_52)
+	set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_35,code=sm_35;-gencode arch=compute_50,code=sm_50;-gencode arch=compute_52,code=sm_52)
 endif(COMPUTE AND (COMPUTE GREATER 0))
--- a/libethash-cuda/dagger.cuh
+++ b/libethash-cuda/dagger.cuh
@ -1,22 +0,0 @@
 #define copy(dst, src, count) for (uint32_t i = 0; i < count; i++) { (dst)[i] = (src)[i]; }
 #define ACCESSES 64
 #define THREADS_PER_HASH (128 / 16)
 #define FNV_PRIME	0x01000193
 #define fnv(x,y) ((x) * FNV_PRIME ^(y))
 __device__ uint4 fnv4(uint4 a, uint4 b)
 {
 	uint4 c;
 	c.x = a.x * FNV_PRIME ^ b.x;
 	c.y = a.y * FNV_PRIME ^ b.y;
 	c.z = a.z * FNV_PRIME ^ b.z;
 	c.w = a.w * FNV_PRIME ^ b.w;
 	return c;
 }
 __device__ uint32_t fnv_reduce(uint4 v)
 {
 	return fnv(fnv(fnv(v.x, v.y), v.z), v.w);
 }
--- a/libethash-cuda/dagger_shared.cuh
+++ b/libethash-cuda/dagger_shared.cuh
@ -1,119 +0,0 @@
 #include "ethash_cuda_miner_kernel_globals.h"
 #include "ethash_cuda_miner_kernel.h"
 #include "keccak.cuh"
 #include "dagger.cuh"
 typedef union
 {
 	hash64_t init;
 	hash32_t mix;
 } compute_hash_share;
 __device__ hash64_t init_hash(uint64_t nonce)
 {
 	hash64_t init;
 	// sha3_512(header .. nonce)
 	uint64_t state[25];
 	state[0] = d_header.uint64s[0];
 	state[1] = d_header.uint64s[1];
 	state[2] = d_header.uint64s[2];
 	state[3] = d_header.uint64s[3];
 	state[4] = nonce;
 	keccak_f1600_init((uint2 *)state);
 	copy(init.uint64s, state, 8);
 	return init;
 }
 __device__ uint32_t inner_loop(uint4 mix, uint32_t thread_id, uint32_t* share)
 {
 	// share init0
 	if (thread_id == 0)
 		*share = mix.x;
 	uint32_t init0 = *share;
 	uint32_t a = 0;
 	do
 	{
 		bool update_share = thread_id == ((a >> 2) & (THREADS_PER_HASH - 1));
 		//#pragma unroll 4
 		for (uint32_t i = 0; i < 4; i++)
 		{
 			if (update_share)
 			{
 				uint32_t m[4] = { mix.x, mix.y, mix.z, mix.w };
 				*share = fnv(init0 ^ (a + i), m[i]) % d_dag_size;
 			}
 			__threadfence_block();
 #if __CUDA_ARCH__ >= 350
 			mix = fnv4(mix, __ldg((&d_dag[*share])->uint4s + thread_id));
 #else
 			mix = fnv4(mix, (&d_dag[*share])->uint4s[thread_id]);
 #endif
 		}
 	} while ((a += 4) != ACCESSES);
 	return fnv_reduce(mix);
 }
 __device__ hash32_t final_hash(hash64_t const* init, hash32_t const* mix)
 {
 	uint64_t state[25];
 	hash32_t hash;
 	// keccak_256(keccak_512(header..nonce) .. mix);
 	copy(state, init->uint64s, 8);
 	copy(state + 8, mix->uint64s, 4);
 	keccak_f1600_final((uint2 *)state);
 	// copy out
 	copy(hash.uint64s, state, 4);
 	return hash;
 }
 __device__ hash32_t compute_hash(
 	uint64_t nonce
 	)
 {
 	extern __shared__  compute_hash_share share[];
 	// Compute one init hash per work item.
 	hash64_t init = init_hash(nonce);
 	// Threads work together in this phase in groups of 8.
 	uint32_t const thread_id = threadIdx.x & (THREADS_PER_HASH - 1);
 	uint32_t const hash_id = threadIdx.x >> 3;
 	hash32_t mix;
 	for (int i = 0; i < THREADS_PER_HASH; i++)
 	{
 		// share init with other threads
 		if (i == thread_id)
 			share[hash_id].init = init;
 		uint4 thread_init = share[hash_id].init.uint4s[thread_id & 3];
 		uint32_t thread_mix = inner_loop(thread_init, thread_id, share[hash_id].mix.uint32s);
 		share[hash_id].mix.uint32s[thread_id] = thread_mix;
 		if (i == thread_id)
 			mix = share[hash_id].mix;
 	}
 	return final_hash(&init, &mix);
 }
--- a/libethash-cuda/dagger_shuffled.cuh
+++ b/libethash-cuda/dagger_shuffled.cuh
@ -1,7 +1,10 @@
 #include "ethash_cuda_miner_kernel_globals.h"
 #include "ethash_cuda_miner_kernel.h"
 #include "keccak.cuh"
-#include "dagger.cuh"
+#include "fnv.cuh"
 #define ACCESSES 64
 #define THREADS_PER_HASH (128 / 16)
 __device__ uint64_t compute_hash_shuffle(
 	uint64_t nonce
@ -26,8 +29,8 @@ __device__ uint64_t compute_hash_shuffle(
 	{
 		// share init among threads
 		for (int j = 0; j < 8; j++) {
-			shuffle[j].x = __shfl(state[j].x, start_lane + i);
+			shuffle[j].x = __shfl(state[j].x, i, THREADS_PER_HASH);
-			shuffle[j].y = __shfl(state[j].y, start_lane + i);
+			shuffle[j].y = __shfl(state[j].y, i, THREADS_PER_HASH);
 		}
 		// ugly but avoids local reads/writes
@ -56,7 +59,7 @@ __device__ uint64_t compute_hash_shuffle(
 				{	
 					shuffle[0].x = fnv(init0 ^ (a + b), ((uint32_t *)&mix)[b]) % d_dag_size;
 				}
-				shuffle[0].x = __shfl(shuffle[0].x, start_lane + t);
+				shuffle[0].x = __shfl(shuffle[0].x, t, THREADS_PER_HASH);
 				mix = fnv4(mix, (&d_dag[shuffle[0].x])->uint4s[thread_id]);
 			}
@ -66,14 +69,14 @@ __device__ uint64_t compute_hash_shuffle(
 		// update mix accross threads
-		shuffle[0].x = __shfl(thread_mix, start_lane + 0);
+		shuffle[0].x = __shfl(thread_mix, 0, THREADS_PER_HASH);
-		shuffle[0].y = __shfl(thread_mix, start_lane + 1);
+		shuffle[0].y = __shfl(thread_mix, 1, THREADS_PER_HASH);
-		shuffle[1].x = __shfl(thread_mix, start_lane + 2);
+		shuffle[1].x = __shfl(thread_mix, 2, THREADS_PER_HASH);
-		shuffle[1].y = __shfl(thread_mix, start_lane + 3);
+		shuffle[1].y = __shfl(thread_mix, 3, THREADS_PER_HASH);
-		shuffle[2].x = __shfl(thread_mix, start_lane + 4);
+		shuffle[2].x = __shfl(thread_mix, 4, THREADS_PER_HASH);
-		shuffle[2].y = __shfl(thread_mix, start_lane + 5);
+		shuffle[2].y = __shfl(thread_mix, 5, THREADS_PER_HASH);
-		shuffle[3].x = __shfl(thread_mix, start_lane + 6);
+		shuffle[3].x = __shfl(thread_mix, 6, THREADS_PER_HASH);
-		shuffle[3].y = __shfl(thread_mix, start_lane + 7);
+		shuffle[3].y = __shfl(thread_mix, 7, THREADS_PER_HASH);
 		if (i == thread_id) {
 			//move mix into state:
--- a/libethash-cuda/ethash_cuda_miner_kernel.cu
+++ b/libethash-cuda/ethash_cuda_miner_kernel.cu
@ -8,12 +8,7 @@
 #include "ethash_cuda_miner_kernel_globals.h"
 #include "cuda_helper.h"
 #define SHUFFLE_MIN_VER 350
 #if __CUDA_ARCH__ >= SHUFFLE_MIN_VER
 #include "dagger_shuffled.cuh"
 #else
 #include "dagger_shared.cuh"
 #endif
 __global__ void 
 __launch_bounds__(128, 7)
@ -23,11 +18,7 @@ ethash_search(
 	)
 {
 	uint32_t const gid = blockIdx.x * blockDim.x + threadIdx.x;	
 #if __CUDA_ARCH__ >= SHUFFLE_MIN_VER
 	uint64_t hash = compute_hash_shuffle(start_nonce + gid);
 #else
 	uint64_t hash = compute_hash(start_nonce + gid).uint64s[0];
 #endif
 	if (cuda_swab64(hash) > d_target) return;
 	uint32_t index = atomicInc(const_cast<uint32_t*>(g_output), SEARCH_RESULT_BUFFER_SIZE - 1) + 1;
 	g_output[index] = gid;
@ -42,11 +33,7 @@ void run_ethash_search(
 	uint64_t start_nonce
 )
 {
 #if __CUDA_ARCH__ >= SHUFFLE_MIN_VER
 	ethash_search <<<blocks, threads, 0, stream >>>(g_output, start_nonce);
 #else
 	ethash_search <<<blocks, threads, (sizeof(compute_hash_share) * threads) / THREADS_PER_HASH, stream>>>(g_output, start_nonce);
 #endif
 	CUDA_SAFE_CALL(cudaGetLastError());
 }
--- a/libethash-cuda/ethash_cuda_miner_kernel.h
+++ b/libethash-cuda/ethash_cuda_miner_kernel.h
@ -7,32 +7,13 @@
 #define SEARCH_RESULT_BUFFER_SIZE 64
-typedef union
+typedef struct
 {
 	uint64_t uint64s[16 / sizeof(uint64_t)];
 	uint32_t uint32s[16 / sizeof(uint32_t)];
 } hash16_t;
 typedef union
 {
 	uint32_t uint32s[32 / sizeof(uint32_t)];
 	uint64_t uint64s[32 / sizeof(uint64_t)];
 	uint2 uint2s[32 / sizeof(uint2)];
 	uint4 uint4s[32 / sizeof(uint4)];
 } hash32_t;
-
+typedef struct
 typedef union
 {
 	uint32_t uint32s[64 / sizeof(uint32_t)];
 	uint64_t uint64s[64 / sizeof(uint64_t)];
 	uint4	 uint4s[64 / sizeof(uint4)];
 } hash64_t;
 typedef union
 {
 	uint32_t uint32s[128 / sizeof(uint32_t)];
 	uint4	 uint4s[128 / sizeof(uint4)];
 } hash128_t;