builds on CUDA 6.5 again

10 years ago · 0e1f01e447
5 changed files with 45 additions and 18 deletions
--- a/ethminer/MinerAux.h
+++ b/ethminer/MinerAux.h
@ -27,6 +27,7 @@
 #include <fstream>
 #include <iostream>
 #include <signal.h>
+#include <random>

 #include <boost/algorithm/string.hpp>
 #include <boost/algorithm/string/trim_all.hpp>
@ -612,7 +613,10 @@ private:
 			});
 			for (unsigned i = 0; !completed; ++i)
 			{
-				cnote << "Mining on difficulty " << difficulty << " " << f.miningProgress();
+				auto mp = f.miningProgress();
+				f.resetMiningProgress();
+
+				cnote << "Mining on difficulty " << difficulty << " " << mp;
 				this_thread::sleep_for(chrono::milliseconds(1000));
 				time++;
 			}
@ -637,8 +641,24 @@ private:
 			time = 0;
 			genesis.setDifficulty(u256(1) << difficulty);
 			genesis.noteDirty();
-			f.setWork(genesis);
-			current = EthashProofOfWork::WorkPackage(genesis);
+			//f.setWork(genesis);
+		
+			h256 hh;
+			std::random_device engine;
+			hh.randomize(engine);
+			h256 newSeedHash = h256();
+
+			current.headerHash = hh;
+			current.seedHash = newSeedHash;
+			current.boundary = genesis.boundary();
+			minelog << "Generated random work package:";
+			minelog << "  Header-hash:" << current.headerHash.hex();
+			minelog << "  Seedhash:" << current.seedHash.hex();
+			minelog << "  Target: " << h256(current.boundary).hex();
+			f.setWork(current);
+			
+
+			//current = EthashProofOfWork::WorkPackage(genesis);
 		}
 	}

@ -690,6 +710,7 @@ private:
 						minelog << "Getting work package...";

 					auto rate = mp.rate();
+
 					try
 					{
 						rpc.eth_submitHashrate(toJS((u256)rate), "0x" + id.hex());
--- a/libethash-cuda/CMakeLists.txt
+++ b/libethash-cuda/CMakeLists.txt
@ -13,7 +13,7 @@ LIST(APPEND CUDA_NVCC_FLAGS_DEBUG -G)
 if(COMPUTE AND (COMPUTE GREATER 0))
 	LIST(APPEND CUDA_NVCC_FLAGS -gencode arch=compute_${COMPUTE},code=sm_${COMPUTE})
 else(COMPUTE AND (COMPUTE GREATER 0))
-	set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_20,code=sm_20;-gencode arch=compute_30,code=sm_30;-gencode arch=compute_32,code=sm_32;-gencode arch=compute_35,code=sm_35;-gencode arch=compute_50,code=sm_50;-gencode arch=compute_52,code=sm_52)
+	set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_20,code=sm_20;-gencode arch=compute_30,code=sm_30;-gencode arch=compute_35,code=sm_35;-gencode arch=compute_50,code=sm_50;-gencode arch=compute_52,code=sm_52)
 endif(COMPUTE AND (COMPUTE GREATER 0))


--- a/libethash-cuda/ethash_cuda_miner.cpp
+++ b/libethash-cuda/ethash_cuda_miner.cpp
@ -44,10 +44,6 @@
 #define ETHASH_BYTES 32

 // workaround lame platforms
-#if !CL_VERSION_1_2
-#define CL_MAP_WRITE_INVALIDATE_REGION CL_MAP_WRITE
-#define CL_MEM_HOST_READ_ONLY 0
-#endif

 #undef min
 #undef max
@ -221,7 +217,7 @@ bool ethash_cuda_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned _d
 		return false;
 	}

-	cout << "Using device: " << device_props.name << "(" << device_props.major << "." << device_props.minor << ")" << endl;
+	cout << "Using device: " << device_props.name << " (Compute " << device_props.major << "." << device_props.minor << ")" << endl;

 	cudaError_t r = cudaSetDevice(device_num);
 	if (r != cudaSuccess)
@ -233,7 +229,6 @@ bool ethash_cuda_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned _d
 	cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
 	cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);

-	m_hash_buf	 = new void *[s_numStreams];
 	m_search_buf = new uint32_t *[s_numStreams];
 	m_streams = new cudaStream_t[s_numStreams];

@ -257,7 +252,6 @@ bool ethash_cuda_miner::init(uint8_t const* _dag, uint64_t _dagSize, unsigned _d
 	// create mining buffers
 	for (unsigned i = 0; i != s_numStreams; ++i)
 	{		
-		result = cudaMallocHost(&m_hash_buf[i], 32 * c_hash_batch_size);
 		result = cudaMallocHost(&m_search_buf[i], (c_max_search_results + 1) * sizeof(uint32_t));
 		result = cudaStreamCreate(&m_streams[i]);
 	}
@ -297,7 +291,7 @@ void ethash_cuda_miner::search(uint8_t const* header, uint64_t target, search_ho
 	cudaMemcpy(m_header, header, 32, cudaMemcpyHostToDevice);
 	for (unsigned i = 0; i != s_numStreams; ++i)
 	{
-		cudaMemcpy(m_search_buf[i], &c_zero, 4, cudaMemcpyHostToDevice);
+		cudaMemcpyAsync(m_search_buf[i], &c_zero, 4, cudaMemcpyHostToDevice, m_streams[i]);
 	}
 	cudaError err = cudaGetLastError();
 	if (cudaSuccess != err)
@ -308,7 +302,7 @@ void ethash_cuda_miner::search(uint8_t const* header, uint64_t target, search_ho
 	unsigned buf = 0;
 	std::random_device engine;
 	uint64_t start_nonce = std::uniform_int_distribution<uint64_t>()(engine);
-	for (;; start_nonce += s_gridSize)
+	for (;;)
 	{
 		run_ethash_search(s_gridSize, s_blockSize, m_streams[buf], m_search_buf[buf], m_header, m_dag_ptr, start_nonce, target);
 		
@ -324,7 +318,8 @@ void ethash_cuda_miner::search(uint8_t const* header, uint64_t target, search_ho

 			if (!s_highCPU)
 				waitStream(m_streams[buf]); // 28ms
-			cudaMemcpyAsync(results, m_search_buf[batch.buf], (1 + c_max_search_results) * sizeof(uint32_t), cudaMemcpyHostToHost, m_streams[batch.buf]);
+
+			cudaMemcpyAsync(results, m_search_buf[batch.buf], (1 + c_max_search_results) * sizeof(uint32_t), cudaMemcpyDeviceToHost, m_streams[batch.buf]);

 			unsigned num_found = std::min<unsigned>(results[0], c_max_search_results);
 			uint64_t nonces[c_max_search_results];
--- a/libethash-cuda/keccak.cuh
+++ b/libethash-cuda/keccak.cuh
@ -11,8 +11,7 @@ __device__ __constant__ uint64_t const keccak_round_constants[24] = {
 	0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL
 };

-
-#if __CUDA_ARCH__ >= 500
+#if __CUDA_ARCH__ >= 500 && CUDART_VERSION >= 7050
 __device__ __forceinline__ 
 uint2 xor3(const uint2 a, const uint2 b, const uint2 c) {
 	uint2 result;
@ -37,7 +36,7 @@ uint2 xor3(const uint2 a, const uint2 b, const uint2 c) {
 }
 #endif

-#if __CUDA_ARCH__ >= 500
+#if __CUDA_ARCH__ >= 500 && CUDART_VERSION >= 7050
 __device__ __forceinline__
 uint2 chi(const uint2 a, const uint2 b, const uint2 c) {
 	uint2 result;
--- a/libethcore/EthashCUDAMiner.cpp
+++ b/libethcore/EthashCUDAMiner.cpp
@ -23,6 +23,10 @@ along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.

 #if ETH_ETHASHCUDA || !ETH_TRUE

+#if defined(WIN32)
+#include <Windows.h>
+#endif
+
 #include "EthashCUDAMiner.h"
 #include <thread>
 #include <chrono>
@ -108,8 +112,16 @@ EthashCUDAMiner::EthashCUDAMiner(ConstructionInfo const& _ci) :
 	Worker("cudaminer" + toString(index())),
 m_hook( new EthashCUDAHook(this))
 {
+/*
+#if defined(WIN32)
+	SYSTEM_INFO sysinfo;
+	GetSystemInfo(&sysinfo);
+	int num_cpus = sysinfo.dwNumberOfProcessors;
+	SetThreadAffinityMask(GetCurrentThread(), 1 << (index() % num_cpus));
+	SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
+#endif
+*/
 }
-
 EthashCUDAMiner::~EthashCUDAMiner()
 {
 	pause();