Allow forcing single chunk DAG upload to GPU

A new argument is added. --force-single-chunk allows the user to overwrite auto chunk detection and force DAG uploading in a single chunk. This should only be used if the user is 100% certain that their card can actually enqueue a DAG for writting bigger than the MAX_MEM_ALLOC_SIZE. OpenCL says this is undefined behaviour so use at your own risk. Still, some cards seem to be able to upload the DAG in a single chunk even if OpenCL thinks they can't, thus the decision to add this option.
10 years ago · b84c28e084
5 changed files with 31 additions and 7 deletions
--- a/ethminer/MinerAux.h
+++ b/ethminer/MinerAux.h
@ -134,6 +134,8 @@ public:
 			m_clAllowCPU = true;
 		else if (arg == "--cl-extragpu-mem" && i + 1 < argc)
 			m_extraGPUMemory = 1000000 * stol(argv[++i]);
+		else if (arg == "--force-single-chunk")
+			m_forceSingleChunk = true;
 		else if (arg == "--phone-home" && i + 1 < argc)
 		{
 			string m = argv[++i];
@ -271,6 +273,7 @@ public:
 					m_openclDevice,
 					m_clAllowCPU,
 					m_extraGPUMemory,
+					m_forceSingleChunk,
 					m_currentBlock
 				))
 			{
@ -318,6 +321,7 @@ public:
 			<< "    --list-devices List the detected OpenCL devices and exit." <<endl
 			<< "    --current-block Let the miner know the current block number at configuration time. Will help determine DAG size and required GPU memory." <<endl
 			<< "    --cl-extragpu-mem Set the memory (in MB) you believe your GPU requires for stuff other than mining. Windows rendering e.t.c.." <<endl
+			<< "    --force-single-chunk Force DAG uploading in a single chunk against OpenCL's judgement. Use at your own risk." <<endl
 			;
 	}

@ -507,6 +511,7 @@ private:
 	unsigned m_miningThreads = UINT_MAX;
 	bool m_shouldListDevices = false;
 	bool m_clAllowCPU = false;
+	bool m_forceSingleChunk = false;
 	boost::optional<uint64_t> m_currentBlock;
 	// default value is 350MB of GPU memory for other stuff (windows system rendering, e.t.c.)
 	unsigned m_extraGPUMemory = 350000000;
--- a/libethash-cl/ethash_cl_miner.cpp
+++ b/libethash-cl/ethash_cl_miner.cpp
@ -137,9 +137,15 @@ unsigned ethash_cl_miner::getNumDevices(unsigned _platformId)
 	return devices.size();
 }

-bool ethash_cl_miner::configureGPU(bool _allowCPU, unsigned _extraGPUMemory, boost::optional<uint64_t> _currentBlock)
+bool ethash_cl_miner::configureGPU(
+	bool _allowCPU,
+	unsigned _extraGPUMemory,
+	bool _forceSingleChunk,
+	boost::optional<uint64_t> _currentBlock
+)
 {
 	s_allowCPU = _allowCPU;
+	s_forceSingleChunk = _forceSingleChunk;
 	s_extraRequiredGPUMem = _extraGPUMemory;
 	// by default let's only consider the DAG of the first epoch
 	uint64_t dagSize = _currentBlock ? ethash_get_datasize(*_currentBlock) : 1073739904U;
@ -168,6 +174,7 @@ bool ethash_cl_miner::configureGPU(bool _allowCPU, unsigned _extraGPUMemory, boo
 }

 bool ethash_cl_miner::s_allowCPU = false;
+bool ethash_cl_miner::s_forceSingleChunk = false;
 unsigned ethash_cl_miner::s_extraRequiredGPUMem;

 bool ethash_cl_miner::searchForAllDevices(function<bool(cl::Device const&)> _callback)
@ -284,15 +291,18 @@ bool ethash_cl_miner::init(
 		// configure chunk number depending on max allocateable memory
 		cl_ulong result;
 		device.getInfo(CL_DEVICE_MAX_MEM_ALLOC_SIZE, &result);
-		if (result >= _dagSize)
+		if (s_forceSingleChunk || result >= _dagSize)
 		{
 			m_dagChunksNum = 1;
-			ETHCL_LOG("Using 1 big chunk. Max OpenCL allocateable memory is" << result);
+			ETHCL_LOG(
+				((result <= _dagSize && s_forceSingleChunk) ? "Forcing single chunk. Good luck!\n" : "") <<
+				"Using 1 big chunk. Max OpenCL allocateable memory is " << result
+			);
 		}
 		else
 		{
 			m_dagChunksNum = 4;
-			ETHCL_LOG("Using 4 chunks. Max OpenCL allocateable memory is" << result);
+			ETHCL_LOG("Using 4 chunks. Max OpenCL allocateable memory is " << result);
 		}

 		if (strncmp("OpenCL 1.0", device_version.c_str(), 10) == 0)
--- a/libethash-cl/ethash_cl_miner.h
+++ b/libethash-cl/ethash_cl_miner.h
@ -41,7 +41,12 @@ public:
 	static unsigned getNumDevices(unsigned _platformId = 0);
 	static std::string platform_info(unsigned _platformId = 0, unsigned _deviceId = 0);
 	static void listDevices();
-	static bool configureGPU(bool _allowCPU, unsigned _extraGPUMemory, boost::optional<uint64_t> _currentBlock);
+	static bool configureGPU(
+		bool _allowCPU,
+		unsigned _extraGPUMemory,
+		bool _forceSingleChunk,
+		boost::optional<uint64_t> _currentBlock
+	);

 	bool init(
 		uint8_t const* _dag,
@ -74,6 +79,8 @@ private:
 	unsigned m_workgroup_size;
 	bool m_opencl_1_1;

+	/// Force dag upload to GPU in a single chunk even if OpenCL thinks you can't do it. Use at your own risk.
+	static bool s_forceSingleChunk;
 	/// Allow CPU to appear as an OpenCL device or not. Default is false
 	static bool s_allowCPU;
 	/// GPU memory required for other things, like window rendering e.t.c.
--- a/libethcore/Ethash.cpp
+++ b/libethcore/Ethash.cpp
@ -386,12 +386,13 @@ bool Ethash::GPUMiner::configureGPU(
 	unsigned _deviceId,
 	bool _allowCPU,
 	unsigned _extraGPUMemory,
+	bool _forceSingleChunk,
 	boost::optional<uint64_t> _currentBlock
 )
 {
 	s_platformId = _platformId;
 	s_deviceId = _deviceId;
-	return ethash_cl_miner::configureGPU(_allowCPU, _extraGPUMemory, _currentBlock);
+	return ethash_cl_miner::configureGPU(_allowCPU, _extraGPUMemory, _forceSingleChunk, _currentBlock);
 }

 #endif
--- a/libethcore/Ethash.h
+++ b/libethcore/Ethash.h
@ -88,7 +88,7 @@ public:
 		static unsigned instances() { return s_numInstances > 0 ? s_numInstances : std::thread::hardware_concurrency(); }
 		static std::string platformInfo();
 		static void listDevices() {}
-		static bool configureGPU(unsigned, unsigned, bool, unsigned, boost::optional<uint64_t>) { return false; }
+		static bool configureGPU(unsigned, unsigned, bool, unsigned, bool, boost::optional<uint64_t>) { return false; }
 		static void setNumInstances(unsigned _instances) { s_numInstances = std::min<unsigned>(_instances, std::thread::hardware_concurrency()); }
 	protected:
 		void kickOff() override
@ -122,6 +122,7 @@ public:
 			unsigned _deviceId,
 			bool _allowCPU,
 			unsigned _extraGPUMemory,
+			bool _forceSingleChunk,
 			boost::optional<uint64_t> _currentBlock
 		);
 		static void setNumInstances(unsigned _instances) { s_numInstances = std::min<unsigned>(_instances, getNumDevices()); }