Browse Source

nvidia maxregs

cl-refactor
Genoil 9 years ago
parent
commit
a5be252960
  1. 30
      libethash-cl/ethash_cl_miner.cpp
  2. 15
      libethash-cl/ethash_cl_miner_kernel.cl

30
libethash-cl/ethash_cl_miner.cpp

@ -42,6 +42,10 @@
#define ETHASH_BYTES 32
#define OPENCL_PLATFORM_UNKNOWN 0
#define OPENCL_PLATFORM_NVIDIA 1
#define OPENCL_PLATFORM_AMD 2
// workaround lame platforms
#if !CL_VERSION_1_2
#define CL_MAP_WRITE_INVALIDATE_REGION CL_MAP_WRITE
@ -331,14 +335,14 @@ bool ethash_cl_miner::init(
string platformName = platforms[_platformId].getInfo<CL_PLATFORM_NAME>();
ETHCL_LOG("Using platform: " << platformName.c_str());
int platformId = 0;
int platformId = OPENCL_PLATFORM_UNKNOWN;
if (platformName == "NVIDIA CUDA")
{
platformId = 1;
platformId = OPENCL_PLATFORM_NVIDIA;
}
else if (platformName == "AMD Accelerated Parallel Processing")
{
platformId = 2;
platformId = OPENCL_PLATFORM_AMD;
}
// get GPU device of the default platform
vector<cl::Device> devices = getDevices(platforms, _platformId);
@ -361,6 +365,21 @@ bool ethash_cl_miner::init(
if (strncmp("OpenCL 1.1", device_version.c_str(), 10) == 0)
m_openclOnePointOne = true;
char options[256];
int computeCapability = 0;
if (platformId == OPENCL_PLATFORM_NVIDIA) {
cl_uint computeCapabilityMajor;
cl_uint computeCapabilityMinor;
clGetDeviceInfo(device(), CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, sizeof(cl_uint), &computeCapabilityMajor, NULL);
clGetDeviceInfo(device(), CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, sizeof(cl_uint), &computeCapabilityMinor, NULL);
computeCapability = computeCapabilityMajor * 10 + computeCapabilityMinor;
int maxregs = computeCapability >= 35 ? 72 : 63;
//printf("-cl-nv-maxrregcount=%d -cl-nv-arch sm_%d -cl-nv-verbose", maxregs, computeCapability);
sprintf(options, "-cl-nv-verbose -cl-nv-maxrregcount=%d", maxregs);// , computeCapability);
}
// create context
m_context = cl::Context(vector<cl::Device>(&device, &device + 1));
m_queue = cl::CommandQueue(m_context, device);
@ -379,9 +398,12 @@ bool ethash_cl_miner::init(
addDefinition(code, "ACCESSES", ETHASH_ACCESSES);
addDefinition(code, "MAX_OUTPUTS", c_maxSearchResults);
addDefinition(code, "PLATFORM", platformId);
addDefinition(code, "COMPUTE", computeCapability);
//debugf("%s", code.c_str());
// create miner OpenCL program
cl::Program::Sources sources;
sources.push_back({ code.c_str(), code.size() });
@ -389,7 +411,7 @@ bool ethash_cl_miner::init(
cl::Program program(m_context, sources);
try
{
program.build({ device });
program.build({ device }, options);
ETHCL_LOG("Printing program log");
ETHCL_LOG(program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str());
}

15
libethash-cl/ethash_cl_miner_kernel.cl

@ -1,8 +1,7 @@
// author Tim Hughes <tim@twistedfury.com>
// Tested on Radeon HD 7850
// Hashrate: 15940347 hashes/s
// Bandwidth: 124533 MB/s
// search kernel should fit in <= 84 VGPRS (3 wavefronts)
#define OPENCL_PLATFORM_UNKNOWN 0
#define OPENCL_PLATFORM_NVIDIA 1
#define OPENCL_PLATFORM_AMD 2
#define THREADS_PER_HASH (128 / 16)
#define HASHES_PER_LOOP (GROUP_SIZE / THREADS_PER_HASH)
@ -36,7 +35,7 @@ __constant uint2 const Keccak_f1600_RC[24] = {
(uint2)(0x80008008, 0x80000000),
};
#if PLATFORM == 1 // CUDA
#if PLATFORM == OPENCL_PLATFORM_NVIDIA && COMPUTE >= 35
static uint2 ROL2(const uint2 a, const int offset) {
uint2 result;
if (offset >= 32) {
@ -49,7 +48,7 @@ static uint2 ROL2(const uint2 a, const int offset) {
}
return result;
}
#elif PLATFORM == 2 // APP
#elif PLATFORM == OPENCL_PLATFORM_AMD
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
static uint2 ROL2(const uint2 vv, const int r)
{
@ -259,7 +258,9 @@ typedef union {
uint uints[16];
} compute_hash_share;
#if PLATFORM != OPENCL_PLATFORM_NVIDIA // use maxrregs on nv
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
#endif
__kernel void ethash_search(
__global volatile uint* restrict g_output,
__constant hash32_t const* g_header,

Loading…
Cancel
Save