diff --git a/cpu-miner.c b/cpu-miner.c index 01100be..ef3efa4 100755 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -53,7 +53,8 @@ extern "C" { #endif int cuda_num_devices(); - void cuda_deviceinfo(); + void cuda_deviceinfo(int); + void cuda_set_device_config(int); int cuda_finddevice(char *name); #ifdef __cplusplus } @@ -1452,34 +1453,28 @@ void parse_device_config(int device, char *config, int *blocks, int *threads) char *p; int tmp_blocks, tmp_threads; - if(config == NULL) goto usedefault; - + if(config == NULL) + return; p = strtok(config, "x"); if(!p) - goto usedefault; + return; tmp_threads = atoi(p); if(tmp_threads < 4 || tmp_threads > 1024) - goto usedefault; + return; p = strtok(NULL, "x"); if(!p) - goto usedefault; + return; tmp_blocks = atoi(p); if(tmp_blocks < 1) - goto usedefault; + return; *blocks = tmp_blocks; *threads = tmp_threads; return; - -usedefault: - *blocks = 4 * device_mpcount[device]; - *threads = opt_cn_threads; - return; - } static void parse_arg(int key, char *arg) @@ -1943,7 +1938,9 @@ int main(int argc, char *argv[]) /* parse command line */ parse_cmdline(argc, argv); color_init(); - cuda_deviceinfo(); + + cuda_deviceinfo(num_processors); + cuda_set_device_config(num_processors); if(!opt_benchmark && !rpc_url) { diff --git a/cryptonight/cryptonight.cu b/cryptonight/cryptonight.cu index 1cb9ba4..485815c 100755 --- a/cryptonight/cryptonight.cu +++ b/cryptonight/cryptonight.cu @@ -47,69 +47,73 @@ extern "C" int cuda_num_devices() } return GPU_N; } - -extern "C" void cuda_deviceinfo() +extern "C" void cuda_set_device_config(int GPU_N) { - cudaError_t err; - int GPU_N; - err = cudaGetDeviceCount(&GPU_N); - if(err != cudaSuccess) - { - if(err != cudaErrorNoDevice) - applog(LOG_ERR, "No CUDA device found!"); - else - applog(LOG_ERR, "Unable to query number of CUDA devices!"); - exit(1); - } - for(int i = 0; i < GPU_N; i++) { - cudaDeviceProp props; - cudaError_t err = cudaGetDeviceProperties(&props, device_map[i]); - if(err != cudaSuccess) + if(device_config[i][0] == 0) { - printf("\nGPU %d: %s\n%s line %d\n", device_map[i], cudaGetErrorString(err), __FILE__, __LINE__); - exit(1); - } - - device_name[i] = strdup(props.name); - device_mpcount[i] = props.multiProcessorCount; - device_arch[i][0] = props.major; - device_arch[i][1] = props.minor; - - device_config[i][0] = props.multiProcessorCount * (props.major < 3 ? 2 : 3); - device_config[i][1] = 64; + device_config[i][0] = device_mpcount[i] * 4; + device_config[i][1] = 64; - /* sm_20 devices can only run 512 threads per cuda block - * `cryptonight_core_gpu_phase1` and `cryptonight_core_gpu_phase3` starts - * `8 * ctx->device_threads` threads per block - */ - if(props.major < 6) { - - //Try to stay under 950 threads ( 1900MiB memory per for hashes ) - while(device_config[i][0] * device_config[i][1] >= 950 && device_config[i][1] > 2) + /* sm_20 devices can only run 512 threads per cuda block + * `cryptonight_core_gpu_phase1` and `cryptonight_core_gpu_phase3` starts + * `8 * ctx->device_threads` threads per block + */ + if(device_arch[i][0] < 6) { - device_config[i][1] /= 2; + //Try to stay under 950 threads ( 1900MiB memory per for hashes ) + while(device_config[i][0] * device_config[i][1] >= 950 && device_config[i][1] > 2) + { + device_config[i][1] /= 2; + } } - //Stay within 85% of the available RAM while(device_config[i][1] > 2) { size_t freeMemory = 0; size_t totalMemoery = 0; - cudaMemGetInfo(&freeMemory, &totalMemoery); - freeMemory = (freeMemory * size_t(85)) / 100; + cudaError_t err = cudaMemGetInfo(&freeMemory, &totalMemoery); + if(err == cudaSuccess) + { + freeMemory = (freeMemory * size_t(85)) / 100; - if(freeMemory > size_t(device_config[i][0]) * size_t(device_config[i][1]) * size_t(2u * 1024u * 1024u)) { - break; - } else { - device_config[i][1] /= 2; + if(freeMemory > size_t(device_config[i][0]) * size_t(device_config[i][1]) * 2097832) + { + break; + } + else + { + device_config[i][1] /= 2; + } } + else + applog(LOG_WARNING, "GPU #%d: CUDA error: %s", device_map[i], cudaGetErrorString(err)); } } } } +extern "C" int cuda_deviceinfo(int GPU_N) +{ + cudaError_t err; + for(int i = 0; i < GPU_N; i++) + { + cudaDeviceProp props; + cudaError_t err = cudaGetDeviceProperties(&props, device_map[i]); + if(err != cudaSuccess) + { + printf("\nGPU %d: %s\n%s line %d\n", device_map[i], cudaGetErrorString(err), __FILE__, __LINE__); + exit(1); + } + + device_name[i] = strdup(props.name); + device_mpcount[i] = props.multiProcessorCount; + device_arch[i][0] = props.major; + device_arch[i][1] = props.minor; + } + return GPU_N; +} static bool substringsearch(const char *haystack, const char *needle, int &match) {