forked from cms-patatrack/pixeltrack-standalone
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetCachingDeviceAllocator.h
86 lines (78 loc) · 3.71 KB
/
getCachingDeviceAllocator.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#ifndef HeterogeneousCore_CUDACore_src_getCachingDeviceAllocator
#define HeterogeneousCore_CUDACore_src_getCachingDeviceAllocator
#include <iomanip>
#include <iostream>
#include <cuda_runtime.h>
#include "CUDACore/cudaCheck.h"
#include "CUDACore/deviceCount.h"
#include "CachingDeviceAllocator.h"
namespace cms::cuda::allocator {
// Use caching or not
enum class Policy { Synchronous = 0, Asynchronous = 1, Caching = 2 };
#ifndef CUDA_DISABLE_CACHING_ALLOCATOR
constexpr Policy policy = Policy::Caching;
#elif CUDA_VERSION >= 11020 && !defined CUDA_DISABLE_ASYNC_ALLOCATOR
constexpr Policy policy = Policy::Asynchronous;
#else
constexpr Policy policy = Policy::Synchronous;
#endif
// Growth factor (bin_growth in cub::CachingDeviceAllocator
constexpr unsigned int binGrowth = 2;
// Smallest bin, corresponds to binGrowth^minBin bytes (min_bin in cub::CacingDeviceAllocator
constexpr unsigned int minBin = 8;
// Largest bin, corresponds to binGrowth^maxBin bytes (max_bin in cub::CachingDeviceAllocator). Note that unlike in cub, allocations larger than binGrowth^maxBin are set to fail.
constexpr unsigned int maxBin = 30;
// Total storage for the allocator. 0 means no limit.
constexpr size_t maxCachedBytes = 0;
// Fraction of total device memory taken for the allocator. In case there are multiple devices with different amounts of memory, the smallest of them is taken. If maxCachedBytes is non-zero, the smallest of them is taken.
constexpr double maxCachedFraction = 0.8;
constexpr bool debug = false;
inline size_t minCachedBytes() {
size_t ret = std::numeric_limits<size_t>::max();
int currentDevice;
cudaCheck(cudaGetDevice(¤tDevice));
const int numberOfDevices = deviceCount();
for (int i = 0; i < numberOfDevices; ++i) {
size_t freeMemory, totalMemory;
cudaCheck(cudaSetDevice(i));
cudaCheck(cudaMemGetInfo(&freeMemory, &totalMemory));
ret = std::min(ret, static_cast<size_t>(maxCachedFraction * freeMemory));
}
cudaCheck(cudaSetDevice(currentDevice));
if (maxCachedBytes > 0) {
ret = std::min(ret, maxCachedBytes);
}
return ret;
}
inline notcub::CachingDeviceAllocator& getCachingDeviceAllocator() {
if (debug) {
std::cout << "cub::CachingDeviceAllocator settings\n"
<< " bin growth " << binGrowth << "\n"
<< " min bin " << minBin << "\n"
<< " max bin " << maxBin << "\n"
<< " resulting bins:\n";
for (auto bin = minBin; bin <= maxBin; ++bin) {
auto binSize = notcub::CachingDeviceAllocator::IntPow(binGrowth, bin);
if (binSize >= (1 << 30) and binSize % (1 << 30) == 0) {
std::cout << " " << std::setw(8) << (binSize >> 30) << " GB\n";
} else if (binSize >= (1 << 20) and binSize % (1 << 20) == 0) {
std::cout << " " << std::setw(8) << (binSize >> 20) << " MB\n";
} else if (binSize >= (1 << 10) and binSize % (1 << 10) == 0) {
std::cout << " " << std::setw(8) << (binSize >> 10) << " kB\n";
} else {
std::cout << " " << std::setw(9) << binSize << " B\n";
}
}
std::cout << " maximum amount of cached memory: " << (minCachedBytes() >> 20) << " MB\n";
}
// the public interface is thread safe
static notcub::CachingDeviceAllocator allocator{binGrowth,
minBin,
maxBin,
minCachedBytes(),
false, // do not skip cleanup
debug};
return allocator;
}
} // namespace cms::cuda::allocator
#endif