Skip to content

Commit

Permalink
[CUDADEV] Update to the code base from the Patatrack CMSSW release
Browse files Browse the repository at this point in the history
Highlights:
  - remove the dependency on the CUB external
  - reduce caching allocator memory usage
  - update CUDA to version 11.0 and C++17
  - update the CUDA code base from CMSSW 11.2.x
  - import more tests from the cuda directory
  • Loading branch information
fwyzard committed Nov 5, 2020
1 parent 5a19e8e commit 56003a9
Show file tree
Hide file tree
Showing 99 changed files with 2,648 additions and 1,703 deletions.
12 changes: 7 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,12 @@ export LIB_DIR := $(BASE_DIR)/lib
export TEST_DIR := $(BASE_DIR)/test

# System external definitions
CUDA_BASE := /usr/local/cuda-10.2
ifneq ($(wildcard $(CUDA_BASE)),)
CUDA_BASE := /usr/local/cuda-11.0
ifeq ($(wildcard $(CUDA_BASE)),)
# CUDA platform not found
CUDA_BASE :=
else
# CUDA platform at $(CUDA_BASE)
CUDA_LIBDIR := $(CUDA_BASE)/lib64
USER_CUDAFLAGS :=
export CUDA_DEPS := $(CUDA_BASE)/lib64/libcudart.so
Expand All @@ -38,15 +42,13 @@ export CUDA_LDFLAGS := -L$(CUDA_BASE)/lib64 -lcudart -lcudadevrt
export CUDA_NVCC := $(CUDA_BASE)/bin/nvcc
define CUFLAGS_template
$(2)NVCC_FLAGS := $$(foreach ARCH,$(1),-gencode arch=compute_$$(ARCH),code=sm_$$(ARCH)) -Wno-deprecated-gpu-targets -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored --expt-relaxed-constexpr --expt-extended-lambda --generate-line-info --source-in-ptx --cudart=shared
$(2)NVCC_COMMON := -std=c++14 -O3 $$($(2)NVCC_FLAGS) -ccbin $(CXX) --compiler-options '$(HOST_CXXFLAGS) $(USER_CXXFLAGS)'
$(2)NVCC_COMMON := -std=c++17 -O3 $$($(2)NVCC_FLAGS) -ccbin $(CXX) --compiler-options '$(HOST_CXXFLAGS) $(USER_CXXFLAGS)'
$(2)CUDA_CUFLAGS := -dc $$($(2)NVCC_COMMON) $(USER_CUDAFLAGS)
$(2)CUDA_DLINKFLAGS := -dlink $$($(2)NVCC_COMMON)
endef
$(eval $(call CUFLAGS_template,$(CUDA_ARCH),))
export CUDA_CUFLAGS
export CUDA_DLINKFLAGS
else # no CUDA
CUDA_BASE=
endif

# Input data definitions
Expand Down
7 changes: 7 additions & 0 deletions src/cuda/CUDACore/deviceAllocatorStatus.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#include "CUDACore/deviceAllocatorStatus.h"

#include "getCachingDeviceAllocator.h"

namespace cms::cuda {
allocator::GpuCachedBytes deviceAllocatorStatus() { return allocator::getCachingDeviceAllocator().CacheStatus(); }
} // namespace cms::cuda
23 changes: 23 additions & 0 deletions src/cuda/CUDACore/deviceAllocatorStatus.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#ifndef HeterogeneousCore_CUDAUtilities_deviceAllocatorStatus_h
#define HeterogeneousCore_CUDAUtilities_deviceAllocatorStatus_h

#include <map>

namespace cms {
namespace cuda {
namespace allocator {
struct TotalBytes {
size_t free;
size_t live;
size_t liveRequested; // CMS: monitor also requested amount
TotalBytes() { free = live = liveRequested = 0; }
};
/// Map type of device ordinals to the number of cached bytes cached by each device
using GpuCachedBytes = std::map<int, TotalBytes>;
} // namespace allocator

allocator::GpuCachedBytes deviceAllocatorStatus();
} // namespace cuda
} // namespace cms

#endif
70 changes: 38 additions & 32 deletions src/cudadev/CUDACore/AtomicPairCounter.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,48 +5,54 @@

#include "CUDACore/cudaCompat.h"

class AtomicPairCounter {
public:
using c_type = unsigned long long int;
namespace cms {
namespace cuda {

AtomicPairCounter() {}
AtomicPairCounter(c_type i) { counter.ac = i; }
class AtomicPairCounter {
public:
using c_type = unsigned long long int;

__device__ __host__ AtomicPairCounter& operator=(c_type i) {
counter.ac = i;
return *this;
}
AtomicPairCounter() {}
AtomicPairCounter(c_type i) { counter.ac = i; }

struct Counters {
uint32_t n; // in a "One to Many" association is the number of "One"
uint32_t m; // in a "One to Many" association is the total number of associations
};
__device__ __host__ AtomicPairCounter& operator=(c_type i) {
counter.ac = i;
return *this;
}

union Atomic2 {
Counters counters;
c_type ac;
};
struct Counters {
uint32_t n; // in a "One to Many" association is the number of "One"
uint32_t m; // in a "One to Many" association is the total number of associations
};

static constexpr c_type incr = 1UL << 32;
union Atomic2 {
Counters counters;
c_type ac;
};

__device__ __host__ Counters get() const { return counter.counters; }
static constexpr c_type incr = 1UL << 32;

// increment n by 1 and m by i. return previous value
__host__ __device__ __forceinline__ Counters add(uint32_t i) {
c_type c = i;
c += incr;
Atomic2 ret;
__device__ __host__ Counters get() const { return counter.counters; }

// increment n by 1 and m by i. return previous value
__host__ __device__ __forceinline__ Counters add(uint32_t i) {
c_type c = i;
c += incr;
Atomic2 ret;
#ifdef __CUDA_ARCH__
ret.ac = atomicAdd(&counter.ac, c);
ret.ac = atomicAdd(&counter.ac, c);
#else
ret.ac = counter.ac;
counter.ac += c;
ret.ac = counter.ac;
counter.ac += c;
#endif
return ret.counters;
}
return ret.counters;
}

private:
Atomic2 counter;
};

private:
Atomic2 counter;
};
} // namespace cuda
} // namespace cms

#endif // HeterogeneousCore_CUDAUtilities_interface_AtomicPairCounter_h
49 changes: 0 additions & 49 deletions src/cudadev/CUDACore/CUDAHostAllocator.h

This file was deleted.

Loading

0 comments on commit 56003a9

Please sign in to comment.