Skip to content

Commit

Permalink
[cudadev] Update to the code base from the Patatrack CMSSW release
Browse files Browse the repository at this point in the history
Highlights:
  - remove the dependency on the CUB external
  - reduce caching allocator memory usage
  - update CUDA to version 11.0 and C++17
  - update the code base from CMSSW 11.2.x
  - import more tests from the cuda directory
  • Loading branch information
fwyzard committed Nov 5, 2020
1 parent 5a19e8e commit ccba578
Show file tree
Hide file tree
Showing 97 changed files with 2,616 additions and 1,703 deletions.
12 changes: 7 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,12 @@ export LIB_DIR := $(BASE_DIR)/lib
export TEST_DIR := $(BASE_DIR)/test

# System external definitions
CUDA_BASE := /usr/local/cuda-10.2
ifneq ($(wildcard $(CUDA_BASE)),)
CUDA_BASE := /usr/local/cuda-11.0
ifeq ($(wildcard $(CUDA_BASE)),)
# CUDA platform not found
CUDA_BASE :=
else
# CUDA platform at $(CUDA_BASE)
CUDA_LIBDIR := $(CUDA_BASE)/lib64
USER_CUDAFLAGS :=
export CUDA_DEPS := $(CUDA_BASE)/lib64/libcudart.so
Expand All @@ -38,15 +42,13 @@ export CUDA_LDFLAGS := -L$(CUDA_BASE)/lib64 -lcudart -lcudadevrt
export CUDA_NVCC := $(CUDA_BASE)/bin/nvcc
define CUFLAGS_template
$(2)NVCC_FLAGS := $$(foreach ARCH,$(1),-gencode arch=compute_$$(ARCH),code=sm_$$(ARCH)) -Wno-deprecated-gpu-targets -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored --expt-relaxed-constexpr --expt-extended-lambda --generate-line-info --source-in-ptx --cudart=shared
$(2)NVCC_COMMON := -std=c++14 -O3 $$($(2)NVCC_FLAGS) -ccbin $(CXX) --compiler-options '$(HOST_CXXFLAGS) $(USER_CXXFLAGS)'
$(2)NVCC_COMMON := -std=c++17 -O3 $$($(2)NVCC_FLAGS) -ccbin $(CXX) --compiler-options '$(HOST_CXXFLAGS) $(USER_CXXFLAGS)'
$(2)CUDA_CUFLAGS := -dc $$($(2)NVCC_COMMON) $(USER_CUDAFLAGS)
$(2)CUDA_DLINKFLAGS := -dlink $$($(2)NVCC_COMMON)
endef
$(eval $(call CUFLAGS_template,$(CUDA_ARCH),))
export CUDA_CUFLAGS
export CUDA_DLINKFLAGS
else # no CUDA
CUDA_BASE=
endif

# Input data definitions
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ The application is designed to require minimal dependencies on the system:
* GNU Make, `curl`, `md5sum`, `tar`
* CMake for `kokkostest` and `kokkos` programs
* C++17 capable compiler that works with `nvcc`, in the current setup this pretty much means GCC 8
* CUDA 10.2 runtime and drivers (real drivers are not needed for building)
* CUDA 11.0 runtime and drivers (real drivers are not needed for building)
* [Intel oneAPI Base Toolkit](https://software.intel.com/content/www/us/en/develop/tools/oneapi/base-toolkit.html)

All other external dependencies (listed below) are downloaded and built automatically.
Expand Down
70 changes: 38 additions & 32 deletions src/cudadev/CUDACore/AtomicPairCounter.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,48 +5,54 @@

#include "CUDACore/cudaCompat.h"

class AtomicPairCounter {
public:
using c_type = unsigned long long int;
namespace cms {
namespace cuda {

AtomicPairCounter() {}
AtomicPairCounter(c_type i) { counter.ac = i; }
class AtomicPairCounter {
public:
using c_type = unsigned long long int;

__device__ __host__ AtomicPairCounter& operator=(c_type i) {
counter.ac = i;
return *this;
}
AtomicPairCounter() {}
AtomicPairCounter(c_type i) { counter.ac = i; }

struct Counters {
uint32_t n; // in a "One to Many" association is the number of "One"
uint32_t m; // in a "One to Many" association is the total number of associations
};
__device__ __host__ AtomicPairCounter& operator=(c_type i) {
counter.ac = i;
return *this;
}

union Atomic2 {
Counters counters;
c_type ac;
};
struct Counters {
uint32_t n; // in a "One to Many" association is the number of "One"
uint32_t m; // in a "One to Many" association is the total number of associations
};

static constexpr c_type incr = 1UL << 32;
union Atomic2 {
Counters counters;
c_type ac;
};

__device__ __host__ Counters get() const { return counter.counters; }
static constexpr c_type incr = 1UL << 32;

// increment n by 1 and m by i. return previous value
__host__ __device__ __forceinline__ Counters add(uint32_t i) {
c_type c = i;
c += incr;
Atomic2 ret;
__device__ __host__ Counters get() const { return counter.counters; }

// increment n by 1 and m by i. return previous value
__host__ __device__ __forceinline__ Counters add(uint32_t i) {
c_type c = i;
c += incr;
Atomic2 ret;
#ifdef __CUDA_ARCH__
ret.ac = atomicAdd(&counter.ac, c);
ret.ac = atomicAdd(&counter.ac, c);
#else
ret.ac = counter.ac;
counter.ac += c;
ret.ac = counter.ac;
counter.ac += c;
#endif
return ret.counters;
}
return ret.counters;
}

private:
Atomic2 counter;
};

private:
Atomic2 counter;
};
} // namespace cuda
} // namespace cms

#endif // HeterogeneousCore_CUDAUtilities_interface_AtomicPairCounter_h
49 changes: 0 additions & 49 deletions src/cudadev/CUDACore/CUDAHostAllocator.h

This file was deleted.

Loading

0 comments on commit ccba578

Please sign in to comment.