Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to clang format 17 #34

Merged
merged 5 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .clang-format
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
Language: Cpp
BasedOnStyle: Google
ColumnLimit: 108
ColumnLimit: 90
NamespaceIndentation: All
SortIncludes: false
IndentWidth: 2
Expand All @@ -11,8 +11,10 @@ PenaltyExcessCharacter: 100
AlignAfterOpenBracket: Align
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
BinPackParameters: false
AlwaysBreakTemplateDeclarations: Yes
ReflowComments: false
BinPackArguments: false
BinPackParameters: false
DerivePointerAlignment: false
PointerAlignment: Left
ReferenceAlignment: Left
2 changes: 1 addition & 1 deletion .github/workflows/clang_format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ jobs:
- name: Run clang-format style check
uses: jidicula/[email protected]
with:
clang-format-version: '16'
clang-format-version: '17'
check-path: ${{ matrix.path }}
exclude-regex: 'CLUEstering/include/test/doctest.h'
3 changes: 2 additions & 1 deletion CLUEstering/alpaka/AlpakaCore/AllocatorPolicy.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ namespace cms::alpakatools {
template <typename TDev>
constexpr inline AllocatorPolicy allocator_policy = AllocatorPolicy::Synchronous;

#if defined ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED || defined ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
#if defined ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED || \
defined ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
template <>
constexpr inline AllocatorPolicy allocator_policy<alpaka::DevCpu> =
#if !defined ALPAKA_DISABLE_CACHING_ALLOCATOR
Expand Down
22 changes: 17 additions & 5 deletions CLUEstering/alpaka/AlpakaCore/CachedBufAlloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,12 @@ namespace cms::alpakatools {

//! The caching memory allocator implementation for the pinned host memory
template <typename TElem, typename TDim, typename TIdx>
struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueCudaRtNonBlocking, void> {
struct CachedBufAlloc<TElem,
TDim,
TIdx,
alpaka::DevCpu,
alpaka::QueueCudaRtNonBlocking,
void> {
template <typename TExtent>
ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
alpaka::QueueCudaRtNonBlocking queue,
Expand Down Expand Up @@ -96,7 +101,12 @@ namespace cms::alpakatools {

//! The caching memory allocator implementation for the pinned host memory
template <typename TElem, typename TDim, typename TIdx>
struct CachedBufAlloc<TElem, TDim, TIdx, alpaka::DevCpu, alpaka::QueueHipRtNonBlocking, void> {
struct CachedBufAlloc<TElem,
TDim,
TIdx,
alpaka::DevCpu,
alpaka::QueueHipRtNonBlocking,
void> {
template <typename TExtent>
ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevCpu const& dev,
alpaka::QueueHipRtNonBlocking queue,
Expand Down Expand Up @@ -152,9 +162,11 @@ namespace cms::alpakatools {
} // namespace traits

template <typename TElem, typename TIdx, typename TExtent, typename TQueue, typename TDev>
ALPAKA_FN_HOST auto allocCachedBuf(TDev const& dev, TQueue queue, TExtent const& extent = TExtent()) {
return traits::CachedBufAlloc<TElem, alpaka::Dim<TExtent>, TIdx, TDev, TQueue>::allocCachedBuf(
dev, queue, extent);
ALPAKA_FN_HOST auto allocCachedBuf(TDev const& dev,
TQueue queue,
TExtent const& extent = TExtent()) {
return traits::CachedBufAlloc<TElem, alpaka::Dim<TExtent>, TIdx, TDev, TQueue>::
allocCachedBuf(dev, queue, extent);
}

} // namespace cms::alpakatools
Expand Down
105 changes: 62 additions & 43 deletions CLUEstering/alpaka/AlpakaCore/CachingAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,16 @@ namespace cms::alpakatools {
template <typename TDevice, typename TQueue>
class CachingAllocator {
public:
using Device = TDevice; // the "memory device", where the memory will be allocated
using Queue = TQueue; // the queue used to submit the memory operations
using Device = TDevice; // the "memory device", where the memory will be allocated
using Queue = TQueue; // the queue used to submit the memory operations
using Event = alpaka::Event<Queue>; // the events used to synchronise the operations
using Buffer = alpaka::Buf<Device, std::byte, alpaka::DimInt<1u>, size_t>;

// The "memory device" type can either be the same as the "synchronisation device" type, or be the host CPU.
static_assert(std::is_same_v<Device, alpaka::Dev<Queue>> or std::is_same_v<Device, alpaka::DevCpu>,
"The \"memory device\" type can either be the same as the \"synchronisation device\" "
static_assert(std::is_same_v<Device, alpaka::Dev<Queue>> or
std::is_same_v<Device, alpaka::DevCpu>,
"The \"memory device\" type can either be the same as the "
"\"synchronisation device\" "
"type, or be the "
"host CPU.");

Expand All @@ -112,8 +114,8 @@ namespace cms::alpakatools {
size_t maxCachedBytes, // total storage for the allocator (0 means no limit);
double
maxCachedFraction, // fraction of total device memory taken for the allocator (0 means no limit);
// if both maxCachedBytes and maxCachedFraction are non-zero,
// the smallest resulting value is used.
// if both maxCachedBytes and maxCachedFraction are non-zero,
// the smallest resulting value is used.
bool reuseSameQueueAllocations, // reuse non-ready allocations if they are in the same queue as the new one;
// this is safe only if all memory operations are scheduled in the same queue
bool debug)
Expand All @@ -135,7 +137,8 @@ namespace cms::alpakatools {
<< " resulting bins:\n";
for (auto bin = minBin_; bin <= maxBin_; ++bin) {
auto binSize = detail::power(binGrowth, bin);
out << " " << std::right << std::setw(12) << detail::as_bytes(binSize) << '\n';
out << " " << std::right << std::setw(12) << detail::as_bytes(binSize)
<< '\n';
}
out << " maximum amount of cached memory: " << detail::as_bytes(maxCachedBytes_);
std::cout << out.str() << std::endl;
Expand Down Expand Up @@ -201,22 +204,26 @@ namespace cms::alpakatools {

if (debug_) {
std::ostringstream out;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " returned " << block.bytes
<< " bytes at " << ptr << " from associated queue " << block.queue->m_spQueueImpl.get()
<< " , event " << block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size()
<< " available blocks cached (" << cachedBytes_.free << " bytes), " << liveBlocks_.size()
<< " live blocks (" << cachedBytes_.live << " bytes) outstanding." << std::endl;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " returned "
<< block.bytes << " bytes at " << ptr << " from associated queue "
<< block.queue->m_spQueueImpl.get() << " , event "
<< block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size()
<< " available blocks cached (" << cachedBytes_.free << " bytes), "
<< liveBlocks_.size() << " live blocks (" << cachedBytes_.live
<< " bytes) outstanding." << std::endl;
std::cout << out.str() << std::endl;
}
} else {
// if the buffer is not recached, it is automatically freed when block goes out of scope
if (debug_) {
std::ostringstream out;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " freed " << block.bytes
<< " bytes at " << ptr << " from associated queue " << block.queue->m_spQueueImpl.get()
<< ", event " << block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size()
<< " available blocks cached (" << cachedBytes_.free << " bytes), " << liveBlocks_.size()
<< " live blocks (" << cachedBytes_.live << " bytes) outstanding." << std::endl;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " freed "
<< block.bytes << " bytes at " << ptr << " from associated queue "
<< block.queue->m_spQueueImpl.get() << ", event "
<< block.event->m_spEventImpl.get() << " .\n\t\t " << cachedBlocks_.size()
<< " available blocks cached (" << cachedBytes_.free << " bytes), "
<< liveBlocks_.size() << " live blocks (" << cachedBytes_.live
<< " bytes) outstanding." << std::endl;
std::cout << out.str() << std::endl;
}
}
Expand Down Expand Up @@ -257,10 +264,11 @@ namespace cms::alpakatools {
return std::make_tuple(minBin_, minBinBytes_);
}
if (bytes > maxBinBytes_) {
throw std::runtime_error("Requested allocation size " + std::to_string(bytes) +
" bytes is too large for the caching detail with maximum bin " +
std::to_string(maxBinBytes_) +
" bytes. You might want to increase the maximum bin size");
throw std::runtime_error(
"Requested allocation size " + std::to_string(bytes) +
" bytes is too large for the caching detail with maximum bin " +
std::to_string(maxBinBytes_) +
" bytes. You might want to increase the maximum bin size");
}
unsigned int bin = minBin_;
size_t binBytes = minBinBytes_;
Expand Down Expand Up @@ -301,11 +309,13 @@ namespace cms::alpakatools {

if (debug_) {
std::ostringstream out;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " reused cached block at "
<< block.buffer->data() << " (" << block.bytes << " bytes) for queue "
<< block.queue->m_spQueueImpl.get() << ", event " << block.event->m_spEventImpl.get()
<< " (previously associated with stream " << iBlock->second.queue->m_spQueueImpl.get()
<< " , event " << iBlock->second.event->m_spEventImpl.get() << ")." << std::endl;
out << "\t" << deviceType_ << " " << alpaka::getName(device_)
<< " reused cached block at " << block.buffer->data() << " ("
<< block.bytes << " bytes) for queue " << block.queue->m_spQueueImpl.get()
<< ", event " << block.event->m_spEventImpl.get()
<< " (previously associated with stream "
<< iBlock->second.queue->m_spQueueImpl.get() << " , event "
<< iBlock->second.event->m_spEventImpl.get() << ")." << std::endl;
std::cout << out.str() << std::endl;
}

Expand All @@ -324,11 +334,14 @@ namespace cms::alpakatools {
return alpaka::allocBuf<std::byte, size_t>(device_, bytes);
} else if constexpr (std::is_same_v<Device, alpaka::DevCpu>) {
// allocate pinned host memory accessible by the queue's platform
return alpaka::allocMappedBuf<alpaka::Pltf<alpaka::Dev<Queue>>, std::byte, size_t>(device_, bytes);
return alpaka::allocMappedBuf<alpaka::Pltf<alpaka::Dev<Queue>>, std::byte, size_t>(
device_, bytes);
} else {
// unsupported combination
static_assert(std::is_same_v<Device, alpaka::Dev<Queue>> or std::is_same_v<Device, alpaka::DevCpu>,
"The \"memory device\" type can either be the same as the \"synchronisation device\" "
static_assert(std::is_same_v<Device, alpaka::Dev<Queue>> or
std::is_same_v<Device, alpaka::DevCpu>,
"The \"memory device\" type can either be the same as the "
"\"synchronisation device\" "
"type, or be "
"the host CPU.");
}
Expand All @@ -341,8 +354,9 @@ namespace cms::alpakatools {
// the allocation attempt failed: free all cached blocks on the device and retry
if (debug_) {
std::ostringstream out;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " failed to allocate "
<< block.bytes << " bytes for queue " << block.queue->m_spQueueImpl.get()
out << "\t" << deviceType_ << " " << alpaka::getName(device_)
<< " failed to allocate " << block.bytes << " bytes for queue "
<< block.queue->m_spQueueImpl.get()
<< ", retrying after freeing cached allocations" << std::endl;
std::cout << out.str() << std::endl;
}
Expand All @@ -366,10 +380,10 @@ namespace cms::alpakatools {

if (debug_) {
std::ostringstream out;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " allocated new block at "
<< block.buffer->data() << " (" << block.bytes << " bytes associated with queue "
<< block.queue->m_spQueueImpl.get() << ", event " << block.event->m_spEventImpl.get() << "."
<< std::endl;
out << "\t" << deviceType_ << " " << alpaka::getName(device_)
<< " allocated new block at " << block.buffer->data() << " (" << block.bytes
<< " bytes associated with queue " << block.queue->m_spQueueImpl.get()
<< ", event " << block.event->m_spEventImpl.get() << "." << std::endl;
std::cout << out.str() << std::endl;
}
}
Expand All @@ -383,10 +397,11 @@ namespace cms::alpakatools {

if (debug_) {
std::ostringstream out;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " freed " << iBlock->second.bytes
<< " bytes.\n\t\t " << (cachedBlocks_.size() - 1) << " available blocks cached ("
<< cachedBytes_.free << " bytes), " << liveBlocks_.size() << " live blocks ("
<< cachedBytes_.live << " bytes) outstanding." << std::endl;
out << "\t" << deviceType_ << " " << alpaka::getName(device_) << " freed "
<< iBlock->second.bytes << " bytes.\n\t\t " << (cachedBlocks_.size() - 1)
<< " available blocks cached (" << cachedBytes_.free << " bytes), "
<< liveBlocks_.size() << " live blocks (" << cachedBytes_.live
<< " bytes) outstanding." << std::endl;
std::cout << out.str() << std::endl;
}

Expand All @@ -395,18 +410,22 @@ namespace cms::alpakatools {
}

// TODO replace with a tbb::concurrent_multimap ?
using CachedBlocks = std::multimap<unsigned int, BlockDescriptor>; // ordered by the allocation bin
using CachedBlocks =
std::multimap<unsigned int, BlockDescriptor>; // ordered by the allocation bin
// TODO replace with a tbb::concurrent_map ?
using BusyBlocks = std::map<void*, BlockDescriptor>; // ordered by the address of the allocated memory
using BusyBlocks =
std::map<void*, BlockDescriptor>; // ordered by the address of the allocated memory

inline static const std::string deviceType_ = boost::core::demangle(typeid(Device).name());
inline static const std::string deviceType_ =
boost::core::demangle(typeid(Device).name());

mutable std::mutex mutex_;
Device device_; // the device where the memory is allocated

CachedBytes cachedBytes_;
CachedBlocks cachedBlocks_; // Set of cached device allocations available for reuse
BusyBlocks liveBlocks_; // map of pointers to the live device allocations currently in use
BusyBlocks
liveBlocks_; // map of pointers to the live device allocations currently in use

const unsigned int binGrowth_; // Geometric growth factor for bin-sizes
const unsigned int minBin_;
Expand Down
24 changes: 17 additions & 7 deletions CLUEstering/alpaka/AlpakaCore/HostOnlyTask.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,22 @@ namespace alpaka {
struct Enqueue<QueueCudaRtNonBlocking, HostOnlyTask> {
using TApi = ApiCudaRt;

static void CUDART_CB callback(cudaStream_t /*queue*/, cudaError_t /*status*/, void* arg) {
static void CUDART_CB callback(cudaStream_t /*queue*/,
cudaError_t /*status*/,
void* arg) {
//ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(status);
std::unique_ptr<HostOnlyTask> pTask(static_cast<HostOnlyTask*>(arg));
(*pTask)();
}

ALPAKA_FN_HOST static auto enqueue(QueueCudaRtNonBlocking& queue, HostOnlyTask task) -> void {
ALPAKA_FN_HOST static auto enqueue(QueueCudaRtNonBlocking& queue, HostOnlyTask task)
-> void {
auto pTask = std::make_unique<HostOnlyTask>(std::move(task));
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(cudaStreamAddCallback(
alpaka::getNativeHandle(queue), callback, static_cast<void*>(pTask.release()), 0u));
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
cudaStreamAddCallback(alpaka::getNativeHandle(queue),
callback,
static_cast<void*>(pTask.release()),
0u));
}
};
#endif // ALPAKA_ACC_GPU_CUDA_ENABLED
Expand All @@ -52,10 +58,14 @@ namespace alpaka {
(*pTask)();
}

ALPAKA_FN_HOST static auto enqueue(QueueHipRtNonBlocking& queue, HostOnlyTask task) -> void {
ALPAKA_FN_HOST static auto enqueue(QueueHipRtNonBlocking& queue, HostOnlyTask task)
-> void {
auto pTask = std::make_unique<HostOnlyTask>(std::move(task));
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(hipStreamAddCallback(
alpaka::getNativeHandle(queue), callback, static_cast<void*>(pTask.release()), 0u));
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
hipStreamAddCallback(alpaka::getNativeHandle(queue),
callback,
static_cast<void*>(pTask.release()),
0u));
}
};
#endif // ALPAKA_ACC_GPU_HIP_ENABLED
Expand Down
3 changes: 2 additions & 1 deletion CLUEstering/alpaka/AlpakaCore/alpakaConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ namespace alpaka_common {

// trick to force expanding ALPAKA_ACCELERATOR_NAMESPACE before stringification inside DEFINE_FWK_MODULE
#define DEFINE_FWK_ALPAKA_MODULE2(name) DEFINE_FWK_MODULE(name)
#define DEFINE_FWK_ALPAKA_MODULE(name) DEFINE_FWK_ALPAKA_MODULE2(ALPAKA_ACCELERATOR_NAMESPACE::name)
#define DEFINE_FWK_ALPAKA_MODULE(name) \
DEFINE_FWK_ALPAKA_MODULE2(ALPAKA_ACCELERATOR_NAMESPACE::name)

#define DEFINE_FWK_ALPAKA_EVENTSETUP_MODULE2(name) DEFINE_FWK_EVENTSETUP_MODULE(name)
#define DEFINE_FWK_ALPAKA_EVENTSETUP_MODULE(name) \
Expand Down
3 changes: 2 additions & 1 deletion CLUEstering/alpaka/AlpakaCore/alpakaDevices.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
namespace cms::alpakatools {

// alpaka host device
inline const alpaka_common::DevHost host = alpaka::getDevByIdx<alpaka_common::PltfHost>(0u);
inline const alpaka_common::DevHost host =
alpaka::getDevByIdx<alpaka_common::PltfHost>(0u);

// alpaka accelerator devices
template <typename TPlatform>
Expand Down
12 changes: 8 additions & 4 deletions CLUEstering/alpaka/AlpakaCore/alpakaFwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,14 @@ namespace alpaka {
template <typename TApi, bool TBlocking>
class QueueUniformCudaHipRt;
}
using QueueCudaRtBlocking = uniform_cuda_hip::detail::QueueUniformCudaHipRt<ApiCudaRt, true>;
using QueueCudaRtNonBlocking = uniform_cuda_hip::detail::QueueUniformCudaHipRt<ApiCudaRt, false>;
using QueueHipRtBlocking = uniform_cuda_hip::detail::QueueUniformCudaHipRt<ApiHipRt, true>;
using QueueHipRtNonBlocking = uniform_cuda_hip::detail::QueueUniformCudaHipRt<ApiHipRt, false>;
using QueueCudaRtBlocking =
uniform_cuda_hip::detail::QueueUniformCudaHipRt<ApiCudaRt, true>;
using QueueCudaRtNonBlocking =
uniform_cuda_hip::detail::QueueUniformCudaHipRt<ApiCudaRt, false>;
using QueueHipRtBlocking =
uniform_cuda_hip::detail::QueueUniformCudaHipRt<ApiHipRt, true>;
using QueueHipRtNonBlocking =
uniform_cuda_hip::detail::QueueUniformCudaHipRt<ApiHipRt, false>;

// Events
template <typename TDev>
Expand Down
Loading
Loading