Skip to content

Commit

Permalink
Fix Resize Issue of Fab with the Async Arena (AMReX-Codes#3663)
Browse files Browse the repository at this point in the history
## Summary

Previously there was an issue with resizing Fabs using The_Async_Arena.
The issue was the previous allocation during resize might be done on a
different stream. This commit fixes the issue and makes the following
patterns work.

    FArrayBox tmp0(The_Async_Arena());
    FArrayBox tmp1(The_Async_Arena());
    FArrayBox tmp2;
    for (MFIter ...) {
        tmp0.resize(box,ncomp,The_Async_Arena());
        tmp1.resize(box,ncomp);
        tmp2.resize(box,ncomp,The_Async_Arena());
    }

## Additional background

AMReX-Astro/Castro#2677

## Checklist

The proposed changes:
- [x] fix a bug or incorrect behavior in AMReX
- [ ] add new capabilities to AMReX
- [ ] changes answers in the test suite to more than roundoff level
- [ ] are likely to significantly affect the results of downstream AMReX
users
- [ ] include documentation in the code and/or rst files, if appropriate
  • Loading branch information
WeiqunZhang authored and guj committed Dec 13, 2023
1 parent df89db3 commit 9348cac
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 1 deletion.
5 changes: 5 additions & 0 deletions Src/Base/AMReX_Arena.H
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,11 @@ public:
*/
virtual void registerForProfiling (const std::string& memory_name);

#ifdef AMREX_USE_GPU
//! Is this GPU stream ordered memory allocator?
[[nodiscard]] virtual bool isStreamOrderedArena () const { return false; }
#endif

/**
* \brief Given a minimum required arena size of sz bytes, this returns
* the next largest arena size that will align to align_size bytes
Expand Down
25 changes: 24 additions & 1 deletion Src/Base/AMReX_BaseFab.H
Original file line number Diff line number Diff line change
Expand Up @@ -1631,6 +1631,9 @@ protected:
Long truesize = 0L; //!< nvar*numpts that was allocated on heap.
bool ptr_owner = false; //!< Owner of T*?
bool shared_memory = false; //!< Is the memory allocated in shared memory?
#ifdef AMREX_USE_GPU
gpuStream_t alloc_stream{};
#endif
};

template <class T>
Expand Down Expand Up @@ -1902,6 +1905,9 @@ BaseFab<T>::define ()
this->truesize = this->nvar*this->domain.numPts();
this->ptr_owner = true;
this->dptr = static_cast<T*>(this->alloc(this->truesize*sizeof(T)));
#ifdef AMREX_USE_GPU
this->alloc_stream = Gpu::gpuStream();
#endif

placementNew(this->dptr, this->truesize);

Expand Down Expand Up @@ -2003,6 +2009,9 @@ BaseFab<T>::BaseFab (BaseFab<T>&& rhs) noexcept
dptr(rhs.dptr), domain(rhs.domain),
nvar(rhs.nvar), truesize(rhs.truesize),
ptr_owner(rhs.ptr_owner), shared_memory(rhs.shared_memory)
#ifdef AMREX_USE_GPU
, alloc_stream(rhs.alloc_stream)
#endif
{
rhs.dptr = nullptr;
rhs.ptr_owner = false;
Expand All @@ -2021,6 +2030,9 @@ BaseFab<T>::operator= (BaseFab<T>&& rhs) noexcept
truesize = rhs.truesize;
ptr_owner = rhs.ptr_owner;
shared_memory = rhs.shared_memory;
#ifdef AMREX_USE_GPU
alloc_stream = rhs.alloc_stream;
#endif

rhs.dptr = nullptr;
rhs.ptr_owner = false;
Expand Down Expand Up @@ -2062,7 +2074,11 @@ BaseFab<T>::resize (const Box& b, int n, Arena* ar)
this->dptr = nullptr;
define();
}
else if (this->nvar*this->domain.numPts() > this->truesize)
else if (this->nvar*this->domain.numPts() > this->truesize
#ifdef AMREX_USE_GPU
|| (arena()->isStreamOrderedArena() && alloc_stream != Gpu::gpuStream())
#endif
)
{
if (this->shared_memory) {
amrex::Abort("BaseFab::resize: BaseFab in shared memory cannot increase size");
Expand Down Expand Up @@ -2114,7 +2130,14 @@ BaseFab<T>::clear () noexcept

placementDelete(this->dptr, this->truesize);

#ifdef AMREX_USE_GPU
auto current_stream = Gpu::Device::gpuStream();
Gpu::Device::setStream(alloc_stream);
#endif
this->free(this->dptr);
#ifdef AMREX_USE_GPU
Gpu::Device::setStream(current_stream);
#endif

if (this->nvar > 1) {
amrex::update_fab_stats(-this->truesize/this->nvar, -this->truesize, sizeof(T));
Expand Down
1 change: 1 addition & 0 deletions Src/Base/AMReX_GpuTypes.H
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ struct Dim1 {
struct gpuStream_t {
sycl::queue* queue = nullptr;
bool operator== (gpuStream_t const& rhs) noexcept { return queue == rhs.queue; }
bool operator!= (gpuStream_t const& rhs) noexcept { return queue != rhs.queue; }
};

#endif
Expand Down
5 changes: 5 additions & 0 deletions Src/Base/AMReX_PArena.H
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ public:
[[nodiscard]] bool isDevice () const final;
[[nodiscard]] bool isPinned () const final;

#ifdef AMREX_USE_GPU
//! Is this CUDA stream ordered memory allocator?
[[nodiscard]] bool isStreamOrderedArena () const final { return true; }
#endif

#ifdef AMREX_CUDA_GE_11_2
private:
cudaMemPool_t m_pool;
Expand Down

0 comments on commit 9348cac

Please sign in to comment.