Skip to content

Commit

Permalink
Plotfile Tools: GPU support (AMReX-Codes#3626)
Browse files Browse the repository at this point in the history
## Summary

Add GPU support for fcompare, fextrema, fsnapshot and fvolumesum.

## Checklist

The proposed changes:
- [x] fix a bug or incorrect behavior in AMReX
- [x] add new capabilities to AMReX
- [ ] changes answers in the test suite to more than roundoff level
- [ ] are likely to significantly affect the results of downstream AMReX
users
- [ ] include documentation in the code and/or rst files, if appropriate
  • Loading branch information
WeiqunZhang authored and guj committed Dec 13, 2023
1 parent 6f45e30 commit fe145ad
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 105 deletions.
2 changes: 1 addition & 1 deletion Src/Base/AMReX_PlotFileDataImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ PlotFileDataImpl::get (int level, std::string const& varname) noexcept
int gid = mfi.index();
FArrayBox& dstfab = mf[mfi];
std::unique_ptr<FArrayBox> srcfab(m_vismf[level]->readFAB(gid, icomp));
dstfab.copy<RunOn::Host>(*srcfab);
dstfab.copy<RunOn::Device>(*srcfab);
}
}
return mf;
Expand Down
35 changes: 18 additions & 17 deletions Tools/Plotfile/fextrema.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <AMReX.H>
#include <AMReX_Print.H>
#include <AMReX_ParReduce.H>
#include <AMReX_PlotFileUtil.H>
#include <AMReX_MultiFabUtil.H>
#include <algorithm>
Expand Down Expand Up @@ -80,23 +81,23 @@ void main_main()
pf.boxArray(ilev+1), ratio);
for (int ivar = 0; ivar < var_names.size(); ++ivar) {
const MultiFab& mf = pf.get(ilev, var_names[ivar]);
for (MFIter mfi(mf); mfi.isValid(); ++mfi) {
const Box& bx = mfi.validbox();
const auto lo = amrex::lbound(bx);
const auto hi = amrex::ubound(bx);
const auto& ifab = mask.array(mfi);
const auto& fab = mf.array(mfi);
for (int k = lo.z; k <= hi.z; ++k) {
for (int j = lo.y; j <= hi.y; ++j) {
for (int i = lo.x; i <= hi.x; ++i) {
if (ifab(i,j,k) == 0) {
vvmin[ivar] = std::min(fab(i,j,k),vvmin[ivar]);
vvmax[ivar] = std::max(fab(i,j,k),vvmax[ivar]);
}
}
}
}
}
auto const& ma = mf.const_arrays();
auto const& ima = mask.const_arrays();
auto rr = ParReduce(TypeList<ReduceOpMin,ReduceOpMax>{},
TypeList<Real,Real>{}, mf,
[=] AMREX_GPU_DEVICE (int bno, int i, int j, int k)
-> GpuTuple<Real,Real>
{
if (ima[bno](i,j,k) == 0) {
auto x = ma[bno](i,j,k);
return {x,x};
} else {
return {std::numeric_limits<Real>::max(),
std::numeric_limits<Real>::lowest()};
}
});
vvmin[ivar] = std::min(amrex::get<0>(rr), vvmin[ivar]);
vvmax[ivar] = std::max(amrex::get<1>(rr), vvmax[ivar]);
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions Tools/Plotfile/fsnapshot.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -278,15 +278,15 @@ void main_main()
gmx = std::log10(gmx);
}

BaseFab<unsigned char> intdat;
BaseFab<unsigned char> intdat(The_Pinned_Arena());
for (int idir = ndir_begin; idir < ndir_end; ++idir) {
intdat.resize(finebox[idir],1);
const int width = (idir == 0) ? finebox[idir].length(1) : finebox[idir].length(0);
const int height = (idir == 2) ? finebox[idir].length(1) : finebox[idir].length(2);
const auto& intarr = intdat.array();
const auto& realarr = datamf[idir].array(0);
Real fac = Real(253.999) / (gmx-gmn);
amrex::LoopOnCpu(finebox[idir], [=] (int i, int j, int k)
amrex::ParallelFor(finebox[idir], [=] AMREX_GPU_DEVICE (int i, int j, int k)
{
int jj = (idir == 2) ? height - 1 - j : j; // flip the data in second image direction
int kk = (idir == 2) ? k : height - 1 - k;
Expand Down
130 changes: 45 additions & 85 deletions Tools/Plotfile/fvolumesum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <AMReX_Print.H>
#include <AMReX_PlotFileUtil.H>
#include <AMReX_MultiFabUtil.H>
#include <AMReX_ParReduce.H>
#include <AMReX_ParallelDescriptor.H>
#include <limits>
#include <iterator>
Expand Down Expand Up @@ -80,7 +81,6 @@ void main_main()

const int dim = pf.spaceDim();


int fine_level = pf.finestLevel();

Vector<Real> pos;
Expand All @@ -95,6 +95,35 @@ void main_main()

Array<Real,AMREX_SPACEDIM> dx = pf.cellSize(ilev);

Real volfac = AMREX_D_TERM(dx[0], *dx[1], *dx[2]);

if (coord == 1) {
AMREX_ALWAYS_ASSERT(AMREX_SPACEDIM == 2);
// axisymmetric V = pi (r_r**2 - r_l**2) * dz
// = pi dr * dz * (r_r + r_l)
// = 2 pi r dr dz
volfac *= 2 * pi; // 2 * pi * dr * dz part here
} else if (coord == 2) {
AMREX_ALWAYS_ASSERT(AMREX_SPACEDIM == 1);
// 1-d spherical V = 4/3 pi (r_r**3 - r_l**3)
volfac *= (4.0_rt/3.0_rt) * pi; // 4/3 * pi * dr part here
}

auto xlo = problo[0];
auto dx0 = dx[0];
AMREX_ASSERT(coord == 0 || coord == 1 || coord == 2);
auto f_vol = [=] AMREX_GPU_DEVICE (int i) {
if (coord == 0) {
return volfac;
} else if (coord == 1) {
return volfac * (xlo + (Real(i)+0.5_rt)*dx0);
} else {
Real r_r = xlo + Real(i+1)*dx0;
Real r_l = xlo + Real(i )*dx0;
return volfac * (r_r*r_r + r_l*r_r + r_l*r_l);
}
};

if (ilev < fine_level) {
IntVect ratio{pf.refRatio(ilev)};
for (int idim = dim; idim < AMREX_SPACEDIM; ++idim) {
Expand All @@ -103,97 +132,28 @@ void main_main()
const iMultiFab mask = makeFineMask(pf.boxArray(ilev), pf.DistributionMap(ilev),
pf.boxArray(ilev+1), ratio);
const MultiFab& mf = pf.get(ilev, var_name);
for (MFIter mfi(mf); mfi.isValid(); ++mfi) {
const Box& bx = mfi.validbox();
if (bx.ok()) {
const auto& m = mask.array(mfi);
const auto& fab = mf.array(mfi);
const auto lo = amrex::lbound(bx);
const auto hi = amrex::ubound(bx);
for (int k = lo.z; k <= hi.z; ++k) {
for (int j = lo.y; j <= hi.y; ++j) {
for (int i = lo.x; i <= hi.x; ++i) {
if (m(i,j,k) == 0) { // not covered by fine
Array<Real,AMREX_SPACEDIM> p
= {AMREX_D_DECL(problo[0]+static_cast<Real>(i+0.5)*dx[0],
problo[1]+static_cast<Real>(j+0.5)*dx[1],
problo[2]+static_cast<Real>(k+0.5)*dx[2])};

// compute the volume
Real vol = std::numeric_limits<Real>::quiet_NaN();
if (coord == 0) {
// Cartesian
vol = 1.0_rt;
for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
vol *= dx[idim];
}
} else if (coord == 1) {
// axisymmetric V = pi (r_r**2 - r_l**2) * dz
// = pi dr * dz * (r_r + r_l)
// = 2 pi r dr dz
vol = 2 * pi * p[0] * dx[0] * dx[1];
} else if (coord == 2) {
// 1-d spherical V = 4/3 pi (r_r**3 - r_l**3)
Real r_r = problo[0]+static_cast<Real>(i+1)*dx[0];
Real r_l = problo[0]+static_cast<Real>(i)*dx[0];
vol = (4.0_rt/3.0_rt) * pi * dx[0] * (r_r*r_r + r_l*r_r + r_l*r_l);
}

lsum += fab(i,j,k) * vol;
}
}
}
}
}
}
auto const& ima = mask.const_arrays();
auto const& ma = mf.const_arrays();
lsum += ParReduce(TypeList<ReduceOpSum>{}, TypeList<Real>{}, mf,
[=] AMREX_GPU_DEVICE (int bno, int i, int j, int k)
-> GpuTuple<Real>
{
return { (ima[bno](i,j,k) == 0) ? ma[bno](i,j,k)*f_vol(i) : 0._rt };
});
} else {
const MultiFab& mf = pf.get(ilev, var_name);
for (MFIter mfi(mf); mfi.isValid(); ++mfi) {
const Box& bx = mfi.validbox();
if (bx.ok()) {
const auto& fab = mf.array(mfi);
const auto lo = amrex::lbound(bx);
const auto hi = amrex::ubound(bx);
for (int k = lo.z; k <= hi.z; ++k) {
for (int j = lo.y; j <= hi.y; ++j) {
for (int i = lo.x; i <= hi.x; ++i) {
Array<Real,AMREX_SPACEDIM> p
= {AMREX_D_DECL(problo[0]+static_cast<Real>(i+0.5)*dx[0],
problo[1]+static_cast<Real>(j+0.5)*dx[1],
problo[2]+static_cast<Real>(k+0.5)*dx[2])};

// compute the volume
Real vol = std::numeric_limits<Real>::quiet_NaN();
if (coord == 0) {
// Cartesian
vol = 1.0_rt;
for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
vol *= dx[idim];
}
} else if (coord == 1) {
// axisymmetric V = pi (r_r**2 - r_l**2) * dz
// = pi dr * dz * (r_r + r_l)
// = 2 pi r dr dz
vol = 2 * pi * p[0] * dx[0] * dx[1];
} else if (coord == 2) {
// 1-d spherical V = 4/3 pi (r_r**3 - r_l**3)
Real r_r = problo[0]+static_cast<Real>(i+1)*dx[0];
Real r_l = problo[0]+static_cast<Real>(i)*dx[0];
vol = (4.0_rt/3.0_rt) * pi * dx[0] * (r_r*r_r + r_l*r_r + r_l*r_l);
}

lsum += fab(i,j,k) * vol;
}
}
}
}
}
auto const& ma = mf.const_arrays();
lsum += ParReduce(TypeList<ReduceOpSum>{}, TypeList<Real>{}, mf,
[=] AMREX_GPU_DEVICE (int bno, int i, int j, int k)
-> GpuTuple<Real>
{
return { ma[bno](i,j,k)*f_vol(i) };
});
}
}

ParallelDescriptor::ReduceRealSum(lsum);


if (ParallelDescriptor::IOProcessor()) {
std::cout << "integral of " << var_name << " = " << lsum << std::endl;

Expand Down

0 comments on commit fe145ad

Please sign in to comment.