From 24cfd9d8b87eb4335ecf893454449ff7d28013e9 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Fri, 19 Aug 2022 18:54:45 +0000 Subject: [PATCH 01/39] Update OCCA memory interop call. --- backends/occa/ceed-occa-vector.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backends/occa/ceed-occa-vector.hpp b/backends/occa/ceed-occa-vector.hpp index 94d2ee5ed3..2c94c2e7d9 100644 --- a/backends/occa/ceed-occa-vector.hpp +++ b/backends/occa/ceed-occa-vector.hpp @@ -25,7 +25,8 @@ namespace ceed { ::occa::memory arrayToMemory(const TM *array) { if (array) { ::occa::memory mem((::occa::modeMemory_t*) array); - return mem.as(::occa::dtype::get()); + mem.setDtype(::occa::dtype::get()); + return mem; } return ::occa::null; } From bf505e7520fcaea0f7b339b7fd26ddcf36581750 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Fri, 19 Aug 2022 19:51:41 +0000 Subject: [PATCH 02/39] Removes deprecated kernelBuilder calls; builds directly from the device. --- backends/occa/ceed-occa-elem-restriction.cpp | 85 +++++++++----------- backends/occa/ceed-occa-elem-restriction.hpp | 7 +- 2 files changed, 41 insertions(+), 51 deletions(-) diff --git a/backends/occa/ceed-occa-elem-restriction.cpp b/backends/occa/ceed-occa-elem-restriction.cpp index 84df900f17..adc63cf441 100644 --- a/backends/occa/ceed-occa-elem-restriction.cpp +++ b/backends/occa/ceed-occa-elem-restriction.cpp @@ -55,7 +55,7 @@ namespace ceed { setupTransposeIndices(); - setupKernelBuilders(); + // setupKernelBuilders(); } void ElemRestriction::setupFromHostMemory(CeedCopyMode copyMode, @@ -175,23 +175,22 @@ namespace ceed { delete [] transposeDofIndices_h; } - void ElemRestriction::setupKernelBuilders() { - ::occa::properties kernelProps; - kernelProps["defines/CeedInt"] = ::occa::dtype::get().name(); - kernelProps["defines/CeedScalar"] = ::occa::dtype::get().name(); - - kernelProps["defines/COMPONENT_COUNT"] = ceedComponentCount; - kernelProps["defines/ELEMENT_SIZE"] = ceedElementSize; - kernelProps["defines/TILE_SIZE"] = 64; - kernelProps["defines/USES_INDICES"] = usesIndices(); - - applyKernelBuilder = ::occa::kernelBuilder::fromString( - occa_elem_restriction_source, "applyRestriction", kernelProps - ); - - applyTransposeKernelBuilder = ::occa::kernelBuilder::fromString( - occa_elem_restriction_source, "applyRestrictionTranspose", kernelProps - ); + void ElemRestriction::setKernelProperties() { + kernelProperties["defines/CeedInt"] = ::occa::dtype::get().name(); + kernelProperties["defines/CeedScalar"] = ::occa::dtype::get().name(); + kernelProperties["defines/COMPONENT_COUNT"] = ceedComponentCount; + kernelProperties["defines/ELEMENT_SIZE"] = ceedElementSize; + kernelProperties["defines/TILE_SIZE"] = 64; + kernelProperties["defines/USES_INDICES"] = usesIndices(); + kernelProperties["defines/USER_STRIDES"] = StrideType::USER_STRIDES; + kernelProperties["defines/NOT_STRIDED"] = StrideType::NOT_STRIDED; + kernelProperties["defines/BACKEND_STRIDES"] = StrideType::BACKEND_STRIDES; + kernelProperties["defines/STRIDE_TYPE"] = ceedStrideType; + kernelProperties["defines/NODE_COUNT"] = transposeQuadIndices.length(); + kernelProperties["defines/NODE_STRIDE"] = ceedNodeStride; + kernelProperties["defines/COMPONENT_STRIDE"] = ceedComponentStride; + kernelProperties["defines/ELEMENT_STRIDE"] = ceedElementStride; + kernelProperties["defines/UNSTRIDED_COMPONENT_STRIDE"] = ceedUnstridedComponentStride; } ElemRestriction* ElemRestriction::getElemRestriction(CeedElemRestriction r, @@ -300,42 +299,32 @@ namespace ceed { Vector &v) { const bool rIsTransposed = (rTransposeMode != CEED_NOTRANSPOSE); - ::occa::properties kernelProps; - kernelProps["defines/USER_STRIDES"] = StrideType::USER_STRIDES; - kernelProps["defines/NOT_STRIDED"] = StrideType::NOT_STRIDED; - kernelProps["defines/BACKEND_STRIDES"] = StrideType::BACKEND_STRIDES; - kernelProps["defines/STRIDE_TYPE"] = ceedStrideType; - - kernelProps["defines/NODE_COUNT"] = transposeQuadIndices.length(); - kernelProps["defines/NODE_STRIDE"] = ceedNodeStride; - kernelProps["defines/COMPONENT_STRIDE"] = ceedComponentStride; - kernelProps["defines/ELEMENT_STRIDE"] = ceedElementStride; - kernelProps["defines/UNSTRIDED_COMPONENT_STRIDE"] = ceedUnstridedComponentStride; - + // Todo: refactor if (rIsTransposed) { - ::occa::kernel applyTranspose = applyTransposeKernelBuilder.build( - getDevice(), - kernelProps - ); - - applyTranspose(ceedElementCount, - transposeQuadIndices, - transposeDofOffsets, - transposeDofIndices, - u.getConstKernelArg(), - v.getKernelArg()); + if(!restrictionTransposeKernel.isInitialized()) { + restrictionTransposeKernel = getDevice().buildKernelFromString( + occa_elem_restriction_source, + "applyRestrictionTranspose", + kernelProperties); + } + restrictionTransposeKernel(ceedElementCount, + transposeQuadIndices, + transposeDofOffsets, + transposeDofIndices, + u.getConstKernelArg(), + v.getKernelArg()); } else { - ::occa::kernel apply = applyKernelBuilder.build( - getDevice(), - kernelProps - ); - - apply(ceedElementCount, + if(!restrictionKernel.isInitialized()) { + restrictionKernel = getDevice().buildKernelFromString( + occa_elem_restriction_source, + "applyRestriction", + kernelProperties); + } + restrictionKernel(ceedElementCount, indices, u.getConstKernelArg(), v.getKernelArg()); } - return CEED_ERROR_SUCCESS; } diff --git a/backends/occa/ceed-occa-elem-restriction.hpp b/backends/occa/ceed-occa-elem-restriction.hpp index a664297ee1..211b8e0541 100644 --- a/backends/occa/ceed-occa-elem-restriction.hpp +++ b/backends/occa/ceed-occa-elem-restriction.hpp @@ -53,8 +53,9 @@ namespace ceed { ::occa::memory transposeDofOffsets; ::occa::memory transposeDofIndices; - ::occa::kernelBuilder applyKernelBuilder; - ::occa::kernelBuilder applyTransposeKernelBuilder; + ::occa::json kernelProperties; + ::occa::kernel restrictionKernel; + ::occa::kernel restrictionTransposeKernel; ElemRestriction(); @@ -74,7 +75,7 @@ namespace ceed { void setupTransposeIndices(); - void setupKernelBuilders(); + void setKernelProperties(); static ElemRestriction* getElemRestriction(CeedElemRestriction r, const bool assertValid = true); From 1bd33c70699b8960cdad51da6f81903f5bc7ebf8 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Fri, 19 Aug 2022 20:04:07 +0000 Subject: [PATCH 03/39] Uses `std::memset` and `std:memcpy`. --- backends/occa/ceed-occa-elem-restriction.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/backends/occa/ceed-occa-elem-restriction.cpp b/backends/occa/ceed-occa-elem-restriction.cpp index adc63cf441..77cf174643 100644 --- a/backends/occa/ceed-occa-elem-restriction.cpp +++ b/backends/occa/ceed-occa-elem-restriction.cpp @@ -15,6 +15,7 @@ // testbed platforms, in support of the nation's exascale computing imperative. #include +#include #include "./ceed-occa-elem-restriction.hpp" #include "./ceed-occa-kernels.hpp" @@ -69,7 +70,7 @@ namespace ceed { } else { const size_t bytes = entries * sizeof(CeedInt); hostIndices = (CeedInt*) ::malloc(bytes); - ::memcpy(hostIndices, indices_h, bytes); + std::memcpy(hostIndices, indices_h, bytes); } if (hostIndices) { @@ -102,7 +103,7 @@ namespace ceed { const CeedInt elementEntryCount = ceedElementCount * ceedElementSize; bool *indexIsUsed = new bool[ceedLVectorSize]; - ::memset(indexIsUsed, 0, ceedLVectorSize * sizeof(bool)); + std::memset(indexIsUsed, 0, ceedLVectorSize * sizeof(bool)); for (CeedInt i = 0; i < elementEntryCount; ++i) { indexIsUsed[hostIndices[i]] = true; @@ -119,7 +120,7 @@ namespace ceed { CeedInt *transposeDofOffsets_h = new CeedInt[dofOffsetCount]; CeedInt *transposeDofIndices_h = new CeedInt[elementEntryCount]; - ::memset(transposeDofOffsets_h, 0, dofOffsetCount * sizeof(CeedInt)); + std::memset(transposeDofOffsets_h, 0, dofOffsetCount * sizeof(CeedInt)); // Compute ids CeedInt offsetId = 0; From 2303fb43cc9628502ff0b39249c4b995eff5bdf0 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Fri, 19 Aug 2022 20:04:54 +0000 Subject: [PATCH 04/39] Uses `std::to_string` instead of internal `occa::toString`. --- backends/occa/ceed-occa-qfunction.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/backends/occa/ceed-occa-qfunction.cpp b/backends/occa/ceed-occa-qfunction.cpp index 7d448ff9df..d08100649f 100644 --- a/backends/occa/ceed-occa-qfunction.cpp +++ b/backends/occa/ceed-occa-qfunction.cpp @@ -15,6 +15,7 @@ // testbed platforms, in support of the nation's exascale computing imperative. #include +#include #include "ceed-occa-qfunction.hpp" #include "ceed-occa-qfunctioncontext.hpp" @@ -154,8 +155,8 @@ namespace ceed { // Set and define in for the q point for (int i = 0; i < args.inputCount(); ++i) { const CeedInt fieldSize = args.getQfInput(i).size; - const std::string qIn_i = "qIn" + ::occa::toString(i); - const std::string in_i = "in" + ::occa::toString(i); + const std::string qIn_i = "qIn" + std::to_string(i); + const std::string in_i = "in" + std::to_string(i); ss << " CeedScalar " << qIn_i << "[" << fieldSize << "];" << std::endl << " in[" << i << "] = " << qIn_i << ";" << std::endl @@ -168,7 +169,7 @@ namespace ceed { // Set out for the q point for (int i = 0; i < args.outputCount(); ++i) { const CeedInt fieldSize = args.getQfOutput(i).size; - const std::string qOut_i = "qOut" + ::occa::toString(i); + const std::string qOut_i = "qOut" + std::to_string(i); ss << " CeedScalar " << qOut_i << "[" << fieldSize << "];" << std::endl << " out[" << i << "] = " << qOut_i << ";" << std::endl; @@ -179,8 +180,8 @@ namespace ceed { // Copy out for the q point for (int i = 0; i < args.outputCount(); ++i) { const CeedInt fieldSize = args.getQfOutput(i).size; - const std::string qOut_i = "qOut" + ::occa::toString(i); - const std::string out_i = "out" + ::occa::toString(i); + const std::string qOut_i = "qOut" + std::to_string(i); + const std::string out_i = "out" + std::to_string(i); ss << " for (int qi = 0; qi < " << fieldSize << "; ++qi) {" << std::endl << " " << out_i << "[q + (OCCA_Q * qi)] = " << qOut_i << "[qi];" << std::endl @@ -204,7 +205,7 @@ namespace ceed { for (CeedInt i = 0; i < args.inputCount(); i++) { Vector *u = Vector::from(U[i]); if (!u) { - return ceedError("Incorrect qFunction input field: U[" + ::occa::toString(i) + "]"); + return ceedError("Incorrect qFunction input field: U[" + std::to_string(i) + "]"); } qFunctionKernel.pushArg(u->getConstKernelArg()); } @@ -212,7 +213,7 @@ namespace ceed { for (CeedInt i = 0; i < args.outputCount(); i++) { Vector *v = Vector::from(V[i]); if (!v) { - return ceedError("Incorrect qFunction output field: V[" + ::occa::toString(i) + "]"); + return ceedError("Incorrect qFunction output field: V[" + std::to_string(i) + "]"); } qFunctionKernel.pushArg(v->getKernelArg()); } From 6aa2114494e8c77ced5868dcd2ce3a1b8c83d4fd Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Fri, 19 Aug 2022 20:05:36 +0000 Subject: [PATCH 05/39] Uses `std::memcpy`. --- backends/occa/ceed-occa-qfunctioncontext.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backends/occa/ceed-occa-qfunctioncontext.cpp b/backends/occa/ceed-occa-qfunctioncontext.cpp index 8296b5760a..f86d31c4fe 100644 --- a/backends/occa/ceed-occa-qfunctioncontext.cpp +++ b/backends/occa/ceed-occa-qfunctioncontext.cpp @@ -14,6 +14,8 @@ // software, applications, hardware, advanced system engineering and early // testbed platforms, in support of the nation's exascale computing imperative. +#include + #include "ceed-occa-qfunctioncontext.hpp" namespace ceed { @@ -120,7 +122,7 @@ namespace ceed { switch (mtype) { case CEED_MEM_HOST: setCurrentHostCtxBufferIfNeeded(); - ::memcpy(currentHostBuffer, data, ctxSize); + std::memcpy(currentHostBuffer, data, ctxSize); syncState = SyncState::host; return CEED_ERROR_SUCCESS; case CEED_MEM_DEVICE: From a268ee58b615ec0d69797ef5b39bbfa4e55c3264 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Fri, 19 Aug 2022 20:15:44 +0000 Subject: [PATCH 06/39] Sets kernel properties. --- backends/occa/ceed-occa-elem-restriction.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/occa/ceed-occa-elem-restriction.cpp b/backends/occa/ceed-occa-elem-restriction.cpp index 77cf174643..150983cfcb 100644 --- a/backends/occa/ceed-occa-elem-restriction.cpp +++ b/backends/occa/ceed-occa-elem-restriction.cpp @@ -55,8 +55,6 @@ namespace ceed { } setupTransposeIndices(); - - // setupKernelBuilders(); } void ElemRestriction::setupFromHostMemory(CeedCopyMode copyMode, @@ -303,6 +301,7 @@ namespace ceed { // Todo: refactor if (rIsTransposed) { if(!restrictionTransposeKernel.isInitialized()) { + setKernelProperties(); restrictionTransposeKernel = getDevice().buildKernelFromString( occa_elem_restriction_source, "applyRestrictionTranspose", @@ -316,6 +315,7 @@ namespace ceed { v.getKernelArg()); } else { if(!restrictionKernel.isInitialized()) { + setKernelProperties(); restrictionKernel = getDevice().buildKernelFromString( occa_elem_restriction_source, "applyRestriction", From 4e0990008484f1e15f79484300fd312d9f683ee0 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Fri, 19 Aug 2022 21:08:14 +0000 Subject: [PATCH 07/39] Removes deprecated kernelBuilder calls; builds directly from the device. --- backends/occa/ceed-occa-simplex-basis.cpp | 154 +++++++++------------- backends/occa/ceed-occa-simplex-basis.hpp | 31 ++--- 2 files changed, 73 insertions(+), 112 deletions(-) diff --git a/backends/occa/ceed-occa-simplex-basis.cpp b/backends/occa/ceed-occa-simplex-basis.cpp index a9183f1b26..ee75ac9744 100644 --- a/backends/occa/ceed-occa-simplex-basis.cpp +++ b/backends/occa/ceed-occa-simplex-basis.cpp @@ -38,7 +38,7 @@ namespace ceed { grad = device.malloc(P * Q * dim, grad_); qWeight = device.malloc(Q, qWeight_); - setupKernelBuilders(); + setKernelProperties(); } SimplexBasis::~SimplexBasis() {} @@ -52,7 +52,20 @@ namespace ceed { return occa_simplex_basis_cpu_function_source; } - void SimplexBasis::setupKernelBuilders() { + void SimplexBasis::setKernelProperties() { + kernelProperties["defines/CeedInt"] = ::occa::dtype::get().name(); + kernelProperties["defines/CeedScalar"] = ::occa::dtype::get().name(); + kernelProperties["defines/DIM"] = dim; + kernelProperties["defines/Q"] = Q; + kernelProperties["defines/P"] = P; + kernelProperties["defines/MAX_PQ"] = P > Q ? P : Q; + kernelProperties["defines/BASIS_COMPONENT_COUNT"] = ceedComponentCount; + if(usingGpuDevice()) { + kernelProperties["defines/ELEMENTS_PER_BLOCK"] = Q <= 1024 ? (1024 / Q) : 1; + } + } + + ::occa::kernel SimplexBasis::buildKernel(const std::string& kernelName) { std::string kernelSource; if (usingGpuDevice()) { kernelSource = occa_simplex_basis_gpu_source; @@ -62,122 +75,77 @@ namespace ceed { kernelSource += occa_simplex_basis_cpu_kernel_source; } - ::occa::properties kernelProps; - kernelProps["defines/CeedInt"] = ::occa::dtype::get().name(); - kernelProps["defines/CeedScalar"] = ::occa::dtype::get().name(); - kernelProps["defines/DIM"] = dim; - kernelProps["defines/Q"] = Q; - kernelProps["defines/P"] = P; - kernelProps["defines/MAX_PQ"] = P > Q ? P : Q; - kernelProps["defines/BASIS_COMPONENT_COUNT"] = ceedComponentCount; - - interpKernelBuilder = ::occa::kernelBuilder::fromString( - kernelSource, "interp", kernelProps - ); - gradKernelBuilder = ::occa::kernelBuilder::fromString( - kernelSource, "grad" , kernelProps - ); - weightKernelBuilder = ::occa::kernelBuilder::fromString( - kernelSource, "weight", kernelProps - ); + return getDevice().buildKernelFromString(kernelSource, + kernelName, + kernelProperties); } int SimplexBasis::applyInterp(const CeedInt elementCount, const bool transpose, Vector &U, Vector &V) { - ::occa::kernel interpKernel = ( - usingGpuDevice() - ? getGpuInterpKernel(transpose) - : getCpuInterpKernel(transpose) - ); - - interpKernel(elementCount, - interp, - U.getConstKernelArg(), - V.getKernelArg()); - + if(transpose) { + if(!interpTKernel.isInitialized()) { + kernelProperties["defines/TRANSPOSE"] = transpose; + interpTKernel = buildKernel("interp"); + } + + interpTKernel(elementCount, + interp, + U.getConstKernelArg(), + V.getKernelArg()); + } else { + if(!interpKernel.isInitialized()) { + kernelProperties["defines/TRANSPOSE"] = transpose; + interpKernel = buildKernel("interp"); + } + + interpKernel(elementCount, + interp, + U.getConstKernelArg(), + V.getKernelArg()); + } return CEED_ERROR_SUCCESS; } - ::occa::kernel SimplexBasis::getCpuInterpKernel(const bool transpose) { - return buildCpuEvalKernel(interpKernelBuilder, - transpose); - } - - ::occa::kernel SimplexBasis::getGpuInterpKernel(const bool transpose) { - return buildGpuEvalKernel(interpKernelBuilder, - transpose); - } - int SimplexBasis::applyGrad(const CeedInt elementCount, const bool transpose, Vector &U, Vector &V) { - ::occa::kernel gradKernel = ( - usingGpuDevice() - ? getGpuGradKernel(transpose) - : getCpuGradKernel(transpose) - ); + if(transpose) { + if(!gradTKernel.isInitialized()) { + kernelProperties["defines/TRANSPOSE"] = transpose; + gradTKernel = buildKernel("grad"); + } - gradKernel(elementCount, - grad, - U.getConstKernelArg(), - V.getKernelArg()); + gradTKernel(elementCount, + grad, + U.getConstKernelArg(), + V.getKernelArg()); + } else { + if(!gradKernel.isInitialized()) { + kernelProperties["defines/TRANSPOSE"] = transpose; + gradKernel = buildKernel("grad"); + } + gradKernel(elementCount, + grad, + U.getConstKernelArg(), + V.getKernelArg()); + } return CEED_ERROR_SUCCESS; } - ::occa::kernel SimplexBasis::getCpuGradKernel(const bool transpose) { - return buildCpuEvalKernel(gradKernelBuilder, - transpose); - } - - ::occa::kernel SimplexBasis::getGpuGradKernel(const bool transpose) { - return buildGpuEvalKernel(gradKernelBuilder, - transpose); - } - int SimplexBasis::applyWeight(const CeedInt elementCount, Vector &W) { - ::occa::kernel weightKernel = ( - usingGpuDevice() - ? getGpuWeightKernel() - : getCpuWeightKernel() - ); - + if(!weightKernel.isInitialized()) { + weightKernel = buildKernel("weight"); + } weightKernel(elementCount, qWeight, W.getKernelArg()); return CEED_ERROR_SUCCESS; } - ::occa::kernel SimplexBasis::getCpuWeightKernel() { - return buildCpuEvalKernel(weightKernelBuilder, - false); - } - - ::occa::kernel SimplexBasis::getGpuWeightKernel() { - return buildGpuEvalKernel(weightKernelBuilder, - false); - } - - ::occa::kernel SimplexBasis::buildCpuEvalKernel(::occa::kernelBuilder &kernelBuilder, - const bool transpose) { - ::occa::properties kernelProps; - kernelProps["defines/TRANSPOSE"] = transpose; - - return kernelBuilder.build(getDevice(), kernelProps); - } - - ::occa::kernel SimplexBasis::buildGpuEvalKernel(::occa::kernelBuilder &kernelBuilder, - const bool transpose) { - ::occa::properties kernelProps; - kernelProps["defines/TRANSPOSE"] = transpose; - kernelProps["defines/ELEMENTS_PER_BLOCK"] = Q <= 1024 ? (1024 / Q) : 1; - - return kernelBuilder.build(getDevice(), kernelProps); - } - int SimplexBasis::apply(const CeedInt elementCount, CeedTransposeMode tmode, CeedEvalMode emode, diff --git a/backends/occa/ceed-occa-simplex-basis.hpp b/backends/occa/ceed-occa-simplex-basis.hpp index 94a766cbd4..69e02fe9f8 100644 --- a/backends/occa/ceed-occa-simplex-basis.hpp +++ b/backends/occa/ceed-occa-simplex-basis.hpp @@ -26,9 +26,13 @@ namespace ceed { ::occa::memory interp; ::occa::memory grad; ::occa::memory qWeight; - ::occa::kernelBuilder interpKernelBuilder; - ::occa::kernelBuilder gradKernelBuilder; - ::occa::kernelBuilder weightKernelBuilder; + + ::occa::json kernelProperties; + ::occa::kernel interpKernel; + ::occa::kernel interpTKernel; + ::occa::kernel gradKernel; + ::occa::kernel gradTKernel; + ::occa::kernel weightKernel; SimplexBasis(CeedBasis basis, CeedInt dim, @@ -44,36 +48,25 @@ namespace ceed { const char* getFunctionSource() const; - void setupKernelBuilders(); + void setKernelProperties(); + + std::string getKernelSource() const; + + ::occa::kernel buildKernel(const std::string& kernelName); int applyInterp(const CeedInt elementCount, const bool transpose, Vector &U, Vector &V); - ::occa::kernel getCpuInterpKernel(const bool transpose); - ::occa::kernel getGpuInterpKernel(const bool transpose); - int applyGrad(const CeedInt elementCount, const bool transpose, Vector &U, Vector &V); - ::occa::kernel getCpuGradKernel(const bool transpose); - ::occa::kernel getGpuGradKernel(const bool transpose); - int applyWeight(const CeedInt elementCount, Vector &W); - ::occa::kernel getCpuWeightKernel(); - ::occa::kernel getGpuWeightKernel(); - - ::occa::kernel buildCpuEvalKernel(::occa::kernelBuilder &kernelBuilder, - const bool transpose); - - ::occa::kernel buildGpuEvalKernel(::occa::kernelBuilder &kernelBuilder, - const bool transpose); - int apply(const CeedInt elementCount, CeedTransposeMode tmode, CeedEvalMode emode, From c3c9e38a1a0e6343d309519e4b6486020aaa2b20 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Mon, 22 Aug 2022 19:41:04 +0000 Subject: [PATCH 08/39] Removes deprecated kernelBuild call; builds directly from the device. --- backends/occa/ceed-occa-simplex-basis.cpp | 2 +- backends/occa/ceed-occa-tensor-basis.cpp | 173 ++++++++++------------ backends/occa/ceed-occa-tensor-basis.hpp | 36 ++--- 3 files changed, 93 insertions(+), 118 deletions(-) diff --git a/backends/occa/ceed-occa-simplex-basis.cpp b/backends/occa/ceed-occa-simplex-basis.cpp index ee75ac9744..4cfb997ff1 100644 --- a/backends/occa/ceed-occa-simplex-basis.cpp +++ b/backends/occa/ceed-occa-simplex-basis.cpp @@ -61,7 +61,7 @@ namespace ceed { kernelProperties["defines/MAX_PQ"] = P > Q ? P : Q; kernelProperties["defines/BASIS_COMPONENT_COUNT"] = ceedComponentCount; if(usingGpuDevice()) { - kernelProperties["defines/ELEMENTS_PER_BLOCK"] = Q <= 1024 ? (1024 / Q) : 1; + kernelProperties["defines/ELEMENTS_PER_BLOCK"] = (Q <= 1024) ? (1024 / Q) : 1; } } diff --git a/backends/occa/ceed-occa-tensor-basis.cpp b/backends/occa/ceed-occa-tensor-basis.cpp index 20e1d21f95..f89033666d 100644 --- a/backends/occa/ceed-occa-tensor-basis.cpp +++ b/backends/occa/ceed-occa-tensor-basis.cpp @@ -45,7 +45,7 @@ namespace ceed { grad1D = device.malloc(P1D * Q1D, grad1D_); qWeight1D = device.malloc(Q1D, qWeight1D_); - setupKernelBuilders(); + setKernelProperties(); } TensorBasis::~TensorBasis() {} @@ -54,6 +54,17 @@ namespace ceed { return true; } + void TensorBasis::setKernelProperties() { + kernelProperties["defines/CeedInt"] = ::occa::dtype::get().name(); + kernelProperties["defines/CeedScalar"] = ::occa::dtype::get().name(); + kernelProperties["defines/Q1D"] = Q1D; + kernelProperties["defines/P1D"] = P1D; + kernelProperties["defines/BASIS_COMPONENT_COUNT"] = ceedComponentCount; + if(usingGpuDevice()) { + kernelProperties["defines/MAX_PQ"] = (Q1D > P1D) ? Q1D : P1D; + } + } + const char* TensorBasis::getFunctionSource() const { // TODO: Add gpu function sources when split const char *cpuFunctionSources[3] = { @@ -64,7 +75,7 @@ namespace ceed { return cpuFunctionSources[dim - 1]; } - void TensorBasis::setupKernelBuilders() { + std::string TensorBasis::getKernelSource() const { const char *cpuFunctionSources[3] = { occa_tensor_basis_1d_cpu_function_source, occa_tensor_basis_2d_cpu_function_source, @@ -89,49 +100,45 @@ namespace ceed { kernelSource += '\n'; kernelSource += cpuKernelSources[dim - 1]; } + return kernelSource; + } - ::occa::properties kernelProps; - kernelProps["defines/CeedInt"] = ::occa::dtype::get().name(); - kernelProps["defines/CeedScalar"] = ::occa::dtype::get().name(); - kernelProps["defines/Q1D"] = Q1D; - kernelProps["defines/P1D"] = P1D; - kernelProps["defines/BASIS_COMPONENT_COUNT"] = ceedComponentCount; - - interpKernelBuilder = ::occa::kernelBuilder::fromString( - kernelSource, "interp", kernelProps - ); - gradKernelBuilder = ::occa::kernelBuilder::fromString( - kernelSource, "grad" , kernelProps - ); - weightKernelBuilder = ::occa::kernelBuilder::fromString( - kernelSource, "weight", kernelProps - ); + ::occa::kernel TensorBasis::buildKernel(const std::string& kernelName) { + std::string kernelSource = getKernelSource(); + return getDevice().buildKernelFromString(kernelSource, + kernelName, + kernelProperties); } int TensorBasis::applyInterp(const CeedInt elementCount, const bool transpose, Vector &U, Vector &V) { - ::occa::kernel interp = ( - usingGpuDevice() - ? getGpuInterpKernel(transpose) - : getCpuInterpKernel(transpose) - ); - - interp(elementCount, - interp1D, - U.getConstKernelArg(), - V.getKernelArg()); - + if(transpose) { + if(!interpTKernel.isInitialized()) { + kernelProperties["defines/TRANSPOSE"] = transpose; + kernelProperties["defines/ELEMENTS_PER_BLOCK"] = elementsPerBlockInterp(); + interpTKernel = buildKernel("interp"); + } + interpTKernel(elementCount, + interp1D, + U.getConstKernelArg(), + V.getKernelArg()); + } else { + if(!interpKernel.isInitialized()) { + kernelProperties["defines/TRANSPOSE"] = transpose; + kernelProperties["defines/ELEMENTS_PER_BLOCK"] = elementsPerBlockInterp(); + interpKernel = buildKernel("interp"); + } + interpKernel(elementCount, + interp1D, + U.getConstKernelArg(), + V.getKernelArg()); + } return CEED_ERROR_SUCCESS; } - ::occa::kernel TensorBasis::getCpuInterpKernel(const bool transpose) { - return buildCpuEvalKernel(interpKernelBuilder, - transpose); - } - - ::occa::kernel TensorBasis::getGpuInterpKernel(const bool transpose) { + int TensorBasis::elementsPerBlockInterp() const { int elementsPerBlock; if (dim == 1) { elementsPerBlock = 32; @@ -145,36 +152,41 @@ namespace ceed { } else { elementsPerBlock = 1; } - - return buildGpuEvalKernel(interpKernelBuilder, - transpose, - elementsPerBlock); + return elementsPerBlock; } int TensorBasis::applyGrad(const CeedInt elementCount, const bool transpose, Vector &U, Vector &V) { - ::occa::kernel grad = ( - usingGpuDevice() - ? getGpuGradKernel(transpose) - : getCpuGradKernel(transpose) - ); - - grad(elementCount, - interp1D, grad1D, - U.getConstKernelArg(), - V.getKernelArg()); - + + if(transpose) { + if(!gradTKernel.isInitialized()) { + kernelProperties["defines/TRANSPOSE"] = transpose; + kernelProperties["defines/ELEMENTS_PER_BLOCK"] = elementsPerBlockGrad(); + gradTKernel = buildKernel("grad"); + } + gradTKernel(elementCount, + interp1D, + grad1D, + U.getConstKernelArg(), + V.getKernelArg()); + } else { + if(!gradKernel.isInitialized()) { + kernelProperties["defines/TRANSPOSE"] = transpose; + kernelProperties["defines/ELEMENTS_PER_BLOCK"] = elementsPerBlockGrad(); + gradKernel = buildKernel("grad"); + } + gradKernel(elementCount, + interp1D, + grad1D, + U.getConstKernelArg(), + V.getKernelArg()); + } return CEED_ERROR_SUCCESS; } - ::occa::kernel TensorBasis::getCpuGradKernel(const bool transpose) { - return buildCpuEvalKernel(gradKernelBuilder, - transpose); - } - - ::occa::kernel TensorBasis::getGpuGradKernel(const bool transpose) { + int TensorBasis::elementsPerBlockGrad() const { int elementsPerBlock; if (dim == 1) { elementsPerBlock = 32; @@ -188,31 +200,21 @@ namespace ceed { } else { elementsPerBlock = 1; } - - return buildGpuEvalKernel(gradKernelBuilder, - transpose, - elementsPerBlock); + return elementsPerBlock; } int TensorBasis::applyWeight(const CeedInt elementCount, Vector &W) { - ::occa::kernel weight = ( - usingGpuDevice() - ? getGpuWeightKernel() - : getCpuWeightKernel() - ); - - weight(elementCount, qWeight1D, W.getKernelArg()); + if(!weightKernel.isInitialized()) { + kernelProperties["defines/ELEMENTS_PER_BLOCK"] = elementsPerBlockWeight(); + weightKernel = buildKernel("weight"); + } + weightKernel(elementCount, qWeight1D, W.getKernelArg()); return CEED_ERROR_SUCCESS; } - ::occa::kernel TensorBasis::getCpuWeightKernel() { - return buildCpuEvalKernel(weightKernelBuilder, - false); - } - - ::occa::kernel TensorBasis::getGpuWeightKernel() { + int TensorBasis::elementsPerBlockWeight() const { int elementsPerBlock; if (dim == 1) { elementsPerBlock = 32 / Q1D; @@ -225,30 +227,7 @@ namespace ceed { } else { elementsPerBlock = Q1D; } - - return buildGpuEvalKernel(weightKernelBuilder, - false, - elementsPerBlock); - } - - ::occa::kernel TensorBasis::buildCpuEvalKernel(::occa::kernelBuilder &kernelBuilder, - const bool transpose) { - ::occa::properties kernelProps; - kernelProps["defines/TRANSPOSE"] = transpose; - - return kernelBuilder.build(getDevice(), kernelProps); - } - - ::occa::kernel TensorBasis::buildGpuEvalKernel(::occa::kernelBuilder &kernelBuilder, - const bool transpose, - const int elementsPerBlock) { - - ::occa::properties kernelProps; - kernelProps["defines/TRANSPOSE"] = transpose; - kernelProps["defines/MAX_PQ"] = Q1D > P1D ? Q1D : P1D; - kernelProps["defines/ELEMENTS_PER_BLOCK"] = elementsPerBlock; - - return kernelBuilder.build(getDevice(), kernelProps); + return elementsPerBlock; } int TensorBasis::apply(const CeedInt elementCount, diff --git a/backends/occa/ceed-occa-tensor-basis.hpp b/backends/occa/ceed-occa-tensor-basis.hpp index 5a9195d074..f37f916632 100644 --- a/backends/occa/ceed-occa-tensor-basis.hpp +++ b/backends/occa/ceed-occa-tensor-basis.hpp @@ -28,9 +28,13 @@ namespace ceed { ::occa::memory interp1D; ::occa::memory grad1D; ::occa::memory qWeight1D; - ::occa::kernelBuilder interpKernelBuilder; - ::occa::kernelBuilder gradKernelBuilder; - ::occa::kernelBuilder weightKernelBuilder; + + ::occa::json kernelProperties; + ::occa::kernel interpKernel; + ::occa::kernel interpTKernel; + ::occa::kernel gradKernel; + ::occa::kernel gradTKernel; + ::occa::kernel weightKernel; TensorBasis(CeedBasis basis, CeedInt dim_, @@ -46,37 +50,29 @@ namespace ceed { const char* getFunctionSource() const; - void setupKernelBuilders(); + std::string getKernelSource() const; + + void setKernelProperties(); + + int elementsPerBlockInterp() const; + int elementsPerBlockGrad() const; + int elementsPerBlockWeight() const; + + ::occa::kernel buildKernel(const std::string& kernelName); int applyInterp(const CeedInt elementCount, const bool transpose, Vector &U, Vector &V); - ::occa::kernel getCpuInterpKernel(const bool transpose); - ::occa::kernel getGpuInterpKernel(const bool transpose); - int applyGrad(const CeedInt elementCount, const bool transpose, Vector &U, Vector &V); - ::occa::kernel getCpuGradKernel(const bool transpose); - ::occa::kernel getGpuGradKernel(const bool transpose); - int applyWeight(const CeedInt elementCount, Vector &W); - ::occa::kernel getCpuWeightKernel(); - ::occa::kernel getGpuWeightKernel(); - - ::occa::kernel buildCpuEvalKernel(::occa::kernelBuilder &kernelBuilder, - const bool transpose); - - ::occa::kernel buildGpuEvalKernel(::occa::kernelBuilder &kernelBuilder, - const bool transpose, - const int elementsPerBlock); - int apply(const CeedInt elementCount, CeedTransposeMode tmode, CeedEvalMode emode, From 8e645d8f2180564ecf3312a231626fd6f6133e88 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Mon, 22 Aug 2022 19:43:30 +0000 Subject: [PATCH 09/39] Uses `std::memcpy`. --- backends/occa/ceed-occa-vector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/occa/ceed-occa-vector.cpp b/backends/occa/ceed-occa-vector.cpp index f40be0358a..591fe5d049 100644 --- a/backends/occa/ceed-occa-vector.cpp +++ b/backends/occa/ceed-occa-vector.cpp @@ -167,7 +167,7 @@ namespace ceed { case CEED_MEM_HOST: setCurrentHostBufferIfNeeded(); if (array) { - ::memcpy(currentHostBuffer, array, length * sizeof(CeedScalar)); + std::memcpy(currentHostBuffer, array, length * sizeof(CeedScalar)); } syncState = SyncState::host; return CEED_ERROR_SUCCESS; From eb4b9557eb7c787be6be2534064929776a3bfaee Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Mon, 22 Aug 2022 20:27:09 +0000 Subject: [PATCH 10/39] Removes deprecated calls to `occa::linalg`. --- backends/occa/ceed-occa-kernels.hpp | 1 + backends/occa/ceed-occa-vector.cpp | 17 ++++++++++++++++- backends/occa/ceed-occa-vector.hpp | 2 ++ backends/occa/kernels/set-value.cpp | 19 +++++++++++++++++++ backends/occa/kernels/set-value.hpp | 13 +++++++++++++ 5 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 backends/occa/kernels/set-value.cpp create mode 100644 backends/occa/kernels/set-value.hpp diff --git a/backends/occa/ceed-occa-kernels.hpp b/backends/occa/ceed-occa-kernels.hpp index db9e1f3d26..9cece9819c 100644 --- a/backends/occa/ceed-occa-kernels.hpp +++ b/backends/occa/ceed-occa-kernels.hpp @@ -18,6 +18,7 @@ #define CEED_OCCA_KERNELS_HEADER #include "./kernels/elem-restriction.hpp" +#include "./kernels/set-value.hpp" #include "./kernels/simplex-basis.hpp" #include "./kernels/tensor-basis.hpp" diff --git a/backends/occa/ceed-occa-vector.cpp b/backends/occa/ceed-occa-vector.cpp index 591fe5d049..2cfffc4325 100644 --- a/backends/occa/ceed-occa-vector.cpp +++ b/backends/occa/ceed-occa-vector.cpp @@ -14,6 +14,9 @@ // software, applications, hardware, advanced system engineering and early // testbed platforms, in support of the nation's exascale computing imperative. +#include + +#include "ceed-occa-kernels.hpp" #include "ceed-occa-vector.hpp" namespace ceed { @@ -107,7 +110,19 @@ namespace ceed { // Prioritize keeping data in the device if (syncState & SyncState::device) { setCurrentMemoryIfNeeded(); - ::occa::linalg::operator_eq(currentMemory, value); + // ::occa::linalg::operator_eq(currentMemory, value); + if(!setValueKernel.isInitialized()) { + ::occa::json kernelProperties; + CeedInt constexpr block_size{256}; + kernelProperties["defines/CeedInt"] = ::occa::dtype::get().name(); + kernelProperties["defines/CeedScalar"] = ::occa::dtype::get().name(); + kernelProperties["defines/BLOCK_SIZE"] = block_size; + + std::string kernelSource = occa_set_value_source; + setValueKernel = getDevice().buildKernelFromString( + kernelSource,"setValue",kernelProperties + ); + } syncState = SyncState::device; } else { setCurrentHostBufferIfNeeded(); diff --git a/backends/occa/ceed-occa-vector.hpp b/backends/occa/ceed-occa-vector.hpp index 2c94c2e7d9..ab3468194a 100644 --- a/backends/occa/ceed-occa-vector.hpp +++ b/backends/occa/ceed-occa-vector.hpp @@ -44,6 +44,8 @@ namespace ceed { CeedSize hostBufferLength; CeedScalar *hostBuffer; + ::occa::kernel setValueKernel; + // Current resources ::occa::memory currentMemory; CeedScalar *currentHostBuffer; diff --git a/backends/occa/kernels/set-value.cpp b/backends/occa/kernels/set-value.cpp new file mode 100644 index 0000000000..d2f0d60c33 --- /dev/null +++ b/backends/occa/kernels/set-value.cpp @@ -0,0 +1,19 @@ +#include "./kernel-defines.hpp" + +// Expects the following types to be defined: +// - CeedInt +// - CeedScalar +// +// Expects the following constants to be defined: +// - BLOCK_SIZE : CeedInt + +const char *occa_set_value_source = STRINGIFY_SOURCE( + + @kernel + void setValue(CeedScalar* ptr,const CeedScalar value,const CeedInt count) { + @tile(BLOCK_SIZE,@outer,@inner) + for(CeedInt i=0; i < count; ++i) { + ptr[i] = value; + } + } +); diff --git a/backends/occa/kernels/set-value.hpp b/backends/occa/kernels/set-value.hpp new file mode 100644 index 0000000000..df61906315 --- /dev/null +++ b/backends/occa/kernels/set-value.hpp @@ -0,0 +1,13 @@ +#ifndef CEED_OCCA_KERNELS_SETVALUE_HEADER +#define CEED_OCCA_KERNELS_SETVALUE_HEADER + +// Expects the following types to be defined: +// - CeedInt +// - CeedScalar +// +// Expects the following constants to be defined: +// - BLOCK_SIZE : CeedInt + +extern const char *occa_set_value_source; + +#endif \ No newline at end of file From 16970557af50c5f79898a1c87e6a6e337b9e0d9e Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Mon, 22 Aug 2022 20:43:59 +0000 Subject: [PATCH 11/39] Add registration and device configuration for DPC++ and OpenCL backends. --- backends/occa/ceed-occa.cpp | 42 +++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/backends/occa/ceed-occa.cpp b/backends/occa/ceed-occa.cpp index e5b4abbfef..a76f2cb677 100644 --- a/backends/occa/ceed-occa.cpp +++ b/backends/occa/ceed-occa.cpp @@ -57,11 +57,12 @@ namespace ceed { if (::occa::modeIsEnabled("HIP")) { return "HIP"; } - /* OpenCL not fully supported in OCCA + if (::occa::modeIsEnabled("dpcpp")) { + return "dpcpp"; + } if (::occa::modeIsEnabled("OpenCL")) { return "OpenCL"; } - */ // Metal doesn't support doubles } @@ -85,12 +86,14 @@ namespace ceed { mode = "HIP"; return CEED_ERROR_SUCCESS; } - /* OpenCL not fully supported in OCCA + if (match == "dcppp") { + mode = "dpcpp"; + return CEED_ERROR_SUCCESS; + } if (match == "opencl") { mode = "OpenCL"; return CEED_ERROR_SUCCESS; } - */ if (match == "openmp") { mode = "OpenMP"; return CEED_ERROR_SUCCESS; @@ -151,12 +154,14 @@ namespace ceed { match = "hip"; return CEED_ERROR_SUCCESS; } - /* + if (resource == "/gpu/dpcpp/occa"){ + match = "dpcpp"; + return CEED_ERROR_SUCCESS; + } if (resource == "/gpu/opencl/occa"){ match = "opencl"; return CEED_ERROR_SUCCESS; } - */ if (resource == "/cpu/openmp/occa"){ match = "openmp"; return CEED_ERROR_SUCCESS; @@ -224,22 +229,21 @@ namespace ceed { } // Set default device id - // Note: OpenCL not fully supported in OCCA if ((mode == "CUDA") - || (mode == "HIP")) { + || (mode == "HIP") + || (mode == "dpcpp") + || (mode == "OpenCL")) { if (!deviceProps.has("device_id")) { deviceProps["device_id"] = 0; } } // Set default platform id - /* OpenCL not fully supported in OCCA - if (mode == "OpenCL") { + if ((mode=="dpcpp") || (mode == "OpenCL")){ if (!deviceProps.has("platform_id")) { deviceProps["platform_id"] = 0; } } - */ } static int initCeed(const char *c_resource, Ceed ceed) { @@ -340,16 +344,14 @@ namespace ceed { CEED_INTERN int CeedRegister_Occa(void) { int ierr; // General mode - ierr = CeedRegister("/*/occa", ceed::occa::registerBackend, 260); CeedChkBackend(ierr); + ierr = CeedRegister("/*/occa", ceed::occa::registerBackend, 270); CeedChkBackend(ierr); // CPU Modes - ierr = CeedRegister("/cpu/self/occa", ceed::occa::registerBackend, 250); CeedChkBackend(ierr); - ierr = CeedRegister("/cpu/openmp/occa", ceed::occa::registerBackend, 240); CeedChkBackend(ierr); - // OpenCL Mode - /* OpenCL not fully supported in OCCA - ierr = CeedRegister("/gpu/opencl/occa", ceed::occa::registerBackend, 230); CeedChkBackend(ierr); - */ + ierr = CeedRegister("/cpu/self/occa",ceed::occa::registerBackend, 260); CeedChkBackend(ierr); + ierr = CeedRegister("/cpu/openmp/occa",ceed::occa::registerBackend, 250); CeedChkBackend(ierr); // GPU Modes - ierr = CeedRegister("/gpu/hip/occa", ceed::occa::registerBackend, 220); CeedChkBackend(ierr); - ierr = CeedRegister("/gpu/cuda/occa", ceed::occa::registerBackend, 210); CeedChkBackend(ierr); + ierr = CeedRegister("/gpu/dpcpp/occa",ceed::occa::registerBackend, 240); CeedChkBackend(ierr); + ierr = CeedRegister("/gpu/opencl/occa",ceed::occa::registerBackend, 230); CeedChkBackend(ierr); + ierr = CeedRegister("/gpu/hip/occa",ceed::occa::registerBackend, 220); CeedChkBackend(ierr); + ierr = CeedRegister("/gpu/cuda/occa",ceed::occa::registerBackend, 210); CeedChkBackend(ierr); return CEED_ERROR_SUCCESS; } From a1021531ed552ce505c05d5222212773938ffd27 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Mon, 22 Aug 2022 20:51:21 +0000 Subject: [PATCH 12/39] Add DPC++ and OpenCL backends to makefile. --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4348541bfe..6446cdcb8f 100644 --- a/Makefile +++ b/Makefile @@ -369,7 +369,8 @@ OCCA_BACKENDS = /cpu/self/occa ifneq ($(wildcard $(OCCA_DIR)/lib/libocca.*),) OCCA_MODES := $(shell $(OCCA_DIR)/bin/occa modes) OCCA_BACKENDS += $(if $(filter OpenMP,$(OCCA_MODES)),/cpu/openmp/occa) -# OCCA_BACKENDS += $(if $(filter OpenCL,$(OCCA_MODES)),/gpu/opencl/occa) + OCCA_BACKENDS += $(if $(filter dpcpp,$(OCCA_MODES)),/gpu/dpcpp/occa) + OCCA_BACKENDS += $(if $(filter OpenCL,$(OCCA_MODES)),/gpu/opencl/occa) OCCA_BACKENDS += $(if $(filter HIP,$(OCCA_MODES)),/gpu/hip/occa) OCCA_BACKENDS += $(if $(filter CUDA,$(OCCA_MODES)),/gpu/cuda/occa) From 898e8f2b207f8c1ce53d0b86e04703ff5e63bdae Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Mon, 22 Aug 2022 20:56:29 +0000 Subject: [PATCH 13/39] Spelling. --- backends/occa/ceed-occa.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/occa/ceed-occa.cpp b/backends/occa/ceed-occa.cpp index a76f2cb677..1ba0bae91e 100644 --- a/backends/occa/ceed-occa.cpp +++ b/backends/occa/ceed-occa.cpp @@ -86,7 +86,7 @@ namespace ceed { mode = "HIP"; return CEED_ERROR_SUCCESS; } - if (match == "dcppp") { + if (match == "dpcpp") { mode = "dpcpp"; return CEED_ERROR_SUCCESS; } From 7bf71c77c34128f61ca5164b28640316afb040b5 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Mon, 22 Aug 2022 22:03:32 +0000 Subject: [PATCH 14/39] Configure build flags for oneAPI compilers. --- Makefile | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 6446cdcb8f..343f540062 100644 --- a/Makefile +++ b/Makefile @@ -90,28 +90,34 @@ endif AFLAGS = -fsanitize=address #-fsanitize=undefined -fno-omit-frame-pointer # Note: Intel oneAPI C/C++ compiler is now icx/icpx -CC_VENDOR := $(subst oneAPI,icc,$(firstword $(filter gcc clang icc oneAPI XL,$(subst -, ,$(shell $(CC) --version))))) -FC_VENDOR := $(if $(FC),$(firstword $(filter GNU ifort XL,$(shell $(FC) --version 2>&1 || $(FC) -qversion)))) +CC_VENDOR := $(filter gcc clang icc oneAPI XL,$(subst -, ,$(shell $(CC) --version))) +FC_VENDOR := $(if $(FC),$(firstword $(filter GNU ifort ifx XL,$(shell $(FC) --version 2>&1 || $(FC) -qversion)))) # Default extra flags by vendor MARCHFLAG.gcc := -march=native MARCHFLAG.clang := $(MARCHFLAG.gcc) MARCHFLAG.icc := +MARCHFLAG.oneAPI := $(MARCHFLAG.clang) OMP_SIMD_FLAG.gcc := -fopenmp-simd OMP_SIMD_FLAG.clang := $(OMP_SIMD_FLAG.gcc) OMP_SIMD_FLAG.icc := -qopenmp-simd +OMP_SIMD_FLAG.oneAPI := $(OMP_SIMD_FLAG.clang) OPT.gcc := -ffp-contract=fast OPT.clang := $(OPT.gcc) +OPT.oneAPI := $(OPT.clang) CFLAGS.gcc := -fPIC -std=c99 -Wall -Wextra -Wno-unused-parameter -MMD -MP CFLAGS.clang := $(CFLAGS.gcc) CFLAGS.icc := $(CFLAGS.gcc) +CFLAGS.oneAPI := $(CFLAGS.clang) CFLAGS.XL := -qpic -MMD CXXFLAGS.gcc := -fPIC -std=c++11 -Wall -Wextra -Wno-unused-parameter -MMD -MP CXXFLAGS.clang := $(CXXFLAGS.gcc) CXXFLAGS.icc := $(CXXFLAGS.gcc) +CXXFLAGS.oneAPI := $(CXXFLAGS.clang) CXXFLAGS.XL := -qpic -std=c++11 -MMD FFLAGS.GNU := -fPIC -cpp -Wall -Wextra -Wno-unused-parameter -Wno-unused-dummy-argument -MMD -MP FFLAGS.ifort := -fPIC -cpp +FFLAGS.ifx := $(FFLAGS.ifort) FFLAGS.XL := -qpic -ffree-form -qpreprocess -qextname -MMD # This check works with compilers that use gcc and clang. It fails with some From ecccda7cf318e3d8788257802430507206aaad3b Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Mon, 22 Aug 2022 22:04:06 +0000 Subject: [PATCH 15/39] Correctly set mode in `occa::json` object. --- backends/occa/ceed-occa.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/backends/occa/ceed-occa.cpp b/backends/occa/ceed-occa.cpp index 1ba0bae91e..fff347adcf 100644 --- a/backends/occa/ceed-occa.cpp +++ b/backends/occa/ceed-occa.cpp @@ -225,7 +225,7 @@ namespace ceed { mode = (std::string) deviceProps["mode"]; } else { mode = defaultMode; - deviceProps["mode"] = mode; + deviceProps.set("mode",mode); } // Set default device id @@ -331,11 +331,15 @@ namespace ceed { try { ierr = ceed::occa::initCeed(resource, ceed); CeedChkBackend(ierr); + } catch (const ::occa::exception &e) { + CeedHandleOccaException(e); + } + try { ierr = ceed::occa::registerMethods(ceed); CeedChkBackend(ierr); - } catch (::occa::exception &exc) { - CeedHandleOccaException(exc); } - + catch (const ::occa::exception &e) { + CeedHandleOccaException(e); + } return CEED_ERROR_SUCCESS; } } From 89d259068c1b7d675e40f9738bf01b73f280ed5a Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Tue, 23 Aug 2022 17:41:31 +0000 Subject: [PATCH 16/39] Add missing functions to OCCA CeedVector implementation. --- backends/occa/ceed-occa-vector.cpp | 68 +++++++++++++++++++++++++++++- backends/occa/ceed-occa-vector.hpp | 16 +++++++ 2 files changed, 83 insertions(+), 1 deletion(-) diff --git a/backends/occa/ceed-occa-vector.cpp b/backends/occa/ceed-occa-vector.cpp index 2cfffc4325..b4066ac238 100644 --- a/backends/occa/ceed-occa-vector.cpp +++ b/backends/occa/ceed-occa-vector.cpp @@ -33,6 +33,27 @@ namespace ceed { freeHostBuffer(); } + int Vector::hasValidArray(bool* has_valid_array) { + (*has_valid_array) = (!!hostBuffer) + || (!!currentHostBuffer) + || (memory.isInitialized()) + || (currentMemory.isInitialized()); + return CEED_ERROR_SUCCESS; + } + + int Vector::hasBorrowedArrayOfType(CeedMemType mem_type, + bool *has_borrowed_array_of_type) { + switch (mem_type) { + case CEED_MEM_HOST: + (*has_borrowed_array_of_type) = !!currentHostBuffer; + break; + case CEED_MEM_DEVICE: + (*has_borrowed_array_of_type) = currentMemory.isInitialized(); + break; + } + return CEED_ERROR_SUCCESS; + } + Vector* Vector::getVector(CeedVector vec, const bool assertValid) { if (!vec || vec == CEED_VECTOR_NONE) { @@ -110,7 +131,6 @@ namespace ceed { // Prioritize keeping data in the device if (syncState & SyncState::device) { setCurrentMemoryIfNeeded(); - // ::occa::linalg::operator_eq(currentMemory, value); if(!setValueKernel.isInitialized()) { ::occa::json kernelProperties; CeedInt constexpr block_size{256}; @@ -272,6 +292,22 @@ namespace ceed { return error; } + int Vector::getWriteOnlyArray(CeedMemType mtype, + CeedScalar **array) { + // const bool willBeFullySynced = ( + // (syncState == SyncState::device && mtype == CEED_MEM_HOST) || + // (syncState == SyncState::host && mtype == CEED_MEM_DEVICE) + // ); + + const int error = getArray(mtype, const_cast(array)); + // // Take advantage the vector will be fully synced + // if (!error && willBeFullySynced) { + // syncState = SyncState::all; + // } + + return error; + } + int Vector::restoreArray(CeedScalar **array) { return CEED_ERROR_SUCCESS; } @@ -360,11 +396,14 @@ namespace ceed { Ceed ceed; ierr = CeedVectorGetCeed(vec, &ceed); CeedChk(ierr); + CeedOccaRegisterFunction(vec, "HasValidArray", Vector::ceedHasValidArray); + CeedOccaRegisterFunction(vec, "HasBorrowedArrayOfType",Vector::ceedHasBorrowedArrayOfType); CeedOccaRegisterFunction(vec, "SetValue", Vector::ceedSetValue); CeedOccaRegisterFunction(vec, "SetArray", Vector::ceedSetArray); CeedOccaRegisterFunction(vec, "TakeArray", Vector::ceedTakeArray); CeedOccaRegisterFunction(vec, "GetArray", Vector::ceedGetArray); CeedOccaRegisterFunction(vec, "GetArrayRead", Vector::ceedGetArrayRead); + CeedOccaRegisterFunction(vec, "GetArrayWrite", Vector::ceedGetArrayWrite); CeedOccaRegisterFunction(vec, "RestoreArray", Vector::ceedRestoreArray); CeedOccaRegisterFunction(vec, "RestoreArrayRead", Vector::ceedRestoreArrayRead); CeedOccaRegisterFunction(vec, "Destroy", Vector::ceedDestroy); @@ -375,6 +414,24 @@ namespace ceed { return CEED_ERROR_SUCCESS; } + int Vector::ceedHasValidArray(CeedVector vec, bool* has_valid_array) { + Vector *vector = Vector::from(vec); + if (!vector) { + return staticCeedError("Invalid CeedVector passed"); + } + return vector->hasValidArray(has_valid_array); + } + + int Vector::ceedHasBorrowedArrayOfType(CeedVector vec, + CeedMemType mem_type, + bool *has_borrowed_array_of_type) { + Vector *vector = Vector::from(vec); + if (!vector) { + return staticCeedError("Invalid CeedVector passed"); + } + return vector->hasBorrowedArrayOfType(mem_type,has_borrowed_array_of_type); + } + int Vector::ceedSetValue(CeedVector vec, CeedScalar value) { Vector *vector = Vector::from(vec); if (!vector) { @@ -418,6 +475,15 @@ namespace ceed { return vector->getReadOnlyArray(mtype, array); } + int Vector::ceedGetArrayWrite(CeedVector vec, CeedMemType mtype, + CeedScalar **array) { + Vector *vector = Vector::from(vec); + if (!vector) { + return staticCeedError("Invalid CeedVector passed"); + } + return vector->getWriteOnlyArray(mtype, array); + } + int Vector::ceedRestoreArray(CeedVector vec, CeedScalar **array) { Vector *vector = Vector::from(vec); if (!vector) { diff --git a/backends/occa/ceed-occa-vector.hpp b/backends/occa/ceed-occa-vector.hpp index ab3468194a..6baf308d87 100644 --- a/backends/occa/ceed-occa-vector.hpp +++ b/backends/occa/ceed-occa-vector.hpp @@ -57,6 +57,10 @@ namespace ceed { ~Vector(); + int hasValidArray(bool* has_valid_array); + + int hasBorrowedArrayOfType(CeedMemType mem_type,bool *has_borrowed_array_of_type); + static Vector* getVector(CeedVector vec, const bool assertValid = true); @@ -94,6 +98,9 @@ namespace ceed { int getReadOnlyArray(CeedMemType mtype, CeedScalar **array); + + int getWriteOnlyArray(CeedMemType mtype, + CeedScalar **array); int restoreArray(CeedScalar **array); @@ -111,6 +118,12 @@ namespace ceed { static int registerCeedFunction(Ceed ceed, CeedVector vec, const char *fname, ceed::occa::ceedFunction f); + static int ceedHasValidArray(CeedVector vec, bool* has_valid_array); + + static int ceedHasBorrowedArrayOfType(CeedVector vec, + CeedMemType mem_type, + bool *has_borrowed_array_of_type); + static int ceedCreate(CeedSize length, CeedVector vec); static int ceedSetValue(CeedVector vec, CeedScalar value); @@ -126,6 +139,9 @@ namespace ceed { static int ceedGetArrayRead(CeedVector vec, CeedMemType mtype, CeedScalar **array); + static int ceedGetArrayWrite(CeedVector vec, CeedMemType mtype, + CeedScalar **array); + static int ceedRestoreArray(CeedVector vec, CeedScalar **array); static int ceedRestoreArrayRead(CeedVector vec, CeedScalar **array); From 504147a5ffd9307e0f108fefb540f5756cb3b1aa Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Tue, 23 Aug 2022 18:40:09 +0000 Subject: [PATCH 17/39] Adds missing call to `setValueKernel` in OCCA CeedVector impl. --- backends/occa/ceed-occa-vector.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backends/occa/ceed-occa-vector.cpp b/backends/occa/ceed-occa-vector.cpp index b4066ac238..23a2e601a6 100644 --- a/backends/occa/ceed-occa-vector.cpp +++ b/backends/occa/ceed-occa-vector.cpp @@ -141,7 +141,8 @@ namespace ceed { std::string kernelSource = occa_set_value_source; setValueKernel = getDevice().buildKernelFromString( kernelSource,"setValue",kernelProperties - ); + ); + setValueKernel(currentMemory,value,length); } syncState = SyncState::device; } else { From 263407ff52d1c811791339cc4c313d60f65d26bc Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Wed, 24 Aug 2022 16:47:00 +0000 Subject: [PATCH 18/39] Gets occa device function name from the CeedQFunction. --- backends/occa/ceed-occa-qfunction.cpp | 9 ++++++--- backends/occa/ceed-occa-qfunction.hpp | 3 ++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/backends/occa/ceed-occa-qfunction.cpp b/backends/occa/ceed-occa-qfunction.cpp index d08100649f..07c8f70988 100644 --- a/backends/occa/ceed-occa-qfunction.cpp +++ b/backends/occa/ceed-occa-qfunction.cpp @@ -23,12 +23,13 @@ namespace ceed { namespace occa { - QFunction::QFunction(const std::string &source) : + QFunction::QFunction(const std::string &source, + const std::string& function_name) : ceedIsIdentity(false) { const size_t colonIndex = source.find(':'); filename = source.substr(0, colonIndex); - qFunctionName = source.substr(colonIndex + 1); + qFunctionName = function_name; } QFunction* QFunction::getQFunction(CeedQFunction qf, @@ -243,8 +244,10 @@ namespace ceed { ierr = CeedGetData(ceed, &context); CeedChk(ierr); char *source; ierr = CeedQFunctionGetSourcePath(qf, &source); CeedChk(ierr); + char *function_name; + ierr = CeedQFunctionGetKernelName(qf,&function_name); CeedChk(ierr); - QFunction *qFunction = new QFunction(source); + QFunction *qFunction = new QFunction(source,function_name); ierr = CeedQFunctionSetData(qf, qFunction); CeedChk(ierr); CeedOccaRegisterFunction(qf, "Apply", QFunction::ceedApply); diff --git a/backends/occa/ceed-occa-qfunction.hpp b/backends/occa/ceed-occa-qfunction.hpp index 3c9d8aac4e..6ead23c58c 100644 --- a/backends/occa/ceed-occa-qfunction.hpp +++ b/backends/occa/ceed-occa-qfunction.hpp @@ -34,7 +34,8 @@ namespace ceed { CeedQFunctionContext qFunctionContext; QFunctionArgs args; - QFunction(const std::string &source); + QFunction(const std::string &source, + const std::string &function_name); static QFunction* getQFunction(CeedQFunction qf, const bool assertValid = true); From 0b739de1ee800ba2266085448ddd2554b051d89f Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Wed, 24 Aug 2022 20:00:38 +0000 Subject: [PATCH 19/39] Adds OpenCL and DPC++ to backends list. --- backends/ceed-backend-list.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/ceed-backend-list.h b/backends/ceed-backend-list.h index c8c2e630d7..7d452c3420 100644 --- a/backends/ceed-backend-list.h +++ b/backends/ceed-backend-list.h @@ -24,7 +24,7 @@ MACRO(CeedRegister_Magma, 2, "/gpu/cuda/magma", "/gpu/hip/magma") MACRO(CeedRegister_Magma_Det, 2, "/gpu/cuda/magma/det", "/gpu/hip/magma/det") MACRO(CeedRegister_Memcheck_Blocked, 1, "/cpu/self/memcheck/blocked") MACRO(CeedRegister_Memcheck_Serial, 1, "/cpu/self/memcheck/serial") -MACRO(CeedRegister_Occa, 4, "/cpu/self/occa", "/cpu/openmp/occa", "/gpu/hip/occa", "/gpu/cuda/occa") +MACRO(CeedRegister_Occa, 6, "/cpu/self/occa", "/cpu/openmp/occa", "/gpu/dpcpp/occa","/gpu/opencl/occa","/gpu/hip/occa", "/gpu/cuda/occa") MACRO(CeedRegister_Opt_Blocked, 1, "/cpu/self/opt/blocked") MACRO(CeedRegister_Opt_Serial, 1, "/cpu/self/opt/serial") MACRO(CeedRegister_Ref, 1, "/cpu/self/ref/serial") From 52bfdcff07fe9c17fe74c0383a07c5c9180cd440 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Wed, 24 Aug 2022 20:01:35 +0000 Subject: [PATCH 20/39] Uses unique kernel name for OCCA qFunction kernels. --- backends/occa/ceed-occa-qfunction.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/backends/occa/ceed-occa-qfunction.cpp b/backends/occa/ceed-occa-qfunction.cpp index 07c8f70988..6f151b2222 100644 --- a/backends/occa/ceed-occa-qfunction.cpp +++ b/backends/occa/ceed-occa-qfunction.cpp @@ -26,9 +26,7 @@ namespace ceed { QFunction::QFunction(const std::string &source, const std::string& function_name) : ceedIsIdentity(false) { - - const size_t colonIndex = source.find(':'); - filename = source.substr(0, colonIndex); + filename = source; qFunctionName = function_name; } @@ -118,7 +116,7 @@ namespace ceed { // Properties only used in the QFunction kernel source props["defines/OCCA_Q"] = Q; - const std::string kernelName = "qFunctionKernel"; + const std::string kernelName = "qf_" + qFunctionName; qFunctionKernel = ( getDevice().buildKernelFromString(getKernelSource(kernelName, Q), From 2af5622fd463355a5faa5403301b0c44900ef620 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Wed, 24 Aug 2022 20:02:52 +0000 Subject: [PATCH 21/39] Adds a dummy `ceed.h` header in include in OCCA kernels. --- backends/occa/include/ceed.h | 4 + backends/occa/include/ceed/ceed-f32.h | 23 + backends/occa/include/ceed/ceed-f64.h | 22 + backends/occa/include/ceed/ceed.h | 694 ++++++++++++++++++++++++++ 4 files changed, 743 insertions(+) create mode 100644 backends/occa/include/ceed.h create mode 100644 backends/occa/include/ceed/ceed-f32.h create mode 100644 backends/occa/include/ceed/ceed-f64.h create mode 100644 backends/occa/include/ceed/ceed.h diff --git a/backends/occa/include/ceed.h b/backends/occa/include/ceed.h new file mode 100644 index 0000000000..5adb15a0a8 --- /dev/null +++ b/backends/occa/include/ceed.h @@ -0,0 +1,4 @@ +#ifndef _OCCA_INCLUDE_CEED_H_ +#define _OCCA_INCLUDE_CEED_H_ +// Phony header to include when compiling OKL +#endif \ No newline at end of file diff --git a/backends/occa/include/ceed/ceed-f32.h b/backends/occa/include/ceed/ceed-f32.h new file mode 100644 index 0000000000..7ee7b43f8c --- /dev/null +++ b/backends/occa/include/ceed/ceed-f32.h @@ -0,0 +1,23 @@ +/// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +/// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +/// +/// SPDX-License-Identifier: BSD-2-Clause +/// +/// This file is part of CEED: http://github.com/ceed + +/// @file +/// Public header for definitions related to using FP32 floating point (single +/// precision) for CeedScalar. Include this header in ceed/ceed.h to use +/// float instead of double. +#ifndef _ceed_f32_h +#define _ceed_f32_h + +/// Set base scalar type to FP32. (See CeedScalarType enum in ceed/ceed.h +/// for all options.) +#define CEED_SCALAR_TYPE CEED_SCALAR_FP32 +typedef float CeedScalar; + +/// Machine epsilon +#define CEED_EPSILON 6e-08 + +#endif diff --git a/backends/occa/include/ceed/ceed-f64.h b/backends/occa/include/ceed/ceed-f64.h new file mode 100644 index 0000000000..fb557df17d --- /dev/null +++ b/backends/occa/include/ceed/ceed-f64.h @@ -0,0 +1,22 @@ +/// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +/// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +/// +/// SPDX-License-Identifier: BSD-2-Clause +/// +/// This file is part of CEED: http://github.com/ceed + +/// @file +/// Public header for definitions related to using FP64 floating point (double +/// precision) for CeedScalar. This is the default header included in ceed/ceed.h. +#ifndef _ceed_f64_h +#define _ceed_f64_h + +/// Set base scalar type to FP64. (See CeedScalarType enum in ceed/ceed.h +/// for all options.) +#define CEED_SCALAR_TYPE CEED_SCALAR_FP64 +typedef double CeedScalar; + +/// Machine epsilon +#define CEED_EPSILON 1e-16 + +#endif diff --git a/backends/occa/include/ceed/ceed.h b/backends/occa/include/ceed/ceed.h new file mode 100644 index 0000000000..70a5e03e5f --- /dev/null +++ b/backends/occa/include/ceed/ceed.h @@ -0,0 +1,694 @@ +/// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +/// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +/// reserved. See files LICENSE and NOTICE for details. +/// +/// This file is part of CEED, a collection of benchmarks, miniapps, software +/// libraries and APIs for efficient high-order finite element and spectral +/// element discretizations for exascale applications. For more information and +/// source code availability see http://github.com/ceed +/// +/// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, +/// a collaborative effort of two U.S. Department of Energy organizations (Office +/// of Science and the National Nuclear Security Administration) responsible for +/// the planning and preparation of a capable exascale ecosystem, including +/// software, applications, hardware, advanced system engineering and early +/// testbed platforms, in support of the nation's exascale computing imperative. + +/// @file +/// Public header for user and utility components of libCEED +#ifndef _ceed_h +#define _ceed_h + +/// @defgroup Ceed Ceed: core components +/// @defgroup CeedVector CeedVector: storing and manipulating vectors +/// @defgroup CeedElemRestriction CeedElemRestriction: restriction from local vectors to elements +/// @defgroup CeedBasis CeedBasis: fully discrete finite element-like objects +/// @defgroup CeedQFunction CeedQFunction: independent operations at quadrature points +/// @defgroup CeedOperator CeedOperator: composed FE-type operations on vectors +/// +/// @page FunctionCategories libCEED: Types of Functions +/// libCEED provides three different header files depending upon the type of +/// functions a user requires. +/// @section Utility Utility Functions +/// These functions are intended general utilities that may be useful to +/// libCEED developers and users. These functions can generally be found in +/// "ceed.h". +/// @section User User Functions +/// These functions are intended to be used by general users of libCEED +/// and can generally be found in "ceed.h". +/// @section Advanced Advanced Functions +/// These functions are intended to be used by advanced users of libCEED +/// and can generally be found in "ceed.h". +/// @section Backend Backend Developer Functions +/// These functions are intended to be used by backend developers of +/// libCEED and can generally be found in "ceed-backend.h". +/// @section Developer Library Developer Functions +/// These functions are intended to be used by library developers of +/// libCEED and can generally be found in "ceed-impl.h". + + +/// Library context created by CeedInit() +/// @ingroup CeedUser +typedef struct Ceed_private *Ceed; +/// Non-blocking Ceed interfaces return a CeedRequest. +/// To perform an operation immediately, pass \ref CEED_REQUEST_IMMEDIATE instead. +/// @ingroup CeedUser +typedef struct CeedRequest_private *CeedRequest; +/// Handle for vectors over the field \ref CeedScalar +/// @ingroup CeedVectorUser +typedef struct CeedVector_private *CeedVector; +/// Handle for object describing restriction to elements +/// @ingroup CeedElemRestrictionUser +typedef struct CeedElemRestriction_private *CeedElemRestriction; +/// Handle for object describing discrete finite element evaluations +/// @ingroup CeedBasisUser +typedef struct CeedBasis_private *CeedBasis; +/// Handle for object describing CeedQFunction fields +/// @ingroup CeedQFunctionBackend +typedef struct CeedQFunctionField_private *CeedQFunctionField; +/// Handle for object describing functions evaluated independently at quadrature points +/// @ingroup CeedQFunctionUser +typedef struct CeedQFunction_private *CeedQFunction; +/// Handle for object describing CeedOperator fields +/// @ingroup CeedOperatorBackend +typedef struct CeedOperatorField_private *CeedOperatorField; +/// Handle for object describing context data for CeedQFunctions +/// @ingroup CeedQFunctionUser +typedef struct CeedQFunctionContext_private *CeedQFunctionContext; +/// Handle for object describing registered fields for CeedQFunctionContext +/// @ingroup CeedQFunctionUser +typedef struct CeedContextFieldLabel_private *CeedContextFieldLabel; +/// Handle for object describing FE-type operators acting on vectors +/// +/// Given an element restriction \f$E\f$, basis evaluator \f$B\f$, and +/// quadrature function\f$f\f$, a CeedOperator expresses operations of the form +/// $$ E^T B^T f(B E u) $$ +/// acting on the vector \f$u\f$. +/// @ingroup CeedOperatorUser +typedef struct CeedOperator_private *CeedOperator; + +CEED_EXTERN int CeedRegistryGetList(size_t *n, char ***const resources, CeedInt **array); +CEED_EXTERN int CeedInit(const char *resource, Ceed *ceed); +CEED_EXTERN int CeedReferenceCopy(Ceed ceed, Ceed *ceed_copy); +CEED_EXTERN int CeedGetResource(Ceed ceed, const char **resource); +CEED_EXTERN int CeedIsDeterministic(Ceed ceed, bool *is_deterministic); +CEED_EXTERN int CeedAddJitSourceRoot(Ceed ceed, const char *jit_source_root); +CEED_EXTERN int CeedView(Ceed ceed, FILE *stream); +CEED_EXTERN int CeedDestroy(Ceed *ceed); + +CEED_EXTERN int CeedErrorImpl(Ceed, const char *, int, const char *, int, + const char *, ...); +/// Raise an error on ceed object +/// +/// @param ceed Ceed library context or NULL +/// @param ecode Error code (int) +/// @param ... printf-style format string followed by arguments as needed +/// +/// @ingroup Ceed +/// @sa CeedSetErrorHandler() +#if defined(__clang__) +/// Use nonstandard ternary to convince the compiler/clang-tidy that this +/// function never returns zero. +# define CeedError(ceed, ecode, ...) \ + (CeedErrorImpl((ceed), __FILE__, __LINE__, __func__, (ecode), __VA_ARGS__), (ecode)) +#else +# define CeedError(ceed, ecode, ...) \ + CeedErrorImpl((ceed), __FILE__, __LINE__, __func__, (ecode), __VA_ARGS__) ?: (ecode) +#endif + +/// Ceed error handlers +CEED_EXTERN int CeedErrorReturn(Ceed, const char *, int, const char *, int, + const char *, va_list *); +CEED_EXTERN int CeedErrorStore(Ceed, const char *, int, const char *, int, + const char *, va_list *); +CEED_EXTERN int CeedErrorAbort(Ceed, const char *, int, const char *, int, + const char *, va_list *); +CEED_EXTERN int CeedErrorExit(Ceed, const char *, int, const char *, int, + const char *, va_list *); +typedef int (*CeedErrorHandler)(Ceed, const char *, int, + const char *, int, const char *, + va_list *); +CEED_EXTERN int CeedSetErrorHandler(Ceed ceed, CeedErrorHandler eh); +CEED_EXTERN int CeedGetErrorMessage(Ceed, const char **err_msg); +CEED_EXTERN int CeedResetErrorMessage(Ceed, const char **err_msg); + +/// libCEED library version numbering +/// @ingroup Ceed +#define CEED_VERSION_MAJOR 0 +#define CEED_VERSION_MINOR 10 +#define CEED_VERSION_PATCH 1 +#define CEED_VERSION_RELEASE false + +/// Compile-time check that the the current library version is at least as +/// recent as the specified version. This macro is typically used in +/// @code +/// #if CEED_VERSION_GE(0, 8, 0) +/// code path that needs at least 0.8.0 +/// #else +/// fallback code for older versions +/// #endif +/// @endcode +/// +/// A non-release version always compares as positive infinity. +/// +/// @param major Major version +/// @param minor Minor version +/// @param patch Patch (subminor) version +/// +/// @ingroup Ceed +/// @sa CeedGetVersion() +#define CEED_VERSION_GE(major, minor, patch) \ + (!CEED_VERSION_RELEASE || \ + (CEED_VERSION_MAJOR > major || \ + (CEED_VERSION_MAJOR == major && \ + (CEED_VERSION_MINOR > minor || \ + (CEED_VERSION_MINOR == minor && CEED_VERSION_PATCH >= patch))))) + +CEED_EXTERN int CeedGetVersion(int *major, int *minor, int *patch, + bool *release); + +CEED_EXTERN int CeedGetScalarType(CeedScalarType *scalar_type); + +CEED_EXTERN const char *const *CeedErrorTypes; + +/// Specify memory type +/// +/// Many Ceed interfaces take or return pointers to memory. This enum is used to +/// specify where the memory being provided or requested must reside. +/// @ingroup Ceed +typedef enum { + /// Memory resides on the host + CEED_MEM_HOST, + /// Memory resides on a device (corresponding to \ref Ceed resource) + CEED_MEM_DEVICE, +} CeedMemType; +CEED_EXTERN const char *const CeedMemTypes[]; + +CEED_EXTERN int CeedGetPreferredMemType(Ceed ceed, CeedMemType *type); + +/// Conveys ownership status of arrays passed to Ceed interfaces. +/// @ingroup Ceed +typedef enum { + /// Implementation will copy the values and not store the passed pointer. + CEED_COPY_VALUES, + /// Implementation can use and modify the data provided by the user, but does + /// not take ownership. + CEED_USE_POINTER, + /// Implementation takes ownership of the pointer and will free using + /// CeedFree() when done using it. The user should not assume that the + /// pointer remains valid after ownership has been transferred. Note that + /// arrays allocated using C++ operator new or other allocators cannot + /// generally be freed using CeedFree(). CeedFree() is capable of freeing any + /// memory that can be freed using free(3). + CEED_OWN_POINTER, +} CeedCopyMode; +CEED_EXTERN const char *const CeedCopyModes[]; + +/// Denotes type of vector norm to be computed +/// @ingroup CeedVector +typedef enum { + /// L_1 norm: sum_i |x_i| + CEED_NORM_1, + /// L_2 norm: sqrt(sum_i |x_i|^2) + CEED_NORM_2, + /// L_Infinity norm: max_i |x_i| + CEED_NORM_MAX, +} CeedNormType; + +CEED_EXTERN int CeedVectorCreate(Ceed ceed, CeedSize len, CeedVector *vec); +CEED_EXTERN int CeedVectorReferenceCopy(CeedVector vec, CeedVector *vec_copy); +CEED_EXTERN int CeedVectorSetArray(CeedVector vec, CeedMemType mem_type, + CeedCopyMode copy_mode, CeedScalar *array); +CEED_EXTERN int CeedVectorSetValue(CeedVector vec, CeedScalar value); +CEED_EXTERN int CeedVectorSyncArray(CeedVector vec, CeedMemType mem_type); +CEED_EXTERN int CeedVectorTakeArray(CeedVector vec, CeedMemType mem_type, + CeedScalar **array); +CEED_EXTERN int CeedVectorGetArray(CeedVector vec, CeedMemType mem_type, + CeedScalar **array); +CEED_EXTERN int CeedVectorGetArrayRead(CeedVector vec, CeedMemType mem_type, + const CeedScalar **array); +CEED_EXTERN int CeedVectorGetArrayWrite(CeedVector vec, CeedMemType mem_type, + CeedScalar **array); +CEED_EXTERN int CeedVectorRestoreArray(CeedVector vec, CeedScalar **array); +CEED_EXTERN int CeedVectorRestoreArrayRead(CeedVector vec, + const CeedScalar **array); +CEED_EXTERN int CeedVectorNorm(CeedVector vec, CeedNormType type, + CeedScalar *norm); +CEED_EXTERN int CeedVectorScale(CeedVector x, CeedScalar alpha); +CEED_EXTERN int CeedVectorAXPY(CeedVector y, CeedScalar alpha, CeedVector x); +CEED_EXTERN int CeedVectorPointwiseMult(CeedVector w, CeedVector x, CeedVector y); +CEED_EXTERN int CeedVectorReciprocal(CeedVector vec); +CEED_EXTERN int CeedVectorView(CeedVector vec, const char *fp_fmt, FILE *stream); +CEED_EXTERN int CeedVectorGetCeed(CeedVector vec, Ceed *ceed); +CEED_EXTERN int CeedVectorGetLength(CeedVector vec, CeedSize *length); +CEED_EXTERN int CeedVectorDestroy(CeedVector *vec); + +CEED_EXTERN CeedRequest *const CEED_REQUEST_IMMEDIATE; +CEED_EXTERN CeedRequest *const CEED_REQUEST_ORDERED; +CEED_EXTERN int CeedRequestWait(CeedRequest *req); + +/// Argument for CeedOperatorSetField that vector is collocated with +/// quadrature points, only used with CeedEvalMode CEED_EVAL_NONE +/// @ingroup CeedBasis +CEED_EXTERN const CeedBasis CEED_BASIS_COLLOCATED; + +/// Argument for CeedOperatorSetField to use active input or output +/// @ingroup CeedVector +CEED_EXTERN const CeedVector CEED_VECTOR_ACTIVE; + +/// Argument for CeedOperatorSetField to use no vector, used with +/// qfunction input with eval mode CEED_EVAL_WEIGHT +/// @ingroup CeedVector +CEED_EXTERN const CeedVector CEED_VECTOR_NONE; + +/// Argument for CeedOperatorSetField to use no ElemRestriction, only used with +/// eval mode CEED_EVAL_WEIGHT. +/// @ingroup CeedElemRestriction +CEED_EXTERN const CeedElemRestriction CEED_ELEMRESTRICTION_NONE; + +/// Argument for CeedOperatorCreate that QFunction is not created by user. +/// Only used for QFunctions dqf and dqfT. If implemented, a backend may +/// attempt to provide the action of these QFunctions. +/// @ingroup CeedQFunction +CEED_EXTERN const CeedQFunction CEED_QFUNCTION_NONE; + +/// Denotes whether a linear transformation or its transpose should be applied +/// @ingroup CeedBasis +typedef enum { + /// Apply the linear transformation + CEED_NOTRANSPOSE, + /// Apply the transpose + CEED_TRANSPOSE +} CeedTransposeMode; +CEED_EXTERN const char *const CeedTransposeModes[]; + +/// Argument for CeedElemRestrictionCreateStrided that L-vector is in +/// the Ceed backend's preferred layout. This argument should only be used +/// with vectors created by a Ceed backend. +/// @ingroup CeedElemRestriction +CEED_EXTERN const CeedInt CEED_STRIDES_BACKEND[3]; + +CEED_EXTERN int CeedElemRestrictionCreate(Ceed ceed, CeedInt num_elem, + CeedInt elem_size, CeedInt num_comp, CeedInt comp_stride, CeedSize l_size, + CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, + CeedElemRestriction *rstr); +CEED_EXTERN int CeedElemRestrictionCreateOriented(Ceed ceed, CeedInt num_elem, + CeedInt elem_size, CeedInt num_comp, CeedInt comp_stride, CeedSize l_size, + CeedMemType mem_type, CeedCopyMode copy_mode, const CeedInt *offsets, + const bool *orient, CeedElemRestriction *rstr); +CEED_EXTERN int CeedElemRestrictionCreateStrided(Ceed ceed, + CeedInt num_elem, CeedInt elem_size, CeedInt num_comp, CeedSize l_size, + const CeedInt strides[3], CeedElemRestriction *rstr); +CEED_EXTERN int CeedElemRestrictionCreateBlocked(Ceed ceed, CeedInt num_elem, + CeedInt elem_size, CeedInt blk_size, CeedInt num_comp, CeedInt comp_stride, + CeedSize l_size, CeedMemType mem_type, CeedCopyMode copy_mode, + const CeedInt *offsets, CeedElemRestriction *rstr); +CEED_EXTERN int CeedElemRestrictionCreateBlockedStrided(Ceed ceed, + CeedInt num_elem, CeedInt elem_size, CeedInt blk_size, CeedInt num_comp, + CeedSize l_size, const CeedInt strides[3], CeedElemRestriction *rstr); +CEED_EXTERN int CeedElemRestrictionReferenceCopy(CeedElemRestriction rstr, + CeedElemRestriction *rstr_copy); +CEED_EXTERN int CeedElemRestrictionCreateVector(CeedElemRestriction rstr, + CeedVector *lvec, CeedVector *evec); +CEED_EXTERN int CeedElemRestrictionApply(CeedElemRestriction rstr, + CeedTransposeMode t_mode, CeedVector u, CeedVector ru, CeedRequest *request); +CEED_EXTERN int CeedElemRestrictionApplyBlock(CeedElemRestriction rstr, + CeedInt block, CeedTransposeMode t_mode, CeedVector u, CeedVector ru, + CeedRequest *request); +CEED_EXTERN int CeedElemRestrictionGetCeed(CeedElemRestriction rstr, + Ceed *ceed); +CEED_EXTERN int CeedElemRestrictionGetCompStride(CeedElemRestriction rstr, + CeedInt *comp_stride); +CEED_EXTERN int CeedElemRestrictionGetNumElements(CeedElemRestriction rstr, + CeedInt *num_elem); +CEED_EXTERN int CeedElemRestrictionGetElementSize(CeedElemRestriction rstr, + CeedInt *elem_size); +CEED_EXTERN int CeedElemRestrictionGetLVectorSize(CeedElemRestriction rstr, + CeedSize *l_size); +CEED_EXTERN int CeedElemRestrictionGetNumComponents(CeedElemRestriction rstr, + CeedInt *num_comp); +CEED_EXTERN int CeedElemRestrictionGetNumBlocks(CeedElemRestriction rstr, + CeedInt *num_blk); +CEED_EXTERN int CeedElemRestrictionGetBlockSize(CeedElemRestriction rstr, + CeedInt *blk_size); +CEED_EXTERN int CeedElemRestrictionGetMultiplicity(CeedElemRestriction rstr, + CeedVector mult); +CEED_EXTERN int CeedElemRestrictionView(CeedElemRestriction rstr, FILE *stream); +CEED_EXTERN int CeedElemRestrictionDestroy(CeedElemRestriction *rstr); + +// The formalism here is that we have the structure +// \int_\Omega v^T f_0(u, \nabla u, qdata) + (\nabla v)^T f_1(u, \nabla u, qdata) +// where gradients are with respect to the reference element. + +/// Basis evaluation mode +/// +/// Modes can be bitwise ORed when passing to most functions. +/// @ingroup CeedBasis +typedef enum { + /// Perform no evaluation (either because there is no data or it is already at + /// quadrature points) + CEED_EVAL_NONE = 0, + /// Interpolate from nodes to quadrature points + CEED_EVAL_INTERP = 1, + /// Evaluate gradients at quadrature points from input in a nodal basis + CEED_EVAL_GRAD = 2, + /// Evaluate divergence at quadrature points from input in a nodal basis + CEED_EVAL_DIV = 4, + /// Evaluate curl at quadrature points from input in a nodal basis + CEED_EVAL_CURL = 8, + /// Using no input, evaluate quadrature weights on the reference element + CEED_EVAL_WEIGHT = 16, +} CeedEvalMode; +CEED_EXTERN const char *const CeedEvalModes[]; + +/// Type of quadrature; also used for location of nodes +/// @ingroup CeedBasis +typedef enum { + /// Gauss-Legendre quadrature + CEED_GAUSS = 0, + /// Gauss-Legendre-Lobatto quadrature + CEED_GAUSS_LOBATTO = 1, +} CeedQuadMode; +CEED_EXTERN const char *const CeedQuadModes[]; + +/// Type of basis shape to create non-tensor H1 element basis +/// +/// Dimension can be extracted with bitwise AND +/// (CeedElemTopology & 2**(dim + 2)) == TRUE +/// @ingroup CeedBasis +typedef enum { + /// Line + CEED_TOPOLOGY_LINE = 1 << 16 | 0, + /// Triangle - 2D shape + CEED_TOPOLOGY_TRIANGLE = 2 << 16 | 1, + /// Quadralateral - 2D shape + CEED_TOPOLOGY_QUAD = 2 << 16 | 2, + /// Tetrahedron - 3D shape + CEED_TOPOLOGY_TET = 3 << 16 | 3, + /// Pyramid - 3D shape + CEED_TOPOLOGY_PYRAMID = 3 << 16 | 4, + /// Prism - 3D shape + CEED_TOPOLOGY_PRISM = 3 << 16 | 5, + /// Hexehedron - 3D shape + CEED_TOPOLOGY_HEX = 3 << 16 | 6, +} CeedElemTopology; +CEED_EXTERN const char *const CeedElemTopologies[]; + +CEED_EXTERN int CeedBasisCreateTensorH1Lagrange(Ceed ceed, CeedInt dim, + CeedInt num_comp, CeedInt P, CeedInt Q, CeedQuadMode quad_mode, CeedBasis *basis); +CEED_EXTERN int CeedBasisCreateTensorH1(Ceed ceed, CeedInt dim, CeedInt num_comp, + CeedInt P_1d, CeedInt Q_1d, + const CeedScalar *interp_1d, + const CeedScalar *grad_1d, + const CeedScalar *q_ref_1d, + const CeedScalar *q_weight_1d, + CeedBasis *basis); +CEED_EXTERN int CeedBasisCreateH1(Ceed ceed, CeedElemTopology topo, + CeedInt num_comp, + CeedInt num_nodes, CeedInt nqpts, + const CeedScalar *interp, + const CeedScalar *grad, + const CeedScalar *q_ref, + const CeedScalar *q_weights, CeedBasis *basis); +CEED_EXTERN int CeedBasisCreateHdiv(Ceed ceed, CeedElemTopology topo, + CeedInt num_comp, + CeedInt num_nodes, CeedInt nqpts, + const CeedScalar *interp, + const CeedScalar *div, + const CeedScalar *q_ref, + const CeedScalar *q_weights, CeedBasis *basis); +CEED_EXTERN int CeedBasisCreateProjection(CeedBasis basis_from, CeedBasis basis_to, CeedBasis *basis_project); +CEED_EXTERN int CeedBasisReferenceCopy(CeedBasis basis, CeedBasis *basis_copy); +CEED_EXTERN int CeedBasisView(CeedBasis basis, FILE *stream); +CEED_EXTERN int CeedBasisApply(CeedBasis basis, CeedInt num_elem, + CeedTransposeMode t_mode, + CeedEvalMode eval_mode, CeedVector u, CeedVector v); +CEED_EXTERN int CeedBasisGetCeed(CeedBasis basis, Ceed *ceed); +CEED_EXTERN int CeedBasisGetDimension(CeedBasis basis, CeedInt *dim); +CEED_EXTERN int CeedBasisGetTopology(CeedBasis basis, CeedElemTopology *topo); +CEED_EXTERN int CeedBasisGetNumQuadratureComponents(CeedBasis basis, CeedInt *Q_comp); +CEED_EXTERN int CeedBasisGetNumComponents(CeedBasis basis, CeedInt *num_comp); +CEED_EXTERN int CeedBasisGetNumNodes(CeedBasis basis, CeedInt *P); +CEED_EXTERN int CeedBasisGetNumNodes1D(CeedBasis basis, CeedInt *P_1d); +CEED_EXTERN int CeedBasisGetNumQuadraturePoints(CeedBasis basis, CeedInt *Q); +CEED_EXTERN int CeedBasisGetNumQuadraturePoints1D(CeedBasis basis, + CeedInt *Q_1d); +CEED_EXTERN int CeedBasisGetQRef(CeedBasis basis, const CeedScalar **q_ref); +CEED_EXTERN int CeedBasisGetQWeights(CeedBasis basis, + const CeedScalar **q_weights); +CEED_EXTERN int CeedBasisGetInterp(CeedBasis basis, const CeedScalar **interp); +CEED_EXTERN int CeedBasisGetInterp1D(CeedBasis basis, + const CeedScalar **interp_1d); +CEED_EXTERN int CeedBasisGetGrad(CeedBasis basis, const CeedScalar **grad); +CEED_EXTERN int CeedBasisGetGrad1D(CeedBasis basis, const CeedScalar **grad_1d); +CEED_EXTERN int CeedBasisGetDiv(CeedBasis basis, const CeedScalar **div); +CEED_EXTERN int CeedBasisDestroy(CeedBasis *basis); + +CEED_EXTERN int CeedGaussQuadrature(CeedInt Q, CeedScalar *q_ref_1d, + CeedScalar *q_weight_1d); +CEED_EXTERN int CeedLobattoQuadrature(CeedInt Q, CeedScalar *q_ref_1d, + CeedScalar *q_weight_1d); +CEED_EXTERN int CeedQRFactorization(Ceed ceed, CeedScalar *mat, CeedScalar *tau, + CeedInt m, CeedInt n); +CEED_EXTERN int CeedSymmetricSchurDecomposition(Ceed ceed, CeedScalar *mat, + CeedScalar *lambda, CeedInt n); +CEED_EXTERN int CeedSimultaneousDiagonalization(Ceed ceed, CeedScalar *mat_A, + CeedScalar *mat_B, CeedScalar *x, CeedScalar *lambda, CeedInt n); + +/** Handle for the user provided CeedQFunction callback function + + @param[in,out] ctx User-defined context set using CeedQFunctionSetContext() or NULL + @param[in] Q Number of quadrature points at which to evaluate + @param[in] in Array of pointers to each input argument in the order provided + by the user in CeedQFunctionAddInput(). Each array has shape + `[dim, num_comp, Q]` where `dim` is the geometric dimension for + \ref CEED_EVAL_GRAD (`dim=1` for \ref CEED_EVAL_INTERP) and + `num_comp` is the number of field components (`num_comp=1` for + scalar fields). This results in indexing the `i`th input at + quadrature point `j` as `in[i][(d*num_comp + c)*Q + j]`. + @param[out] out Array of pointers to each output array in the order provided + using CeedQFunctionAddOutput(). The shapes are as above for + \a in. + + @return An error code: 0 - success, otherwise - failure + + @ingroup CeedQFunction +**/ +typedef int (*CeedQFunctionUser)(void *ctx, const CeedInt Q, + const CeedScalar *const *in, + CeedScalar *const *out); + +CEED_EXTERN int CeedQFunctionCreateInterior(Ceed ceed, CeedInt vec_length, + CeedQFunctionUser f, const char *source, CeedQFunction *qf); +CEED_EXTERN int CeedQFunctionCreateInteriorByName(Ceed ceed, const char *name, + CeedQFunction *qf); +CEED_EXTERN int CeedQFunctionCreateIdentity(Ceed ceed, CeedInt size, + CeedEvalMode in_mode, CeedEvalMode out_mode, CeedQFunction *qf); +CEED_EXTERN int CeedQFunctionReferenceCopy(CeedQFunction qf, CeedQFunction *qf_copy); +CEED_EXTERN int CeedQFunctionAddInput(CeedQFunction qf, const char *field_name, + CeedInt size, CeedEvalMode eval_mode); +CEED_EXTERN int CeedQFunctionAddOutput(CeedQFunction qf, const char *field_name, + CeedInt size, CeedEvalMode eval_mode); +CEED_EXTERN int CeedQFunctionGetFields(CeedQFunction qf, + CeedInt *num_input_fields, + CeedQFunctionField **input_fields, + CeedInt *num_output_fields, + CeedQFunctionField **output_fields); +CEED_EXTERN int CeedQFunctionSetContext(CeedQFunction qf, + CeedQFunctionContext ctx); +CEED_EXTERN int CeedQFunctionSetContextWritable(CeedQFunction qf, bool is_writable); +CEED_EXTERN int CeedQFunctionSetUserFlopsEstimate(CeedQFunction qf, CeedSize flops); +CEED_EXTERN int CeedQFunctionView(CeedQFunction qf, FILE *stream); +CEED_EXTERN int CeedQFunctionGetCeed(CeedQFunction qf, Ceed *ceed); +CEED_EXTERN int CeedQFunctionApply(CeedQFunction qf, CeedInt Q, + CeedVector *u, CeedVector *v); +CEED_EXTERN int CeedQFunctionDestroy(CeedQFunction *qf); + +CEED_EXTERN int CeedQFunctionFieldGetName(CeedQFunctionField qf_field, + char **field_name); +CEED_EXTERN int CeedQFunctionFieldGetSize(CeedQFunctionField qf_field, + CeedInt *size); +CEED_EXTERN int CeedQFunctionFieldGetEvalMode(CeedQFunctionField qf_field, + CeedEvalMode *eval_mode); + +/// Denotes type of data stored in a CeedQFunctionContext field +/// @ingroup CeedQFunction +typedef enum { + /// Double precision value + CEED_CONTEXT_FIELD_DOUBLE = 1, + /// 32 bit integer value + CEED_CONTEXT_FIELD_INT32 = 2, +} CeedContextFieldType; +CEED_EXTERN const char *const CeedContextFieldTypes[]; + +/** Handle for the user provided CeedQFunctionContextDataDestroy callback function + + @param[in,out] data User-CeedQFunctionContext data + + @return An error code: 0 - success, otherwise - failure + + @ingroup CeedQFunction +**/ +typedef int (*CeedQFunctionContextDataDestroyUser)(void *data); + +CEED_EXTERN int CeedQFunctionContextCreate(Ceed ceed, + CeedQFunctionContext *ctx); +CEED_EXTERN int CeedQFunctionContextReferenceCopy(CeedQFunctionContext ctx, + CeedQFunctionContext *ctx_copy); +CEED_EXTERN int CeedQFunctionContextSetData(CeedQFunctionContext ctx, + CeedMemType mem_type, CeedCopyMode copy_mode, size_t size, void *data); +CEED_EXTERN int CeedQFunctionContextTakeData(CeedQFunctionContext ctx, + CeedMemType mem_type, void *data); +CEED_EXTERN int CeedQFunctionContextGetData(CeedQFunctionContext ctx, + CeedMemType mem_type, void *data); +CEED_EXTERN int CeedQFunctionContextGetDataRead(CeedQFunctionContext ctx, + CeedMemType mem_type, void *data); +CEED_EXTERN int CeedQFunctionContextRestoreData(CeedQFunctionContext ctx, + void *data); +CEED_EXTERN int CeedQFunctionContextRestoreDataRead(CeedQFunctionContext ctx, + void *data); +CEED_EXTERN int CeedQFunctionContextRegisterDouble(CeedQFunctionContext ctx, + const char *field_name, size_t field_offset, size_t num_values, + const char *field_description); +CEED_EXTERN int CeedQFunctionContextRegisterInt32(CeedQFunctionContext ctx, + const char *field_name, size_t field_offset, size_t num_values, + const char *field_description); +CEED_EXTERN int CeedQFunctionContextGetAllFieldLabels(CeedQFunctionContext ctx, + const CeedContextFieldLabel **field_labels, CeedInt *num_fields); +CEED_EXTERN int CeedContextFieldLabelGetDescription(CeedContextFieldLabel label, + const char **field_name, const char **field_description, size_t *num_values, + CeedContextFieldType *field_type); +CEED_EXTERN int CeedQFunctionContextGetContextSize(CeedQFunctionContext ctx, + size_t *ctx_size); +CEED_EXTERN int CeedQFunctionContextView(CeedQFunctionContext ctx, + FILE *stream); +CEED_EXTERN int CeedQFunctionContextSetDataDestroy(CeedQFunctionContext ctx, + CeedMemType f_mem_type, CeedQFunctionContextDataDestroyUser f); +CEED_EXTERN int CeedQFunctionContextDestroy(CeedQFunctionContext *ctx); + +CEED_EXTERN int CeedOperatorCreate(Ceed ceed, CeedQFunction qf, + CeedQFunction dqf, CeedQFunction dqfT, + CeedOperator *op); +CEED_EXTERN int CeedCompositeOperatorCreate(Ceed ceed, CeedOperator *op); +CEED_EXTERN int CeedOperatorReferenceCopy(CeedOperator op, CeedOperator *op_copy); +CEED_EXTERN int CeedOperatorSetField(CeedOperator op, const char *field_name, + CeedElemRestriction r, CeedBasis b, + CeedVector v); +CEED_EXTERN int CeedOperatorGetFields(CeedOperator op, + CeedInt *num_input_fields, + CeedOperatorField **input_fields, + CeedInt *num_output_fields, + CeedOperatorField **output_fields); +CEED_EXTERN int CeedCompositeOperatorAddSub(CeedOperator composite_op, + CeedOperator sub_op); +CEED_EXTERN int CeedOperatorCheckReady(CeedOperator op); +CEED_EXTERN int CeedOperatorGetActiveVectorLengths(CeedOperator op, CeedSize *input_size, CeedSize *output_size); +CEED_EXTERN int CeedOperatorSetQFunctionAssemblyReuse(CeedOperator op, bool reuse_assembly_data); +CEED_EXTERN int CeedOperatorSetQFunctionAssemblyDataUpdateNeeded(CeedOperator op, bool needs_data_update); +CEED_EXTERN int CeedOperatorLinearAssembleQFunction(CeedOperator op, + CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request); +CEED_EXTERN int CeedOperatorLinearAssembleQFunctionBuildOrUpdate(CeedOperator op, + CeedVector *assembled, CeedElemRestriction *rstr, CeedRequest *request); +CEED_EXTERN int CeedOperatorLinearAssembleDiagonal(CeedOperator op, + CeedVector assembled, CeedRequest *request); +CEED_EXTERN int CeedOperatorLinearAssembleAddDiagonal(CeedOperator op, + CeedVector assembled, CeedRequest *request); +CEED_EXTERN int CeedOperatorLinearAssemblePointBlockDiagonal(CeedOperator op, + CeedVector assembled, CeedRequest *request); +CEED_EXTERN int CeedOperatorLinearAssembleAddPointBlockDiagonal(CeedOperator op, + CeedVector assembled, CeedRequest *request); +CEED_EXTERN int CeedOperatorLinearAssembleSymbolic(CeedOperator op, + CeedSize *num_entries, CeedInt **rows, CeedInt **cols); +CEED_EXTERN int CeedOperatorLinearAssemble(CeedOperator op, CeedVector values); +CEED_EXTERN int CeedOperatorMultigridLevelCreate(CeedOperator op_fine, + CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, + CeedOperator *op_coarse, CeedOperator *op_prolong, CeedOperator *op_restrict); +CEED_EXTERN int CeedOperatorMultigridLevelCreateTensorH1( + CeedOperator op_fine, CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, + CeedBasis basis_coarse, const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, + CeedOperator *op_prolong, CeedOperator *op_restrict); +CEED_EXTERN int CeedOperatorMultigridLevelCreateH1(CeedOperator op_fine, + CeedVector p_mult_fine, CeedElemRestriction rstr_coarse, CeedBasis basis_coarse, + const CeedScalar *interp_c_to_f, CeedOperator *op_coarse, + CeedOperator *op_prolong, CeedOperator *op_restrict); +CEED_EXTERN int CeedOperatorCreateFDMElementInverse(CeedOperator op, + CeedOperator *fdm_inv, CeedRequest *request); +CEED_EXTERN int CeedOperatorSetNumQuadraturePoints(CeedOperator op, CeedInt num_qpts); +CEED_EXTERN int CeedOperatorSetName(CeedOperator op, const char *name); +CEED_EXTERN int CeedOperatorView(CeedOperator op, FILE *stream); +CEED_EXTERN int CeedOperatorGetCeed(CeedOperator op, Ceed *ceed); +CEED_EXTERN int CeedOperatorGetNumElements(CeedOperator op, CeedInt *num_elem); +CEED_EXTERN int CeedOperatorGetNumQuadraturePoints(CeedOperator op, + CeedInt *num_qpts); +CEED_EXTERN int CeedOperatorGetFlopsEstimate(CeedOperator op, CeedSize *flops); +CEED_EXTERN int CeedOperatorContextGetFieldLabel(CeedOperator op, + const char *field_name, CeedContextFieldLabel *field_label); +CEED_EXTERN int CeedOperatorContextSetDouble(CeedOperator op, + CeedContextFieldLabel field_label, double *values); +CEED_EXTERN int CeedOperatorContextSetInt32(CeedOperator op, + CeedContextFieldLabel field_label, int *values); +CEED_EXTERN int CeedOperatorApply(CeedOperator op, CeedVector in, + CeedVector out, CeedRequest *request); +CEED_EXTERN int CeedOperatorApplyAdd(CeedOperator op, CeedVector in, + CeedVector out, CeedRequest *request); +CEED_EXTERN int CeedOperatorDestroy(CeedOperator *op); + +CEED_EXTERN int CeedOperatorFieldGetName(CeedOperatorField op_field, + char **field_name); +CEED_EXTERN int CeedOperatorFieldGetElemRestriction(CeedOperatorField op_field, + CeedElemRestriction *rstr); +CEED_EXTERN int CeedOperatorFieldGetBasis(CeedOperatorField op_field, + CeedBasis *basis); +CEED_EXTERN int CeedOperatorFieldGetVector(CeedOperatorField op_field, + CeedVector *vec); + +/** + @brief Return integer power + + @param[in] base The base to exponentiate + @param[in] power The power to raise the base to + + @return base^power + + @ref Utility +**/ +static inline CeedInt CeedIntPow(CeedInt base, CeedInt power) { + CeedInt result = 1; + while (power) { + if (power & 1) result *= base; + power >>= 1; + base *= base; + } + return result; +} + +/** + @brief Return minimum of two integers + + @param[in] a The first integer to compare + @param[in] b The second integer to compare + + @return The minimum of the two integers + + @ref Utility +**/ +static inline CeedInt CeedIntMin(CeedInt a, CeedInt b) { return a < b ? a : b; } + +/** + @brief Return maximum of two integers + + @param[in] a The first integer to compare + @param[in] b The second integer to compare + + @return The maximum of the two integers + + @ref Utility +**/ +static inline CeedInt CeedIntMax(CeedInt a, CeedInt b) { return a > b ? a : b; } + +// Used to ensure initialization before CeedInit() +CEED_EXTERN int CeedRegisterAll(void); +// Used to ensure initialization before CeedQFunctionCreate*() +CEED_EXTERN int CeedQFunctionRegisterAll(void); + +#endif From 3aa6a4ae5ffca79a8b20c8b61ce7bd4ee421aa20 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Wed, 24 Aug 2022 23:00:21 +0000 Subject: [PATCH 22/39] Rewrite arrays of structs in format that OCCA can handle. --- include/ceed/jit-source/gallery/ceed-mass2dbuild.h | 7 ++++--- include/ceed/jit-source/gallery/ceed-mass3dbuild.h | 7 ++++--- include/ceed/jit-source/gallery/ceed-poisson2dbuild.h | 8 +++++--- include/ceed/jit-source/gallery/ceed-poisson3dbuild.h | 8 +++++--- include/ceed/jit-source/gallery/ceed-vectormassapply.h | 7 ++++--- .../jit-source/gallery/ceed-vectorpoisson1dapply.h | 7 ++++--- .../jit-source/gallery/ceed-vectorpoisson2dapply.h | 10 +++++++--- .../jit-source/gallery/ceed-vectorpoisson3dapply.h | 10 +++++++--- 8 files changed, 40 insertions(+), 24 deletions(-) diff --git a/include/ceed/jit-source/gallery/ceed-mass2dbuild.h b/include/ceed/jit-source/gallery/ceed-mass2dbuild.h index 3ba4563450..e25b71eb9b 100644 --- a/include/ceed/jit-source/gallery/ceed-mass2dbuild.h +++ b/include/ceed/jit-source/gallery/ceed-mass2dbuild.h @@ -19,10 +19,11 @@ CEED_QFUNCTION(Mass2DBuild)(void *ctx, const CeedInt Q, // *INDENT-OFF* // in[0] is Jacobians with shape [2, nc=2, Q] // in[1] is quadrature weights, size (Q) - const CeedScalar (*J)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[0], - *w = in[1]; + typedef CeedScalar array_t[3][CEED_Q_VLA]; + const array_t* J = (const array_t*) in[0]; + const CeedScalar * const w = in[1]; // out[0] is quadrature data, size (Q) - CeedScalar *q_data = out[0]; + CeedScalar *q_data = out[0]; // *INDENT-ON* // Quadrature point loop diff --git a/include/ceed/jit-source/gallery/ceed-mass3dbuild.h b/include/ceed/jit-source/gallery/ceed-mass3dbuild.h index dc3ce56da5..db6429f4ef 100644 --- a/include/ceed/jit-source/gallery/ceed-mass3dbuild.h +++ b/include/ceed/jit-source/gallery/ceed-mass3dbuild.h @@ -19,10 +19,11 @@ CEED_QFUNCTION(Mass3DBuild)(void *ctx, const CeedInt Q, // *INDENT-OFF* // in[0] is Jacobians with shape [2, nc=3, Q] // in[1] is quadrature weights, size (Q) - const CeedScalar (*J)[3][CEED_Q_VLA] = (const CeedScalar(*)[3][CEED_Q_VLA])in[0], - *w = in[1]; + typedef CeedScalar array_t[3][CEED_Q_VLA]; + const array_t* J = (const array_t*) in[0]; + const CeedScalar * const w = in[1]; // out[0] is quadrature data, size (Q) - CeedScalar *q_data = out[0]; + CeedScalar *q_data = out[0]; // *INDENT-ON* // Quadrature point loop diff --git a/include/ceed/jit-source/gallery/ceed-poisson2dbuild.h b/include/ceed/jit-source/gallery/ceed-poisson2dbuild.h index 69d55aeac1..8c27fcae57 100644 --- a/include/ceed/jit-source/gallery/ceed-poisson2dbuild.h +++ b/include/ceed/jit-source/gallery/ceed-poisson2dbuild.h @@ -22,10 +22,12 @@ CEED_QFUNCTION(Poisson2DBuild)(void *ctx, const CeedInt Q, // *INDENT-OFF* // in[0] is Jacobians with shape [2, nc=2, Q] // in[1] is quadrature weights, size (Q) - const CeedScalar (*J)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[0], - *w = in[1]; + typedef CeedScalar array_t[2][CEED_Q_VLA]; + const array_t* J = (const array_t*) in[0]; + const CeedScalar * const w = in[1]; // out[0] is qdata, size (3*Q) - CeedScalar (*q_data)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + vec_t* q_data = (vec_t*) out[0]; // *INDENT-ON* // Quadrature point loop diff --git a/include/ceed/jit-source/gallery/ceed-poisson3dbuild.h b/include/ceed/jit-source/gallery/ceed-poisson3dbuild.h index 7dbbe57167..fc598b4cf6 100644 --- a/include/ceed/jit-source/gallery/ceed-poisson3dbuild.h +++ b/include/ceed/jit-source/gallery/ceed-poisson3dbuild.h @@ -23,10 +23,12 @@ CEED_QFUNCTION(Poisson3DBuild)(void *ctx, const CeedInt Q, // *INDENT-OFF* // in[0] is Jacobians with shape [3, nc=3, Q] // in[1] is quadrature weights, size (Q) - const CeedScalar (*J)[3][CEED_Q_VLA] = (const CeedScalar(*)[3][CEED_Q_VLA])in[0], - *w = in[1]; + typedef CeedScalar array_t[3][CEED_Q_VLA]; + const array_t* J = (const array_t*) in[0]; + const CeedScalar * const w = in[1]; // out[0] is qdata, size (6*Q) - CeedScalar (*q_data)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + vec_t* q_data = (vec_t*) out[0]; // *INDENT-ON* const CeedInt dim = 3; diff --git a/include/ceed/jit-source/gallery/ceed-vectormassapply.h b/include/ceed/jit-source/gallery/ceed-vectormassapply.h index b20218175d..d6e458d054 100644 --- a/include/ceed/jit-source/gallery/ceed-vectormassapply.h +++ b/include/ceed/jit-source/gallery/ceed-vectormassapply.h @@ -20,10 +20,11 @@ CEED_QFUNCTION(Vector3MassApply)(void *ctx, const CeedInt Q, // *INDENT-OFF* // in[0] is u, size (Q) // in[1] is quadrature data, size (Q) - const CeedScalar (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - *q_data = in[1]; + typedef CeedScalar array_t[CEED_Q_VLA]; + const array_t* u = (const array_t*) in[0]; + const CeedScalar * const q_data = in[1]; // out[0] is v, size (Q) - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + array_t* v = (array_t*) out[0]; // *INDENT-ON* const CeedInt num_comp = 3; diff --git a/include/ceed/jit-source/gallery/ceed-vectorpoisson1dapply.h b/include/ceed/jit-source/gallery/ceed-vectorpoisson1dapply.h index 19d9b278fc..e7a963e202 100644 --- a/include/ceed/jit-source/gallery/ceed-vectorpoisson1dapply.h +++ b/include/ceed/jit-source/gallery/ceed-vectorpoisson1dapply.h @@ -21,10 +21,11 @@ CEED_QFUNCTION(Vector3Poisson1DApply)(void *ctx, const CeedInt Q, // *INDENT-OFF* // in[0] is gradient u, shape [1, nc=3, Q] // in[1] is quadrature data, size (Q) - const CeedScalar (*ug)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*q_data) = in[1]; + typedef CeedScalar array_t[CEED_Q_VLA]; + const array_t* ug = (const array_t*) in[0]; + const CeedScalar * const q_data = in[1]; // out[0] is output to multiply against gradient v, shape [1, nc=3, Q] - CeedScalar (*vg)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + array_t* vg = (array_t*) out[0]; // *INDENT-ON* const CeedInt num_comp = 3; diff --git a/include/ceed/jit-source/gallery/ceed-vectorpoisson2dapply.h b/include/ceed/jit-source/gallery/ceed-vectorpoisson2dapply.h index 6588e65e7f..0d3e68abf2 100644 --- a/include/ceed/jit-source/gallery/ceed-vectorpoisson2dapply.h +++ b/include/ceed/jit-source/gallery/ceed-vectorpoisson2dapply.h @@ -21,10 +21,14 @@ CEED_QFUNCTION(Vector3Poisson2DApply)(void *ctx, const CeedInt Q, // *INDENT-OFF* // in[0] is gradient u, shape [2, nc=3, Q] // in[1] is quadrature data, size (3*Q) - const CeedScalar (*ug)[3][CEED_Q_VLA] = (const CeedScalar(*)[3][CEED_Q_VLA])in[0], - (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1]; + typedef CeedScalar array_t[3][CEED_Q_VLA]; + const array_t* ug = (const array_t*) in[0]; + + typedef CeedScalar vec_t[CEED_Q_VLA]; + const vec_t* q_data = (const vec_t*) in[1]; + // out[0] is output to multiply against gradient v, shape [2, nc=3, Q] - CeedScalar (*vg)[3][CEED_Q_VLA] = (CeedScalar(*)[3][CEED_Q_VLA])out[0]; + array_t* vg = (array_t*) out[0]; // *INDENT-ON* const CeedInt dim = 2, num_comp = 3; diff --git a/include/ceed/jit-source/gallery/ceed-vectorpoisson3dapply.h b/include/ceed/jit-source/gallery/ceed-vectorpoisson3dapply.h index 541723dec6..bcf250dfb5 100644 --- a/include/ceed/jit-source/gallery/ceed-vectorpoisson3dapply.h +++ b/include/ceed/jit-source/gallery/ceed-vectorpoisson3dapply.h @@ -22,10 +22,14 @@ CEED_QFUNCTION(Vector3Poisson3DApply)(void *ctx, const CeedInt Q, // *INDENT-OFF* // in[0] is gradient u, shape [3, nc=3, Q] // in[1] is quadrature data, size (6*Q) - const CeedScalar (*ug)[3][CEED_Q_VLA] = (const CeedScalar(*)[3][CEED_Q_VLA])in[0], - (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1]; + typedef CeedScalar array_t[3][CEED_Q_VLA]; + const array_t* ug = (const array_t*) in[0]; + + typedef CeedScalar vec_t[CEED_Q_VLA]; + const vec_t* q_data = (const vec_t*) in[1]; + // out[0] is output to multiply against gradient v, shape [3, nc=3, Q] - CeedScalar (*vg)[3][CEED_Q_VLA] = (CeedScalar(*)[3][CEED_Q_VLA])out[0]; + array_t* vg = (array_t*) out[0]; // *INDENT-ON* const CeedInt dim = 3, num_comp = 3; From d0aae6cd1b170e087355dab1d6a42649aa876c66 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Wed, 24 Aug 2022 23:00:59 +0000 Subject: [PATCH 23/39] Adds stubs for missing functions in OCCA qfunctioncontext. --- backends/occa/ceed-occa-qfunctioncontext.cpp | 24 ++++++++++++++++++++ backends/occa/ceed-occa-qfunctioncontext.hpp | 10 ++++++++ 2 files changed, 34 insertions(+) diff --git a/backends/occa/ceed-occa-qfunctioncontext.cpp b/backends/occa/ceed-occa-qfunctioncontext.cpp index f86d31c4fe..956028d7a4 100644 --- a/backends/occa/ceed-occa-qfunctioncontext.cpp +++ b/backends/occa/ceed-occa-qfunctioncontext.cpp @@ -251,9 +251,12 @@ namespace ceed { Ceed ceed; ierr = CeedQFunctionContextGetCeed(ctx, &ceed); CeedChk(ierr); + CeedOccaRegisterFunction(ctx, "HasValidData", QFunctionContext::ceedHasValidData); + CeedOccaRegisterFunction(ctx, "HasBorrowedDataOfType", QFunctionContext::ceedHasBorrowedDataOfType); CeedOccaRegisterFunction(ctx, "SetData", QFunctionContext::ceedSetData); CeedOccaRegisterFunction(ctx, "TakeData", QFunctionContext::ceedTakeData); CeedOccaRegisterFunction(ctx, "GetData", QFunctionContext::ceedGetData); + CeedOccaRegisterFunction(ctx, "GetDataRead", QFunctionContext::ceedGetDataRead); CeedOccaRegisterFunction(ctx, "RestoreData", QFunctionContext::ceedRestoreData); CeedOccaRegisterFunction(ctx, "Destroy", QFunctionContext::ceedDestroy); @@ -263,6 +266,17 @@ namespace ceed { return CEED_ERROR_SUCCESS; } + int QFunctionContext::ceedHasValidData(const CeedQFunctionContext ctx, + bool *has_valid_data) { + return CEED_ERROR_SUCCESS; + } + + int QFunctionContext::ceedHasBorrowedDataOfType(const CeedQFunctionContext ctx, + CeedMemType mem_type, + bool *has_borrowed_data_of_type) { + return CEED_ERROR_SUCCESS; + } + int QFunctionContext::ceedSetData(CeedQFunctionContext ctx, CeedMemType mtype, CeedCopyMode cmode, void *data) { QFunctionContext *ctx_ = QFunctionContext::from(ctx); @@ -290,6 +304,16 @@ namespace ceed { return ctx_->getData(mtype, data); } + int QFunctionContext::ceedGetDataRead(CeedQFunctionContext ctx, + CeedMemType mtype, + void *data) { + QFunctionContext *ctx_ = QFunctionContext::from(ctx); + if (!ctx_) { + return staticCeedError("Invalid CeedQFunctionContext passed"); + } + return ctx_->getData(mtype, data); + } + int QFunctionContext::ceedRestoreData(CeedQFunctionContext ctx) { QFunctionContext *ctx_ = QFunctionContext::from(ctx); if (!ctx_) { diff --git a/backends/occa/ceed-occa-qfunctioncontext.hpp b/backends/occa/ceed-occa-qfunctioncontext.hpp index de5e59ada8..db074ab68f 100644 --- a/backends/occa/ceed-occa-qfunctioncontext.hpp +++ b/backends/occa/ceed-occa-qfunctioncontext.hpp @@ -89,6 +89,13 @@ namespace ceed { static int ceedCreate(CeedQFunctionContext ctx); + static int ceedHasValidData(const CeedQFunctionContext ctx, + bool *has_valid_data); + + static int ceedHasBorrowedDataOfType(const CeedQFunctionContext ctx, + CeedMemType mem_type, + bool *has_borrowed_data_of_type); + static int ceedSetData(CeedQFunctionContext ctx, CeedMemType mtype, CeedCopyMode cmode, void *data); @@ -98,6 +105,9 @@ namespace ceed { static int ceedGetData(CeedQFunctionContext ctx, CeedMemType mtype, void *data); + static int ceedGetDataRead(CeedQFunctionContext ctx, CeedMemType mtype, + void *data); + static int ceedRestoreData(CeedQFunctionContext ctx); static int ceedDestroy(CeedQFunctionContext ctx); From b109b47b5fac67e9011ad63f823cbddcb5673acb Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Wed, 24 Aug 2022 23:09:42 +0000 Subject: [PATCH 24/39] Includes the cmath header when compiling C++ code. --- tests/t406-qfunction.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/t406-qfunction.h b/tests/t406-qfunction.h index da7dd540f7..1bad1b96b1 100644 --- a/tests/t406-qfunction.h +++ b/tests/t406-qfunction.h @@ -6,7 +6,12 @@ // This file is part of CEED: http://github.com/ceed // Note: intentionally testing strange spacing in '#include's +#ifdef __cplusplus +#include +#else #include +#endif + #include #include "./t406-qfunction-scales.h" # include "t406-qfunction-helper.h" From b6ffe63401515be2474a99b2a3a48d44b6cdc145 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Thu, 25 Aug 2022 21:05:25 +0000 Subject: [PATCH 25/39] Add stubs for missing OCCA backend LinearAssembleXXX functions. --- backends/occa/ceed-occa-operator.cpp | 6 ++++++ backends/occa/ceed-occa-operator.hpp | 1 + 2 files changed, 7 insertions(+) diff --git a/backends/occa/ceed-occa-operator.cpp b/backends/occa/ceed-occa-operator.cpp index a74090650a..9aa58439ad 100644 --- a/backends/occa/ceed-occa-operator.cpp +++ b/backends/occa/ceed-occa-operator.cpp @@ -119,6 +119,7 @@ namespace ceed { ierr = CeedOperatorSetData(op, operator_); CeedChk(ierr); CeedOccaRegisterFunction(op, "LinearAssembleQFunction", Operator::ceedLinearAssembleQFunction); + CeedOccaRegisterFunction(op, "LinearAssembleQFunctionUpdate", Operator::ceedLinearAssembleQFunction); CeedOccaRegisterFunction(op, "LinearAssembleAddDiagonal", Operator::ceedLinearAssembleAddDiagonal); CeedOccaRegisterFunction(op, "LinearAssembleAddPointBlockDiagonal", Operator::ceedLinearAssembleAddPointBlockDiagonal); CeedOccaRegisterFunction(op, "CreateFDMElementInverse", Operator::ceedCreateFDMElementInverse); @@ -143,10 +144,15 @@ namespace ceed { return staticCeedError("(OCCA) Backend does not implement LinearAssembleQFunction"); } + int Operator::ceedLinearAssembleQFunctionUpdate(CeedOperator op) { + return staticCeedError("(OCCA) Backend does not implement LinearAssembleQFunctionUpdate"); + } + int Operator::ceedLinearAssembleAddDiagonal(CeedOperator op) { return staticCeedError("(OCCA) Backend does not implement LinearAssembleDiagonal"); } + int Operator::ceedLinearAssembleAddPointBlockDiagonal(CeedOperator op) { return staticCeedError("(OCCA) Backend does not implement LinearAssemblePointBlockDiagonal"); } diff --git a/backends/occa/ceed-occa-operator.hpp b/backends/occa/ceed-occa-operator.hpp index 295475361b..3408b1c2f3 100644 --- a/backends/occa/ceed-occa-operator.hpp +++ b/backends/occa/ceed-occa-operator.hpp @@ -70,6 +70,7 @@ namespace ceed { static int ceedCreateComposite(CeedOperator op); static int ceedLinearAssembleQFunction(CeedOperator op); + static int ceedLinearAssembleQFunctionUpdate(CeedOperator op); static int ceedLinearAssembleAddDiagonal(CeedOperator op); static int ceedLinearAssembleAddPointBlockDiagonal(CeedOperator op); static int ceedCreateFDMElementInverse(CeedOperator op); From 0fd53ddc657af433f81a9ed7ce8c01cb6256efb5 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Thu, 25 Aug 2022 21:06:08 +0000 Subject: [PATCH 26/39] Adds missing functions to OCCA implemenation of qFunctionContext. --- backends/occa/ceed-occa-qfunctioncontext.cpp | 36 ++++++++++++++++++-- backends/occa/ceed-occa-qfunctioncontext.hpp | 5 +++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/backends/occa/ceed-occa-qfunctioncontext.cpp b/backends/occa/ceed-occa-qfunctioncontext.cpp index 956028d7a4..ae53b810db 100644 --- a/backends/occa/ceed-occa-qfunctioncontext.cpp +++ b/backends/occa/ceed-occa-qfunctioncontext.cpp @@ -105,6 +105,28 @@ namespace ceed { } } + + int QFunctionContext::hasValidData(bool* has_valid_data) const { + (*has_valid_data) = (!!hostBuffer) + || (!!currentHostBuffer ) + || (memory.isInitialized()) + || (currentMemory.isInitialized()); + return CEED_ERROR_SUCCESS; + } + + int QFunctionContext::hasBorrowedDataOfType(CeedMemType mem_type, + bool *has_borrowed_data_of_type) const { + switch (mem_type) { + case CEED_MEM_HOST: + (*has_borrowed_data_of_type) = !!currentHostBuffer; + break; + case CEED_MEM_DEVICE: + (*has_borrowed_data_of_type) = currentMemory.isInitialized(); + break; + } + return CEED_ERROR_SUCCESS; + } + int QFunctionContext::setData(CeedMemType mtype, CeedCopyMode cmode, void *data) { switch (cmode) { @@ -268,13 +290,22 @@ namespace ceed { int QFunctionContext::ceedHasValidData(const CeedQFunctionContext ctx, bool *has_valid_data) { - return CEED_ERROR_SUCCESS; + QFunctionContext *ctx_ = QFunctionContext::from(ctx); + if (!ctx_) { + return staticCeedError("Invalid CeedQFunctionContext passed"); + } + return ctx_->hasValidData(has_valid_data); } int QFunctionContext::ceedHasBorrowedDataOfType(const CeedQFunctionContext ctx, CeedMemType mem_type, bool *has_borrowed_data_of_type) { - return CEED_ERROR_SUCCESS; + QFunctionContext *ctx_ = QFunctionContext::from(ctx); + if (!ctx_) { + return staticCeedError("Invalid CeedQFunctionContext passed"); + } + return ctx_->hasBorrowedDataOfType(mem_type, + has_borrowed_data_of_type); } int QFunctionContext::ceedSetData(CeedQFunctionContext ctx, CeedMemType mtype, @@ -311,6 +342,7 @@ namespace ceed { if (!ctx_) { return staticCeedError("Invalid CeedQFunctionContext passed"); } + // Todo: Determine if calling getData is sufficient return ctx_->getData(mtype, data); } diff --git a/backends/occa/ceed-occa-qfunctioncontext.hpp b/backends/occa/ceed-occa-qfunctioncontext.hpp index db074ab68f..ad5c2999c1 100644 --- a/backends/occa/ceed-occa-qfunctioncontext.hpp +++ b/backends/occa/ceed-occa-qfunctioncontext.hpp @@ -67,6 +67,11 @@ namespace ceed { void freeHostCtxBuffer(); + int hasValidData(bool* has_valid_data) const; + + int hasBorrowedDataOfType(CeedMemType mem_type, + bool *has_borrowed_data_of_type) const; + int setData(CeedMemType mtype, CeedCopyMode cmode, void *data); int copyDataValues(CeedMemType mtype, void *data); From 56911a03087ed02a7fc63d3316e3273a82234f0e Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Thu, 25 Aug 2022 21:08:13 +0000 Subject: [PATCH 27/39] Removes math function headers which were causing OCCA JIT failures. --- tests/t406-qfunction.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/t406-qfunction.h b/tests/t406-qfunction.h index 1bad1b96b1..cad64ec563 100644 --- a/tests/t406-qfunction.h +++ b/tests/t406-qfunction.h @@ -6,12 +6,6 @@ // This file is part of CEED: http://github.com/ceed // Note: intentionally testing strange spacing in '#include's -#ifdef __cplusplus -#include -#else -#include -#endif - #include #include "./t406-qfunction-scales.h" # include "t406-qfunction-helper.h" From d87084f1714b567d6ecac9ec487bffce716aad69 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Thu, 25 Aug 2022 21:09:04 +0000 Subject: [PATCH 28/39] Rewrite arrays of structs in format that OCCA can handle. --- tests/t566-operator.h | 13 ++++++++----- tests/t567-operator.h | 21 ++++++++++++++------- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/tests/t566-operator.h b/tests/t566-operator.h index 4b00885864..f30ba0ea2a 100644 --- a/tests/t566-operator.h +++ b/tests/t566-operator.h @@ -10,7 +10,8 @@ CEED_QFUNCTION(setup)(void *ctx, const CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { - const CeedScalar *weight = in[0], *J = in[1]; + const CeedScalar *weight = in[0]; + const CeedScalar *J = in[1]; CeedScalar *rho = out[0]; for (CeedInt i=0; i Date: Fri, 26 Aug 2022 22:22:05 +0000 Subject: [PATCH 29/39] Rewrites fluids example qfunctions to be compatible with OCCA. --- examples/fluids/qfunctions/advection.h | 43 +++++++++------- examples/fluids/qfunctions/advection2d.h | 62 ++++++++++++++---------- examples/fluids/qfunctions/mass.h | 7 +-- examples/fluids/qfunctions/setupgeo2d.h | 16 +++--- 4 files changed, 76 insertions(+), 52 deletions(-) diff --git a/examples/fluids/qfunctions/advection.h b/examples/fluids/qfunctions/advection.h index 5922446a37..bc7c555255 100644 --- a/examples/fluids/qfunctions/advection.h +++ b/examples/fluids/qfunctions/advection.h @@ -163,9 +163,11 @@ CEED_QFUNCTION(ICsAdvection)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // Inputs - const CeedScalar (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + + const vec_t* X = (const vec_t*) in[0]; // Outputs - CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + vec_t* q0 = (vec_t*) out[0]; CeedPragmaSIMD // Quadrature Point Loop @@ -199,13 +201,15 @@ CEED_QFUNCTION(Advection)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // Inputs // *INDENT-OFF* - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], - (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2]; - + typedef CeedScalar array_t[5][CEED_Q_VLA]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + + const vec_t* q = (const vec_t*) in[0]; + const array_t* dq = (const array_t*) in[1]; + const vec_t* q_data = (const vec_t*) in[2]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], - (*dv)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1]; + vec_t* v = (vec_t*) out[0]; + array_t* dv = (array_t*) out[1]; // *INDENT-ON* // Context @@ -322,13 +326,16 @@ CEED_QFUNCTION(IFunction_Advection)(void *ctx, CeedInt Q, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], - (*q_dot)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3]; + typedef CeedScalar array_t[5][CEED_Q_VLA]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + + const vec_t* q = (const vec_t*) in[0]; + const array_t* dq = (const array_t*) in[1]; + const vec_t* q_dot = (const vec_t*) in[2]; + const vec_t* q_data = (const vec_t*) in[3]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], - (*dv)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1]; + vec_t* v = (vec_t*) out[0]; + array_t* dv = (array_t*) out[1]; // *INDENT-ON* AdvectionContext context = (AdvectionContext)ctx; const CeedScalar CtauS = context->CtauS; @@ -464,10 +471,12 @@ CEED_QFUNCTION(Advection_InOutFlow)(void *ctx, CeedInt Q, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + + const vec_t* q= (const vec_t*) in[0]; + const vec_t* q_data_sur = (const vec_t*) in[2]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + vec_t* v = (vec_t*) out[0]; // *INDENT-ON* AdvectionContext context = (AdvectionContext)ctx; const CeedScalar E_wind = context->E_wind; diff --git a/examples/fluids/qfunctions/advection2d.h b/examples/fluids/qfunctions/advection2d.h index b324255454..d314238ab4 100644 --- a/examples/fluids/qfunctions/advection2d.h +++ b/examples/fluids/qfunctions/advection2d.h @@ -15,7 +15,6 @@ #include #include "utils.h" -typedef struct SetupContext_ *SetupContext; struct SetupContext_ { CeedScalar rc; CeedScalar lx; @@ -24,8 +23,8 @@ struct SetupContext_ { CeedScalar time; int wind_type; // See WindType: 0=ROTATION, 1=TRANSLATION }; +#define SetupContext struct SetupContext_* -typedef struct AdvectionContext_ *AdvectionContext; struct AdvectionContext_ { CeedScalar CtauS; CeedScalar strong_form; @@ -33,6 +32,7 @@ struct AdvectionContext_ { bool implicit; int stabilization; // See StabilizationType: 0=none, 1=SU, 2=SUPG }; +#define AdvectionContext struct AdvectionContext_* // ***************************************************************************** // This QFunction sets the initial conditions and the boundary conditions @@ -143,9 +143,11 @@ CEED_QFUNCTION_HELPER CeedInt Exact_Advection2d(CeedInt dim, CeedScalar time, CEED_QFUNCTION(ICsAdvection2d)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // Inputs - const CeedScalar (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + + const vec_t *X = (const vec_t*) in[0]; // Outputs - CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + vec_t* q0 = (vec_t*) out[0]; const SetupContext context = (SetupContext)ctx; CeedPragmaSIMD @@ -181,12 +183,15 @@ CEED_QFUNCTION(Advection2d)(void *ctx, CeedInt Q, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], - (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2]; + typedef CeedScalar array_t[5][CEED_Q_VLA]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + + const vec_t* q = (const vec_t*) in[0]; + const array_t* dq = (const array_t*) in[1]; + const vec_t* q_data = (const vec_t*) in[2]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], - (*dv)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1]; + vec_t* v = (vec_t*) out[0]; + array_t* dv = (array_t*) out[1]; // *INDENT-ON* AdvectionContext context = (AdvectionContext)ctx; const CeedScalar CtauS = context->CtauS; @@ -205,7 +210,7 @@ CEED_QFUNCTION(Advection2d)(void *ctx, CeedInt Q, const CeedScalar E = q[4][i]; // -- Grad in const CeedScalar drho[2] = {dq[0][0][i], - dq[1][0][i], + dq[1][0][i] }; // *INDENT-OFF* const CeedScalar du[3][2] = {{(dq[0][1][i] - drho[0]*u[0]) / rho, @@ -213,11 +218,11 @@ CEED_QFUNCTION(Advection2d)(void *ctx, CeedInt Q, {(dq[0][2][i] - drho[0]*u[1]) / rho, (dq[1][2][i] - drho[1]*u[1]) / rho}, {(dq[0][3][i] - drho[0]*u[2]) / rho, - (dq[1][3][i] - drho[1]*u[2]) / rho}, + (dq[1][3][i] - drho[1]*u[2]) / rho} }; // *INDENT-ON* const CeedScalar dE[3] = {dq[0][4][i], - dq[1][4][i], + dq[1][4][i] }; // -- Interp-to-Interp q_data const CeedScalar wdetJ = q_data[0][i]; @@ -227,7 +232,7 @@ CEED_QFUNCTION(Advection2d)(void *ctx, CeedInt Q, const CeedScalar dXdx[2][2] = {{q_data[1][i], q_data[2][i]}, {q_data[3][i], - q_data[4][i]}, + q_data[4][i]} }; // *INDENT-ON* @@ -285,13 +290,16 @@ CEED_QFUNCTION(IFunction_Advection2d)(void *ctx, CeedInt Q, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], - (*q_dot)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3]; + typedef CeedScalar array_t[5][CEED_Q_VLA]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + + const vec_t* q = (const vec_t*) in[0]; + const array_t* dq = (const array_t*) in[1]; + const vec_t* q_dot = (const vec_t*) in[2]; + const vec_t* q_data = (const vec_t*) in[3]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], - (*dv)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1]; + vec_t* v = (vec_t*) out[0]; + array_t* dv = (array_t*) out[1]; // *INDENT-ON* AdvectionContext context = (AdvectionContext)ctx; const CeedScalar CtauS = context->CtauS; @@ -310,7 +318,7 @@ CEED_QFUNCTION(IFunction_Advection2d)(void *ctx, CeedInt Q, const CeedScalar E = q[4][i]; // -- Grad in const CeedScalar drho[2] = {dq[0][0][i], - dq[1][0][i], + dq[1][0][i] }; // *INDENT-OFF* const CeedScalar du[3][2] = {{(dq[0][1][i] - drho[0]*u[0]) / rho, @@ -318,11 +326,11 @@ CEED_QFUNCTION(IFunction_Advection2d)(void *ctx, CeedInt Q, {(dq[0][2][i] - drho[0]*u[1]) / rho, (dq[1][2][i] - drho[1]*u[1]) / rho}, {(dq[0][3][i] - drho[0]*u[2]) / rho, - (dq[1][3][i] - drho[1]*u[2]) / rho}, + (dq[1][3][i] - drho[1]*u[2]) / rho} }; // *INDENT-ON* const CeedScalar dE[3] = {dq[0][4][i], - dq[1][4][i], + dq[1][4][i] }; // -- Interp-to-Interp q_data const CeedScalar wdetJ = q_data[0][i]; @@ -332,7 +340,7 @@ CEED_QFUNCTION(IFunction_Advection2d)(void *ctx, CeedInt Q, const CeedScalar dXdx[2][2] = {{q_data[1][i], q_data[2][i]}, {q_data[3][i], - q_data[4][i]}, + q_data[4][i]} }; // *INDENT-ON* // The Physics @@ -408,10 +416,12 @@ CEED_QFUNCTION(Advection2d_InOutFlow)(void *ctx, CeedInt Q, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + + const vec_t* q= (const vec_t*) in[0]; + const vec_t* q_data_sur = (const vec_t*) in[2]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + vec_t* v = (vec_t*) out[0]; // *INDENT-ON* AdvectionContext context = (AdvectionContext)ctx; const CeedScalar E_wind = context->E_wind; diff --git a/examples/fluids/qfunctions/mass.h b/examples/fluids/qfunctions/mass.h index ab8c1d23d2..6016d49e37 100644 --- a/examples/fluids/qfunctions/mass.h +++ b/examples/fluids/qfunctions/mass.h @@ -29,11 +29,12 @@ CEED_QFUNCTION(Mass)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*u)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*q_data) = in[1]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + const vec_t* u = (const vec_t*) in[0]; + const CeedScalar * const q_data = in[1]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + vec_t* v = (vec_t*) out[0]; // *INDENT-ON* CeedPragmaSIMD diff --git a/examples/fluids/qfunctions/setupgeo2d.h b/examples/fluids/qfunctions/setupgeo2d.h index f88fa135e2..b7780d2d25 100644 --- a/examples/fluids/qfunctions/setupgeo2d.h +++ b/examples/fluids/qfunctions/setupgeo2d.h @@ -52,10 +52,13 @@ CEED_QFUNCTION(Setup2d)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*J)[2][CEED_Q_VLA] = (const CeedScalar(*)[2][CEED_Q_VLA])in[0], - (*w) = in[1]; + typedef CeedScalar array_t[2][CEED_Q_VLA]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + + const array_t* J = (const array_t*) in[0]; + const CeedScalar * const w = in[1]; // Outputs - CeedScalar (*q_data)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + vec_t* q_data = (vec_t*) out[0]; // *INDENT-ON* CeedPragmaSIMD @@ -115,10 +118,11 @@ CEED_QFUNCTION(SetupBoundary2d)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*J)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*w) = in[1]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + const vec_t* J = (const vec_t*) in[0]; + const CeedScalar * const w = in[1]; // Outputs - CeedScalar (*q_data_sur)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + vec_t * q_data_sur = (vec_t*) out[0]; // *INDENT-ON* CeedPragmaSIMD From 144512c2ed1925082b7194f78cac953b777228c7 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Fri, 26 Aug 2022 22:23:14 +0000 Subject: [PATCH 30/39] Fixes array dimensions in mass2dbuild. --- include/ceed/jit-source/gallery/ceed-mass2dbuild.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ceed/jit-source/gallery/ceed-mass2dbuild.h b/include/ceed/jit-source/gallery/ceed-mass2dbuild.h index e25b71eb9b..f076145fbf 100644 --- a/include/ceed/jit-source/gallery/ceed-mass2dbuild.h +++ b/include/ceed/jit-source/gallery/ceed-mass2dbuild.h @@ -19,7 +19,7 @@ CEED_QFUNCTION(Mass2DBuild)(void *ctx, const CeedInt Q, // *INDENT-OFF* // in[0] is Jacobians with shape [2, nc=2, Q] // in[1] is quadrature weights, size (Q) - typedef CeedScalar array_t[3][CEED_Q_VLA]; + typedef CeedScalar array_t[2][CEED_Q_VLA]; const array_t* J = (const array_t*) in[0]; const CeedScalar * const w = in[1]; // out[0] is quadrature data, size (Q) From 2f14ce03d84113a724b2339dba2772d39184dc0f Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Tue, 30 Aug 2022 16:29:27 +0000 Subject: [PATCH 31/39] Rewrites advection problem kernels to work with OCCA. --- examples/fluids/qfunctions/advection.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/fluids/qfunctions/advection.h b/examples/fluids/qfunctions/advection.h index bc7c555255..a8e21efca6 100644 --- a/examples/fluids/qfunctions/advection.h +++ b/examples/fluids/qfunctions/advection.h @@ -14,7 +14,6 @@ #include #include -typedef struct SetupContext_ *SetupContext; struct SetupContext_ { CeedScalar rc; CeedScalar lx; @@ -26,8 +25,8 @@ struct SetupContext_ { int bubble_type; // See BubbleType: 0=SPHERE, 1=CYLINDER int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK }; +#define SetupContext struct SetupContext_* -typedef struct AdvectionContext_ *AdvectionContext; struct AdvectionContext_ { CeedScalar CtauS; CeedScalar strong_form; @@ -35,6 +34,7 @@ struct AdvectionContext_ { bool implicit; int stabilization; // See StabilizationType: 0=none, 1=SU, 2=SUPG }; +#define AdvectionContext struct AdvectionContext_* CEED_QFUNCTION_HELPER CeedScalar Square(CeedScalar x) { return x*x; } From aa2058552d6131f9e6696f4e3943e6db7013dc26 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Tue, 30 Aug 2022 16:31:19 +0000 Subject: [PATCH 32/39] Rewrites blasius problem kernels to work with OCCA. --- examples/fluids/qfunctions/blasius.h | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/examples/fluids/qfunctions/blasius.h b/examples/fluids/qfunctions/blasius.h index 6fb44bc529..8ec4797618 100644 --- a/examples/fluids/qfunctions/blasius.h +++ b/examples/fluids/qfunctions/blasius.h @@ -17,7 +17,6 @@ #include "newtonian_types.h" #include "utils.h" -typedef struct BlasiusContext_ *BlasiusContext; struct BlasiusContext_ { bool implicit; // !< Using implicit timesteping or not bool weakT; // !< flag to set Temperature weakly at inflow @@ -28,6 +27,7 @@ struct BlasiusContext_ { CeedScalar x_inflow; // !< Location of inflow in x struct NewtonianIdealGasContext_ newtonian_ctx; }; +#define BlasiusContext struct BlasiusContext_* void CEED_QFUNCTION_HELPER(BlasiusSolution)(const CeedScalar y, const CeedScalar Uinf, const CeedScalar x0, const CeedScalar x, @@ -122,10 +122,11 @@ void CEED_QFUNCTION_HELPER(BlasiusSolution)(const CeedScalar y, CEED_QFUNCTION(ICsBlasius)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // Inputs - const CeedScalar (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + const vec_t* X = (const vec_t*) in[0]; // Outputs - CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + vec_t* q0 = (vec_t*) out[0]; const BlasiusContext context = (BlasiusContext)ctx; const CeedScalar cv = context->newtonian_ctx.cv; @@ -167,12 +168,13 @@ CEED_QFUNCTION(Blasius_Inflow)(void *ctx, CeedInt Q, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + const vec_t* q = (const vec_t*) in[0]; + const vec_t* q_data_sur = (const vec_t*) in[2]; + const vec_t* X = (const vec_t*) in[3]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + vec_t* v = (vec_t*) out[0]; // *INDENT-ON* const BlasiusContext context = (BlasiusContext)ctx; const bool implicit = context->implicit; @@ -261,12 +263,13 @@ CEED_QFUNCTION(Blasius_Inflow_Jacobian)(void *ctx, CeedInt Q, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + const vec_t* dq = (const vec_t*) in[0]; + const vec_t* q_data_sur = (const vec_t*) in[2]; + const vec_t* X = (const vec_t*) in[3]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + vec_t* v = (vec_t*) out[0]; // *INDENT-ON* const BlasiusContext context = (BlasiusContext)ctx; const bool implicit = context->implicit; From 01dab83217e804e9a9d026f41b3143c580415cfc Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Tue, 30 Aug 2022 16:31:40 +0000 Subject: [PATCH 33/39] Rewrites channel problem kernels to work with OCCA. --- examples/fluids/qfunctions/channel.h | 33 +++++++++++++++------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/examples/fluids/qfunctions/channel.h b/examples/fluids/qfunctions/channel.h index 443f18da30..e1ed823f73 100644 --- a/examples/fluids/qfunctions/channel.h +++ b/examples/fluids/qfunctions/channel.h @@ -18,7 +18,6 @@ #include "newtonian_types.h" #include "utils.h" -typedef struct ChannelContext_ *ChannelContext; struct ChannelContext_ { bool implicit; // !< Using implicit timesteping or not CeedScalar theta0; // !< Reference temperature @@ -29,11 +28,12 @@ struct ChannelContext_ { CeedScalar B; // !< Body-force driving the flow struct NewtonianIdealGasContext_ newtonian_ctx; }; +#define ChannelContext struct ChannelContext_*; CEED_QFUNCTION_HELPER State Exact_Channel(CeedInt dim, CeedScalar time, const CeedScalar X[], CeedInt Nf, void *ctx) { - const ChannelContext context = (ChannelContext)ctx; + struct ChannelContext_* context = (struct ChannelContext_*) ctx; const CeedScalar theta0 = context->theta0; const CeedScalar P0 = context->P0; const CeedScalar umax = context->umax; @@ -69,14 +69,15 @@ CEED_QFUNCTION_HELPER State Exact_Channel(CeedInt dim, CeedScalar time, // ***************************************************************************** CEED_QFUNCTION(ICsChannel)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { - // Inputs - const CeedScalar (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0]; + // Inputs + typedef CeedScalar vec_t[CEED_Q_VLA]; + const vec_t* X = (const vec_t*) in[0]; // Outputs - CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + vec_t* q0 = (vec_t*) out[0]; // Context - const ChannelContext context = (ChannelContext)ctx; + struct ChannelContext_* context = (struct ChannelContext_*) ctx; // Quadrature Point Loop CeedPragmaSIMD @@ -104,14 +105,15 @@ CEED_QFUNCTION(Channel_Inflow)(void *ctx, CeedInt Q, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + const vec_t* q = (const vec_t*) in[0]; + const vec_t* q_data_sur = (const vec_t*) in[2]; + const vec_t* X = (const vec_t*) in[3]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + vec_t* v = (vec_t*) out[0]; // *INDENT-ON* - const ChannelContext context = (ChannelContext)ctx; + struct ChannelContext_* context = (struct ChannelContext_*) ctx; const bool implicit = context->implicit; NewtonianIdealGasContext gas = &context->newtonian_ctx; const CeedScalar cv = gas->cv; @@ -189,14 +191,15 @@ CEED_QFUNCTION(Channel_Outflow)(void *ctx, CeedInt Q, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + const vec_t* q = (const vec_t*) in[0]; + const vec_t* q_data_sur = (const vec_t*) in[2]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + vec_t* v = (vec_t*) out[0]; // *INDENT-ON* - const ChannelContext context = (ChannelContext)ctx; + struct ChannelContext_* context = (struct ChannelContext_*) ctx; const bool implicit = context->implicit; const CeedScalar P0 = context->P0; From 0a0a697f16a2a09488bad2b5aa242c82e3be7b95 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Tue, 30 Aug 2022 16:32:11 +0000 Subject: [PATCH 34/39] Rewrites dirichlet bc kernels to work with OCCA. --- examples/fluids/qfunctions/dirichlet_boundary.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/examples/fluids/qfunctions/dirichlet_boundary.h b/examples/fluids/qfunctions/dirichlet_boundary.h index 0e87d3731d..3c673d0566 100644 --- a/examples/fluids/qfunctions/dirichlet_boundary.h +++ b/examples/fluids/qfunctions/dirichlet_boundary.h @@ -15,13 +15,14 @@ CEED_QFUNCTION(SetupDirichletBC)(void *ctx, CeedInt Q, CeedScalar *const *out) { // Inputs // *INDENT-OFF* - const CeedScalar (*coords)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0]; - const CeedScalar (*multiplicity)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[1]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + const vec_t* coords = (const vec_t*) in[0]; + const vec_t* multiplicity = (const vec_t*) in[1]; // *INDENT-ON* // Outputs - CeedScalar (*coords_stored)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; - CeedScalar (*scale_stored) = (CeedScalar(*))out[1]; + vec_t* coords_stored = (vec_t*) out[0]; + CeedScalar * const scale_stored = out[1]; CeedPragmaSIMD for(CeedInt i=0; i Date: Tue, 30 Aug 2022 16:32:53 +0000 Subject: [PATCH 35/39] Rewrites newtonian kernels to work with OCCA. --- examples/fluids/qfunctions/newtonian.h | 274 +++++++++++-------- examples/fluids/qfunctions/newtonian_types.h | 4 +- 2 files changed, 162 insertions(+), 116 deletions(-) diff --git a/examples/fluids/qfunctions/newtonian.h b/examples/fluids/qfunctions/newtonian.h index cbe455a6c5..283c816e5d 100644 --- a/examples/fluids/qfunctions/newtonian.h +++ b/examples/fluids/qfunctions/newtonian.h @@ -25,10 +25,11 @@ CEED_QFUNCTION(ICsNewtonianIG)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // Inputs - const CeedScalar (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + const vec_t* X = (const vec_t*) in[0]; // Outputs - CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + vec_t *q0 = (vec_t*) out[0]; // Context const SetupContext context = (SetupContext)ctx; @@ -73,7 +74,8 @@ CEED_QFUNCTION(ICsNewtonianIG)(void *ctx, CeedInt Q, CEED_QFUNCTION(ICsNewtonianIG_Prim)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // Outputs - CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + vec_t *q0 = (vec_t*) out[0]; // Context const SetupContext context = (SetupContext)ctx; @@ -152,13 +154,17 @@ CEED_QFUNCTION(RHSFunction_Newtonian)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], - (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + typedef CeedScalar array_t[5][CEED_Q_VLA]; + + const vec_t* q = (const vec_t*) in[0]; + const array_t* Grad_q = (const array_t*) in[1]; + const vec_t* q_data= (const vec_t*) in[2]; + const vec_t* x = (const vec_t*) in[3]; + // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], - (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1]; + vec_t* v = (vec_t*) out[0]; + array_t* Grad_v = (array_t*) out[1]; // *INDENT-ON* // Context @@ -254,15 +260,17 @@ CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], - (*q_dot)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3], - (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + typedef CeedScalar array_t[5][CEED_Q_VLA]; + const vec_t* q = (const vec_t*) in[0]; + const array_t* Grad_q = (const array_t*) in[1]; + const vec_t* q_dot = (const vec_t*) in[2]; + const vec_t* q_data = (const vec_t*) in[3]; + const vec_t* x = (const vec_t*) in[4]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], - (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1], - (*jac_data)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[2]; + vec_t* v = (vec_t*) out[0]; + array_t* Grad_v = (array_t*) out[1]; + vec_t* jac_data = (vec_t*) out[2]; // *INDENT-ON* // Context NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; @@ -359,14 +367,16 @@ CEED_QFUNCTION(IJacobian_Newtonian)(void *ctx, CeedInt Q, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*Grad_dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], - (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3], - (*jac_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + typedef CeedScalar array_t[5][CEED_Q_VLA]; + const vec_t* dq = (const vec_t*) in[0]; + const array_t* Grad_dq = (const array_t*) in[1]; + const vec_t* q_data = (const vec_t*) in[2]; + const vec_t* x = (const vec_t*)in[3]; + const vec_t* jac_data= (const vec_t*) in[4]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], - (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1]; + vec_t* v = (vec_t*) out[0]; + array_t* Grad_v = (array_t*) out[1]; // *INDENT-ON* // Context NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; @@ -392,7 +402,7 @@ CEED_QFUNCTION(IJacobian_Newtonian)(void *ctx, CeedInt Q, }; // *INDENT-ON* - CeedScalar U[5], kmstress[6], Tau_d[3] __attribute((unused)); + CeedScalar U[5], kmstress[6], Tau_d[3]; for (int j=0; j<5; j++) U[j] = jac_data[j][i]; for (int j=0; j<6; j++) kmstress[j] = jac_data[5+j][i]; for (int j=0; j<3; j++) Tau_d[j] = jac_data[5+6+j][i]; @@ -460,32 +470,31 @@ CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q, CeedScalar *const *out) { //*INDENT-OFF* - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + typedef CeedScalar array_t[5][CEED_Q_VLA]; + const vec_t* q = (const vec_t*) in[0]; + const array_t* Grad_q = (const array_t*) in[1]; + const vec_t* q_data_sur = (const vec_t*) in[2]; + const vec_t* x = (const vec_t*) in[3]; - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA]) out[0], - (*jac_data_sur)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA]) out[1]; + vec_t* v = (vec_t*) out[0]; + vec_t* jac_data_sur = (vec_t*) out[1]; //*INDENT-ON* - const NewtonianIdealGasContext context = (NewtonianIdealGasContext) ctx; + NewtonianIdealGasContext context = (NewtonianIdealGasContext) ctx; const bool is_implicit = context->is_implicit; - State (*StateFromQi)(NewtonianIdealGasContext gas, - const CeedScalar qi[5], const CeedScalar x[3]); - State (*StateFromQi_fwd)(NewtonianIdealGasContext gas, - State s, const CeedScalar dqi[5], - const CeedScalar x[3], const CeedScalar dx[3]); - StateFromQi = context->is_primitive ? &StateFromY : &StateFromU; - StateFromQi_fwd = context->is_primitive ? &StateFromY_fwd : &StateFromU_fwd; - CeedPragmaSIMD for(CeedInt i=0; iis_primitive) { + s = StateFromY(context, qi, x_i); + } else { + s = StateFromU(context, qi, x_i); + } const CeedScalar wdetJb = (is_implicit ? -1. : 1.) * q_data_sur[0][i]; // ---- Normal vect @@ -506,7 +515,11 @@ CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q, dqi[k] = Grad_q[0][k][i] * dXdx[0][j] + Grad_q[1][k][i] * dXdx[1][j]; dx_i[j] = 1.; - grad_s[j] = StateFromQi_fwd(context, s, dqi, x_i, dx_i); + if(context->is_primitive) { + grad_s[j] = StateFromY_fwd(context, s, dqi, x_i, dx_i); + } else { + grad_s[j] = StateFromU_fwd(context, s, dqi, x_i, dx_i); + } } CeedScalar strain_rate[6], kmstress[6], stress[3][3], Fe[3]; @@ -558,25 +571,20 @@ CEED_QFUNCTION(BoundaryIntegral_Jacobian)(void *ctx, CeedInt Q, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*Grad_dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3], - (*jac_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + typedef CeedScalar array_t[5][CEED_Q_VLA]; + + const vec_t* dq= (const vec_t*) in[0]; + const array_t* Grad_dq = (const array_t*) in[1]; + const vec_t* q_data_sur = (const vec_t*) in[2]; + const vec_t* x = (const vec_t*) in[3]; + const vec_t* jac_data_sur = (const vec_t*) in[4]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + vec_t* v = (vec_t*) out[0]; // *INDENT-ON* - const NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; + NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; const bool implicit = context->is_implicit; - State (*StateFromQi)(NewtonianIdealGasContext gas, - const CeedScalar qi[5], const CeedScalar x[3]); - State (*StateFromQi_fwd)(NewtonianIdealGasContext gas, - State s, const CeedScalar dqi[5], - const CeedScalar x[3], const CeedScalar dx[3]); - StateFromQi = context->is_primitive ? &StateFromY : &StateFromU; - StateFromQi_fwd = context->is_primitive ? &StateFromY_fwd : &StateFromU_fwd; - CeedPragmaSIMD // Quadrature Point Loop for (CeedInt i=0; iis_primitive) { + s = StateFromY(context, qi, x_i); + } else { + s = StateFromU(context, qi, x_i); + } + State ds; + if(context->is_primitive) { + ds = StateFromY_fwd(context, s, dqi, x_i, dx_i); + } else { + ds = StateFromU_fwd(context, s, dqi, x_i, dx_i); + } State grad_ds[3]; for (CeedInt j=0; j<3; j++) { CeedScalar dx_i[3] = {0}, dqi_j[5]; for (CeedInt k=0; k<5; k++) dqi_j[k] = Grad_dq[0][k][i] * dXdx[0][j] + Grad_dq[1][k][i] * dXdx[1][j]; - dx_i[j] = 1.; - grad_ds[j] = StateFromQi_fwd(context, s, dqi_j, x_i, dx_i); + dx_i[j] = 1.0; + if(context->is_primitive) { + grad_ds[j] = StateFromY_fwd(context, s, dqi_j, x_i, dx_i); + } else { + grad_ds[j] = StateFromU_fwd(context, s, dqi_j, x_i, dx_i); + } } CeedScalar dstrain_rate[6], dkmstress[6], stress[3][3], dstress[3][3], dFe[3]; @@ -641,27 +663,22 @@ CEED_QFUNCTION(PressureOutflow)(void *ctx, CeedInt Q, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + typedef CeedScalar array_t[5][CEED_Q_VLA]; + + const vec_t* q = (const vec_t*) in[0]; + const array_t* Grad_q = (const array_t*) in[1]; + const vec_t* q_data_sur = (const vec_t*) in[2]; + const vec_t* x = (const vec_t*) in[3]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], - (*jac_data_sur)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[1]; + vec_t* v = (vec_t*) out[0]; + vec_t* jac_data_sur = (vec_t*) out[1]; // *INDENT-ON* - const NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; + NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; const bool implicit = context->is_implicit; const CeedScalar P0 = context->P0; - State (*StateFromQi)(NewtonianIdealGasContext gas, - const CeedScalar qi[5], const CeedScalar x[3]); - State (*StateFromQi_fwd)(NewtonianIdealGasContext gas, - State s, const CeedScalar dqi[5], - const CeedScalar x[3], const CeedScalar dx[3]); - StateFromQi = context->is_primitive ? &StateFromY : &StateFromU; - StateFromQi_fwd = context->is_primitive ? &StateFromY_fwd : &StateFromU_fwd; - CeedPragmaSIMD // Quadrature Point Loop for (CeedInt i=0; iis_primitive) { + s= StateFromY(context, qi, x_i); + } else { + s= StateFromU(context, qi, x_i); + } s.Y.pressure = P0; // -- Interp-to-Interp q_data @@ -695,8 +717,12 @@ CEED_QFUNCTION(PressureOutflow)(void *ctx, CeedInt Q, for (CeedInt k=0; k<5; k++) dqi[k] = Grad_q[0][k][i] * dXdx[0][j] + Grad_q[1][k][i] * dXdx[1][j]; - dx_i[j] = 1.; - grad_s[j] = StateFromQi_fwd(context, s, dqi, x_i, dx_i); + dx_i[j] = 1.0; + if(context->is_primitive) { + grad_s[j] = StateFromY_fwd(context, s, dqi, x_i, dx_i); + } else { + grad_s[j] = StateFromU_fwd(context, s, dqi, x_i, dx_i); + } } CeedScalar strain_rate[6], kmstress[6], stress[3][3], Fe[3]; @@ -748,26 +774,23 @@ CEED_QFUNCTION(PressureOutflow_Jacobian)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*Grad_dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3], - (*jac_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + typedef CeedScalar array_t[5][CEED_Q_VLA]; + + const vec_t* dq= (const vec_t*) in[0]; + const array_t* Grad_dq = (const array_t*) in[1]; + const vec_t* q_data_sur = (const vec_t*) in[2]; + const vec_t* x = (const vec_t*) in[3]; + const vec_t* jac_data_sur = (const vec_t*) in[4]; + // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + vec_t* v = (vec_t*) out[0]; + // *INDENT-ON* - const NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; + NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; const bool implicit = context->is_implicit; - State (*StateFromQi)(NewtonianIdealGasContext gas, - const CeedScalar qi[5], const CeedScalar x[3]); - State (*StateFromQi_fwd)(NewtonianIdealGasContext gas, - State s, const CeedScalar dQi[5], - const CeedScalar x[3], const CeedScalar dx[3]); - StateFromQi = context->is_primitive ? &StateFromY : &StateFromU; - StateFromQi_fwd = context->is_primitive ? &StateFromY_fwd : &StateFromU_fwd; - CeedPragmaSIMD // Quadrature Point Loop for (CeedInt i=0; iis_primitive) { + s = StateFromY(context, qi, x_i); + } else { + s = StateFromU(context, qi, x_i); + } + + State ds; + if(context->is_primitive) { + ds = StateFromY_fwd(context, s, dqi, x_i, dx_i); + } else { + ds = StateFromU_fwd(context, s, dqi, x_i, dx_i); + } + s.Y.pressure = context->P0; ds.Y.pressure = 0.; @@ -798,8 +833,12 @@ CEED_QFUNCTION(PressureOutflow_Jacobian)(void *ctx, CeedInt Q, for (CeedInt k=0; k<5; k++) dqi_j[k] = Grad_dq[0][k][i] * dXdx[0][j] + Grad_dq[1][k][i] * dXdx[1][j]; - dx_i[j] = 1.; - grad_ds[j] = StateFromQi_fwd(context, s, dqi_j, x_i, dx_i); + dx_i[j] = 1.0; + if(context->is_primitive) { + grad_ds[j] = StateFromY_fwd(context, s, dqi_j, x_i, dx_i); + } else { + grad_ds[j] = StateFromU_fwd(context, s, dqi_j, x_i, dx_i); + } } CeedScalar dstrain_rate[6], dkmstress[6], stress[3][3], dstress[3][3], dFe[3]; @@ -835,15 +874,18 @@ CEED_QFUNCTION(IFunction_Newtonian_Prim)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], - (*q_dot)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3], - (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + typedef CeedScalar array_t[5][CEED_Q_VLA]; + const vec_t* q= (const vec_t*) in[0]; + const array_t* Grad_q = (const array_t*) in[1]; + const vec_t* q_dot = (const vec_t*) in[2]; + const vec_t* q_data = (const vec_t*) in[3]; + const vec_t* x = (const vec_t*) in[4]; + // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], - (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1], - (*jac_data)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[2]; + vec_t* v = (vec_t*) out[0]; + array_t* Grad_v = (array_t*) out[1]; + vec_t* jac_data = (vec_t*) out[2]; // *INDENT-ON* // Context NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; @@ -946,14 +988,18 @@ CEED_QFUNCTION(IJacobian_Newtonian_Prim)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*Grad_dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1], - (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3], - (*jac_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + typedef CeedScalar array_t[5][CEED_Q_VLA]; + + const vec_t* dq = (const vec_t*) in[0]; + const array_t* Grad_dq = (const array_t*) in[1]; + const vec_t* q_data = (const vec_t*) in[2]; + const vec_t* x = (const vec_t*) in[3]; + const vec_t* jac_data = (vec_t*) in[4]; + // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0], - (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1]; + vec_t* v = (vec_t*) out[0]; + array_t* Grad_v = (array_t*) in[1]; // *INDENT-ON* // Context NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx; @@ -979,7 +1025,7 @@ CEED_QFUNCTION(IJacobian_Newtonian_Prim)(void *ctx, CeedInt Q, }; // *INDENT-ON* - CeedScalar Y[5], kmstress[6], Tau_d[3] __attribute((unused)); + CeedScalar Y[5], kmstress[6], Tau_d[3]; for (int j=0; j<5; j++) Y[j] = jac_data[j][i]; for (int j=0; j<6; j++) kmstress[j] = jac_data[5+j][i]; for (int j=0; j<3; j++) Tau_d[j] = jac_data[5+6+j][i]; diff --git a/examples/fluids/qfunctions/newtonian_types.h b/examples/fluids/qfunctions/newtonian_types.h index 879add7d13..e1a59b3856 100644 --- a/examples/fluids/qfunctions/newtonian_types.h +++ b/examples/fluids/qfunctions/newtonian_types.h @@ -11,7 +11,6 @@ #include #include "stabilization_types.h" -typedef struct SetupContext_ *SetupContext; struct SetupContext_ { CeedScalar theta0; CeedScalar thetaC; @@ -31,8 +30,8 @@ struct SetupContext_ { int bubble_type; // See BubbleType: 0=SPHERE, 1=CYLINDER int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK }; +#define SetupContext struct SetupContext_* -typedef struct NewtonianIdealGasContext_ *NewtonianIdealGasContext; struct NewtonianIdealGasContext_ { CeedScalar lambda; CeedScalar mu; @@ -53,5 +52,6 @@ struct NewtonianIdealGasContext_ { bool is_primitive; StabilizationType stabilization; }; +#define NewtonianIdealGasContext struct NewtonianIdealGasContext_* #endif // newtonian_types_h From 4ba91afcf05fcbec3bdfe9c6523b292bebe84718 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Tue, 30 Aug 2022 16:33:21 +0000 Subject: [PATCH 36/39] Rewrites setupgeo kernels to be compatible with OCCA. --- examples/fluids/qfunctions/setupgeo.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/examples/fluids/qfunctions/setupgeo.h b/examples/fluids/qfunctions/setupgeo.h index c45c094a94..4e35430016 100644 --- a/examples/fluids/qfunctions/setupgeo.h +++ b/examples/fluids/qfunctions/setupgeo.h @@ -53,11 +53,13 @@ CEED_QFUNCTION(Setup)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*J)[3][CEED_Q_VLA] = (const CeedScalar(*)[3][CEED_Q_VLA])in[0], - (*w) = in[1]; + typedef CeedScalar array_t[3][CEED_Q_VLA]; + const array_t* J = (const array_t*) in[0]; + const CeedScalar* const w = in[1]; // Outputs - CeedScalar (*q_data)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + vec_t* q_data = (vec_t*) out[0]; // *INDENT-ON* CeedPragmaSIMD @@ -152,10 +154,13 @@ CEED_QFUNCTION(SetupBoundary)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*J)[3][CEED_Q_VLA] = (const CeedScalar(*)[3][CEED_Q_VLA])in[0], - (*w) = in[1]; + typedef CeedScalar array_t[3][CEED_Q_VLA]; + const array_t* J = (const array_t*) in[0]; + const CeedScalar* const w = in[1]; + // Outputs - CeedScalar (*q_data_sur)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + vec_t* q_data_sur = (vec_t*) out[0]; CeedPragmaSIMD // Quadrature Point Loop From 40d190722cdaf56e3db106c38cb8c701d1ea6589 Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Tue, 30 Aug 2022 16:33:50 +0000 Subject: [PATCH 37/39] Rewrites stabilization kernels to be compatible with OCCA. --- examples/fluids/qfunctions/stabilization.h | 15 ++++++++------- examples/fluids/qfunctions/stabilization_types.h | 15 ++++++++++----- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/examples/fluids/qfunctions/stabilization.h b/examples/fluids/qfunctions/stabilization.h index 0299612246..d87f3463be 100644 --- a/examples/fluids/qfunctions/stabilization.h +++ b/examples/fluids/qfunctions/stabilization.h @@ -126,13 +126,14 @@ CEED_QFUNCTION_HELPER void Tau_diagPrim(NewtonianIdealGasContext gas, State s, dts = Ctau_t / dt ; - tau = rho*rho*((4. * dts * dts) - + u[0] * ( u[0] * gijd[0] + 2. * ( u[1] * gijd[1] + u[2] * gijd[3])) - + u[1] * ( u[1] * gijd[2] + 2. * u[2] * gijd[4]) - + u[2] * u[2] * gijd[5]) - + Ctau_v* mu * mu * - (gijd[0]*gijd[0] + gijd[2]*gijd[2] + gijd[5]*gijd[5] + - + 2. * (gijd[1]*gijd[1] + gijd[3]*gijd[3] + gijd[4]*gijd[4])); + tau = rho* rho * ( + (4.0 * dts * dts) + + u[0] * ( u[0] * gijd[0] + 2.0 * ( u[1] * gijd[1] + u[2] * gijd[3]) ) + + u[1] * ( u[1] * gijd[2] + 2.0 * u[2] * gijd[4]) + + u[2] * u[2] * gijd[5]) + + Ctau_v* mu * mu * ( + gijd[0]*gijd[0] + gijd[2]*gijd[2] + gijd[5]*gijd[5] + + 2.0 * (gijd[1]*gijd[1] + gijd[3]*gijd[3] + gijd[4]*gijd[4]) ); fact = sqrt(tau); diff --git a/examples/fluids/qfunctions/stabilization_types.h b/examples/fluids/qfunctions/stabilization_types.h index 7e484df200..68db31754a 100644 --- a/examples/fluids/qfunctions/stabilization_types.h +++ b/examples/fluids/qfunctions/stabilization_types.h @@ -1,10 +1,15 @@ #ifndef stabilization_types_h #define stabilization_types_h -typedef enum { - STAB_NONE = 0, - STAB_SU = 1, // Streamline Upwind - STAB_SUPG = 2, // Streamline Upwind Petrov-Galerkin -} StabilizationType; +// typedef enum { + // STAB_NONE = 0, + // STAB_SU = 1, // Streamline Upwind + // STAB_SUPG = 2, // Streamline Upwind Petrov-Galerkin +// } StabilizationType; + +typedef int StabilizationType; +static const int STAB_NONE=0; +static const int STAB_SU=1; +static const int STAB_SUPG=2; #endif // stabilization_types_h From 3d396e1f8286d27304ed122e7756a08c07dc761b Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Tue, 30 Aug 2022 16:34:21 +0000 Subject: [PATCH 38/39] Rewrites stg kernels to be compatible with OCCA. --- examples/fluids/qfunctions/stg_shur14.h | 53 +++++++++++--------- examples/fluids/qfunctions/stg_shur14_type.h | 26 +++++----- 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/examples/fluids/qfunctions/stg_shur14.h b/examples/fluids/qfunctions/stg_shur14.h index 043328f55f..7be2143b39 100644 --- a/examples/fluids/qfunctions/stg_shur14.h +++ b/examples/fluids/qfunctions/stg_shur14.h @@ -100,7 +100,7 @@ CEED_QFUNCTION_HELPER void InterpolateProfile(const CeedScalar dw, * @param[in] Ektot Total turbulent kinetic energy of spectrum * @returns qn Spectrum coefficient */ -CeedScalar CEED_QFUNCTION_HELPER(Calc_qn)(const CeedScalar kappa, +CEED_QFUNCTION_HELPER CeedScalar Calc_qn(const CeedScalar kappa, const CeedScalar dkappa, const CeedScalar keta, const CeedScalar kcut, const CeedScalar ke, const CeedScalar Ektot_inv) { const CeedScalar feta_x_fcut = exp(-Square(12*kappa/keta) @@ -110,7 +110,7 @@ CeedScalar CEED_QFUNCTION_HELPER(Calc_qn)(const CeedScalar kappa, } // Calculate hmax, ke, keta, and kcut -void CEED_QFUNCTION_HELPER(SpectrumConstants)(const CeedScalar dw, +CEED_QFUNCTION_HELPER void SpectrumConstants(const CeedScalar dw, const CeedScalar eps, const CeedScalar lt, const CeedScalar h[3], const CeedScalar nu, CeedScalar *hmax, CeedScalar *ke, CeedScalar *keta, CeedScalar *kcut) { @@ -133,7 +133,7 @@ void CEED_QFUNCTION_HELPER(SpectrumConstants)(const CeedScalar dw, * @param[in] stg_ctx STGShur14Context for the problem * @param[out] qn Spectrum coefficients, [nmodes] */ -void CEED_QFUNCTION_HELPER(CalcSpectrum)(const CeedScalar dw, +CEED_QFUNCTION_HELPER void CalcSpectrum(const CeedScalar dw, const CeedScalar eps, const CeedScalar lt, const CeedScalar h[3], const CeedScalar nu, CeedScalar qn[], const STGShur14Context stg_ctx) { @@ -163,7 +163,7 @@ void CEED_QFUNCTION_HELPER(CalcSpectrum)(const CeedScalar dw, * @param[out] u Velocity at X and t * @param[in] stg_ctx STGShur14Context for the problem */ -void CEED_QFUNCTION_HELPER(STGShur14_Calc)(const CeedScalar X[3], +CEED_QFUNCTION_HELPER void STGShur14_Calc(const CeedScalar X[3], const CeedScalar t, const CeedScalar ubar[3], const CeedScalar cij[6], const CeedScalar qn[], CeedScalar u[3], const STGShur14Context stg_ctx) { @@ -206,7 +206,7 @@ void CEED_QFUNCTION_HELPER(STGShur14_Calc)(const CeedScalar X[3], * @param[out] u Velocity at X and t * @param[in] stg_ctx STGShur14Context for the problem */ -void CEED_QFUNCTION_HELPER(STGShur14_Calc_PrecompEktot)(const CeedScalar X[3], +CEED_QFUNCTION_HELPER void STGShur14_Calc_PrecompEktot(const CeedScalar X[3], const CeedScalar t, const CeedScalar ubar[3], const CeedScalar cij[6], const CeedScalar Ektot, const CeedScalar h[3], const CeedScalar dw, const CeedScalar eps, const CeedScalar lt, const CeedScalar nu, CeedScalar u[3], @@ -249,10 +249,11 @@ void CEED_QFUNCTION_HELPER(STGShur14_Calc_PrecompEktot)(const CeedScalar X[3], CEED_QFUNCTION(Preprocess_STGShur14)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { //*INDENT-OFF* - const CeedScalar (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[0], - (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[1]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + const vec_t* q_data_sur = (const vec_t*) in[0]; + const vec_t* x = (const vec_t*) in[1]; - CeedScalar (*stg_data) = (CeedScalar(*)) out[0]; + CeedScalar* stg_data = (CeedScalar*) out[0]; //*INDENT-ON* CeedScalar ubar[3], cij[6], eps, lt; @@ -303,10 +304,11 @@ CEED_QFUNCTION(Preprocess_STGShur14)(void *ctx, CeedInt Q, CEED_QFUNCTION(ICsSTG)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { // Inputs - const CeedScalar (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + const vec_t* X = (const vec_t*) in[0]; // Outputs - CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + vec_t* q0 = (vec_t*) out[0]; const STGShur14Context stg_ctx = (STGShur14Context) ctx; CeedScalar u[3], cij[6], eps, lt; @@ -349,12 +351,13 @@ CEED_QFUNCTION(STGShur14_Inflow)(void *ctx, CeedInt Q, CeedScalar *const *out) { //*INDENT-OFF* - const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[2], - (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[3]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + const vec_t* q = (const vec_t*) in[0]; + const vec_t* q_data_sur = (const vec_t*) in[2]; + const vec_t* X = (const vec_t*) in[3]; - CeedScalar(*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA]) out[0], - (*jac_data_sur)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA]) out[1]; + vec_t* v = (vec_t*) out[0]; + vec_t *jac_data_sur = (vec_t*) out[1]; //*INDENT-ON* @@ -450,11 +453,12 @@ CEED_QFUNCTION(STGShur14_Inflow_Jacobian)(void *ctx, CeedInt Q, CeedScalar *const *out) { // *INDENT-OFF* // Inputs - const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0], - (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2], - (*jac_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + const vec_t* dq = (const vec_t*) in[0]; + const vec_t* q_data_sur = (const vec_t*) in[2]; + const vec_t* jac_data_sur = (const vec_t*) in[4]; // Outputs - CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0]; + vec_t* v = (vec_t*) out[0]; // *INDENT-ON* const STGShur14Context stg_ctx = (STGShur14Context)ctx; const bool implicit = stg_ctx->is_implicit; @@ -520,12 +524,13 @@ CEED_QFUNCTION(STGShur14_Inflow_StrongQF)(void *ctx, CeedInt Q, const CeedScalar *const *in, CeedScalar *const *out) { //*INDENT-OFF* - const CeedScalar (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[0], - (*coords)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[1], - (*scale) = (const CeedScalar(*)) in[2], - (*stg_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA]) in[3]; + typedef CeedScalar vec_t[CEED_Q_VLA]; + const vec_t* q_data_sur = (const vec_t*) in[0]; + const vec_t* coords = (const vec_t*) in[1]; + const CeedScalar * const scale = in[2]; + const vec_t* stg_data = (const vec_t*) in[3]; - CeedScalar(*bcval)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA]) out[0]; + vec_t* bcval = (vec_t*) out[0]; //*INDENT-ON* const STGShur14Context stg_ctx = (STGShur14Context) ctx; diff --git a/examples/fluids/qfunctions/stg_shur14_type.h b/examples/fluids/qfunctions/stg_shur14_type.h index 25731d2d1c..25e5247a10 100644 --- a/examples/fluids/qfunctions/stg_shur14_type.h +++ b/examples/fluids/qfunctions/stg_shur14_type.h @@ -11,10 +11,20 @@ #include #include "newtonian_types.h" +struct offsets_{ + size_t sigma, d, phi; // !< Random number set, [nmodes,3], [nmodes,3], [nmodes] + size_t kappa; // !< Wavemode frequencies in increasing order, [nmodes] + size_t prof_dw; // !< Distance to wall for Inflow Profie, [nprof] + size_t ubar; // !< Mean velocity, [nprof, 3] + size_t cij; // !< Cholesky decomposition [nprof, 6] + size_t eps; // !< Turbulent Disspation [nprof, 6] + size_t lt; // !< Tubulent Length Scale [nprof, 6] + size_t ynodes; // !< Locations of nodes in y direction [nynodes] +} ; // !< Holds offsets for each array in data + /* Access data arrays via: * CeedScalar (*sigma)[ctx->nmodes] = (CeedScalar (*)[ctx->nmodes])&ctx->data[ctx->offsets.sigma]; * CeedScalar *eps = &ctx->data[ctx->offsets.eps]; */ -typedef struct STGShur14Context_ *STGShur14Context; struct STGShur14Context_ { CeedInt nmodes; // !< Number of wavemodes CeedInt nprofs; // !< Number of profile points in STGInflow.dat @@ -30,19 +40,11 @@ struct STGShur14Context_ { CeedScalar dz; // !< dz used for h calculation bool prescribe_T; // !< Prescribe temperature weakly struct NewtonianIdealGasContext_ newtonian_ctx; - - struct { - size_t sigma, d, phi; // !< Random number set, [nmodes,3], [nmodes,3], [nmodes] - size_t kappa; // !< Wavemode frequencies in increasing order, [nmodes] - size_t prof_dw; // !< Distance to wall for Inflow Profie, [nprof] - size_t ubar; // !< Mean velocity, [nprof, 3] - size_t cij; // !< Cholesky decomposition [nprof, 6] - size_t eps; // !< Turbulent Disspation [nprof, 6] - size_t lt; // !< Tubulent Length Scale [nprof, 6] - size_t ynodes; // !< Locations of nodes in y direction [nynodes] - } offsets; // !< Holds offsets for each array in data + struct offsets_ offsets; size_t total_bytes; // !< Total size of struct plus array CeedScalar data[1]; // !< Holds concatenated scalar array data }; +#define STGShur14Context struct STGShur14Context_* + #endif From 8801fe348cc1b00474d645bb3c836fc9c2940e2b Mon Sep 17 00:00:00 2001 From: Kris Rowe Date: Tue, 30 Aug 2022 16:34:43 +0000 Subject: [PATCH 39/39] Adds occa backends to tests for the fluids example. --- tests/tap.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/tap.sh b/tests/tap.sh index f085cb8cf0..0ee3b8ee44 100755 --- a/tests/tap.sh +++ b/tests/tap.sh @@ -82,13 +82,13 @@ for ((i=0;i<${#backends[@]};++i)); do backend=${backends[$i]} # Fluids and Solids QFunctions use VLA; not currently supported in OCCA - if [[ "$backend" = *occa* && \ - ( "$1" = fluids-* || "$1" = solids-* || "$1" = t507* ) ]]; then - printf "ok $i0 # SKIP - no support for VLA with $backend\n" - printf "ok $i1 # SKIP - no support for VLA with $backend stdout\n" - printf "ok $i2 # SKIP - no support for VLA with $backend stderr\n" - continue; - fi + # if [[ "$backend" = *occa* && \ + # ( "$1" = fluids-* || "$1" = solids-* || "$1" = t507* ) ]]; then + # printf "ok $i0 # SKIP - no support for VLA with $backend\n" + # printf "ok $i1 # SKIP - no support for VLA with $backend stdout\n" + # printf "ok $i2 # SKIP - no support for VLA with $backend stderr\n" + # continue; + # fi # Nek5000 integration not currently supported in OCCA if [[ "$backend" = *occa* && \