Merge pull request #3825 from nilsvu/blaze_padding

Blaze: restore support for padding
sxs-collaboration · Aug 21, 2024 · c011a22 · c011a22
2 parents e078510 + ff40b91
commit c011a22
Show file tree

Hide file tree

Showing 5 changed files with 24 additions and 14 deletions.
diff --git a/cmake/SetupBlaze.cmake b/cmake/SetupBlaze.cmake
@@ -130,6 +130,13 @@ target_compile_definitions(Blaze
   #   by the Blaze CMake configuration for the machine we are running on. We
   #   could override it here explicitly to tune performance.
   # BLAZE_CACHE_SIZE
+  # - Disable padding for dynamic matrices.
+  #   Blaze warns that this may decrease performance:
+  #   https://bitbucket.org/blaze-lib/blaze/src/c4d9e85414370e880e5e79c86e3c8d4d38dcde7a/blaze/config/Optimizations.h#lines-52
+  #   We haven't tested this much, so we may want to try enabling padding again.
+  #   To support padding, explicit calls to LAPACK functions need to pass
+  #   `.spacing()` instead of `.rows()/.columns()` to the `LDA`, `LDB`, etc.
+  #   parameters (see `[matrix_spacing]` in `Test_Spectral.cpp`).
   BLAZE_USE_PADDING=0
   # - Always enable non-temporal stores for cache optimization of large data
   #   structures: https://bitbucket.org/blaze-lib/blaze/wiki/Configuration%20Files#!streaming-non-temporal-stores

diff --git a/src/Evolution/DgSubcell/Matrices.cpp b/src/Evolution/DgSubcell/Matrices.cpp
@@ -309,13 +309,13 @@ Matrix reconstruction_matrix_cache_impl_helper(
   // We use rhs_recons_matrix here as a temp buffer, we will fill it later.
   Matrix rhs_recons_matrix(num_pts + 1, num_subcells);
   dgemm_<true>('T', 'N', proj_matrix.columns(), proj_matrix.columns(),
-               proj_matrix.rows(), 2.0, proj_matrix.data(), proj_matrix.rows(),
-               proj_matrix.data(), proj_matrix.rows(), 0.0,
-               rhs_recons_matrix.data(), proj_matrix.columns());
+               proj_matrix.rows(), 2.0, proj_matrix.data(),
+               proj_matrix.spacing(), proj_matrix.data(), proj_matrix.spacing(),
+               0.0, rhs_recons_matrix.data(), rhs_recons_matrix.spacing());
 
   for (size_t l = 0; l < num_pts; ++l) {
     for (size_t j = 0; j < num_pts; ++j) {
-      lhs_recons_matrix(l, j) = *(rhs_recons_matrix.data() + (j + l * num_pts));
+      lhs_recons_matrix(l, j) = rhs_recons_matrix(j, l);
     }
   }
 
@@ -363,9 +363,10 @@ Matrix reconstruction_matrix_cache_impl_helper(
   // than Blaze.
   dgemm_<true>('N', 'N', inv_lhs_recons_matrix.rows(),
                rhs_recons_matrix.columns(), inv_lhs_recons_matrix.columns(),
-               1.0, inv_lhs_recons_matrix.data(), inv_lhs_recons_matrix.rows(),
-               rhs_recons_matrix.data(), inv_lhs_recons_matrix.columns(), 0.0,
-               full_recons_matrix.data(), inv_lhs_recons_matrix.rows());
+               1.0, inv_lhs_recons_matrix.data(),
+               inv_lhs_recons_matrix.spacing(), rhs_recons_matrix.data(),
+               rhs_recons_matrix.spacing(), 0.0, full_recons_matrix.data(),
+               full_recons_matrix.spacing());
   // exclude bottom row for Lagrange multiplier.
   Matrix reduced_recons_matrix(num_pts, num_subcells);
   for (size_t i = 0; i < num_pts; ++i) {

diff --git a/src/Evolution/DgSubcell/Reconstruction.cpp b/src/Evolution/DgSubcell/Reconstruction.cpp
@@ -32,11 +32,11 @@ void reconstruct_impl(
   if (reconstruction_method == ReconstructionMethod::AllDimsAtOnce) {
     const Matrix& recons_matrix =
         reconstruction_matrix(dg_mesh, subcell_extents);
-    dgemm_<true>('N', 'N', recons_matrix.rows(), number_of_components,
-                 recons_matrix.columns(), 1.0, recons_matrix.data(),
-                 recons_matrix.rows(), subcell_u_times_projected_det_jac.data(),
-                 recons_matrix.columns(), 0.0, dg_u.data(),
-                 recons_matrix.rows());
+    dgemm_<true>(
+        'N', 'N', recons_matrix.rows(), number_of_components,
+        recons_matrix.columns(), 1.0, recons_matrix.data(),
+        recons_matrix.spacing(), subcell_u_times_projected_det_jac.data(),
+        recons_matrix.columns(), 0.0, dg_u.data(), recons_matrix.rows());
   } else {
     ASSERT(reconstruction_method == ReconstructionMethod::DimByDim,
            "reconstruction_method must be either DimByDim or AllDimsAtOnce");

diff --git a/tests/Unit/Evolution/DgSubcell/Test_Matrices.cpp b/tests/Unit/Evolution/DgSubcell/Test_Matrices.cpp
@@ -221,8 +221,8 @@ void reconstruction_matrix(const double eps) {
 
     DataVector reconstructed_nodal_coeffs(num_pts);
     dgemv_('N', single_recons.rows(), single_recons.columns(), 1.0,
-           single_recons.data(), single_recons.rows(), subcell_values.data(), 1,
-           0.0, reconstructed_nodal_coeffs.data(), 1);
+           single_recons.data(), single_recons.spacing(), subcell_values.data(),
+           1, 0.0, reconstructed_nodal_coeffs.data(), 1);
 
     CHECK_ITERABLE_CUSTOM_APPROX(expected_nodal_coeffs,
                                  reconstructed_nodal_coeffs, local_approx);

diff --git a/tests/Unit/NumericalAlgorithms/Spectral/Test_Spectral.cpp b/tests/Unit/NumericalAlgorithms/Spectral/Test_Spectral.cpp
@@ -62,8 +62,10 @@ void test_exact_differentiation_impl(const Function& max_poly_deg) {
           Spectral::differentiation_matrix<BasisType, QuadratureType>(n);
       const auto u = unit_polynomial(p, collocation_pts);
       DataVector numeric_derivative{n};
+      // [matrix_spacing]
       dgemv_('N', n, n, 1., diff_matrix.data(), diff_matrix.spacing(), u.data(),
              1, 0.0, numeric_derivative.data(), 1);
+      // [matrix_spacing]
       const auto analytic_derivative =
           unit_polynomial_derivative(p, collocation_pts);
       if (BasisType == Spectral::Basis::FiniteDifference and n > 20) {