Laghos GPU variants (#539)

* initial push for laghos gpu implementation * lint * lint * lint * removing unused import * updating dryruns --------- Co-authored-by: august-knox <[email protected]>
LLNL · Jan 9, 2025 · b5384ff · b5384ff
1 parent 757b8da
commit b5384ff
Show file tree

Hide file tree

Showing 4 changed files with 83 additions and 50 deletions.
diff --git a/.github/workflows/run.yml b/.github/workflows/run.yml
@@ -261,27 +261,6 @@ jobs:
             --disable-logger \
             workspace setup --dry-run
 
-      - name: Dry run dynamic laghos/mpi-only on LLNL-Magma-Penguin-icelake-OmniPath with allocation modifier
-        run: |
-          ./bin/benchpark experiment init --dest=laghos-mpi-only laghos
-          ./bin/benchpark setup ./laghos-mpi-only LLNL-Magma-Penguin-icelake-OmniPath workspace/
-          . workspace/setup.sh
-          ramble \
-            --workspace-dir workspace/laghos-mpi-only/LLNL-Magma-Penguin-icelake-OmniPath/workspace \
-            --disable-progress-bar \
-            --disable-logger \
-            workspace setup --dry-run
-
-      - name: Dry run laghos/mpi-only on LLNL-Ruby-icelake-OmniPath with allocation modifier
-        run: |
-          ./bin/benchpark setup laghos-mpi-only LLNL-Ruby-icelake-OmniPath workspace/
-          . workspace/setup.sh
-          ramble \
-            --workspace-dir workspace/laghos-mpi-only/LLNL-Ruby-icelake-OmniPath/workspace \
-            --disable-progress-bar \
-            --disable-logger \
-            workspace setup --dry-run
-
       - name: Dry run lammps/openmp with static Ruby
         run: |
           ./bin/benchpark setup lammps/openmp LLNL-Ruby-icelake-OmniPath workspace/
@@ -614,3 +593,26 @@ jobs:
             --disable-progress-bar \
             --disable-logger \
             workspace setup --dry-run
+
+      - name: Dry run dynamic laghos/mpi-only with dynamic CTS magma
+        run: |
+          ./bin/benchpark experiment init --dest=laghos-mpi-only laghos
+          ./bin/benchpark setup ./laghos-mpi-only magma-system workspace/
+          system_id=$(./bin/benchpark system id ./magma-system)
+          . workspace/setup.sh
+          ramble \
+            --workspace-dir "workspace/laghos-mpi-only/$system_id/workspace" \
+            --disable-progress-bar \
+            --disable-logger \
+            workspace setup --dry-run
+
+      - name: Dry run laghos/mpi-only with dynamic CTS ruby
+        run: |
+          ./bin/benchpark setup laghos-mpi-only ruby-system workspace/
+          system_id=$(./bin/benchpark system id ./ruby-system)
+          . workspace/setup.sh
+          ramble \
+            --workspace-dir "workspace/laghos-mpi-only/$system_id/workspace" \
+            --disable-progress-bar \
+            --disable-logger \
+            workspace setup --dry-run
diff --git a/experiments/laghos/experiment.py b/experiments/laghos/experiment.py
@@ -3,17 +3,20 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-from benchpark.error import BenchparkError
 from benchpark.directives import variant
 from benchpark.experiment import Experiment
 from benchpark.scaling import StrongScaling
 from benchpark.expr.builtin.caliper import Caliper
+from benchpark.cuda import CudaExperiment
+from benchpark.rocm import ROCmExperiment
 
 
 class Laghos(
     Experiment,
     StrongScaling,
     Caliper,
+    CudaExperiment,
+    ROCmExperiment,
 ):
 
     variant(
@@ -29,36 +32,30 @@ class Laghos(
     )
 
     def compute_applications_section(self):
-        # TODO: Replace with conflicts clause
-        scaling_modes = {
-            "strong": self.spec.satisfies("+strong"),
-            "single_node": self.spec.satisfies("+single_node"),
-        }
-
-        scaling_mode_enabled = [key for key, value in scaling_modes.items() if value]
-        if len(scaling_mode_enabled) != 1:
-            raise BenchparkError(
-                f"Only one type of scaling per experiment is allowed for application package {self.name}"
-            )
 
         # Number of initial nodes
-        num_nodes = {"n_nodes": 1}
+        n_resources = {"n_nodes": 1}
+        device = "n_ranks"
+        if self.spec.satisfies("+cuda"):
+            self.add_experiment_variable("device", "cuda", True)
+        elif self.spec.satisfies("+rocm"):
+            self.add_experiment_variable("device", "hip", True)
 
+        if self.spec.satisfies("+cuda") or self.spec.satisfies("+rocm"):
+            device = "n_gpus"
         if self.spec.satisfies("+single_node"):
-            for pk, pv in num_nodes.items():
-                self.add_experiment_variable(pk, pv, True)
+            for pk, pv in n_resources.items():
+                self.add_experiment_variable(device, pv, True)
         elif self.spec.satisfies("+strong"):
             scaled_variables = self.generate_strong_scaling_params(
-                {tuple(num_nodes.keys()): list(num_nodes.values())},
+                {tuple(n_resources.keys()): list(n_resources.values())},
                 int(self.spec.variants["scaling-factor"][0]),
                 int(self.spec.variants["scaling-iterations"][0]),
             )
             for pk, pv in scaled_variables.items():
                 self.add_experiment_variable(pk, pv, True)
-
-        self.add_experiment_variable(
-            "n_ranks", "{sys_cores_per_node} * {n_nodes}", True
-        )
+            num_resources = scaled_variables["n_nodes"]
+            self.add_experiment_variable(device, num_resources, True)
 
     def compute_spack_section(self):
         # get package version
@@ -69,11 +66,23 @@ def compute_spack_section(self):
         system_specs = {}
         system_specs["compiler"] = "default-compiler"
         system_specs["mpi"] = "default-mpi"
+        system_specs["lapack"] = "default-lapack"
+        system_specs["blas"] = "default-blas"
 
         # set package spack specs
         # empty package_specs value implies external package
         self.add_spack_spec(system_specs["mpi"])
-        # self.add_spack_spec(system_specs["blas"])
+
+        if self.spec.satisfies("+cuda"):
+            system_specs["cuda_version"] = "{default_cuda_version}"
+            system_specs["cuda_arch"] = "{cuda_arch}"
+        elif self.spec.satisfies("+rocm"):
+            system_specs["rocm_arch"] = "{rocm_arch}"
+
+        # empty package_specs value implies external package
+        self.add_spack_spec(system_specs["blas"])
+        self.add_spack_spec(system_specs["lapack"])
+        self.add_spack_spec(system_specs["mpi"])
 
         self.add_spack_spec(
             self.name, [f"laghos@{app_version} +metis", system_specs["compiler"]]

diff --git a/repo/laghos/application.py b/repo/laghos/application.py
@@ -19,7 +19,7 @@ class Laghos(ExecutableApplication):
             'lagrangian','spatial-discretization','unstructured-grid',
             'network-latency-bound','network-collectives','unstructured-grid']
 
-    executable('prob', 'laghos -p {problem} -m {mesh} -rs {rs} -rp {rp} -ms {ms}', use_mpi=True)
+    executable('prob', 'laghos -p {problem} -m {mesh} -rs {rs} -rp {rp} -ms {ms} -d {device}', use_mpi=True)
 
     workload('triplept', executables=['prob'])
 
@@ -31,17 +31,20 @@ class Laghos(ExecutableApplication):
             description='problem number',
             workloads=['triplept'])
 
-    workload_variable('rs', default='5',
+    workload_variable('rs', default='2',
             description='number of serial refinements',
             workloads=['triplept'])
 
     workload_variable('rp', default='0',
             description='number of parallel refinements',
             workloads=['triplept'])
 
-    workload_variable('ms', default='500',
+    workload_variable('ms', default='250',
             description='max number of steps',
             workloads=['triplept'])
+    workload_variable('device', default='cpu',
+        description='cpu or cuda',
+        workloads=['triplept'])
 
     figure_of_merit('Major kernels total time',
                     log_file='{experiment_run_dir}/{experiment_name}.out',

diff --git a/repo/laghos/package.py b/repo/laghos/package.py
@@ -6,7 +6,7 @@
 from spack.package import *
 
 
-class Laghos(MakefilePackage):
+class Laghos(MakefilePackage, CudaPackage, ROCmPackage):
     """Laghos (LAGrangian High-Order Solver) is a CEED miniapp that solves the
     time-dependent Euler equations of compressible gas dynamics in a moving
     Lagrangian frame using unstructured high-order finite element spatial
@@ -33,16 +33,35 @@ class Laghos(MakefilePackage):
     depends_on("caliper", when="+caliper")
     depends_on("adiak", when="+caliper")
 
-    depends_on("hypre~fortran", when="@develop")
-    depends_on("[email protected]+optimize+pic+shared", when="@develop")
-    depends_on("mfem@develop^[email protected]+optimize+pic+shared", when="@develop")
+    depends_on("[email protected]+optimize+pic+shared")
+    #depends_on("[email protected]+optimize+pic+shared", when="@develop")
+    #depends_on("mfem@develop^[email protected]+optimize+pic+shared", when="@develop")
     depends_on("[email protected]:", when="@3.1")
     depends_on("[email protected]:4.1", when="@3.0")
     # Recommended mfem version for laghos v2.0 is: ^[email protected]
     depends_on("[email protected]", when="@2.0")
     # Recommended mfem version for laghos v1.x is: ^[email protected]
     depends_on("[email protected]", when="@1.0,1.1")
-
+    depends_on("[email protected]_comm_cali")
+    depends_on("mfem cxxstd=14")
+
+
+    depends_on("mpi")
+    depends_on("hypre+mpi")
+    depends_on("hypre+cuda+mpi", when="+cuda")
+    depends_on("[email protected]+mixedint~fortran", when="@develop")
+
+    requires("+cuda", when="^hypre+cuda")
+    for arch in ("none", "50", "60", "70", "80"):
+        depends_on(f"hypre cuda_arch={arch}", when=f"cuda_arch={arch}")
+        depends_on(f"mfem cuda_arch={arch}", when=f"cuda_arch={arch}")
+    depends_on("mfem +cuda+mpi", when="+cuda")
+    depends_on("mfem +rocm+mpi", when="+rocm")
+    depends_on("hypre +rocm +mpi", when="+rocm")
+    requires("+rocm", when="^hypre+rocm")
+    for target in ("none", "gfx803", "gfx900", "gfx906", "gfx908", "gfx90a", "gfx942"):
+        depends_on(f"hypre amdgpu_target={target}", when=f"amdgpu_target={target}")
+        depends_on(f"mfem amdgpu_target={target}", when=f"amdgpu_target={target}")
     # Replace MPI_Session
     patch(
         "https://github.com/CEED/Laghos/commit/c800883ab2741c8c3b99486e7d8ddd8e53a7cb95.patch?full_index=1",