intel · ianayl · Jul 3, 2024 · Jul 3, 2024 · Jul 3, 2024 · Jul 3, 2024
@@ -0,0 +1,105 @@
+name: Aggregate compute-benchmark averages from historical data
+
+# The benchmarking workflow in sycl-linux-run-tests.yml passes or fails based on
+# how the benchmark results compare to a historical average: This historical
+# average is calculated in this workflow, which aggregates historical data and
+# produces measures of central tendency (median in this case) used for this
+# purpose.
+
+on:
+  workflow_dispatch:
+    inputs:
+      cutoff_timestamp:
+        description: |
+          Timestamp indicating the age limit of data used in average calculation:
+          Any benchmark results created before this timestamp is excluded from
+          being aggregated. 
+
+          Any valid date string supported by GNU coreutils is valid here:
+          https://www.gnu.org/software/coreutils/manual/html_node/Date-input-formats.html
+        type: string
+        required: false
+  workflow_call:
+    inputs:
+      cutoff_timestamp:
+        type: string
+        required: false
+
+permissions:
+  contents: read
+
+jobs:
+  aggregate:
+    name: Aggregate average (median) value for all metrics
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        path: llvm
+        sparse-checkout: |
+          devops/scripts/benchmarking
+    - name: Load benchmarking configuration
+      run: |
+        CONFIG_FILE="$GITHUB_WORKSPACE/llvm/devops/scripts/benchmarking/benchmark-ci.conf"
+
+        # Load default values from configuration file
+        . "$GITHUB_WORKSPACE/llvm/devops/scripts/benchmarking/utils.sh"
+        # utils.sh contains functions to sanitize config file settings
+        load_single_config $CONFIG_FILE PERF_RES_GIT_REPO
+        load_single_config $CONFIG_FILE PERF_RES_BRANCH
+        load_single_config $CONFIG_FILE PERF_RES_PATH
+        echo "PERF_RES_GIT_REPO=$PERF_RES_GIT_REPO" >> $GITHUB_ENV
+        echo "PERF_RES_BRANCH=$PERF_RES_BRANCH" >> $GITHUB_ENV
+        echo "PERF_RES_PATH=$PERF_RES_PATH" >> $GITHUB_ENV
+
+        # Determine a "cutoff timestamp" used by the aggregator script
+        #
+        # This timestamp controls which historical results are used to compute
+        # measures of central tendency: Any files timestamped *before* this time
+        # will be *excluded* from the central tendency calculation.
+
+        load_single_config $CONFIG_FILE TIMESTAMP_FORMAT
+        echo "TIMESTAMP_FORMAT=$TIMESTAMP_FORMAT" >> $GITHUB_ENV
+        if [ -z '${{ inputs.cutoff_timestamp }}' ]; then
+          # No time given, use default time period from config file:
+          load_single_config $CONFIG_FILE AVERAGE_CUTOFF_RANGE
+          echo "CUTOFF_TIMESTAMP=$(date --date="$AVERAGE_CUTOFF_RANGE" +"$TIMESTAMP_FORMAT")" >> $GITHUB_ENV
+        else
+          # If the provided time is a valid GNU coreutils date string, convert
+          # the time to our format:
+          _converted_timestamp="$(date --date '${{ inputs.cutoff_timestamp }}' +"$TIMESTAMP_FORMAT" 2> /dev/null)"
+          if [ -n "$_converted_timestamp" ]; then
+            echo "CUTOFF_TIMESTAMP=$_converted_timestamp" >> $GITHUB_ENV
+          else
+            # If not a valid GNU date string, it could be in our timestamp format already.
+            # aggregate.py will ensure the timestamp is in the proper format, so we can pass the
+            # time forward regardless: 
+            echo 'CUTOFF_TIMESTAMP=${{ inputs.cutoff_timestamp }}' >> $GITHUB_ENV
+          fi
+        fi
+    - name: Checkout historical performance results repository
+      run: |
+        git clone -b $PERF_RES_BRANCH https://github.com/$PERF_RES_GIT_REPO $PERF_RES_PATH
+    - name: Run aggregator on historical results
+      run: |
+        # The current format of the historical results respository is:
+        # /<runner type>/<test case name>
+        # Thus, a min/max depth of 2 is used to enumerate all test cases in the
+        # repository. Runner type and testcase name is also extracted from this
+        # path.
+        for dir in $(find "$PERF_RES_PATH" -mindepth 2 -maxdepth 2 -type d ! -path '*.git*'); do
+          _runner="$(basename $(dirname $dir))"
+          _testcase="$(basename $dir)"
+          python llvm/devops/scripts/benchmarking/aggregate.py "$_runner" "$_testcase" "$CUTOFF_TIMESTAMP"
+        done
+    - name: Upload average to the repo
+      env:
+        GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
+      run: |
+        # TODO -- waiting on security clearance
+        cd "$PERF_RES_PATH"
+        git config user.name "SYCL Benchmarking Bot"
+        git config user.email "[email protected]"
+        git add .
+        git commit -m "[GHA] Aggregate median data from $CUTOFF_TIMESTAMP to $(date +"$TIMESTAMP_FORMAT")"
+        git push "https://[email protected]/$PERF_RES_GIT_REPO.git" "$PERF_RES_BRANCH"
@@ -25,7 +25,7 @@ on:
         required: False
       tests_selector:
         description: |
-          Two possible options: "e2e" and "cts".
+          Three possible options: "e2e", "cts", and "benchmark".
         type: string
         default: "e2e"
 
@@ -153,6 +153,7 @@ on:
         options:
           - e2e
           - cts
+          - benchmark
 
       env:
         description: |
@@ -192,8 +193,14 @@ permissions:
   packages: read
 
 jobs:
+  benchmark_aggregate:
+    if: ${{ inputs.tests_selector == 'benchmark' }}
+    name: (Benchmark only) Aggregate benchmark data
+    uses: ./.github/workflows/sycl-benchmark-aggregate.yml
+
   run:
-    if: github.event_name == 'workflow_dispatch' || inputs.skip_run == 'false'
+    if: ${{ always() && ( github.event_name == 'workflow_dispatch' || inputs.skip_run == 'false' ) }}
+    needs: benchmark_aggregate
     name: ${{ inputs.name }}
     runs-on: ${{ fromJSON(inputs.runner) }}
     container:
@@ -316,12 +323,12 @@ jobs:
           fi
 
     - name: Download E2E Binaries
-      if: inputs.e2e_binaries_artifact != ''
+      if: inputs.tests_selector == 'e2e' && inputs.e2e_binaries_artifact != ''
       uses: actions/download-artifact@v4
       with:
         name: ${{ inputs.e2e_binaries_artifact }}
     - name: Extract E2E Binaries
-      if: inputs.e2e_binaries_artifact != ''
+      if: inputs.tests_selector == 'e2e' && inputs.e2e_binaries_artifact != ''
       run: |
         mkdir build-e2e
         tar -I 'zstd' -xf e2e_binaries.tar.zst -C build-e2e
@@ -389,25 +396,25 @@ jobs:
         ninja -C build-cts -k0 $( [ -n "$CTS_TESTS_TO_BUILD" ] && echo "$CTS_TESTS_TO_BUILD" || echo "test_conformance")
 
     - name: Pack SYCL-CTS binaries
-      if: always() && !cancelled() && inputs.cts_testing_mode == 'build-only'
+      if: inputs.tests_selector == 'cts' && always() && !cancelled() && inputs.cts_testing_mode == 'build-only'
       run: tar -I 'zstd -9' -cf sycl_cts_bin.tar.zst -C ./build-cts/bin .
 
     - name: Upload SYCL-CTS binaries
-      if: always() && !cancelled() && inputs.cts_testing_mode == 'build-only'
+      if: inputs.tests_selector == 'cts' && always() && !cancelled() && inputs.cts_testing_mode == 'build-only'
       uses: actions/upload-artifact@v4
       with:
         name: sycl_cts_bin
         path: sycl_cts_bin.tar.zst
         retention-days: ${{ inputs.retention-days }}
 
     - name: Download SYCL-CTS binaries
-      if: inputs.sycl_cts_artifact != ''
+      if: inputs.tests_selector == 'cts' && inputs.sycl_cts_artifact != ''
       uses: actions/download-artifact@v4
       with:
         name: ${{ inputs.sycl_cts_artifact }}
 
     - name: Extract SYCL-CTS binaries
-      if: inputs.sycl_cts_artifact != ''
+      if: inputs.tests_selector == 'cts' && inputs.sycl_cts_artifact != ''
       run: |
         mkdir -p build-cts/bin
         tar -I 'zstd' -xf sycl_cts_bin.tar.zst -C build-cts/bin
@@ -427,7 +434,7 @@ jobs:
     # these files may differ from each other, so when there is a pre-built set of
     # tests, we need to filter it according to the filter-file.
     - name: Filter SYCL CTS test categories
-      if: inputs.sycl_cts_artifact != ''
+      if: inputs.tests_selector == 'cts' && inputs.sycl_cts_artifact != ''
       shell: bash
       run: |
         cts_exclude_filter=""
@@ -481,12 +488,40 @@ jobs:
 
         exit $ret
     - name: Pack E2E binaries
-      if: ${{ always() && !cancelled() && inputs.e2e_testing_mode == 'build-only'}}
+      if: inputs.tests_selector == 'e2e' && always() && !cancelled() && inputs.e2e_testing_mode == 'build-only'
       run: tar -I 'zstd -9' -cf e2e_binaries.tar.zst -C ./build-e2e .
     - name: Upload E2E binaries
-      if: ${{ always() && !cancelled() && inputs.e2e_testing_mode == 'build-only'}}
+      if: inputs.tests_selector == 'e2e' && always() && !cancelled() && inputs.e2e_testing_mode == 'build-only'
       uses: actions/upload-artifact@v4
       with:
         name: sycl_e2e_bin_${{ inputs.artifact_suffix }}
         path: e2e_binaries.tar.zst
         retention-days: ${{ inputs.retention-days }}
+
+    - name: Run compute-benchmarks
+      if: inputs.tests_selector == 'benchmark'
+      run: |
+        export ONEAPI_DEVICE_SELECTOR="${{ inputs.target_devices }}"
+        export CMPLR_ROOT=$PWD/toolchain
+        sycl-ls
+        ./devops/scripts/benchmarking/benchmark.sh -t '${{ inputs.runner }}' -s
+    - name: Push compute-benchmarks results
+      if: inputs.tests_selector == 'benchmark'
+      env:
+        GITHUB_TOKEN: ${{ secrets.LLVM_SYCL_BENCHMARK_TOKEN }}
+      run: |
+        # TODO -- waiting on security clearance
+
+        # Load configuration values
+        . "./devops/scripts/benchmarking/utils.sh"
+        CONFIG_FILE="./devops/scripts/benchmarking/benchmark-ci.conf"
+        load_single_config "$CONFIG_FILE" PERF_RES_PATH
+        load_single_config "$CONFIG_FILE" PERF_RES_GIT_REPO
+        load_single_config "$CONFIG_FILE" PERF_RES_BRANCH
+
+        cd "$PERF_RES_PATH"
+        git config user.name "SYCL Benchmarking Bot"
+        git config user.email "[email protected]"
+        git add .
+        git commit -m "[GHA] Upload compute-benchmarks results from ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+        git push "https://[email protected]/$PERF_RES_GIT_REPO.git" "$PERF_RES_BRANCH"
@@ -206,6 +206,33 @@ jobs:
       sycl_toolchain_archive: ${{ needs.ubuntu2204_build.outputs.artifact_archive_name }}
       sycl_toolchain_decompress_command: ${{ needs.ubuntu2204_build.outputs.artifact_decompress_command }}
       sycl_cts_artifact: sycl_cts_bin
+
+  run-sycl-benchmarks:
+    needs: ubuntu2204_build
+    if: ${{ always() && !cancelled() && needs.ubuntu2204_build.outputs.build_conclusion == 'success' }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - name: Compute-benchmarks on L0 Gen12
+            runner: '["Linux", "gen12"]'
+            image: ghcr.io/intel/llvm/ubuntu2404_intel_drivers:latest
+            image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN
+            target_devices: level_zero:gpu
+            reset_intel_gpu: true
+    uses: ./.github/workflows/sycl-linux-run-tests.yml
+    with:
+      name: ${{ matrix.name }}
+      runner: ${{ matrix.runner }}
+      image: ${{ matrix.image }}
+      image_options: ${{ matrix.image_options }}
+      target_devices: ${{ matrix.target_devices }}
+      tests_selector: benchmark
+      reset_intel_gpu: ${{ matrix.reset_intel_gpu }}
+      ref: ${{ github.sha }}
+      sycl_toolchain_artifact: sycl_linux_default
+      sycl_toolchain_archive: ${{ needs.ubuntu2204_build.outputs.artifact_archive_name }}
+      sycl_toolchain_decompress_command: ${{ needs.ubuntu2204_build.outputs.artifact_decompress_command }}
 
   nightly_build_upload:
     name: Nightly Build Upload

@@ -0,0 +1,126 @@
+import csv
+import sys
+from pathlib import Path
+import heapq
+import statistics
+
+import common
+
+
+# Simple median calculation
+class SimpleMedian:
+
+    def __init__(self):
+        self.elements = []
+
+    def add(self, n: float):
+        self.elements.append(n)
+
+    def get_median(self) -> float:
+        return statistics.median(elements)
+
+
+# Calculate medians incrementally using a heap: Useful for when dealing with
+# large number of samples.
+#
+# TODO how many samples are we going to realistically get? I had written this
+# with precommit in mind, but if this only runs nightly, it would actually be
+# faster to do a normal median calculation.
+class StreamingMedian:
+
+    def __init__(self):
+        # Gist: we keep a minheap and a maxheap, and store the median as the top
+        # of the minheap. When a new element comes it gets put into the heap
+        # based on if the element is bigger than the current median. Then, the
+        # heaps are heapified and the median is repopulated by heapify.
+        self.minheap_larger = []
+        self.maxheap_smaller = []
+
+    # Note: numbers on maxheap should be negative, as heapq
+    # is minheap by default
+
+    def add(self, n: float):
+        if len(self.maxheap_smaller) == 0 or -self.maxheap_smaller[0] >= n:
+            heapq.heappush(self.maxheap_smaller, -n)
+        else:
+            heapq.heappush(self.minheap_larger, n)
+
+        # Ensure minheap has more elements than maxheap
+        if len(self.maxheap_smaller) > len(self.minheap_larger) + 1:
+            heapq.heappush(self.minheap_larger, -heapq.heappop(self.maxheap_smaller))
+        elif len(self.maxheap_smaller) < len(self.minheap_larger):
+            heapq.heappush(self.maxheap_smaller, -heapq.heappop(self.minheap_larger))
+
+    def get_median(self) -> float:
+        if len(self.maxheap_smaller) == len(self.minheap_larger):
+            # Equal number of elements smaller and larger than "median":
+            # thus, there are two median values. The median would then become
+            # the average of both median values.
+            return (-self.maxheap_smaller[0] + self.minheap_larger[0]) / 2.0
+        else:
+            # Otherwise, median is always in minheap, as minheap is always
+            # bigger
+            return -self.maxheap_smaller[0]
+
+
+def aggregate_median(runner: str, benchmark: str, cutoff: str):
+
+    # Get all .csv benchmark samples for the requested runner + benchmark
+    def csv_samples() -> list[str]:
+        # TODO check that the path below is valid directory
+        cache_dir = Path(f"{common.PERF_RES_PATH}/{runner}/{benchmark}")
+        # TODO check for time range; What time range do I want?
+        return filter(
+            lambda f: f.is_file()
+            and common.valid_timestamp(str(f)[-19:-4])
+            and str(f)[-19:-4] > cutoff,
+            cache_dir.glob(f"{benchmark}-*_*.csv"),
+        )
+
+    # Calculate median of every desired metric:
+    aggregate_s = dict()
+    for sample_path in csv_samples():
+        with open(sample_path, "r") as sample_file:
+            for s in csv.DictReader(sample_file):
+                test_case = s["TestCase"]
+                # Construct entry in aggregate_s for test case if it does not
+                # exist already:
+                if test_case not in aggregate_s:
+                    aggregate_s[test_case] = {
+                        metric: SimpleMedian() for metric in common.metrics_variance
+                    }
+
+                for metric in common.metrics_variance:
+                    aggregate_s[test_case][metric].add(common.sanitize(s[metric]))
+
+    # Write calculated median (aggregate_s) as a new .csv file:
+    with open(
+        f"{common.PERF_RES_PATH}/{runner}/{benchmark}/{benchmark}-median.csv", "w"
+    ) as output_csv:
+        writer = csv.DictWriter(
+            output_csv, fieldnames=["TestCase", *common.metrics_variance.keys()]
+        )
+        writer.writeheader()
+        for test_case in aggregate_s:
+            writer.writerow(
+                {"TestCase": test_case}
+                | {
+                    metric: aggregate_s[test_case][metric].get_median()
+                    for metric in common.metrics_variance
+                }
+            )
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 4:
+        print(
+            f"Usage: {sys.argv[0]} <runner name> <test case name> <cutoff date YYYYMMDD_HHMMSS>"
+        )
+        exit(1)
+    if not common.valid_timestamp(sys.argv[3]):
+        print(sys.argv)
+        print(f"Bad cutoff timestamp, please use YYYYMMDD_HHMMSS.")
+        exit(1)
+    common.load_configs()
+    #                <runner>,    <test case>, <cutoff>
+    aggregate_median(sys.argv[1], sys.argv[2], sys.argv[3])