Skip to content

Commit

Permalink
Merge branch 'nod-ai:main' into fix_worker_idx
Browse files Browse the repository at this point in the history
  • Loading branch information
IanNod authored Jan 31, 2025
2 parents 0257e7e + 7671d57 commit fd8ea7f
Show file tree
Hide file tree
Showing 37 changed files with 619 additions and 545 deletions.
23 changes: 4 additions & 19 deletions .github/workflows/pkgci_shark_ai.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
matrix:
version: [3.11]
fail-fast: false
runs-on: mi300x-3
runs-on: azure-cpubuilder-linux-scale
# runs-on: ubuntu-latest # everything else works but this throws an "out of resources" during model loading
# TODO: make a copy of this that runs on standard runners with tiny llama instead of a 8b model
defaults:
Expand All @@ -44,10 +44,6 @@ jobs:
with:
python-version: ${{matrix.version}}

- name: Set Python version without dot
run: |
echo "PY_VERSION_NO_DOT=$(echo ${{ matrix.version }} | tr -d '.')" >> $GITHUB_ENV
- name: Setup UV caching
run: |
CACHE_DIR="${GITHUB_WORKSPACE}/.uv-cache"
Expand All @@ -60,23 +56,12 @@ jobs:
path: .uv-cache
key: ${{ runner.os }}-uv-py${{ matrix.version }}-${{ hashFiles('requirements-iree-pinned.txt', 'pytorch-cpu-requirements.txt', 'sharktank/requirements.txt', 'sharktank/requirements-tests.txt', 'shortfin/requirements-tests.txt') }}

- name: Download sharktank artifacts
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: snapshot-sharktank-linux-x86_64-cp${{ env.PY_VERSION_NO_DOT }}-cp${{ env.PY_VERSION_NO_DOT }}
path: ${{ env.PACKAGE_DOWNLOAD_DIR }}

- name: Download shortfin artifacts
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: snapshot-shortfin-linux-x86_64-cp${{ env.PY_VERSION_NO_DOT }}-cp${{ env.PY_VERSION_NO_DOT }}
path: ${{ env.PACKAGE_DOWNLOAD_DIR }}

- name: Download shark-ai artifacts
- name: Download package artifacts
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: snapshot-shark-ai-linux-x86_64-cp${{ env.PY_VERSION_NO_DOT }}-cp${{ env.PY_VERSION_NO_DOT }}
pattern: snapshot-*-linux-x86_64-*
path: ${{ env.PACKAGE_DOWNLOAD_DIR }}
merge-multiple: true

- name: Setup venv
run: |
Expand Down
27 changes: 25 additions & 2 deletions .github/workflows/update_iree_requirement_pins.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,25 @@ env:
GIT_BRANCH_NAME: ${{ inputs.branch-name || 'integrates/iree' }}

jobs:
determine-duty-engineer:
runs-on: ubuntu-24.04
outputs:
duty-engineer: ${{ steps.set-duty.outputs.engineer }}
steps:
- name: Determine IREE bump duty engineer
id: set-duty
run: |
# rotation schedule (GitHub usernames); add yourself into this schedule to get notified when it's your turn.
ENGINEERS=('renxida')
# current week number (1-53)
WEEK=$(date +%V)
INDEX=$(( WEEK % ${#ENGINEERS[@]} ))
DUTY_ENGINEER=${ENGINEERS[$INDEX]}
echo "engineer=$DUTY_ENGINEER" >> $GITHUB_OUTPUT
echo "This week's IREE bump duty engineer: $DUTY_ENGINEER"
check-for-existing-branch:
if: ${{ github.repository_owner == 'nod-ai' || github.event_name != 'schedule' }}
runs-on: ubuntu-24.04
Expand All @@ -43,8 +62,8 @@ jobs:
fi
update-iree:
needs: check-for-existing-branch
runs-on: ubuntu-22.04
needs: [check-for-existing-branch, determine-duty-engineer]
runs-on: ubuntu-24.04
if: ${{ needs.check-for-existing-branch.outputs.branch-exists == 0 }}

steps:
Expand Down Expand Up @@ -87,10 +106,14 @@ jobs:
body: |
Diff: https://github.com/iree-org/iree/compare/iree-${{ env.CURRENT_IREE_BASE_COMPILER_VERSION }}...iree-${{ env.LATEST_IREE_BASE_COMPILER_VERSION }}
IREE bump duty engineer this week: @${{ needs.determine-duty-engineer.outputs.duty-engineer }}
Auto-generated by GitHub Actions using [`.github/workflows/update_iree_requirement_pins.yml`](https://github.com/${{ github.repository }}/blob/main/.github/workflows/update_iree_requirement_pins.yml).
commit-message: "Bump IREE to ${{ env.LATEST_IREE_BASE_COMPILER_VERSION }}."
assignees: ${{ needs.determine-duty-engineer.outputs.duty-engineer }}

- name: Write summary
if: ${{ steps.cpr.outputs.pull-request-number }}
run: |
echo "Pull Request URL: ${{ steps.cpr.outputs.pull-request-url }}" >> ${GITHUB_STEP_SUMMARY}
echo "IREE bump duty engineer: @${{ needs.determine-duty-engineer.outputs.duty-engineer }}" >> ${GITHUB_STEP_SUMMARY}
32 changes: 17 additions & 15 deletions app_tests/benchmark_tests/llm/sglang_benchmarks/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,12 @@
sys.path.append(
os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
)
from integration_tests.llm.utils import (
compile_model,
end_log_group,
export_paged_llm_v1,
download_with_hf_datasets,
start_log_group,
from integration_tests.llm.model_management import (
ModelConfig,
ModelProcessor,
ModelSource,
)
from integration_tests.llm.logging_utils import start_log_group, end_log_group

logger = logging.getLogger(__name__)

Expand All @@ -47,16 +46,19 @@ def pre_process_model(request, tmp_path_factory):
settings = request.param["settings"]
batch_sizes = request.param["batch_sizes"]

mlir_path = tmp_dir / "model.mlir"
config_path = tmp_dir / "config.json"
vmfb_path = tmp_dir / "model.vmfb"

model_path = tmp_dir / model_param_file_name
download_with_hf_datasets(tmp_dir, model_name)

export_paged_llm_v1(mlir_path, config_path, model_path, batch_sizes)
# Configure model
config = ModelConfig(
model_file=model_param_file_name,
tokenizer_id=model_name, # Using model_name as tokenizer_id, adjust if needed
batch_sizes=batch_sizes,
device_settings=settings,
source=ModelSource.HUGGINGFACE,
repo_id=model_name, # Using model_name as repo_id, adjust if needed
)

compile_model(mlir_path, vmfb_path, settings)
# Process model through all stages
processor = ModelProcessor(tmp_dir)
artifacts = processor.process_model(config)

logger.info("Model artifacts setup successfully" + end_log_group())
MODEL_DIR_CACHE[param_key] = tmp_dir
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,12 @@
from sglang import bench_serving

from .utils import SGLangBenchmarkArgs, log_jsonl_result

from integration_tests.llm.utils import download_tokenizer, wait_for_server
from integration_tests.llm.model_management import (
ModelConfig,
ModelProcessor,
ModelSource,
)
from integration_tests.llm.server_management import ServerInstance

logger = logging.getLogger(__name__)

Expand All @@ -26,17 +30,31 @@
def test_sglang_benchmark(request_rate, tokenizer_id, sglang_args, tmp_path_factory):
tmp_dir = tmp_path_factory.mktemp("sglang_benchmark_test")

# Download tokenizer for llama3_8B_fp16
download_tokenizer(tmp_dir, tokenizer_id)
# Download tokenizer using ModelProcessor
config = ModelConfig(
model_file="tokenizer.json", # Only need tokenizer
tokenizer_id=tokenizer_id,
batch_sizes=(1,), # Not relevant for tokenizer only
device_settings=None, # Not relevant for tokenizer only
source=ModelSource.HUGGINGFACE,
repo_id=tokenizer_id,
)
processor = ModelProcessor(tmp_dir)
artifacts = processor.process_model(config)

logger.info("Beginning SGLang benchmark test...")

port = sglang_args
base_url = f"http://localhost:{port}"

# Setting a high timeout gives enough time for downloading model artifacts
# and starting up server... Takes a little longer than shortfin.
wait_for_server(base_url, timeout=600)
# Wait for server using ServerInstance's method
server = ServerInstance(
None
) # We don't need config since we're just using wait_for_ready
server.port = int(port) # Set port manually since we didn't start the server
server.wait_for_ready(
timeout=600
) # High timeout for model artifacts download and server startup

benchmark_args = SGLangBenchmarkArgs(
backend="sglang",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,9 @@
log_jsonl_result,
)

from integration_tests.llm.utils import (
end_log_group,
find_available_port,
start_llm_server,
start_log_group,
)
from integration_tests.llm.logging_utils import end_log_group, start_log_group
from integration_tests.llm.server_management import ServerConfig, ServerInstance
from integration_tests.llm.model_management import ModelArtifacts

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -83,20 +80,24 @@ def test_shortfin_benchmark(
model_path = tmp_dir / model_param_file_name

# Start shortfin llm server
server_process, port = start_llm_server(
tokenizer_path,
config_path,
vmfb_path,
model_path,
device_settings,
timeout=30,
server_config = ServerConfig(
artifacts=ModelArtifacts(
weights_path=model_path,
tokenizer_path=tokenizer_path,
mlir_path=tmp_dir / "model.mlir",
vmfb_path=vmfb_path,
config_path=config_path,
),
device_settings=device_settings,
)
server = ServerInstance(server_config)
server.start()

# Run and collect SGLang Serving Benchmark
benchmark_args = SGLangBenchmarkArgs(
backend="shortfin",
num_prompt=10,
base_url=f"http://localhost:{port}",
base_url=f"http://localhost:{server.port}",
tokenizer=tmp_dir,
request_rate=request_rate,
)
Expand Down Expand Up @@ -130,5 +131,4 @@ def test_shortfin_benchmark(
except Exception as e:
logger.error(e)

server_process.terminate()
server_process.wait()
server.stop()
21 changes: 21 additions & 0 deletions app_tests/integration_tests/llm/logging_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright 2024 Advanced Micro Devices, Inc.
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

import os


def start_log_group(headline):
"""Start a collapsible log group in GitHub Actions."""
if os.environ.get("GITHUB_ACTIONS") == "true":
return f"\n::group::{headline}"
return ""


def end_log_group():
"""End a collapsible log group in GitHub Actions."""
if os.environ.get("GITHUB_ACTIONS") == "true":
return "\n::endgroup::"
return ""
Loading

0 comments on commit fd8ea7f

Please sign in to comment.