Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle performance reports #9

Merged
merged 1 commit into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/actions/collect_data/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ runs:
run: |
python3 ${GITHUB_ACTION_PATH}/src/generate_data.py --run_id ${{ inputs.run_id }}
# Workaround: Copy file to avoid GH upload filename limitations
cp pipeline_${{ inputs.run_id }}*.json pipelinecopy_${{ inputs.run_id }}.json
cp pipeline_*.json pipelinecopy_${{ inputs.run_id }}.json
cp benchmark_*.json benchmarkcopy_${{ inputs.run_id }}.json

- name: Create key file
if: ${{ inputs.ssh-private-key != '' }}
Expand All @@ -74,6 +75,7 @@ runs:
path: |
if-no-files-found: warn
path: |
benchmarkcopy_${{ inputs.run_id }}.json
pipelinecopy_${{ inputs.run_id }}.json
generated/cicd/${{ inputs.run_id }}/workflow.json
generated/cicd/${{ inputs.run_id }}/workflow_jobs.json
2 changes: 0 additions & 2 deletions .github/actions/collect_data/sftp-csv.txt

This file was deleted.

1 change: 1 addition & 0 deletions .github/actions/collect_data/sftp-json.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
put -r pipeline_*.json
put -r benchmark_*.json
ls -hal
117 changes: 117 additions & 0 deletions .github/actions/collect_data/src/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
#
# SPDX-License-Identifier: Apache-2.0
import os
import pathlib
import json
from loguru import logger
from pydantic_models import BenchmarkMeasurement, CompleteBenchmarkRun


def create_json_from_report(pipeline, workflow_outputs_dir):
results = []
reports = _get_model_reports(workflow_outputs_dir, pipeline.github_pipeline_id)

for job_id, report_paths in reports.items():
for report_path in report_paths:
with open(report_path) as report_file:
report_data = json.load(report_file)
results.append(_map_benchmark_data(pipeline, job_id, report_data))
logger.info(f"Created benchmark data for job: {job_id} model: {report_data['model']}")
return results


def get_benchmark_filename(report):
ts = report.run_start_ts.strftime("%Y-%m-%dT%H:%M:%S%z")
return f"benchmark_{report.github_job_id}_{ts}.json"


def _get_model_reports(workflow_outputs_dir, workflow_run_id: int):
"""
This function searches for perf reports in the artifacts directory
and returns a mapping of job IDs to the paths of the perf reports.
We expect that report filename is in format `<report_name>_<job_id>.json`.
"""
job_paths_map = {}
artifacts_dir = f"{workflow_outputs_dir}/{workflow_run_id}/artifacts"

logger.info(f"Searching for perf reports in {artifacts_dir}")

for root, _, files in os.walk(artifacts_dir):
for file in files:
if file.endswith(".json"):
logger.debug(f"Found perf report {file}")
file_path = pathlib.Path(root) / file
filename = file_path.name
try:
job_id = int(filename.split(".")[-2].split("_")[-1])
except ValueError:
logger.warning(f"Could not extract job ID from {filename}")
continue
report_paths = job_paths_map.get(job_id, [])
report_paths.append(file_path)
job_paths_map[job_id] = report_paths
return job_paths_map


def _map_benchmark_data(pipeline, job_id, report_data):

# get job information from pipeline
job = next(job for job in pipeline.jobs if job.github_job_id == job_id)

return CompleteBenchmarkRun(
run_start_ts=pipeline.pipeline_start_ts,
run_end_ts=pipeline.pipeline_end_ts,
run_type="",
git_repo_name=None,
git_commit_hash=pipeline.git_commit_hash,
git_commit_ts=None,
git_branch_name=pipeline.git_branch_name,
github_pipeline_id=pipeline.github_pipeline_id,
github_pipeline_link=pipeline.github_pipeline_link,
github_job_id=job.github_job_id,
user_name=pipeline.git_author,
docker_image=job.docker_image,
device_hostname=job.host_name,
device_ip=None,
device_info=None,
ml_model_name=report_data["model"],
ml_model_type=None,
num_layers=None,
batch_size=report_data.get("batch_size", None),
config_params={},
precision=None,
dataset_name=None,
profiler_name=None,
input_sequence_length=None,
output_sequence_length=None,
image_dimension=None,
perf_analysis=None,
training=report_data.get("training", False),
measurements=[
BenchmarkMeasurement(
step_start_ts=job.job_start_ts,
step_end_ts=job.job_end_ts,
iteration=0,
step_name="",
step_warm_up_num_iterations=None,
name="samples_per_sec",
value=report_data["samples_per_sec"],
target=None,
device_power=None,
device_temperature=None,
),
BenchmarkMeasurement(
step_start_ts=job.job_start_ts,
step_end_ts=job.job_end_ts,
iteration=0,
step_name="",
step_warm_up_num_iterations=None,
name="total_time",
value=report_data["total_time"],
target=None,
device_power=None,
device_temperature=None,
),
],
)
23 changes: 22 additions & 1 deletion .github/actions/collect_data/src/generate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
#
# SPDX-License-Identifier: Apache-2.0

import os
import argparse
from loguru import logger
from utils import get_github_runner_environment
from cicd import create_cicd_json_for_data_analysis, get_cicd_json_filename
from benchmark import create_json_from_report, get_benchmark_filename


def create_pipeline_json(workflow_filename: str, jobs_filename: str, workflow_outputs_dir):
Expand All @@ -27,6 +29,20 @@ def create_pipeline_json(workflow_filename: str, jobs_filename: str, workflow_ou
return pipeline, report_filename


def create_benchmark_jsons(pipeline, workflow_outputs_dir):
results = []
reports = create_json_from_report(pipeline, workflow_outputs_dir)
for report in reports:
report_filename = get_benchmark_filename(
report
) # f"benchmark_{report.github_job_id}_{report.run_start_ts}.json"
logger.info(f"Writing benchmark JSON to {report_filename}")
with open(report_filename, "w") as f:
f.write(report.model_dump_json())
results.append((report, report_filename))
return results


if __name__ == "__main__":

parser = argparse.ArgumentParser()
Expand All @@ -41,8 +57,13 @@ def create_pipeline_json(workflow_filename: str, jobs_filename: str, workflow_ou
args = parser.parse_args()

logger.info(f"Creating pipeline JSON for workflow run ID {args.run_id}")
create_pipeline_json(
pipeline, _ = create_pipeline_json(
workflow_filename=f"{args.output_dir}/{args.run_id}/workflow.json",
jobs_filename=f"{args.output_dir}/{args.run_id}/workflow_jobs.json",
workflow_outputs_dir=args.output_dir,
)

create_benchmark_jsons(
pipeline=pipeline,
workflow_outputs_dir=args.output_dir,
)
114 changes: 114 additions & 0 deletions .github/actions/collect_data/src/pydantic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,117 @@ class Pipeline(BaseModel):
git_author: str = Field(description="Author of the Git commit.")
orchestrator: Optional[str] = Field(None, description="CI/CD pipeline orchestration platform.")
jobs: List[Job] = []


class BenchmarkMeasurement(BaseModel):
"""
Contains measurements for each benchmark run, iteration and step.

A run can have multiple iterations, each iteration can have multiple steps and each
step can execute multiple measurements.
"""

step_start_ts: datetime = Field(description="Timestamp with time zone when the step started.")
step_end_ts: datetime = Field(description="Timestamp with time zone when the step ended.")
iteration: int = Field(
description="A benchmark run can comprise a loop that repeats with the same "
"parameters the same sequence of steps and measurements for each. "
"This integer is the repetition number."
)
step_name: str = Field(description="Name of the benchmark step within the run.")
step_warm_up_num_iterations: Optional[int] = Field(
None, description="Number of iterations for device warm-up at each step."
)
name: str = Field(
description="Name of the measurement performed, e.g. tokens_per_sec_per_user, "
"tokens_per_sec, images_per_sec, pearson_correlation, "
"top1/top5 ratios."
)
value: float = Field(description="Measured value.")
target: Optional[float] = Field(None, description="Target value.")
device_power: Optional[float] = Field(
None,
description="Average power consumption in Watts during the benchmark step.",
)
device_temperature: Optional[float] = Field(
None, description="Average temperature of the device during the benchmark."
)


class CompleteBenchmarkRun(BaseModel):
"""
Contains information about each execution of an AI model benchmark, called benchmark
run, composed of steps each of which performs a set of measurements.

The sequence of steps in a run can be iterated in a loop.
"""

run_start_ts: datetime = Field(description="Timestamp with time zone when the benchmark run started.")
run_end_ts: datetime = Field(description="Timestamp with time zone when the benchmark run ended.")
run_type: str = Field(description="Description of the benchmark run, e.g. a100_fp16_experiments.")
git_repo_name: Optional[str] = Field(
None,
description="Name of the Git repository containing the code that executes " "the benchmark.",
)
git_commit_hash: Optional[str] = Field(
None,
description="Git commit hash of the code used to run the benchmark (software " "version info).",
)
git_commit_ts: Optional[datetime] = Field(None, description="Timestamp with timezone of the git commit.")
git_branch_name: Optional[str] = Field(
None, description="Name of the Git branch associated with the benchmark run."
)
github_pipeline_id: Optional[int] = Field(
None,
description="Unique identifier for the pipeline record from GitHub Actions.",
)
github_pipeline_link: Optional[str] = Field(
None,
description="Link to the GitHub job run associated with the benchmark run.",
)
github_job_id: Optional[int] = Field(None, description="Unique GitHub Actions CI job ID.")
user_name: Optional[str] = Field(None, description="Name of the person that executed the benchmark run.")
docker_image: Optional[str] = Field(
None,
description="Name or ID of the Docker image used for benchmarking (software "
"version info), e.g., trt-llm-v080.",
)
device_hostname: str = Field(description="Host name of the device on which the benchmark is performed.")
device_ip: Optional[str] = Field(None, description="Host IP address.")
device_info: Optional[dict] = Field(
None,
description="Device information as JSON, such as manufacturer, card_type, "
"dram_size, num_cores, price, bus_interface, optimal_clock_speed.",
)
ml_model_name: str = Field(description="Name of the benchmarked neural network model.")
ml_model_type: Optional[str] = Field(
None,
description="Model type, such as text generation, classification, question " "answering, etc.",
)
num_layers: Optional[int] = Field(None, description="Number of layers of the model.")
batch_size: Optional[int] = Field(None, description="Batch size.")
config_params: Optional[dict] = Field(None, description="Additional training/inference parameters.")
precision: Optional[str] = Field(
None,
description="Numerical precision, such as bfp8, fp16, or a mix such as " "fp16_act_bfp8_weights, etc.",
)
dataset_name: Optional[str] = Field(None, description="Name of the dataset used for the benchmark.")
profiler_name: Optional[str] = Field(None, description="Profiler to time the benchmark.")
input_sequence_length: Optional[int] = Field(
None,
description="Length of the sequence used as input to the model, applicable " "to sequence models.",
)
output_sequence_length: Optional[int] = Field(
None,
description="Length of the sequence used as output by the model, applicable " "to sequence models.",
)
image_dimension: Optional[str] = Field(
None,
description="Dimension of the image, e.g. 224x224x3, applicable to computer " "vision models.",
)
perf_analysis: Optional[bool] = Field(
None,
description="If the model was run in perf analysis mode. This is " "kernel/operation execution mode.",
)
training: Optional[bool] = Field(None, description="ML model benchmarks for training or inference.")
measurements: List[BenchmarkMeasurement] = Field(description="List of benchmark measurements.")
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"model": "MNIST Linear",
"config": "",
"date": "24-12-03",
"hash": "c47f41a",
"machine_name": "8cb186cee6d2",
"samples_per_sec": 0.23979727678872859,
"total_samples": 1,
"total_time": 4.170189142227173,
"training": false,
"batch_size": 1,
"output": "forge-benchmark-e2e-mnist_33854708624.json",
"arch": "",
"chips": "",
"device": "",
"galaxy": "",
"perf_analysis": "",
"load_tti": "",
"save_tti": "",
"task": "",
"evaluation_score": ""
}
Loading
Loading