diff --git a/.github/actions/collect_data/action.yml b/.github/actions/collect_data/action.yml index 41af4d7..ef783de 100644 --- a/.github/actions/collect_data/action.yml +++ b/.github/actions/collect_data/action.yml @@ -52,7 +52,8 @@ runs: run: | python3 ${GITHUB_ACTION_PATH}/src/generate_data.py --run_id ${{ inputs.run_id }} # Workaround: Copy file to avoid GH upload filename limitations - cp pipeline_${{ inputs.run_id }}*.json pipelinecopy_${{ inputs.run_id }}.json + cp pipeline_*.json pipelinecopy_${{ inputs.run_id }}.json + cp benchmark_*.json benchmarkcopy_${{ inputs.run_id }}.json - name: Create key file if: ${{ inputs.ssh-private-key != '' }} @@ -74,6 +75,7 @@ runs: path: | if-no-files-found: warn path: | + benchmarkcopy_${{ inputs.run_id }}.json pipelinecopy_${{ inputs.run_id }}.json generated/cicd/${{ inputs.run_id }}/workflow.json generated/cicd/${{ inputs.run_id }}/workflow_jobs.json diff --git a/.github/actions/collect_data/sftp-csv.txt b/.github/actions/collect_data/sftp-csv.txt deleted file mode 100644 index 90e9c53..0000000 --- a/.github/actions/collect_data/sftp-csv.txt +++ /dev/null @@ -1,2 +0,0 @@ -put -r generated/benchmark_data/*.csv -ls -hal diff --git a/.github/actions/collect_data/sftp-json.txt b/.github/actions/collect_data/sftp-json.txt index 8bb0486..4c415f3 100644 --- a/.github/actions/collect_data/sftp-json.txt +++ b/.github/actions/collect_data/sftp-json.txt @@ -1,2 +1,3 @@ put -r pipeline_*.json +put -r benchmark_*.json ls -hal diff --git a/.github/actions/collect_data/src/benchmark.py b/.github/actions/collect_data/src/benchmark.py new file mode 100644 index 0000000..02d6e22 --- /dev/null +++ b/.github/actions/collect_data/src/benchmark.py @@ -0,0 +1,117 @@ +# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +# +# SPDX-License-Identifier: Apache-2.0 +import os +import pathlib +import json +from loguru import logger +from pydantic_models import BenchmarkMeasurement, CompleteBenchmarkRun + + +def create_json_from_report(pipeline, workflow_outputs_dir): + results = [] + reports = _get_model_reports(workflow_outputs_dir, pipeline.github_pipeline_id) + + for job_id, report_paths in reports.items(): + for report_path in report_paths: + with open(report_path) as report_file: + report_data = json.load(report_file) + results.append(_map_benchmark_data(pipeline, job_id, report_data)) + logger.info(f"Created benchmark data for job: {job_id} model: {report_data['model']}") + return results + + +def get_benchmark_filename(report): + ts = report.run_start_ts.strftime("%Y-%m-%dT%H:%M:%S%z") + return f"benchmark_{report.github_job_id}_{ts}.json" + + +def _get_model_reports(workflow_outputs_dir, workflow_run_id: int): + """ + This function searches for perf reports in the artifacts directory + and returns a mapping of job IDs to the paths of the perf reports. + We expect that report filename is in format `_.json`. + """ + job_paths_map = {} + artifacts_dir = f"{workflow_outputs_dir}/{workflow_run_id}/artifacts" + + logger.info(f"Searching for perf reports in {artifacts_dir}") + + for root, _, files in os.walk(artifacts_dir): + for file in files: + if file.endswith(".json"): + logger.debug(f"Found perf report {file}") + file_path = pathlib.Path(root) / file + filename = file_path.name + try: + job_id = int(filename.split(".")[-2].split("_")[-1]) + except ValueError: + logger.warning(f"Could not extract job ID from {filename}") + continue + report_paths = job_paths_map.get(job_id, []) + report_paths.append(file_path) + job_paths_map[job_id] = report_paths + return job_paths_map + + +def _map_benchmark_data(pipeline, job_id, report_data): + + # get job information from pipeline + job = next(job for job in pipeline.jobs if job.github_job_id == job_id) + + return CompleteBenchmarkRun( + run_start_ts=pipeline.pipeline_start_ts, + run_end_ts=pipeline.pipeline_end_ts, + run_type="", + git_repo_name=None, + git_commit_hash=pipeline.git_commit_hash, + git_commit_ts=None, + git_branch_name=pipeline.git_branch_name, + github_pipeline_id=pipeline.github_pipeline_id, + github_pipeline_link=pipeline.github_pipeline_link, + github_job_id=job.github_job_id, + user_name=pipeline.git_author, + docker_image=job.docker_image, + device_hostname=job.host_name, + device_ip=None, + device_info=None, + ml_model_name=report_data["model"], + ml_model_type=None, + num_layers=None, + batch_size=report_data.get("batch_size", None), + config_params={}, + precision=None, + dataset_name=None, + profiler_name=None, + input_sequence_length=None, + output_sequence_length=None, + image_dimension=None, + perf_analysis=None, + training=report_data.get("training", False), + measurements=[ + BenchmarkMeasurement( + step_start_ts=job.job_start_ts, + step_end_ts=job.job_end_ts, + iteration=0, + step_name="", + step_warm_up_num_iterations=None, + name="samples_per_sec", + value=report_data["samples_per_sec"], + target=None, + device_power=None, + device_temperature=None, + ), + BenchmarkMeasurement( + step_start_ts=job.job_start_ts, + step_end_ts=job.job_end_ts, + iteration=0, + step_name="", + step_warm_up_num_iterations=None, + name="total_time", + value=report_data["total_time"], + target=None, + device_power=None, + device_temperature=None, + ), + ], + ) diff --git a/.github/actions/collect_data/src/generate_data.py b/.github/actions/collect_data/src/generate_data.py index 339b428..a0146c1 100644 --- a/.github/actions/collect_data/src/generate_data.py +++ b/.github/actions/collect_data/src/generate_data.py @@ -2,10 +2,12 @@ # # SPDX-License-Identifier: Apache-2.0 +import os import argparse from loguru import logger from utils import get_github_runner_environment from cicd import create_cicd_json_for_data_analysis, get_cicd_json_filename +from benchmark import create_json_from_report, get_benchmark_filename def create_pipeline_json(workflow_filename: str, jobs_filename: str, workflow_outputs_dir): @@ -27,6 +29,20 @@ def create_pipeline_json(workflow_filename: str, jobs_filename: str, workflow_ou return pipeline, report_filename +def create_benchmark_jsons(pipeline, workflow_outputs_dir): + results = [] + reports = create_json_from_report(pipeline, workflow_outputs_dir) + for report in reports: + report_filename = get_benchmark_filename( + report + ) # f"benchmark_{report.github_job_id}_{report.run_start_ts}.json" + logger.info(f"Writing benchmark JSON to {report_filename}") + with open(report_filename, "w") as f: + f.write(report.model_dump_json()) + results.append((report, report_filename)) + return results + + if __name__ == "__main__": parser = argparse.ArgumentParser() @@ -41,8 +57,13 @@ def create_pipeline_json(workflow_filename: str, jobs_filename: str, workflow_ou args = parser.parse_args() logger.info(f"Creating pipeline JSON for workflow run ID {args.run_id}") - create_pipeline_json( + pipeline, _ = create_pipeline_json( workflow_filename=f"{args.output_dir}/{args.run_id}/workflow.json", jobs_filename=f"{args.output_dir}/{args.run_id}/workflow_jobs.json", workflow_outputs_dir=args.output_dir, ) + + create_benchmark_jsons( + pipeline=pipeline, + workflow_outputs_dir=args.output_dir, + ) diff --git a/.github/actions/collect_data/src/pydantic_models.py b/.github/actions/collect_data/src/pydantic_models.py index 30c7da5..7f8fa2b 100644 --- a/.github/actions/collect_data/src/pydantic_models.py +++ b/.github/actions/collect_data/src/pydantic_models.py @@ -114,3 +114,117 @@ class Pipeline(BaseModel): git_author: str = Field(description="Author of the Git commit.") orchestrator: Optional[str] = Field(None, description="CI/CD pipeline orchestration platform.") jobs: List[Job] = [] + + +class BenchmarkMeasurement(BaseModel): + """ + Contains measurements for each benchmark run, iteration and step. + + A run can have multiple iterations, each iteration can have multiple steps and each + step can execute multiple measurements. + """ + + step_start_ts: datetime = Field(description="Timestamp with time zone when the step started.") + step_end_ts: datetime = Field(description="Timestamp with time zone when the step ended.") + iteration: int = Field( + description="A benchmark run can comprise a loop that repeats with the same " + "parameters the same sequence of steps and measurements for each. " + "This integer is the repetition number." + ) + step_name: str = Field(description="Name of the benchmark step within the run.") + step_warm_up_num_iterations: Optional[int] = Field( + None, description="Number of iterations for device warm-up at each step." + ) + name: str = Field( + description="Name of the measurement performed, e.g. tokens_per_sec_per_user, " + "tokens_per_sec, images_per_sec, pearson_correlation, " + "top1/top5 ratios." + ) + value: float = Field(description="Measured value.") + target: Optional[float] = Field(None, description="Target value.") + device_power: Optional[float] = Field( + None, + description="Average power consumption in Watts during the benchmark step.", + ) + device_temperature: Optional[float] = Field( + None, description="Average temperature of the device during the benchmark." + ) + + +class CompleteBenchmarkRun(BaseModel): + """ + Contains information about each execution of an AI model benchmark, called benchmark + run, composed of steps each of which performs a set of measurements. + + The sequence of steps in a run can be iterated in a loop. + """ + + run_start_ts: datetime = Field(description="Timestamp with time zone when the benchmark run started.") + run_end_ts: datetime = Field(description="Timestamp with time zone when the benchmark run ended.") + run_type: str = Field(description="Description of the benchmark run, e.g. a100_fp16_experiments.") + git_repo_name: Optional[str] = Field( + None, + description="Name of the Git repository containing the code that executes " "the benchmark.", + ) + git_commit_hash: Optional[str] = Field( + None, + description="Git commit hash of the code used to run the benchmark (software " "version info).", + ) + git_commit_ts: Optional[datetime] = Field(None, description="Timestamp with timezone of the git commit.") + git_branch_name: Optional[str] = Field( + None, description="Name of the Git branch associated with the benchmark run." + ) + github_pipeline_id: Optional[int] = Field( + None, + description="Unique identifier for the pipeline record from GitHub Actions.", + ) + github_pipeline_link: Optional[str] = Field( + None, + description="Link to the GitHub job run associated with the benchmark run.", + ) + github_job_id: Optional[int] = Field(None, description="Unique GitHub Actions CI job ID.") + user_name: Optional[str] = Field(None, description="Name of the person that executed the benchmark run.") + docker_image: Optional[str] = Field( + None, + description="Name or ID of the Docker image used for benchmarking (software " + "version info), e.g., trt-llm-v080.", + ) + device_hostname: str = Field(description="Host name of the device on which the benchmark is performed.") + device_ip: Optional[str] = Field(None, description="Host IP address.") + device_info: Optional[dict] = Field( + None, + description="Device information as JSON, such as manufacturer, card_type, " + "dram_size, num_cores, price, bus_interface, optimal_clock_speed.", + ) + ml_model_name: str = Field(description="Name of the benchmarked neural network model.") + ml_model_type: Optional[str] = Field( + None, + description="Model type, such as text generation, classification, question " "answering, etc.", + ) + num_layers: Optional[int] = Field(None, description="Number of layers of the model.") + batch_size: Optional[int] = Field(None, description="Batch size.") + config_params: Optional[dict] = Field(None, description="Additional training/inference parameters.") + precision: Optional[str] = Field( + None, + description="Numerical precision, such as bfp8, fp16, or a mix such as " "fp16_act_bfp8_weights, etc.", + ) + dataset_name: Optional[str] = Field(None, description="Name of the dataset used for the benchmark.") + profiler_name: Optional[str] = Field(None, description="Profiler to time the benchmark.") + input_sequence_length: Optional[int] = Field( + None, + description="Length of the sequence used as input to the model, applicable " "to sequence models.", + ) + output_sequence_length: Optional[int] = Field( + None, + description="Length of the sequence used as output by the model, applicable " "to sequence models.", + ) + image_dimension: Optional[str] = Field( + None, + description="Dimension of the image, e.g. 224x224x3, applicable to computer " "vision models.", + ) + perf_analysis: Optional[bool] = Field( + None, + description="If the model was run in perf analysis mode. This is " "kernel/operation execution mode.", + ) + training: Optional[bool] = Field(None, description="ML model benchmarks for training or inference.") + measurements: List[BenchmarkMeasurement] = Field(description="List of benchmark measurements.") diff --git a/.github/actions/collect_data/test/data/12141788622/artifacts/forge-benchmark-e2e-mnist_33854708624.json b/.github/actions/collect_data/test/data/12141788622/artifacts/forge-benchmark-e2e-mnist_33854708624.json new file mode 100644 index 0000000..c176c63 --- /dev/null +++ b/.github/actions/collect_data/test/data/12141788622/artifacts/forge-benchmark-e2e-mnist_33854708624.json @@ -0,0 +1,22 @@ +{ + "model": "MNIST Linear", + "config": "", + "date": "24-12-03", + "hash": "c47f41a", + "machine_name": "8cb186cee6d2", + "samples_per_sec": 0.23979727678872859, + "total_samples": 1, + "total_time": 4.170189142227173, + "training": false, + "batch_size": 1, + "output": "forge-benchmark-e2e-mnist_33854708624.json", + "arch": "", + "chips": "", + "device": "", + "galaxy": "", + "perf_analysis": "", + "load_tti": "", + "save_tti": "", + "task": "", + "evaluation_score": "" +} diff --git a/.github/actions/collect_data/test/data/12141788622/workflow.json b/.github/actions/collect_data/test/data/12141788622/workflow.json new file mode 100644 index 0000000..bae4e6a --- /dev/null +++ b/.github/actions/collect_data/test/data/12141788622/workflow.json @@ -0,0 +1,269 @@ +{ + "id": 12141788622, + "name": "On PR", + "node_id": "WFR_kwLOMYlQT88AAAAC07T9zg", + "head_branch": "vmilosevic/upload_perf_report", + "head_sha": "470c66d36e9392947e0e336923e67de5c8ce5b83", + "path": ".github/workflows/on-pr.yml", + "display_title": "Upload perf report", + "run_number": 1402, + "event": "pull_request", + "status": "in_progress", + "conclusion": null, + "workflow_id": 112131564, + "check_suite_id": 31606539821, + "check_suite_node_id": "CS_kwDOMYlQT88AAAAHW-WGLQ", + "url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/actions/runs/12141788622", + "html_url": "https://github.com/tenstorrent/tt-forge-fe/actions/runs/12141788622", + "pull_requests": [ + { + "url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/pulls/569", + "id": 2154682397, + "number": 569, + "head": { + "ref": "vmilosevic/upload_perf_report", + "sha": "470c66d36e9392947e0e336923e67de5c8ce5b83", + "repo": { + "id": 831082575, + "url": "https://api.github.com/repos/tenstorrent/tt-forge-fe", + "name": "tt-forge-fe" + } + }, + "base": { + "ref": "main", + "sha": "85d8575678cd0fc899d1de1895cd14c86b9068ac", + "repo": { + "id": 831082575, + "url": "https://api.github.com/repos/tenstorrent/tt-forge-fe", + "name": "tt-forge-fe" + } + } + } + ], + "created_at": "2024-12-03T14:29:46Z", + "updated_at": "2024-12-03T14:30:20Z", + "actor": { + "login": "vmilosevic", + "id": 157983820, + "node_id": "U_kgDOCWqkTA", + "avatar_url": "https://avatars.githubusercontent.com/u/157983820?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/vmilosevic", + "html_url": "https://github.com/vmilosevic", + "followers_url": "https://api.github.com/users/vmilosevic/followers", + "following_url": "https://api.github.com/users/vmilosevic/following{/other_user}", + "gists_url": "https://api.github.com/users/vmilosevic/gists{/gist_id}", + "starred_url": "https://api.github.com/users/vmilosevic/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/vmilosevic/subscriptions", + "organizations_url": "https://api.github.com/users/vmilosevic/orgs", + "repos_url": "https://api.github.com/users/vmilosevic/repos", + "events_url": "https://api.github.com/users/vmilosevic/events{/privacy}", + "received_events_url": "https://api.github.com/users/vmilosevic/received_events", + "type": "User", + "user_view_type": "public", + "site_admin": false + }, + "run_attempt": 1, + "referenced_workflows": [ + { + "path": "tenstorrent/tt-forge-fe/.github/workflows/build-and-test.yml@c47f41abf6fd9eb51f218bc9d04215421edc46c2", + "sha": "c47f41abf6fd9eb51f218bc9d04215421edc46c2", + "ref": "refs/pull/569/merge" + }, + { + "path": "tenstorrent/tt-forge-fe/.github/workflows/spdx.yml@c47f41abf6fd9eb51f218bc9d04215421edc46c2", + "sha": "c47f41abf6fd9eb51f218bc9d04215421edc46c2", + "ref": "refs/pull/569/merge" + }, + { + "path": "tenstorrent/tt-forge-fe/.github/workflows/perf-benchmark.yml@c47f41abf6fd9eb51f218bc9d04215421edc46c2", + "sha": "c47f41abf6fd9eb51f218bc9d04215421edc46c2", + "ref": "refs/pull/569/merge" + }, + { + "path": "tenstorrent/tt-forge-fe/.github/workflows/pre-commit.yml@c47f41abf6fd9eb51f218bc9d04215421edc46c2", + "sha": "c47f41abf6fd9eb51f218bc9d04215421edc46c2", + "ref": "refs/pull/569/merge" + } + ], + "run_started_at": "2024-12-03T14:29:46Z", + "triggering_actor": { + "login": "vmilosevic", + "id": 157983820, + "node_id": "U_kgDOCWqkTA", + "avatar_url": "https://avatars.githubusercontent.com/u/157983820?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/vmilosevic", + "html_url": "https://github.com/vmilosevic", + "followers_url": "https://api.github.com/users/vmilosevic/followers", + "following_url": "https://api.github.com/users/vmilosevic/following{/other_user}", + "gists_url": "https://api.github.com/users/vmilosevic/gists{/gist_id}", + "starred_url": "https://api.github.com/users/vmilosevic/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/vmilosevic/subscriptions", + "organizations_url": "https://api.github.com/users/vmilosevic/orgs", + "repos_url": "https://api.github.com/users/vmilosevic/repos", + "events_url": "https://api.github.com/users/vmilosevic/events{/privacy}", + "received_events_url": "https://api.github.com/users/vmilosevic/received_events", + "type": "User", + "user_view_type": "public", + "site_admin": false + }, + "jobs_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/actions/runs/12141788622/attempts/1/jobs", + "logs_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/actions/runs/12141788622/attempts/1/logs", + "check_suite_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/check-suites/31606539821", + "artifacts_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/actions/runs/12141788622/artifacts", + "cancel_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/actions/runs/12141788622/cancel", + "rerun_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/actions/runs/12141788622/rerun", + "previous_attempt_url": null, + "workflow_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/actions/workflows/112131564", + "head_commit": { + "id": "470c66d36e9392947e0e336923e67de5c8ce5b83", + "tree_id": "681fa454c02bcf9c49aee9d9820fa13ffcee262f", + "message": "Fix file path", + "timestamp": "2024-12-03T14:29:38Z", + "author": { + "name": "Vladimir Milosevic", + "email": "vmilosevic@tenstorrent.com" + }, + "committer": { + "name": "Vladimir Milosevic", + "email": "vmilosevic@tenstorrent.com" + } + }, + "repository": { + "id": 831082575, + "node_id": "R_kgDOMYlQTw", + "name": "tt-forge-fe", + "full_name": "tenstorrent/tt-forge-fe", + "private": false, + "owner": { + "login": "tenstorrent", + "id": 64161552, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjY0MTYxNTUy", + "avatar_url": "https://avatars.githubusercontent.com/u/64161552?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/tenstorrent", + "html_url": "https://github.com/tenstorrent", + "followers_url": "https://api.github.com/users/tenstorrent/followers", + "following_url": "https://api.github.com/users/tenstorrent/following{/other_user}", + "gists_url": "https://api.github.com/users/tenstorrent/gists{/gist_id}", + "starred_url": "https://api.github.com/users/tenstorrent/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/tenstorrent/subscriptions", + "organizations_url": "https://api.github.com/users/tenstorrent/orgs", + "repos_url": "https://api.github.com/users/tenstorrent/repos", + "events_url": "https://api.github.com/users/tenstorrent/events{/privacy}", + "received_events_url": "https://api.github.com/users/tenstorrent/received_events", + "type": "Organization", + "user_view_type": "public", + "site_admin": false + }, + "html_url": "https://github.com/tenstorrent/tt-forge-fe", + "description": "The TT-Forge FE is a graph compiler designed to optimize and transform computational graphs for deep learning models, enhancing their performance and efficiency.", + "fork": false, + "url": "https://api.github.com/repos/tenstorrent/tt-forge-fe", + "forks_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/forks", + "keys_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/teams", + "hooks_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/hooks", + "issue_events_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/issues/events{/number}", + "events_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/events", + "assignees_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/assignees{/user}", + "branches_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/branches{/branch}", + "tags_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/tags", + "blobs_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/statuses/{sha}", + "languages_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/languages", + "stargazers_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/stargazers", + "contributors_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/contributors", + "subscribers_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/subscribers", + "subscription_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/subscription", + "commits_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/contents/{+path}", + "compare_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/merges", + "archive_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/downloads", + "issues_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/issues{/number}", + "pulls_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/pulls{/number}", + "milestones_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/milestones{/number}", + "notifications_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/labels{/name}", + "releases_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/releases{/id}", + "deployments_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/deployments" + }, + "head_repository": { + "id": 831082575, + "node_id": "R_kgDOMYlQTw", + "name": "tt-forge-fe", + "full_name": "tenstorrent/tt-forge-fe", + "private": false, + "owner": { + "login": "tenstorrent", + "id": 64161552, + "node_id": "MDEyOk9yZ2FuaXphdGlvbjY0MTYxNTUy", + "avatar_url": "https://avatars.githubusercontent.com/u/64161552?v=4", + "gravatar_id": "", + "url": "https://api.github.com/users/tenstorrent", + "html_url": "https://github.com/tenstorrent", + "followers_url": "https://api.github.com/users/tenstorrent/followers", + "following_url": "https://api.github.com/users/tenstorrent/following{/other_user}", + "gists_url": "https://api.github.com/users/tenstorrent/gists{/gist_id}", + "starred_url": "https://api.github.com/users/tenstorrent/starred{/owner}{/repo}", + "subscriptions_url": "https://api.github.com/users/tenstorrent/subscriptions", + "organizations_url": "https://api.github.com/users/tenstorrent/orgs", + "repos_url": "https://api.github.com/users/tenstorrent/repos", + "events_url": "https://api.github.com/users/tenstorrent/events{/privacy}", + "received_events_url": "https://api.github.com/users/tenstorrent/received_events", + "type": "Organization", + "user_view_type": "public", + "site_admin": false + }, + "html_url": "https://github.com/tenstorrent/tt-forge-fe", + "description": "The TT-Forge FE is a graph compiler designed to optimize and transform computational graphs for deep learning models, enhancing their performance and efficiency.", + "fork": false, + "url": "https://api.github.com/repos/tenstorrent/tt-forge-fe", + "forks_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/forks", + "keys_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/keys{/key_id}", + "collaborators_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/collaborators{/collaborator}", + "teams_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/teams", + "hooks_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/hooks", + "issue_events_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/issues/events{/number}", + "events_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/events", + "assignees_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/assignees{/user}", + "branches_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/branches{/branch}", + "tags_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/tags", + "blobs_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/git/blobs{/sha}", + "git_tags_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/git/tags{/sha}", + "git_refs_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/git/refs{/sha}", + "trees_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/git/trees{/sha}", + "statuses_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/statuses/{sha}", + "languages_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/languages", + "stargazers_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/stargazers", + "contributors_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/contributors", + "subscribers_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/subscribers", + "subscription_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/subscription", + "commits_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/commits{/sha}", + "git_commits_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/git/commits{/sha}", + "comments_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/comments{/number}", + "issue_comment_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/issues/comments{/number}", + "contents_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/contents/{+path}", + "compare_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/compare/{base}...{head}", + "merges_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/merges", + "archive_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/{archive_format}{/ref}", + "downloads_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/downloads", + "issues_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/issues{/number}", + "pulls_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/pulls{/number}", + "milestones_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/milestones{/number}", + "notifications_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/notifications{?since,all,participating}", + "labels_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/labels{/name}", + "releases_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/releases{/id}", + "deployments_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/deployments" + } +} diff --git a/.github/actions/collect_data/test/data/12141788622/workflow_jobs.json b/.github/actions/collect_data/test/data/12141788622/workflow_jobs.json new file mode 100644 index 0000000..a6b2cc2 --- /dev/null +++ b/.github/actions/collect_data/test/data/12141788622/workflow_jobs.json @@ -0,0 +1,43 @@ +{ + "total_count": 8, + "jobs": [ + { + "id": 33854708624, + "run_id": 12141788622, + "workflow_name": "On PR", + "head_branch": "vmilosevic/upload_perf_report", + "run_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/actions/runs/12141788622", + "run_attempt": 1, + "node_id": "CR_kwDOMYlQT88AAAAH4eXbkA", + "head_sha": "470c66d36e9392947e0e336923e67de5c8ce5b83", + "url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/actions/jobs/33854708624", + "html_url": "https://github.com/tenstorrent/tt-forge-fe/actions/runs/12141788622/job/33854708624", + "status": "completed", + "conclusion": "success", + "created_at": "2024-12-03T14:30:09Z", + "started_at": "2024-12-03T14:30:15Z", + "completed_at": "2024-12-03T14:39:31Z", + "name": "perf-benchmark / build-and-run-perf", + "steps": [ + { + "name": "Run Perf Benchmark", + "status": "completed", + "conclusion": "success", + "number": 10, + "started_at": "2024-12-03T14:38:42Z", + "completed_at": "2024-12-03T14:38:57Z" + } + ], + "check_run_url": "https://api.github.com/repos/tenstorrent/tt-forge-fe/check-runs/33854708624", + "labels": [ + "in-service", + "n150", + "performance" + ], + "runner_id": 33, + "runner_name": "f10cs11-n150", + "runner_group_id": 1, + "runner_group_name": "Default" + } + ] +} diff --git a/.github/actions/collect_data/test/test_generate_data.py b/.github/actions/collect_data/test/test_generate_data.py index 0dd01a4..44ec6d6 100644 --- a/.github/actions/collect_data/test/test_generate_data.py +++ b/.github/actions/collect_data/test/test_generate_data.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from generate_data import create_pipeline_json +from generate_data import create_pipeline_json, create_benchmark_jsons import os import json import pytest @@ -50,6 +50,33 @@ def test_create_pipeline_json(run_id, expected): assert check_constraint(pipeline) +@pytest.mark.parametrize( + "run_id, expected", + [ + ("12141788622", {"ml_model_name": "MNIST Linear", "measurements_cnt": 2}), + ], +) +def test_create_benchmark_json(run_id, expected): + """ + End-to-end test for create_pipeline_json function + Calling this will generate a pipeline json file + """ + os.environ["GITHUB_EVENT_NAME"] = "test" + + pipeline, filename = create_pipeline_json( + workflow_filename=f"test/data/{run_id}/workflow.json", + jobs_filename=f"test/data/{run_id}/workflow_jobs.json", + workflow_outputs_dir="test/data", + ) + reports = create_benchmark_jsons(pipeline, "test/data") + for report, report_filename in reports: + assert os.path.exists(report_filename) + with open(report_filename, "r") as file: + report_json = json.load(file) + assert report_json["ml_model_name"] == expected["ml_model_name"] + assert len(report_json["measurements"]) == expected["measurements_cnt"] + + def check_constraint(pipeline): # check if the pipeline has the correct constraints # unique cicd_job_id, full_test_name, test_start_ts