Skip to content

Commit

Permalink
Handle performance reports
Browse files Browse the repository at this point in the history
  • Loading branch information
vmilosevic committed Dec 4, 2024
1 parent f18e9a2 commit d541806
Show file tree
Hide file tree
Showing 10 changed files with 619 additions and 5 deletions.
4 changes: 3 additions & 1 deletion .github/actions/collect_data/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ runs:
run: |
python3 ${GITHUB_ACTION_PATH}/src/generate_data.py --run_id ${{ inputs.run_id }}
# Workaround: Copy file to avoid GH upload filename limitations
cp pipeline_${{ inputs.run_id }}*.json pipelinecopy_${{ inputs.run_id }}.json
cp pipeline_*.json pipelinecopy_${{ inputs.run_id }}.json
cp benchmark_*.json benchmarkcopy_${{ inputs.run_id }}.json
- name: Create key file
if: ${{ inputs.ssh-private-key != '' }}
Expand All @@ -74,6 +75,7 @@ runs:
path: |
if-no-files-found: warn
path: |
benchmarkcopy_${{ inputs.run_id }}.json
pipelinecopy_${{ inputs.run_id }}.json
generated/cicd/${{ inputs.run_id }}/workflow.json
generated/cicd/${{ inputs.run_id }}/workflow_jobs.json
2 changes: 0 additions & 2 deletions .github/actions/collect_data/sftp-csv.txt

This file was deleted.

1 change: 1 addition & 0 deletions .github/actions/collect_data/sftp-json.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
put -r pipeline_*.json
put -r benchmark_*.json
ls -hal
117 changes: 117 additions & 0 deletions .github/actions/collect_data/src/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
#
# SPDX-License-Identifier: Apache-2.0
import os
import pathlib
import json
from loguru import logger
from pydantic_models import BenchmarkMeasurement, CompleteBenchmarkRun


def create_json_from_report(pipeline, workflow_outputs_dir):
results = []
reports = _get_model_reports(workflow_outputs_dir, pipeline.github_pipeline_id)

for job_id, report_paths in reports.items():
for report_path in report_paths:
with open(report_path) as report_file:
report_data = json.load(report_file)
results.append(_map_benchmark_data(pipeline, job_id, report_data))
logger.info(f"Created benchmark data for job: {job_id} model: {report_data['model']}")
return results


def get_benchmark_filename(report):
ts = report.run_start_ts.strftime("%Y-%m-%dT%H:%M:%S%z")
return f"benchmark_{report.github_job_id}_{ts}.json"


def _get_model_reports(workflow_outputs_dir, workflow_run_id: int):
"""
This function searches for perf reports in the artifacts directory
and returns a mapping of job IDs to the paths of the perf reports.
We expect that report filename is in format `<report_name>_<job_id>.json`.
"""
job_paths_map = {}
artifacts_dir = f"{workflow_outputs_dir}/{workflow_run_id}/artifacts"

logger.info(f"Searching for perf reports in {artifacts_dir}")

for root, _, files in os.walk(artifacts_dir):
for file in files:
if file.endswith(".json"):
logger.debug(f"Found perf report {file}")
file_path = pathlib.Path(root) / file
filename = file_path.name
try:
job_id = int(filename.split(".")[-2].split("_")[-1])
except ValueError:
logger.warning(f"Could not extract job ID from {filename}")
continue
report_paths = job_paths_map.get(job_id, [])
report_paths.append(file_path)
job_paths_map[job_id] = report_paths
return job_paths_map


def _map_benchmark_data(pipeline, job_id, report_data):

# get job information from pipeline
job = next(job for job in pipeline.jobs if job.github_job_id == job_id)

return CompleteBenchmarkRun(
run_start_ts=pipeline.pipeline_start_ts,
run_end_ts=pipeline.pipeline_end_ts,
run_type="",
git_repo_name=None,
git_commit_hash=pipeline.git_commit_hash,
git_commit_ts=None,
git_branch_name=pipeline.git_branch_name,
github_pipeline_id=pipeline.github_pipeline_id,
github_pipeline_link=pipeline.github_pipeline_link,
github_job_id=job.github_job_id,
user_name=pipeline.git_author,
docker_image=job.docker_image,
device_hostname=job.host_name,
device_ip=None,
device_info=None,
ml_model_name=report_data["model"],
ml_model_type=None,
num_layers=None,
batch_size=report_data.get("batch_size", None),
config_params={},
precision=None,
dataset_name=None,
profiler_name=None,
input_sequence_length=None,
output_sequence_length=None,
image_dimension=None,
perf_analysis=None,
training=report_data.get("training", False),
measurements=[
BenchmarkMeasurement(
step_start_ts=job.job_start_ts,
step_end_ts=job.job_end_ts,
iteration=0,
step_name="",
step_warm_up_num_iterations=None,
name="samples_per_sec",
value=report_data["samples_per_sec"],
target=None,
device_power=None,
device_temperature=None,
),
BenchmarkMeasurement(
step_start_ts=job.job_start_ts,
step_end_ts=job.job_end_ts,
iteration=0,
step_name="",
step_warm_up_num_iterations=None,
name="total_time",
value=report_data["total_time"],
target=None,
device_power=None,
device_temperature=None,
),
],
)
23 changes: 22 additions & 1 deletion .github/actions/collect_data/src/generate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
#
# SPDX-License-Identifier: Apache-2.0

import os
import argparse
from loguru import logger
from utils import get_github_runner_environment
from cicd import create_cicd_json_for_data_analysis, get_cicd_json_filename
from benchmark import create_json_from_report, get_benchmark_filename


def create_pipeline_json(workflow_filename: str, jobs_filename: str, workflow_outputs_dir):
Expand All @@ -27,6 +29,20 @@ def create_pipeline_json(workflow_filename: str, jobs_filename: str, workflow_ou
return pipeline, report_filename


def create_benchmark_jsons(pipeline, workflow_outputs_dir):
results = []
reports = create_json_from_report(pipeline, workflow_outputs_dir)
for report in reports:
report_filename = get_benchmark_filename(
report
) # f"benchmark_{report.github_job_id}_{report.run_start_ts}.json"
logger.info(f"Writing benchmark JSON to {report_filename}")
with open(report_filename, "w") as f:
f.write(report.model_dump_json())
results.append((report, report_filename))
return results


if __name__ == "__main__":

parser = argparse.ArgumentParser()
Expand All @@ -41,8 +57,13 @@ def create_pipeline_json(workflow_filename: str, jobs_filename: str, workflow_ou
args = parser.parse_args()

logger.info(f"Creating pipeline JSON for workflow run ID {args.run_id}")
create_pipeline_json(
pipeline, _ = create_pipeline_json(
workflow_filename=f"{args.output_dir}/{args.run_id}/workflow.json",
jobs_filename=f"{args.output_dir}/{args.run_id}/workflow_jobs.json",
workflow_outputs_dir=args.output_dir,
)

create_benchmark_jsons(
pipeline=pipeline,
workflow_outputs_dir=args.output_dir,
)
114 changes: 114 additions & 0 deletions .github/actions/collect_data/src/pydantic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,117 @@ class Pipeline(BaseModel):
git_author: str = Field(description="Author of the Git commit.")
orchestrator: Optional[str] = Field(None, description="CI/CD pipeline orchestration platform.")
jobs: List[Job] = []


class BenchmarkMeasurement(BaseModel):
"""
Contains measurements for each benchmark run, iteration and step.
A run can have multiple iterations, each iteration can have multiple steps and each
step can execute multiple measurements.
"""

step_start_ts: datetime = Field(description="Timestamp with time zone when the step started.")
step_end_ts: datetime = Field(description="Timestamp with time zone when the step ended.")
iteration: int = Field(
description="A benchmark run can comprise a loop that repeats with the same "
"parameters the same sequence of steps and measurements for each. "
"This integer is the repetition number."
)
step_name: str = Field(description="Name of the benchmark step within the run.")
step_warm_up_num_iterations: Optional[int] = Field(
None, description="Number of iterations for device warm-up at each step."
)
name: str = Field(
description="Name of the measurement performed, e.g. tokens_per_sec_per_user, "
"tokens_per_sec, images_per_sec, pearson_correlation, "
"top1/top5 ratios."
)
value: float = Field(description="Measured value.")
target: Optional[float] = Field(None, description="Target value.")
device_power: Optional[float] = Field(
None,
description="Average power consumption in Watts during the benchmark step.",
)
device_temperature: Optional[float] = Field(
None, description="Average temperature of the device during the benchmark."
)


class CompleteBenchmarkRun(BaseModel):
"""
Contains information about each execution of an AI model benchmark, called benchmark
run, composed of steps each of which performs a set of measurements.
The sequence of steps in a run can be iterated in a loop.
"""

run_start_ts: datetime = Field(description="Timestamp with time zone when the benchmark run started.")
run_end_ts: datetime = Field(description="Timestamp with time zone when the benchmark run ended.")
run_type: str = Field(description="Description of the benchmark run, e.g. a100_fp16_experiments.")
git_repo_name: Optional[str] = Field(
None,
description="Name of the Git repository containing the code that executes " "the benchmark.",
)
git_commit_hash: Optional[str] = Field(
None,
description="Git commit hash of the code used to run the benchmark (software " "version info).",
)
git_commit_ts: Optional[datetime] = Field(None, description="Timestamp with timezone of the git commit.")
git_branch_name: Optional[str] = Field(
None, description="Name of the Git branch associated with the benchmark run."
)
github_pipeline_id: Optional[int] = Field(
None,
description="Unique identifier for the pipeline record from GitHub Actions.",
)
github_pipeline_link: Optional[str] = Field(
None,
description="Link to the GitHub job run associated with the benchmark run.",
)
github_job_id: Optional[int] = Field(None, description="Unique GitHub Actions CI job ID.")
user_name: Optional[str] = Field(None, description="Name of the person that executed the benchmark run.")
docker_image: Optional[str] = Field(
None,
description="Name or ID of the Docker image used for benchmarking (software "
"version info), e.g., trt-llm-v080.",
)
device_hostname: str = Field(description="Host name of the device on which the benchmark is performed.")
device_ip: Optional[str] = Field(None, description="Host IP address.")
device_info: Optional[dict] = Field(
None,
description="Device information as JSON, such as manufacturer, card_type, "
"dram_size, num_cores, price, bus_interface, optimal_clock_speed.",
)
ml_model_name: str = Field(description="Name of the benchmarked neural network model.")
ml_model_type: Optional[str] = Field(
None,
description="Model type, such as text generation, classification, question " "answering, etc.",
)
num_layers: Optional[int] = Field(None, description="Number of layers of the model.")
batch_size: Optional[int] = Field(None, description="Batch size.")
config_params: Optional[dict] = Field(None, description="Additional training/inference parameters.")
precision: Optional[str] = Field(
None,
description="Numerical precision, such as bfp8, fp16, or a mix such as " "fp16_act_bfp8_weights, etc.",
)
dataset_name: Optional[str] = Field(None, description="Name of the dataset used for the benchmark.")
profiler_name: Optional[str] = Field(None, description="Profiler to time the benchmark.")
input_sequence_length: Optional[int] = Field(
None,
description="Length of the sequence used as input to the model, applicable " "to sequence models.",
)
output_sequence_length: Optional[int] = Field(
None,
description="Length of the sequence used as output by the model, applicable " "to sequence models.",
)
image_dimension: Optional[str] = Field(
None,
description="Dimension of the image, e.g. 224x224x3, applicable to computer " "vision models.",
)
perf_analysis: Optional[bool] = Field(
None,
description="If the model was run in perf analysis mode. This is " "kernel/operation execution mode.",
)
training: Optional[bool] = Field(None, description="ML model benchmarks for training or inference.")
measurements: List[BenchmarkMeasurement] = Field(description="List of benchmark measurements.")
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"model": "MNIST Linear",
"config": "",
"date": "24-12-03",
"hash": "c47f41a",
"machine_name": "8cb186cee6d2",
"samples_per_sec": 0.23979727678872859,
"total_samples": 1,
"total_time": 4.170189142227173,
"training": false,
"batch_size": 1,
"output": "forge-benchmark-e2e-mnist_33854708624.json",
"arch": "",
"chips": "",
"device": "",
"galaxy": "",
"perf_analysis": "",
"load_tti": "",
"save_tti": "",
"task": "",
"evaluation_score": ""
}
Loading

0 comments on commit d541806

Please sign in to comment.