Skip to content

Commit

Permalink
Minor change in imports
Browse files Browse the repository at this point in the history
  • Loading branch information
pablomlago committed Feb 7, 2025
1 parent 42777f2 commit 55861af
Showing 1 changed file with 8 additions and 7 deletions.
15 changes: 8 additions & 7 deletions src/brevitas_examples/llm/benchmark/llm_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,6 @@ def run_args_bucket_process(
print(
f"Process: {id}, remaining combinations: {args_queue.qsize()}, remaining time: {'unknown' if num_runs == 0 else str(datetime.timedelta(seconds=int((args_queue.qsize() / num_processes + 1)*mean_running_time)))}"
)
# TODO: Change so each process has an unique name, without relying
# on the process id suffix
job_name = f"{rn.get_name()}"
job_folder = f"{results_folder}/{job_name}"
# Create folder to store the results of the experiment
Expand All @@ -79,7 +77,7 @@ def run_args_bucket_process(
# Redirect output to files
sys.stdout = stdout_file
sys.stderr = stderr_file
# Wait before starting a new process to prevent using the same GPUs
# Record the wall-clock elapsed time when running the LLM entrypoint
start_time = time.time()
try:
results, _ = quantize_llm(args, extra_args)
Expand All @@ -97,7 +95,7 @@ def run_args_bucket_process(
stdout_file.close()
stderr_file.close()
num_retries += 1
# Dump information regarding the state of the run
# Dump information with the state and results of the run
with open(f"{job_folder}/run_results.yaml", 'w') as f:
yaml.dump({
"elapsed_time": running_time,
Expand All @@ -106,7 +104,7 @@ def run_args_bucket_process(
**(results if results is not None else {})},
f)
if results is not None:
# Update mean running time
# Update mean running time and move to next combination
num_runs += 1
mean_running_time = mean_running_time * (
num_runs - 1) / num_runs + running_time / num_runs
Expand Down Expand Up @@ -192,7 +190,8 @@ def parse_results(results_folder: str) -> pd.DataFrame:
row_data = {"job_id": job_name, **job_config, **job_results}
row_data_list.append(row_data)
if job_config is not None:
# Columns are obtained by computing the union of the sets of keys in row_data_list
# Columns are obtained by computing the union of the sets of keys in row_data_list, since,
# for instance, some jobs might have crashed before completing the LM eval
common_keys = ["job_id"] + list(job_config.keys()) + [
"elapsed_time", "status", "retry_number", "float_ppl", "quant_ppl"]
common_keys_set = set(common_keys)
Expand Down Expand Up @@ -251,15 +250,17 @@ def parse_results(results_folder: str) -> pd.DataFrame:
else:
extra_args += [f"--{key.replace('_', '-')}", str(value)]
args = SimpleNamespace(**args)
# Only keep valid configurations
validate(args, extra_args)
q.put((args, extra_args, args_dict))
except AssertionError as e:
# Invalid configuration
pass
# Map the comma-separated string of GPU ids to a list
cuda_available_devices = list(map(int, script_args.gpus.split(",")))
# Number of argument combinations
num_processes = len(cuda_available_devices) // script_args.num_gpus_per_process
# Instantiate threads to run the arguments in each bucket
# Instantiate processes to run the argument combinations
processes = []
for i in range(num_processes):
cuda_visible_devices = ",".join(
Expand Down

0 comments on commit 55861af

Please sign in to comment.