Skip to content

Commit

Permalink
Enhance memory and performance logging in catalog preparation tests
Browse files Browse the repository at this point in the history
Signed-off-by: elronbandel <[email protected]>
  • Loading branch information
elronbandel committed Jan 29, 2025
1 parent 0968633 commit 119d07e
Showing 1 changed file with 35 additions and 7 deletions.
42 changes: 35 additions & 7 deletions tests/catalog/test_preparation.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import glob
import os
import time
import tracemalloc

import psutil
from huggingface_hub.utils import GatedRepoError
from unitxt.loaders import MissingKaggleCredentialsError
from unitxt.logging_utils import get_logger
Expand All @@ -23,12 +25,12 @@
# Make sure the order in which the tests are run is deterministic
# Having a different order for local testing and github testing may cause diffs in results.
all_preparation_files.sort()
num_par = 1 # num of parallel executions
num_par = 8 # num of parallel executions
logger.critical(
f"Over all, {len(all_preparation_files)} files will now be tested over {num_par} parallel processes."
)
# the following should be any of modulo num_par: 0,1,2,3,4,5,6,7,8,.. num_par-1
modulo = 0
modulo = 2
all_preparation_files = [
file for i, file in enumerate(all_preparation_files) if i % num_par == modulo
]
Expand All @@ -43,15 +45,23 @@ def test_preparations(self):
logger.critical(
f"Testing {len(all_preparation_files)} preparation files: \n{all_preparation_files_as_string}\n"
)
times = {}
stats = {}
for file in all_preparation_files:
logger.info(
"\n_____________________________________________\n"
f" Testing preparation file:\n {file}."
"\n_____________________________________________\n"
)
try:
tracemalloc.start()
process = psutil.Process()
start_memory = process.memory_info().rss / (
1024**3
) # Convert bytes to GB
disk_start = psutil.disk_io_counters()
start_time = time.time()
tracemalloc.start()

with self.subTest(file=file):
try:
import_module_from_file(file)
Expand All @@ -70,21 +80,39 @@ def test_preparations(self):
self.assertTrue(True)

elapsed_time = time.time() - start_time
_, peak = tracemalloc.get_traced_memory()
disk_end = psutil.disk_io_counters()
read_gb = (disk_end.read_bytes - disk_start.read_bytes) / (1024**3)
write_gb = (disk_end.write_bytes - disk_start.write_bytes) / (1024**3)

tracemalloc.stop()
_, peak = tracemalloc.get_traced_memory()
# Convert to GB
peak_memory_python = peak / (1024**3) # Convert bytes to GB
peak_memory_system = (
process.memory_info().rss / (1024**3) - start_memory
) # Convert bytes to GB

minutes = int(elapsed_time // 60)
seconds = int(elapsed_time % 60)
formatted_time = f"{minutes:02}:{seconds:02}"
logger.info(
"\n_____________________________________________\n"
f" Finished testing preparation file:\n {file}."
f" Preparation Time: {formatted_time}"
f" Elapsed Time: {formatted_time}\n"
f" Peak Python Memory Usage: {peak_memory_python:.4f} GB\n"
f" Peak System RAM Usage: {peak_memory_system:.4f} GB\n"
f" Disk Write: {write_gb:.4f} GB, Disk Read: {read_gb:.4f} GB"
"\n_____________________________________________\n"
)

times[file.split("prepare")[-1]] = formatted_time
stats[
file.split("prepare")[-1]
] = f"Time: {formatted_time}, RAM: {peak_memory_system:.4f}, Disk: {write_gb:.4f}"
except Exception as e:
logger.critical(f"Testing preparation file '{file}' failed:")
raise e

logger.critical(f"Preparation times table for {len(times)} files:")
times = dict(sorted(times.items(), key=lambda item: item[1], reverse=True))
logger.critical(f"Preparation times table for {len(stats)} files:")
times = dict(sorted(stats.items(), key=lambda item: item[1], reverse=True))
print_dict(times, log_level="critical")

0 comments on commit 119d07e

Please sign in to comment.