Skip to content

Commit

Permalink
refactor: experiments params to config (DynaDojo#118)
Browse files Browse the repository at this point in the history
* changed all params.json and params_file to config.json and config_file

* changed all params.json and params_file to config.json and config_file. Made experiments config file and ran experiment. All works

* changed last thing from save_params -> save_config

---------

Co-authored-by: Caryn Tran <[email protected]>
  • Loading branch information
2 people authored and lmclane04 committed Jul 26, 2024
1 parent 2ee87b6 commit 4d71fa4
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 52 deletions.
44 changes: 22 additions & 22 deletions experiments/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
--algo: which algo, short name, see params.py algo_dict
--system: which system, short name, see params.py system_dict
--challenge: which challenge, one of ["fc", "fts", "fe"]
--output_dir: where to save params, default "experiments/outputs"
--output_dir: where to save config, default "experiments/outputs"
--all: if True, make all params, default False
Usage:
python -m experiments make --challenge <challenge_key> --system <system_key> --algo <algo_key> --output_dir <output_dir>
python -m experiments make --challenge fc --system lds --algo lr_5
Arguments for run:
--params_file: which params file to run
--config_file: which config file to run
--total_nodes: how many machines to run on (default 1, for running locally)
--node: which node is being run, [1, total_nodes], default None which runs the whole challenge
--output_dir: where to save results, default "experiments/outputs"
Expand All @@ -21,12 +21,12 @@
Usage:
python -m experiments \
run \
--params_file experiments/outputs/fc/lds/fc_lds_lr_l=10/params.json \
--config_file experiments/outputs/fc/lds/fc_lds_lr_l=10/config.json \
--node 2 --total_nodes 10 \
--num_cpu_parallel -2 \
--if_missing
python -m experiments run --num_cpu_parallel -2 --params_file experiments/outputs/fc/lds/fc_lds_lr_5_l=5/params.json
python -m experiments run --num_cpu_parallel -2 --config_file experiments/outputs/fc/lds/fc_lds_lr_5_l=5/config.json
Arguments for plot:
--data_dir: where to load results from
Expand All @@ -53,7 +53,7 @@
import argparse
import os
from .utils import algo_dict, load_from_json, system_dict, challenge_dicts
from .main import load_data, run_challenge, make_plots, save_params, prGreen, prPink
from .main import load_data, run_challenge, make_plots, save_config, prGreen, prPink
from dynadojo.challenges import FixedError, FixedComplexity, FixedTrainSize


Expand All @@ -69,11 +69,11 @@
make_parser.add_argument('--algo', type=str, default='lr', help='Specify which algo to run')
make_parser.add_argument('--system', type=str, default='lds', choices=system_dict.keys(), help='Specify which system to run')
make_parser.add_argument('--challenge', type=str, default="fc", choices=["fc", "fts", "fe"], help='Specify which challenge to run')
make_parser.add_argument('--output_dir', type=str, default="experiments/outputs", help='where to save params')
make_parser.add_argument('--output_dir', type=str, default="experiments/outputs", help='where to save config')
make_parser.add_argument('--all', action='store_true', help='if True, make all params')
make_parser.set_defaults(all=False)

run_parser.add_argument('--params_file', type=str, help='what params file to run')
run_parser.add_argument('--config_file', type=str, help='what config file to run')
run_parser.add_argument('--node', type=int, default=None, help='which node is being run in [1, total_nodes], if None, run on splits')
run_parser.add_argument('--total_nodes', type=int, default=1, help='how many machines to run on')
run_parser.add_argument('--num_cpu_parallel', type=int, default=None, help='number of cpus to use for parallelization')
Expand All @@ -99,8 +99,8 @@
for a in chall_dict[s].keys():
if a != "default":
print(f"Making {c.__name__} {s} {a}")
params_file, total_jobs = save_params(s, a, challenge_cls=c, output_dir=args.output_dir)
prPink(f"{params_file} with {total_jobs} jobs")
config_file, total_jobs = save_config(s, a, challenge_cls=c, output_dir=args.output_dir)
prPink(f"{config_file} with {total_jobs} jobs")
else:
assert args.algo.split("_")[0] in algo_dict.keys(), f"algo {args.algo} must be in algo_dict"
if args.challenge == "fc":
Expand All @@ -109,22 +109,22 @@
challenge_cls = FixedTrainSize
else:
challenge_cls = FixedError
params_file, total_jobs = save_params(args.system, args.algo, challenge_cls, output_dir=args.output_dir)
prPink(f"{params_file} with {total_jobs} jobs")
config_file, total_jobs = save_config(args.system, args.algo, challenge_cls, output_dir=args.output_dir)
prPink(f"{config_file} with {total_jobs} jobs")
if rest: #maybe parse more args
args = program.parse_args(rest)
if args.command == 'run':
args.params_file = params_file
args.config_file = config_file
else:
exit(0)

if args.command == 'run':
assert args.params_file is not None, "must specify params file"
assert args.config_file is not None, "must specify config file"

if args.if_missing:
params = load_from_json(args.params_file)
total_jobs = params["total_jobs"]
_, data = load_data(os.path.join(args.output_dir, params["folder_path"]))
config = load_from_json(args.config_file)
total_jobs = config["total_jobs"]
_, data = load_data(os.path.join(args.output_dir, config["folder_path"]))
if data is None:
prGreen("No previous jobs found.")
args.jobs = None
Expand All @@ -140,7 +140,7 @@
if args.node is not None and args.total_nodes > 1:
assert args.node >= 1 and args.node <= args.total_nodes, f"{args.node=} must be between[1, {args.total_nodes=}]"
run_challenge(
params_file_path=args.params_file,
config_file_path=args.config_file,
output_dir=args.output_dir,
split=(args.node, args.total_nodes),
num_cpu_parallel=args.num_cpu_parallel,
Expand All @@ -149,7 +149,7 @@
else: # run the whole challenge
prGreen(f"Running {len(args.jobs.split(',')) if args.jobs else 'all'} jobs.")
run_challenge(
params_file_path=args.params_file,
config_file_path=args.config_file,
output_dir=args.output_dir,
split=None,
num_cpu_parallel=args.num_cpu_parallel,
Expand All @@ -165,12 +165,12 @@
)

elif args.command == 'check':
#must contain params.json
#must contain config.json
assert args.data_dir is not None, "must specify data directory"
assert os.path.exists(args.data_dir + "/params.json"), f"params.json not found in {args.data_dir}"
assert os.path.exists(args.data_dir + "/config.json"), f"config.json not found in {args.data_dir}"

params = load_from_json(args.data_dir + "/params.json")
total_jobs = params["total_jobs"]
config = load_from_json(args.data_dir + "/config.json")
total_jobs = config["total_jobs"]
_, data = load_data(args.data_dir)
if data is None:
completed_jobs = []
Expand Down
58 changes: 29 additions & 29 deletions experiments/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,43 +7,43 @@
import pandas as pd
import numpy as np
from dynadojo.challenges import FixedComplexity, ScalingChallenge
from .utils import _get_params, save_to_json, load_from_json
from .utils import _get_config, save_to_json, load_from_json


def save_params(
def save_config(
s ="lds",
a = "lr",
challenge_cls:type[ScalingChallenge] = FixedComplexity,
output_dir="experiments/outputs"
):
experiment_params = _get_params(s, a, challenge_cls=challenge_cls)
folder_path = experiment_params["folder_path"]
# don't overwrite existing params
params_file_path = os.path.join(output_dir, folder_path, "params.json")
if os.path.exists(params_file_path):
prGreen(f"Params already exist for {folder_path}...skipping")
experiment_config = _get_config(s, a, challenge_cls=challenge_cls)
folder_path = experiment_config["folder_path"]
# don't overwrite existing config
config_file_path = os.path.join(output_dir, folder_path, "config.json")
if os.path.exists(config_file_path):
prGreen(f"Config already exist for {folder_path}...skipping")
else:
save_to_json(experiment_params, os.path.join(output_dir, folder_path, "params.json"))
return params_file_path, experiment_params['total_jobs']
save_to_json(experiment_config, os.path.join(output_dir, folder_path, "config.json"))
return config_file_path, experiment_config['total_jobs']

def get_max_splits(s="lds", m="lr", challenge_cls:type[ScalingChallenge] = FixedComplexity,):
params = _get_params(s, m, challenge_cls=challenge_cls)
return params["total_jobs"]
config = _get_config(s, m, challenge_cls=challenge_cls)
return config["total_jobs"]

def run_challenge(
params_file_path,
config_file_path,
output_dir="experiments/outputs",
split=(1,1),
num_cpu_parallel=None,
jobs_filter=None
):
"""
Run an experiment given a params file.
Run an experiment given a config file.
Parameters
----------
params_file_path : str
path to params file
config_file_path : str
path to config file
output_dir : str, optional
base path to save results, by default "experiments/outputs"
split : tuple, optional
Expand All @@ -54,16 +54,16 @@ def run_challenge(
which jobs to run, by default None (run all jobs)
"""
# Load params
experiment_params = load_from_json(params_file_path)
challenge_params = experiment_params["challenge"]
evaluate_params = experiment_params["evaluate"]
challenge_cls = experiment_params["challenge_cls"]
experiment_config = load_from_json(config_file_path)
challenge_params = experiment_config["challenge"]
evaluate_params = experiment_config["evaluate"]
challenge_cls = experiment_config["challenge_cls"]

# Override num_cpu_parallel
if num_cpu_parallel:
evaluate_params['num_parallel_cpu'] = num_cpu_parallel

all_jobs = list(range(experiment_params["total_jobs"]))
all_jobs = list(range(experiment_config["total_jobs"]))
if jobs_filter:
all_jobs = jobs_filter

Expand All @@ -83,16 +83,16 @@ def run_challenge(
)

# Make output directory
folder_path = os.path.join(output_dir, experiment_params["folder_path"])
folder_path = os.path.join(output_dir, experiment_config["folder_path"])
if not os.path.exists(folder_path):
os.makedirs(folder_path, exist_ok=True)

# Save params if not already saved (in the case we rerun an experiment from renamed folder)
if not os.path.exists(f"{folder_path}/params.json"):
save_to_json(experiment_params, f"{folder_path}/params.json")
# Save config if not already saved (in the case we rerun an experiment from renamed folder)
if not os.path.exists(f"{folder_path}/config.json"):
save_to_json(experiment_config, f"{folder_path}/config.json")

# Get csv file path, specifying split if necessary
filename = experiment_params["experiment_name"]
filename = experiment_config["experiment_name"]
if split is not None:
filename += f"_{split_num}-of-{total_splits}"
filename += ".csv"
Expand All @@ -116,10 +116,10 @@ def make_plots(
save=True
):

experiment_params = load_from_json(f"{data_path}/params.json")
challenge_cls = experiment_params["challenge_cls"]
experiment_config = load_from_json(f"{data_path}/config.json")
challenge_cls = experiment_config["challenge_cls"]

filebase = experiment_params["folder_path"].split('/')[-1]
filebase = experiment_config["folder_path"].split('/')[-1]
csv_filename = filebase + ".csv"
figure_filename = filebase + ".pdf"

Expand Down
2 changes: 1 addition & 1 deletion experiments/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"fe" : (FixedError, fe_challenge_params_dict),
}

def _get_params(s, a, challenge_cls: type[ScalingChallenge]=FixedComplexity):
def _get_config(s, a, challenge_cls: type[ScalingChallenge]=FixedComplexity):
"""
Get challenge parameters for a given system, algo, and challenge class, overriding defaults with system and algo specific parameters.
Expand Down

0 comments on commit 4d71fa4

Please sign in to comment.