-
Notifications
You must be signed in to change notification settings - Fork 10
/
run_quickstart_from_scratch.py
181 lines (155 loc) · 6.82 KB
/
run_quickstart_from_scratch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import pandas as pd
from autogluon.common.savers import save_pd
from autogluon.common.utils.simulation_utils import convert_simulation_artifacts_to_tabular_predictions_dict
from autogluon.tabular import TabularPredictor
from autogluon_benchmark import OpenMLTaskWrapper
from tabrepo import EvaluationRepository
from tabrepo.repository import EvaluationRepositoryZeroshot
from tabrepo.predictions import TabularPredictionsInMemory
from tabrepo.contexts.context import BenchmarkContext, construct_context
from tabrepo.contexts.subcontext import BenchmarkSubcontext
from tabrepo.simulation.ground_truth import GroundTruth
def get_artifacts(task: OpenMLTaskWrapper, fold: int, hyperparameters: dict, dataset: str = None, time_limit=60):
if dataset is None:
dataset = str(task.task_id)
print(f"Fitting configs on dataset: {dataset}\t| fold: {fold}")
train_data, test_data = task.get_train_test_split_combined(fold=fold)
predictor: TabularPredictor = TabularPredictor(label=task.label).fit(
train_data=train_data,
hyperparameters=hyperparameters,
ag_args_fit={"ag.max_time_limit": time_limit},
fit_weighted_ensemble=False,
calibrate=False,
verbosity=0,
)
leaderboard = predictor.leaderboard(test_data, score_format="error")
leaderboard["dataset"] = dataset
leaderboard["tid"] = task.task_id
leaderboard["fold"] = fold
leaderboard["problem_type"] = task.problem_type
leaderboard.rename(columns={
"eval_metric": "metric",
"metric_error_test": "metric_error",
}, inplace=True)
simulation_artifact = predictor.simulation_artifact(test_data=test_data)
simulation_artifacts = {dataset: {fold: simulation_artifact}}
return simulation_artifacts, leaderboard
def convert_leaderboard_to_configs(leaderboard: pd.DataFrame, minimal: bool = True) -> pd.DataFrame:
df_configs = leaderboard.rename(columns=dict(
model="framework",
fit_time="time_train_s",
pred_time_test="time_infer_s"
))
if minimal:
df_configs = df_configs[[
"dataset",
"fold",
"framework",
"metric_error",
"metric_error_val",
"metric",
"problem_type",
"time_train_s",
"time_infer_s",
"tid",
]]
return df_configs
"""
This tutorial showcases how to generate a small context from scratch using AutoGluon.
For the code to generate the full context, refer to https://github.com/autogluon/tabrepo/tree/main/scripts/execute_benchmarks
Required dependencies:
```bash
# Requires autogluon-benchmark
git clone https://github.com/Innixma/autogluon-benchmark.git
pip install -e autogluon-benchmark
```
This example script runs 7 configs on 3 tiny datasets with 2 folds, for a total of 42 trained models.
The full TabRepo runs 1530 configs on 244 datasets with 3 folds, using 8-fold bagging, for a total of 1,119,960 trained bagged models consisting of 8,959,680 fold models.
"""
if __name__ == '__main__':
# list of datasets to train on
datasets = [
"Australian",
"blood-transfusion",
"meta",
]
# dataset to task id map to reference the appropriate OpenML task.
dataset_to_tid_dict = {
"Australian": 146818,
"blood-transfusion": 359955,
"meta": 3623,
}
# time limit in seconds each config gets to train per dataset. Early stopped if exceeded.
time_limit_per_config = 60 # 3600 in paper
# the configs to train on each dataset
hyperparameters = {
"GBM": {},
"XGB": {},
"CAT": {},
"FASTAI": {},
"NN_TORCH": {},
"RF": {},
"XT": {},
}
# the folds to train on each dataset
folds = [0, 1]
artifacts = []
# Fit models on the datasets and get their artifacts
for dataset in datasets:
task_id = dataset_to_tid_dict[dataset]
for fold in folds:
task = OpenMLTaskWrapper.from_task_id(task_id=task_id)
artifacts.append(
get_artifacts(
task=task,
fold=fold,
dataset=dataset,
hyperparameters=hyperparameters,
time_limit=time_limit_per_config,
)
)
# TODO: Move into AutoGluonTaskWrapper
simulation_artifacts_full = dict()
leaderboards = []
for simulation_artifacts, leaderboard in artifacts:
leaderboards.append(leaderboard)
leaderboard_full = pd.concat(leaderboards)
print(leaderboard_full)
for simulation_artifacts, leaderboard in artifacts:
for k in simulation_artifacts.keys():
if k not in simulation_artifacts_full:
simulation_artifacts_full[k] = {}
for f in simulation_artifacts[k]:
if f in simulation_artifacts_full:
raise AssertionError(f"Two results exist for tid {k}, fold {f}!")
else:
simulation_artifacts_full[k][f] = simulation_artifacts[k][f]
zeroshot_pp, zeroshot_gt = convert_simulation_artifacts_to_tabular_predictions_dict(simulation_artifacts=simulation_artifacts_full)
save_loc = "./quickstart/"
save_loc_data_dir = save_loc + "model_predictions/"
predictions = TabularPredictionsInMemory.from_dict(zeroshot_pp)
ground_truth = GroundTruth.from_dict(zeroshot_gt)
predictions.to_data_dir(data_dir=save_loc_data_dir)
ground_truth.to_data_dir(data_dir=save_loc_data_dir)
df_configs = convert_leaderboard_to_configs(leaderboard=leaderboard_full)
save_pd.save(path=f"{save_loc}configs.parquet", df=df_configs)
context: BenchmarkContext = construct_context(
name="quickstart",
datasets=datasets,
folds=folds,
local_prefix=save_loc,
local_prefix_is_relative=False,
has_baselines=False)
subcontext = BenchmarkSubcontext(parent=context)
# Note: Can also skip all the above code if you want to use a readily available context rather than generating from scratch:
# from tabrepo.contexts import get_subcontext
# subcontext = get_subcontext(name="D244_F3_C1530_30")
repo: EvaluationRepository = subcontext.load_from_parent()
repo: EvaluationRepositoryZeroshot = repo.to_zeroshot()
results_cv = repo.simulate_zeroshot(num_zeroshot=3, n_splits=2, backend="seq")
df_results = repo.generate_output_from_portfolio_cv(portfolio_cv=results_cv, name="quickstart")
# TODO: Fix time_infer_s to only include used models in the ensemble
# TODO: Add way to fetch model hyperparameters and generate input hyperparameters dict based on `portfolio` column.
# TODO: Run `portfolio` on datasets to verify that the true performance matches the simulated performance.
with pd.option_context("display.max_rows", None, "display.max_columns", None, "display.width", 1000):
print(df_results)