Skip to content

Commit

Permalink
add docs
Browse files Browse the repository at this point in the history
  • Loading branch information
dengdifan committed Dec 2, 2024
1 parent 8b748db commit 2487bc3
Showing 1 changed file with 28 additions and 17 deletions.
45 changes: 28 additions & 17 deletions smac/main/config_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ class ConfigSelector:
the highest budgets are checked first. For example, if min_trials is three, but we find only
two trials in the runhistory for the highest budget, we will use trials of a lower budget
instead.
batch_sampling_estimation_strategy: str, defaults to no_estimation
Batch sample setting, this is applied for parallel setting. During batch sampling, ConfigSelectors might need
to suggest new samples while some configurations are still running. This argument determines if we want to make
use of this information and fantasize the new estimations. If no_estimate is applied, we do not use the
information from the running configurations. If the strategy is kriging_believer, we use the predicted mean from
our surrogate model as the estimations for the new samples. If the strategy is CL_min/mean/max, we use the
min/mean/max from the existing evaluations as the estimations for the new samples. if the strategy is sample,
we use our surrogate model (in this case, only GP is allowed) to sample new configurations
"""

def __init__(
Expand All @@ -54,7 +62,7 @@ def __init__(
retrain_after: int = 8,
retries: int = 16,
min_trials: int = 1,
batch_sampling_estimating_strategy: str = 'CL_mean',
batch_sampling_estimation_strategy: str = "no_estimate",
) -> None:
# Those are the configs sampled from the passed initial design
# Selecting configurations from initial design
Expand Down Expand Up @@ -85,7 +93,7 @@ def __init__(
self._processed_configs: list[Configuration] = []

# for batch sampling setting
self._batch_sampling_estimating_strategy = batch_sampling_estimating_strategy
self._batch_sampling_estimation_strategy = batch_sampling_estimation_strategy

def _set_components(
self,
Expand Down Expand Up @@ -294,14 +302,17 @@ def _collect_data(self) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
# Therefore, there is no need to check the number of workers in this case

X_running = self._runhistory_encoder.transform_running_configs(budget_subset=[b])
Y_estimated = self.estimate_running_config_costs(X_running, Y, self._batch_sampling_estimating_strategy)
if Y_estimated is not None:
configs_array_running = self._runhistory_encoder.get_running_configurations(
budget_subset=self._considered_budgets
if self._batch_sampling_estimation_strategy != 'no_estimate':
Y_estimated = self.estimate_running_config_costs(
X_running, Y, self._batch_sampling_estimation_strategy
)
X = np.concatenate([X, X_running], axis=0)
Y = np.concatenate([Y, Y_estimated], axis=0)
configs_array = np.concatenate([configs_array, configs_array_running], axis=0)
if Y_estimated is not None:
configs_array_running = self._runhistory_encoder.get_running_configurations(
budget_subset=self._considered_budgets
)
X = np.concatenate([X, X_running], axis=0)
Y = np.concatenate([Y, Y_estimated], axis=0)
configs_array = np.concatenate([configs_array, configs_array_running], axis=0)

return X, Y, configs_array

Expand All @@ -323,7 +334,7 @@ def estimate_running_config_costs(
self,
X_running: np.ndarray,
Y_evaluated: np.ndarray,
estimate_strategy: str = 'CL_max'):
estimation_strategy: str = 'CL_max'):
"""
This function is implemented to estimate the still pending/ running configurations
Parameters
Expand All @@ -333,7 +344,7 @@ def estimate_running_config_costs(
Y_evaluated : np.ndarray
a np array with size (n_evaluated_configs, n_obj) that records the costs of all the previous evaluated
configurations
estimate_strategy: str
estimation_strategy: str
how do we estimate the target y_running values
Returns
Expand All @@ -344,30 +355,30 @@ def estimate_running_config_costs(
n_running_points = len(X_running)
if n_running_points == 0:
return None
if estimate_strategy == 'CL_max':
if estimation_strategy == 'CL_max':
# constant liar max, we take the maximal values of all the evaluated Y and apply them to the running X
Y_estimated = np.nanmax(Y_evaluated, axis=0, keepdims=True)
return np.repeat(Y_estimated, n_running_points, 0)
elif estimate_strategy == 'CL_min':
elif estimation_strategy == 'CL_min':
# constant liar min, we take the minimal values of all the evaluated Y and apply them to the running X
Y_estimated = np.nanmin(Y_evaluated, axis=0, keepdims=True)
return np.repeat(Y_estimated, n_running_points, 0)
elif estimate_strategy == 'CL_mean':
elif estimation_strategy == 'CL_mean':
# constant liar min, we take the mean values of all the evaluated Y and apply them to the running X
Y_estimated = np.nanmean(Y_evaluated, axis=0, keepdims=True)
return np.repeat(Y_estimated, n_running_points, 0)
elif estimate_strategy == 'kriging_believer':
elif estimation_strategy == 'kriging_believer':
# in kriging believer, we apply the predicted means of the surrogate model to estimate the running X
return self._model.predict_marginalized(X_running)[0]
elif estimate_strategy == 'sample':
elif estimation_strategy == 'sample':
# https://papers.nips.cc/paper_files/paper/2012/file/05311655a15b75fab86956663e1819cd-Paper.pdf
# since this requires a multi-variant gaussian distribution, we need to restrict the model needs to be a
# gaussian process
assert isinstance(self._model, GaussianProcess), 'Sample based estimate strategy only allows ' \
'GP as surrogate model!'
return self._model.sample_functions(X_test=X_running, n_funcs=1)
else:
raise ValueError(f'Unknown estimating strategy: {estimate_strategy}')
raise ValueError(f'Unknown estimating strategy: {estimation_strategy}')

def _get_x_best(self, X: np.ndarray) -> tuple[np.ndarray, float]:
"""Get value, configuration, and array representation of the *best* configuration.
Expand Down

0 comments on commit 2487bc3

Please sign in to comment.