Skip to content

Commit

Permalink
Fix PriorAcquisitionFunction (#1185)
Browse files Browse the repository at this point in the history
* Dummy commit to create Draft MR

* First preliminary fix for the iteration_number.

* Update iteration_number counter

* Update priors and examples

* Fix typo

* Remove doubled priro floor

* Update tests

* Add changelog.md

* Update format
  • Loading branch information
LukasFehring authored Jan 13, 2025
1 parent 7919c87 commit aed7769
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 20 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

## Bugfixes
- Fix kwargs for DifferentialEvolution (#1187)
- Fix PiBo implementation (#1076)

# 2.2.1

Expand Down
77 changes: 72 additions & 5 deletions examples/1_basics/6_priors.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
ConfigurationSpace,
NormalFloatHyperparameter,
UniformIntegerHyperparameter,
UniformFloatHyperparameter,
)
from sklearn.datasets import load_digits
from sklearn.exceptions import ConvergenceWarning
Expand All @@ -38,7 +39,7 @@

class MLP:
@property
def configspace(self) -> ConfigurationSpace:
def prior_configspace(self) -> ConfigurationSpace:
# Build Configuration Space which defines all parameters and their ranges.
# To illustrate different parameter types,
# we use continuous, integer and categorical parameters.
Expand Down Expand Up @@ -100,7 +101,67 @@ def configspace(self) -> ConfigurationSpace:
)

# Add all hyperparameters at once:
cs.add([n_layer, n_neurons, activation, optimizer, batch_size, learning_rate_init])
cs.add(
[n_layer, n_neurons, activation, optimizer, batch_size, learning_rate_init]
)

return cs

@property
def configspace(self) -> ConfigurationSpace:
# Build Configuration Space which defines all parameters and their ranges.
# To illustrate different parameter types,
# we use continuous, integer and categorical parameters.
cs = ConfigurationSpace()

# We do not have an educated belief on the number of layers beforehand
n_layer = UniformIntegerHyperparameter(
"n_layer",
lower=1,
upper=5,
)

# Define network width without a specific prior
n_neurons = UniformIntegerHyperparameter(
"n_neurons",
lower=8,
upper=256,
)

# Define activation functions without specific weights
activation = CategoricalHyperparameter(
"activation",
["logistic", "tanh", "relu"],
default_value="relu",
)

# Define optimizer without specific weights
optimizer = CategoricalHyperparameter(
"optimizer",
["sgd", "adam"],
default_value="adam",
)

# Define batch size without specific distribution
batch_size = UniformIntegerHyperparameter(
"batch_size",
16,
512,
default_value=128,
)

# Define learning rate range without log-normal prior
learning_rate_init = UniformFloatHyperparameter(
"learning_rate_init",
lower=1e-5,
upper=1.0,
default_value=1e-3,
)

# Add all hyperparameters at once:
cs.add(
[n_layer, n_neurons, activation, optimizer, batch_size, learning_rate_init]
)

return cs

Expand All @@ -119,8 +180,12 @@ def train(self, config: Configuration, seed: int = 0) -> float:
)

# Returns the 5-fold cross validation accuracy
cv = StratifiedKFold(n_splits=5, random_state=seed, shuffle=True) # to make CV splits consistent
score = cross_val_score(classifier, digits.data, digits.target, cv=cv, error_score="raise")
cv = StratifiedKFold(
n_splits=5, random_state=seed, shuffle=True
) # to make CV splits consistent
score = cross_val_score(
classifier, digits.data, digits.target, cv=cv, error_score="raise"
)

return 1 - np.mean(score)

Expand All @@ -140,7 +205,9 @@ def train(self, config: Configuration, seed: int = 0) -> float:

# We define the prior acquisition function, which conduct the optimization using priors over the optimum
acquisition_function = PriorAcquisitionFunction(
acquisition_function=HyperparameterOptimizationFacade.get_acquisition_function(scenario),
acquisition_function=HyperparameterOptimizationFacade.get_acquisition_function(
scenario
),
decay_beta=scenario.n_trials / 10, # Proven solid value
)

Expand Down
19 changes: 15 additions & 4 deletions smac/acquisition/function/prior_acquisition_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ def __init__(
acquisition_type = self._acquisition_function

self._rescale = isinstance(acquisition_type, (LCB, TS))

# Variables needed to adapt the weighting of the prior
self._initial_design_size = None
self._iteration_number = 0

@property
Expand Down Expand Up @@ -116,7 +119,12 @@ def _update(self, **kwargs: Any) -> None:
Current incumbent value.
"""
assert "eta" in kwargs
self._iteration_number += 1

# Compute intiial design size
if self._initial_design_size is None:
self._initial_design_size = kwargs["num_data"]

self._iteration_number = kwargs["num_data"] - self._initial_design_size
self._eta = kwargs["eta"]

assert self.model is not None
Expand Down Expand Up @@ -146,8 +154,10 @@ def _compute_prior(self, X: np.ndarray) -> np.ndarray:
for parameter, X_col in zip(self._hyperparameters.values(), X.T):
if self._discretize and isinstance(parameter, FloatHyperparameter):
assert self._discrete_bins_factor is not None
number_of_bins = int(np.ceil(self._discrete_bins_factor * self._decay_beta / self._iteration_number))
prior_values *= self._compute_discretized_pdf(parameter, X_col, number_of_bins) + self._prior_floor
number_of_bins = int(
np.ceil(self._discrete_bins_factor * self._decay_beta / (self._iteration_number + 1))
)
prior_values *= self._compute_discretized_pdf(parameter, X_col, number_of_bins)
else:
prior_values *= parameter._pdf(X_col[:, np.newaxis])

Expand Down Expand Up @@ -178,6 +188,7 @@ def _compute_discretized_pdf(
The user prior over the optimum for the parameter at hand.
"""
# Evaluates the actual pdf on all the relevant points
# Replace deprecated method
pdf_values = hyperparameter._pdf(X_col[:, np.newaxis])

# Retrieves the largest value of the pdf in the domain
Expand Down Expand Up @@ -221,6 +232,6 @@ def _compute(self, X: np.ndarray) -> np.ndarray:
acq_values = self._acquisition_function._compute(X)

prior_values = self._compute_prior(X) + self._prior_floor
decayed_prior_values = np.power(prior_values, self._decay_beta / self._iteration_number)
decayed_prior_values = np.power(prior_values, self._decay_beta / (self._iteration_number + 1))

return acq_values * decayed_prior_values
22 changes: 11 additions & 11 deletions tests/test_acquisition/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,16 +298,16 @@ def test_prior_init_ts(prior_model, acq_ts, beta):

def test_prior_update(prior_model, acquisition_function, beta):
paf = PriorAcquisitionFunction(acquisition_function=acquisition_function, decay_beta=beta)
paf.update(model=prior_model, eta=2)
paf.update(model=prior_model, eta=2, num_data=10)
assert paf._eta == 2
assert paf._acquisition_function._eta == 2
assert paf._iteration_number == 1
assert paf._iteration_number == 0


def test_prior_compute_prior_Nx1(prior_model, hyperparameter_dict, acquisition_function, beta):
prior_model.update_prior(hyperparameter_dict)
paf = PriorAcquisitionFunction(acquisition_function=acquisition_function, decay_beta=beta)
paf.update(model=prior_model, eta=1)
paf.update(model=prior_model, eta=1, num_data=1)

X = np.array([0, 0.5, 1]).reshape(3, 1)
prior_values = paf._compute_prior(X)
Expand All @@ -321,7 +321,7 @@ def test_prior_compute_prior_Nx1(prior_model, hyperparameter_dict, acquisition_f
def test_prior_compute_prior_NxD(prior_model, hyperparameter_dict, acquisition_function, beta):
prior_model.update_prior(hyperparameter_dict)
paf = PriorAcquisitionFunction(acquisition_function=acquisition_function, decay_beta=beta)
paf.update(model=prior_model, eta=1)
paf.update(model=prior_model, eta=1, num_data=1)

X = np.array([[0, 0], [0, 1], [1, 1]])
prior_values = paf._compute_prior(X)
Expand All @@ -339,7 +339,7 @@ def test_prior_compute_prior_1xD(prior_model, acquisition_function, beta):

prior_model.update_prior(hyperparameter_dict)
paf = PriorAcquisitionFunction(acquisition_function=acquisition_function, decay_beta=beta)
paf.update(model=prior_model, eta=1)
paf.update(model=prior_model, eta=1, num_data=1)

X = np.array([[0.5, 0.5]])
prior_values = paf._compute_prior(X)
Expand All @@ -351,7 +351,7 @@ def test_prior_compute_prior_1xD(prior_model, acquisition_function, beta):
def test_prior_compute_prior_1x1(prior_model, hyperparameter_dict, acquisition_function, beta):
prior_model.update_prior(hyperparameter_dict)
paf = PriorAcquisitionFunction(acquisition_function=acquisition_function, decay_beta=beta)
paf.update(model=prior_model, eta=1)
paf.update(model=prior_model, eta=1, num_data=1)

X = np.array([0.5]).reshape(1, 1)
prior_values = paf._compute_prior(X)
Expand All @@ -378,7 +378,7 @@ def hp_dict3(x0_prior, x1_prior, x2_prior):
def test_prior_1xD(hp_dict3, prior_model, acquisition_function, beta, prior_floor):
prior_model.update_prior(hp_dict3)
paf = PriorAcquisitionFunction(acquisition_function=acquisition_function, decay_beta=beta, prior_floor=prior_floor)
paf.update(model=prior_model, eta=1.0)
paf.update(model=prior_model, eta=1.0, num_data=1)
configurations = [ConfigurationMock([1.0, 1.0, 1.0])]
acq = paf(configurations)
assert acq.shape == (1, 1)
Expand All @@ -391,7 +391,7 @@ def test_prior_1xD(hp_dict3, prior_model, acquisition_function, beta, prior_floo
def test_prior_NxD(hp_dict3, prior_model, acquisition_function, beta, prior_floor):
prior_model.update_prior(hp_dict3)
paf = PriorAcquisitionFunction(acquisition_function=acquisition_function, decay_beta=beta, prior_floor=prior_floor)
paf.update(model=prior_model, eta=1.0)
paf.update(model=prior_model, eta=1.0, num_data=1)

# These are the exact same numbers as in the EI tests below
configurations = [
Expand Down Expand Up @@ -449,15 +449,15 @@ def test_prior_NxD_TS(prior_model, hp_dict3, acq_ts, beta, prior_floor):
def test_prior_decay(hp_dict3, prior_model, acquisition_function, beta, prior_floor):
prior_model.update_prior(hp_dict3)
paf = PriorAcquisitionFunction(acquisition_function=acquisition_function, decay_beta=beta, prior_floor=prior_floor)
paf.update(model=prior_model, eta=1.0)
paf.update(model=prior_model, eta=1.0, num_data=0)
configurations = [ConfigurationMock([0.1, 0.1, 0.1])]

for i in range(1, 6):
prior_factor = np.power(0.2 * 1.0 * 1.8 + paf._prior_floor, beta / i)
acq = paf(configurations)
print(acq, 0.90020601136712231 * prior_factor)
assert np.isclose(acq[0][0], 0.90020601136712231 * prior_factor)
paf.update(model=prior_model, eta=1.0) # increase iteration number
paf.update(model=prior_model, eta=1.0, num_data = i) # increase iteration number


def test_prior_discretize_pdf(prior_model, acquisition_function, beta, prior_floor):
Expand All @@ -467,7 +467,7 @@ def test_prior_discretize_pdf(prior_model, acquisition_function, beta, prior_flo
paf = PriorAcquisitionFunction(
acquisition_function=acquisition_function, decay_beta=beta, prior_floor=prior_floor, discretize=True
)
paf.update(model=prior_model, eta=1)
paf.update(model=prior_model, eta=1, num_data=1)

number_of_bins_1 = 13
number_of_bins_2 = 27521
Expand Down

0 comments on commit aed7769

Please sign in to comment.