diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 4ed7856d..da6e6d3c 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -36,6 +36,7 @@ jobs: - name: Install dependencies run: | + pip install build pip install ".[dev]" - name: Make docs diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 18e51f3f..62791d28 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -56,7 +56,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.7", "3.8", "3.9", "3.10"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] os: ["ubuntu-latest", "macos-latest", "windows-latest"] steps: @@ -159,9 +159,9 @@ jobs: - name: Create sdist id: sdist run: | - python -m pip install --upgrade pip - python setup.py sdist - echo "sdist_name=$(ls -t dist/${{ env.package-name }}-*.tar.gz | head -n 1)" >> $GITHUB_ENV + python -m pip install --upgrade pip build + python -m build --sdist + echo "sdist_name=$(ls -t dist/${{ env.package-name }}-*.tar.gz | head -n 1)" >> "$GITHUB_ENV" - name: Install ${{ env.package-name }} run: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e34ab6c6..e48f2f18 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -43,11 +43,12 @@ on: - main # Release branches - "[0-9]+.[0-9]+.X" - - create: tags: - v* + release: + types: [created] + env: package-name: ConfigSpace @@ -65,7 +66,8 @@ jobs: strategy: fail-fast: false matrix: - py: [cp37, cp38, cp39, cp310] + # We can drop "i686" once "cp37" is dropped + py: [cp37, cp38, cp39, cp310, cp311] arch: ["x86_64", "i686", "aarch64"] system: ["manylinux", "musllinux"] @@ -74,9 +76,19 @@ jobs: # Not supported by numpy - system: "musllinux" - # Scipy doesn't have a wheel for cp310 i686 + # Scipy lacks some i686 support, which cause the testing of the wheels + # to fail, as scipy is attempted to be built form scratch + - py: cp38 + arch: i686 + + - py: cp39 + arch: i686 + - py: cp310 - arch: "i686" + arch: i686 + + - py: cp311 + arch: i686 steps: - name: Checkout ${{ env.package-name }} @@ -87,7 +99,7 @@ jobs: uses: docker/setup-qemu-action@v1 - name: Build wheels with cibuildwheel to wheelhouse/*.whl - uses: pypa/cibuildwheel@v2.3.1 + uses: pypa/cibuildwheel@v2.11.4 env: CIBW_BUILD: ${{ matrix.py }}-${{ matrix.system }}_* CIBW_ARCHS: ${{ matrix.arch }} @@ -105,7 +117,7 @@ jobs: strategy: fail-fast: false matrix: - py: [cp37, cp38, cp39, cp310] + py: [cp37, cp38, cp39, cp310, cp311] arch: ["x86_64", "universal2", "arm64"] exclude: @@ -121,7 +133,7 @@ jobs: uses: actions/checkout@v2 - name: Build wheels with cibuildwheel to wheelhouse/*.whl - uses: pypa/cibuildwheel@v2.3.1 + uses: pypa/cibuildwheel@v2.11.4 env: CIBW_BUILD: ${{ matrix.py }}-* CIBW_ARCHS: ${{ matrix.arch }} @@ -139,21 +151,30 @@ jobs: strategy: fail-fast: false matrix: - py: [cp37, cp38, cp39, cp310] + py: [cp37, cp38, cp39, cp310, cp311] arch: ["AMD64", "x86"] + exclude: + # Scipy lacks win32 support, which cause the testing of the wheels + # to fail, as scipy is attempted to be built form scratch + - py: cp38 + arch: x86 + + - py: cp39 + arch: x86 - # We can't build win32 (x86) with cp310 because numpy doesn't have a win32 wheel - py: cp310 - arch: "x86" + arch: x86 + - py: cp311 + arch: x86 steps: - name: Checkout ${{ env.package-name }} uses: actions/checkout@v2 - name: Build wheels with cibuildwheel to wheelhouse/*.whl - uses: pypa/cibuildwheel@v2.3.1 + uses: pypa/cibuildwheel@v2.11.4 env: CIBW_BUILD: ${{ matrix.py }}-* CIBW_ARCHS: ${{ matrix.arch }} @@ -171,7 +192,7 @@ jobs: strategy: fail-fast: false matrix: - py: ["3.7", "3.8", "3.9", "3.10"] + py: ["3.7", "3.8", "3.9", "3.10", "3.11"] steps: - name: Checkout ${{ env.package-name }} @@ -184,9 +205,9 @@ jobs: - name: Build source distribution run: | - python -m pip install --upgrade pip - python setup.py sdist - echo "sdist_name=$(ls -t dist/${{ env.package-name }}-*.tar.gz | head -n 1)" >> $GITHUB_ENV + python -m pip install --upgrade pip build + python -m build --sdist + echo "sdist_name=$(ls -t dist/${{ env.package-name }}-*.tar.gz | head -n 1)" >> "$GITHUB_ENV" - name: Twine check ${{ env.package-name }} run: | @@ -223,7 +244,7 @@ jobs: needs: [build_linux_wheels, build_macos_wheels, build_windows_wheels, build_sdist] # Only on a tagged release, push - if: startsWith(github.ref, 'refs/tags/v') && github.event_name != 'pull_request' + if: startsWith(github.ref, 'refs/tags/v') && github.event_name != 'pull_request' steps: - name: Checkout ${{ env.package-name }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3d1d9a5e..73c362f5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,8 +7,8 @@ repos: name: mypy ConfigSpace files: ConfigSpace - - repo: https://gitlab.com/pycqa/flake8 - rev: 4.0.1 + - repo: https://github.com/pycqa/flake8 + rev: 5.0.4 hooks: - id: flake8 name: flake8 ConfigSpace diff --git a/ConfigSpace/__init__.py b/ConfigSpace/__init__.py index 6e845e89..bd4ed9f4 100644 --- a/ConfigSpace/__init__.py +++ b/ConfigSpace/__init__.py @@ -29,8 +29,6 @@ from ConfigSpace.__version__ import __version__ from ConfigSpace.__authors__ import __authors__ -import ConfigSpace.api.distributions as distributions -import ConfigSpace.api.types as types from ConfigSpace.api import (Beta, Categorical, Distribution, Float, Integer, Normal, Uniform) from ConfigSpace.conditions import (AndConjunction, EqualsCondition, @@ -54,6 +52,8 @@ UniformFloatHyperparameter, UniformIntegerHyperparameter, UnParametrizedHyperparameter) +import ConfigSpace.api.distributions as distributions +import ConfigSpace.api.types as types __all__ = [ "__authors__", diff --git a/ConfigSpace/__version__.py b/ConfigSpace/__version__.py index ee3313ca..28e42f3f 100644 --- a/ConfigSpace/__version__.py +++ b/ConfigSpace/__version__.py @@ -1,4 +1,4 @@ """Version information.""" # The following line *must* be the last in the module, exactly as formatted: -__version__ = "0.6.0" +__version__ = "0.6.1" diff --git a/ConfigSpace/c_util.pyx b/ConfigSpace/c_util.pyx index 6177ddd4..8b6b2169 100644 --- a/ConfigSpace/c_util.pyx +++ b/ConfigSpace/c_util.pyx @@ -1,5 +1,3 @@ -# cython: language_level=3 - from collections import deque import numpy as np diff --git a/ConfigSpace/conditions.pxd b/ConfigSpace/conditions.pxd index 50f79c6d..8c7af891 100644 --- a/ConfigSpace/conditions.pxd +++ b/ConfigSpace/conditions.pxd @@ -1,5 +1,3 @@ -# cython: language_level=3 - import numpy as np cimport numpy as np diff --git a/ConfigSpace/conditions.pyx b/ConfigSpace/conditions.pyx index 08f7add2..6d07d87e 100644 --- a/ConfigSpace/conditions.pyx +++ b/ConfigSpace/conditions.pyx @@ -26,8 +26,6 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# cython: language_level=3 - import io from functools import reduce from abc import ABCMeta, abstractmethod diff --git a/ConfigSpace/configuration_space.pyx b/ConfigSpace/configuration_space.pyx index fcfc62d0..fbe83fef 100644 --- a/ConfigSpace/configuration_space.pyx +++ b/ConfigSpace/configuration_space.pyx @@ -26,8 +26,6 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# cython: language_level=3 - import collections.abc from collections import defaultdict, deque, OrderedDict import copy @@ -1528,12 +1526,12 @@ class ConfigurationSpace(collections.abc.Mapping): new_child = new_configspace[child_name] new_parent = new_configspace[parent_name] - if hasattr(condition, 'value'): - condition_arg = getattr(condition, 'value') - substituted_condition = condition_type(child=new_child, parent=new_parent, value=condition_arg) - elif hasattr(condition, 'values'): + if hasattr(condition, 'values'): condition_arg = getattr(condition, 'values') substituted_condition = condition_type(child=new_child, parent=new_parent, values=condition_arg) + elif hasattr(condition, 'value'): + condition_arg = getattr(condition, 'value') + substituted_condition = condition_type(child=new_child, parent=new_parent, value=condition_arg) else: raise AttributeError(f'Did not find the expected attribute in condition {type(condition)}.') @@ -1575,15 +1573,24 @@ class ConfigurationSpace(collections.abc.Mapping): hyperparameter_name = getattr(forbidden.hyperparameter, 'name') new_hyperparameter = new_configspace[hyperparameter_name] - if hasattr(forbidden, 'value'): - forbidden_arg = getattr(forbidden, 'value') - substituted_forbidden = forbidden_type(hyperparameter=new_hyperparameter, value=forbidden_arg) - elif hasattr(forbidden, 'values'): + if hasattr(forbidden, 'values'): forbidden_arg = getattr(forbidden, 'values') substituted_forbidden = forbidden_type(hyperparameter=new_hyperparameter, values=forbidden_arg) + elif hasattr(forbidden, 'value'): + forbidden_arg = getattr(forbidden, 'value') + substituted_forbidden = forbidden_type(hyperparameter=new_hyperparameter, value=forbidden_arg) else: raise AttributeError(f'Did not find the expected attribute in forbidden {type(forbidden)}.') + new_forbiddens.append(substituted_forbidden) + elif isinstance(forbidden, ForbiddenRelation): + forbidden_type = type(forbidden) + left_name = getattr(forbidden.left, 'name') + left_hyperparameter = new_configspace[left_name] + right_name = getattr(forbidden.right, 'name') + right_hyperparameter = new_configspace[right_name] + + substituted_forbidden = forbidden_type(left=left_hyperparameter, right=right_hyperparameter) new_forbiddens.append(substituted_forbidden) else: raise TypeError(f'Did not expect the supplied forbidden type {type(forbidden)}.') diff --git a/ConfigSpace/forbidden.pxd b/ConfigSpace/forbidden.pxd index aa7abeb1..b381a19b 100644 --- a/ConfigSpace/forbidden.pxd +++ b/ConfigSpace/forbidden.pxd @@ -1,5 +1,3 @@ -# cython: language_level=3 - import numpy as np cimport numpy as np diff --git a/ConfigSpace/forbidden.pyx b/ConfigSpace/forbidden.pyx index 7e62f646..ef6b0a52 100644 --- a/ConfigSpace/forbidden.pyx +++ b/ConfigSpace/forbidden.pyx @@ -27,8 +27,6 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# cython: language_level=3 - import copy import numpy as np import io diff --git a/ConfigSpace/functional.py b/ConfigSpace/functional.py new file mode 100644 index 00000000..395f0c33 --- /dev/null +++ b/ConfigSpace/functional.py @@ -0,0 +1,81 @@ +from typing import Iterator + +from more_itertools import roundrobin +import numpy as np + + +def center_range( + center: int, + low: int, + high: int, + step: int = 1, +) -> Iterator[int]: + """Get a range centered around a value. + + >>> list(center_range(5, 0, 10)) + [4, 6, 3, 7, 2, 8, 1, 9, 0, 10] + + Parameters + ---------- + center: int + The center of the range + + low: int + The low end of the range + + high: int + The high end of the range + + step: int = 1 + The step size + + Returns + ------- + Iterator[int] + """ + assert low <= center <= high + above_center = range(center + step, high + 1, step) + below_center = range(center - step, low - 1, -step) + yield from roundrobin(below_center, above_center) + + +def arange_chunked( + start: int, + stop: int, + step: int = 1, + *, + chunk_size: int, +) -> Iterator[np.ndarray]: + """Get np.arange in a chunked fashion. + + >>> list(arange_chunked(0, 10, 3)) + [array([0, 1, 2]), array([3, 4, 5]), array([6, 7, 8]), array([9])] + + Parameters + ---------- + start: int + The start of the range + + stop: int + The stop of the range + + chunk_size: int + The size of the chunks + + step: int = 1 + The step size + + Returns + ------- + Iterator[np.ndarray] + """ + assert step > 0 + assert chunk_size > 0 + assert start < stop + n_items = int(np.ceil((stop - start) / step)) + n_chunks = int(np.ceil(n_items / chunk_size)) + + for chunk in range(0, n_chunks): + chunk_start = start + (chunk * chunk_size) + chunk_stop = min(chunk_start + chunk_size, stop) + yield np.arange(chunk_start, chunk_stop, step) diff --git a/ConfigSpace/hyperparameters.pxd b/ConfigSpace/hyperparameters.pxd index ca18809f..f7ff4a9f 100644 --- a/ConfigSpace/hyperparameters.pxd +++ b/ConfigSpace/hyperparameters.pxd @@ -1,4 +1,3 @@ -# cython: language_level=3 from typing import Union import numpy as np cimport numpy as np diff --git a/ConfigSpace/hyperparameters.pyx b/ConfigSpace/hyperparameters.pyx index e5b3d5f5..0c6b8c6e 100644 --- a/ConfigSpace/hyperparameters.pyx +++ b/ConfigSpace/hyperparameters.pyx @@ -27,16 +27,33 @@ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import copy import io -# cython: language_level=3 import math import warnings from collections import OrderedDict, Counter +from itertools import count +from more_itertools import roundrobin, duplicates_everseen from typing import List, Any, Dict, Union, Set, Tuple, Optional, Sequence -import numpy as np from scipy.stats import truncnorm, beta as spbeta, norm +import numpy as np + +# It's necessary to call "import_array" if you use any part of the +# numpy PyArray_* API. From Cython 3, accessing attributes like +# ".shape" on a typed Numpy array use this API. Therefore we recommend +# always calling "import_array" whenever you "cimport numpy" cimport numpy as np +np.import_array() + +from ConfigSpace.functional import center_range, arange_chunked +# OPTIM: Some operations generate an arange which could blowup memory if +# done over the entire space of integers (int32/64). +# To combat this, `arange_chunked` is used in scenarios where reducion +# operations over all the elments could be done in partial steps independantly. +# For example, a sum over the pdf values could be done in chunks. +# This may add some small overhead for smaller ranges but is unlikely to +# be noticable. +ARANGE_CHUNKSIZE = 10_000_000 cdef class Hyperparameter(object): @@ -1542,89 +1559,135 @@ cdef class UniformIntegerHyperparameter(IntegerHyperparameter): else: return False - def get_num_neighbors(self, value=None) -> int: - return self.upper - self.lower + def get_num_neighbors(self, value = None) -> int: + # If there is a value in the range, then that value is not a neighbor of itself + # so we need to remove one + if value is not None and self.lower <= value <= self.upper: + return self.upper - self.lower - 1 + else: + return self.upper - self.lower def get_neighbors( self, - value: Union[int, float], + value: float, rs: np.random.RandomState, number: int = 4, transform: bool = False, std: float = 0.2, ) -> List[int]: - cdef int n_requested = number - cdef int idx = 0 - cdef int i = 0 - neighbors = [] # type: List[int] - cdef int sampled_neighbors = 0 - _neighbors_as_int = set() # type: Set[int] - cdef long long int_value = self._transform(value) - cdef long long new_int_value = 0 - cdef float new_value = 0.0 - cdef np.ndarray samples - cdef double[:] samples_view - - if self.upper - self.lower <= n_requested: - transformed_value = self._transform(value) - for n in range(self.lower, self.upper + 1): - if n != int_value: - if transform: - neighbors.append(n) - else: - n = self._inverse_transform(n) - neighbors.append(n) + """Get the neighbors of a value + + NOTE + ---- + **This assumes the value is in the unit-hypercube [0, 1]** + + Parameters + ---------- + value: float + The value to get neighbors around. This assume the ``value`` has been + converted to the [0, 1] range which can be done with ``_inverse_transform``. + + rs: RandomState + The random state to use + + number: int = 4 + How many neighbors to get + + transform: bool = False + Whether to transform this value from the unit cube, back to the + hyperparameter's specified range of values. + + std: float = 0.2 + The std. dev. to use in the [0, 1] hypercube space while sampling + for neighbors. + + Returns + ------- + List[int] + Some ``number`` of neighbors centered around ``value``. + """ + assert 0 <= value <= 1, ( + "For get neighbors of UniformIntegerHyperparameter, the value" + " if assumed to be in the unit-hypercube [0, 1]. If this was not" + " the behaviour assumed, please raise a ticket on github." + ) + assert number < 1000000, ( + "Can only generate less than 1 million neighbors." + ) + # Convert python values to cython ones + cdef long long center = self._transform(value) + cdef long long lower = self.lower + cdef long long upper = self.upper + cdef unsigned int n_requested = number + cdef unsigned long long n_neighbors = upper - lower - 1 + cdef long long stepsize = self.q if self.q is not None else 1 + + neighbors = [] + + cdef long long v # A value that's possible to return + if n_neighbors < n_requested: + + for v in range(lower, center): + neighbors.append(v) + for v in range(center + 1, upper): + neighbors.append(v) + + if transform: + return neighbors + else: + return self._inverse_transform(np.asarray(neighbors)).tolist() + + # A truncated normal between 0 and 1, centered on the value with a scale of std. + # This will be sampled from and converted to the corresponding int value + # However, this is too slow - we use the "poor man's truncnorm below" + # cdef np.ndarray float_indices = truncnorm.rvs( + # a=(0 - value) / std, + # b=(1 - value) / std, + # loc=value, + # scale=std, + # size=number, + # random_state=rs + # ) + # We sample five times as many values as needed and weed them out below + # (perform rejection sampling and make sure we don't sample any neighbor twice) + # This increases our chances of not having to fill the neighbors list by calling + # `center_range` + # Five is an arbitrary number and can probably be tuned to reduce overhead + cdef np.ndarray float_indices = rs.normal(value, std, size=number * 5) + cdef np.ndarray mask = (float_indices >= 0) & (float_indices <= 1) + float_indices = float_indices[mask] + + cdef np.ndarray possible_neighbors_as_array = self._transform_vector(float_indices).astype(np.longlong) + cdef long long [:] possible_neighbors = possible_neighbors_as_array + + cdef unsigned int n_neighbors_generated = 0 + cdef unsigned int n_candidates = len(float_indices) + cdef unsigned int candidate_index = 0 + cdef set seen = {center} + while n_neighbors_generated < n_requested and candidate_index < n_candidates: + v = possible_neighbors[candidate_index] + if v not in seen: + seen.add(v) + n_neighbors_generated += 1 + candidate_index += 1 + + if n_neighbors_generated < n_requested: + numbers_around = center_range(center, lower, upper, stepsize) + + while n_neighbors_generated < n_requested: + v = next(numbers_around) + if v not in seen: + seen.add(v) + n_neighbors_generated += 1 + + seen.remove(center) + neighbors = list(seen) + if transform: + return neighbors else: - samples = rs.normal(loc=value, scale=std, size=250) - samples_view = samples - - while sampled_neighbors < n_requested: - - while True: - new_value = samples_view[idx] - idx += 1 - i += 1 - if idx >= 250: - samples = rs.normal(loc=value, scale=std, size=250) - samples_view = samples - idx = 0 - if new_value < 0 or new_value > 1: - continue - new_int_value = self._transform(new_value) - if int_value == new_int_value: - continue - elif i >= 200: - # Fallback to uniform sampling if generating samples correctly - # takes too long - values_to_sample = [j for j in range(self.lower, self.upper + 1) - if j != int_value] - samples = rs.choice( - values_to_sample, - size=n_requested, - replace=False, - ) - for sample in samples: - if transform: - neighbors.append(sample) - else: - sample = self._inverse_transform(sample) - neighbors.append(sample) - break - elif new_int_value in _neighbors_as_int: - continue - elif int_value != new_int_value: - break - - _neighbors_as_int.add(new_int_value) - sampled_neighbors += 1 - if transform: - neighbors.append(new_int_value) - else: - new_value = self._inverse_transform(new_int_value) - neighbors.append(new_value) + return self._inverse_transform(np.array(neighbors)).tolist() - return neighbors def _pdf(self, vector: np.ndarray) -> np.ndarray: """ @@ -1891,31 +1954,65 @@ cdef class NormalIntegerHyperparameter(IntegerHyperparameter): rs: np.random.RandomState, number: int = 4, transform: bool = False, - ) -> List[Union[np.ndarray, float, int]]: - neighbors = [] # type: List[Union[np.ndarray, float, int]] - while len(neighbors) < number: - rejected = True - iteration = 0 - while rejected: - iteration += 1 - new_value = rs.normal(value, self.sigma) - int_value = self._transform(value) - new_int_value = self._transform(new_value) - - if self.lower is not None and self.upper is not None: - int_value = min(max(int_value, self.lower), self.upper) - new_int_value = min(max(new_int_value, self.lower), self.upper) - - if int_value != new_int_value: - rejected = False - elif iteration > 100000: - raise ValueError('Probably caught in an infinite loop.') + ) -> List[int]: + stepsize = self.q if self.q is not None else 1 + bounded = self.lower is not None + mu = self.mu + sigma = self.sigma + + neighbors: set[int] = set() + center = self._transform(value) + + if bounded: + float_indices = norm.rvs( + loc=mu, + scale=sigma, + size=number, + random_state=rs, + ) + else: + float_indices = truncnorm( + a = (self.lower - mu) / sigma, + b = (self.upper - mu) / sigma, + loc=center, + scale=sigma, + size=number, + random_state=rs, + ) + + possible_neighbors = self._transform_vector(float_indices).astype(np.longlong) + + for possible_neighbor in possible_neighbors: + # If we already happen to have this neighbor, pick the closest + # number around it that is not arelady included + if possible_neighbor in neighbors or possible_neighbor == center: + + if bounded: + numbers_around = center_range(possible_neighbor, self.lower, self.upper, stepsize) + else: + decrement_count = count(possible_neighbor - stepsize, step=-stepsize) + increment_count = count(possible_neighbor + stepsize, step=stepsize) + numbers_around = roundrobin(decrement_count, increment_count) + + valid_numbers_around = ( + n for n in numbers_around + if (n not in neighbors and n != center) + ) + possible_neighbor = next(valid_numbers_around, None) + + if possible_neighbor is None: + raise ValueError( + f"Found no more eligble neighbors for value {center}" + f"\nfound {neighbors}" + ) + + # We now have a valid sample, add it to the list of neighbors + neighbors.add(possible_neighbor) if transform: - neighbors.append(self._transform(new_value)) + return [self._transform(neighbor) for neighbor in neighbors] else: - neighbors.append(new_value) - return neighbors + return list(neighbors) def _compute_normalization(self): if self.lower is None: @@ -1924,9 +2021,8 @@ cdef class NormalIntegerHyperparameter(IntegerHyperparameter): return 1 else: - all_integer_values = np.arange(self.lower, self.upper + 1) - all_probabilities = self.nfhp.pdf(all_integer_values) - return np.sum(all_probabilities) + chunks = arange_chunked(self.lower, self.upper + 1, chunk_size=ARANGE_CHUNKSIZE) + return sum(self.nfhp.pdf(chunk).sum() for chunk in chunks) def _pdf(self, vector: np.ndarray) -> np.ndarray: """ @@ -1952,9 +2048,9 @@ cdef class NormalIntegerHyperparameter(IntegerHyperparameter): return self.nfhp._pdf(vector) / self.normalization_constant def get_max_density(self): - all_integer_values = np.arange(self.lower, self.upper + 1) - all_probabilities = self.nfhp.pdf(all_integer_values) - return np.max(all_probabilities) / self.normalization_constant + chunks = arange_chunked(self.lower, self.upper + 1, chunk_size=ARANGE_CHUNKSIZE) + maximum = max(self.nfhp.pdf(chunk).max() for chunk in chunks) + return maximum / self.normalization_constant def get_size(self) -> float: if self.lower is None: @@ -2128,9 +2224,8 @@ cdef class BetaIntegerHyperparameter(UniformIntegerHyperparameter): return value def _compute_normalization(self): - all_integer_values = np.arange(self.lower, self.upper + 1) - all_probabilities = self.bfhp.pdf(all_integer_values) - return np.sum(all_probabilities) + chunks = arange_chunked(self.lower, self.upper + 1, chunk_size=ARANGE_CHUNKSIZE) + return sum(self.bfhp.pdf(chunk).sum() for chunk in chunks) def _pdf(self, vector: np.ndarray) -> np.ndarray: """ @@ -2156,9 +2251,9 @@ cdef class BetaIntegerHyperparameter(UniformIntegerHyperparameter): return self.bfhp._pdf(vector) / self.normalization_constant def get_max_density(self): - all_integer_values = np.arange(self.lower, self.upper + 1) - all_probabilities = self.bfhp.pdf(all_integer_values) - return np.max(all_probabilities) / self.normalization_constant + chunks = arange_chunked(self.lower, self.upper + 1, chunk_size=ARANGE_CHUNKSIZE) + maximum = max(self.bfhp.pdf(chunk).max() for chunk in chunks) + return maximum / self.normalization_constant cdef class CategoricalHyperparameter(Hyperparameter): @@ -2522,7 +2617,13 @@ cdef class CategoricalHyperparameter(Hyperparameter): Probability density values of the input vector """ probs = np.array(self.probabilities) + nan = np.isnan(vector) + if np.any(nan): + # Temporarily pick any valid index to use `vector` as an index for `probs` + vector[nan] = 0 res = np.array(probs[vector.astype(int)]) + if np.any(nan): + res[nan] = 0 if res.ndim == 0: return res.reshape(-1) return res diff --git a/ConfigSpace/util.pyx b/ConfigSpace/util.pyx index 5cae4801..f56d401f 100644 --- a/ConfigSpace/util.pyx +++ b/ConfigSpace/util.pyx @@ -132,16 +132,22 @@ def get_one_exchange_neighbourhood( list(configuration.configuration_space._hyperparameters.keys()) ) hyperparameters_list_length = len(hyperparameters_list) - hyperparameters_used = [hp.name - for hp in configuration.configuration_space.get_hyperparameters() - if hp.get_num_neighbors(configuration.get(hp.name)) == 0 and - configuration.get(hp.name)is not None] + hyperparameters_used = [ + hp.name + for hp in configuration.configuration_space.get_hyperparameters() + if ( + hp.get_num_neighbors(configuration.get(hp.name)) == 0 + and configuration.get(hp.name)is not None + ) + ] number_of_usable_hyperparameters = sum(np.isfinite(configuration.get_array())) n_neighbors_per_hp = { - hp.name: num_neighbors if - isinstance(hp, NumericalHyperparameter) and hp.get_num_neighbors( - configuration.get(hp.name))> num_neighbors - else hp.get_num_neighbors(configuration.get(hp.name)) + hp.name: num_neighbors + if ( + isinstance(hp, NumericalHyperparameter) + and hp.get_num_neighbors(configuration.get(hp.name))> num_neighbors + ) else + hp.get_num_neighbors(configuration.get(hp.name)) for hp in configuration.configuration_space.get_hyperparameters() } diff --git a/Makefile b/Makefile index bde61c2b..fc2d76b8 100644 --- a/Makefile +++ b/Makefile @@ -28,6 +28,19 @@ DIST := "${DIR}/dist"" DOCDIR := "${DIR}/docs" BUILD := "${DIR}/build" INDEX_HTML := "file://${DOCDIR}/build/html/index.html" +NUMPY_INCLUDE := $(shell python -c 'import numpy; print(numpy.get_include())') + +# https://stackoverflow.com/questions/40750596/how-do-i-escape-bracket-in-makefile +CP := ) + +benchmark: + python scripts/benchmark_sampling.py + +cython-annotate: + C_INCLUDE_PATH=$(NUMPY_INCLUDE) cython -3 --directive boundscheck=False,wraparound=False --annotate ConfigSpace/*.pyx + +cython-html: cython-annotate + python -c "import webbrowser; from pathlib import Path; [webbrowser.open(f'file://{path}') for path in Path('ConfigSpace').absolute().glob('*.html')]" install-dev: $(PIP) install -e ".[dev]" @@ -45,7 +58,7 @@ clean-docs: clean: clean-build clean-docs build: - python setup.py develop + python -m build # Running build before making docs is needed all be it very slow. # Without doing a full build, the doctests seem to use docstrings from the last compiled build diff --git a/changelog.md b/changelog.md index e328fc1f..9766a5af 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,8 @@ +# Version 0.6.1 + +# MAINT #286: Add support for Python 3.11. +# FIX #282: Fixes a memory leak in the neighborhood generation of integer hyperparameters. + # Version 0.6.0 * ADD #255: An easy interface of `Float`, `Integer`, `Categorical` for creating search spaces. diff --git a/scripts/benchmark_sampling.py b/scripts/benchmark_sampling.py index 8869e3f4..cbf97d3c 100644 --- a/scripts/benchmark_sampling.py +++ b/scripts/benchmark_sampling.py @@ -31,7 +31,7 @@ def run_test(configuration_space_path): validation_times = [] # Sample a little bit - for i in range(10): + for i in range(20): cs.seed(i) start_time = time.time() configurations = cs.sample_configuration(size=n_configs) @@ -40,7 +40,7 @@ def run_test(configuration_space_path): for j, c in enumerate(configurations): - if i == 0: + if i > 10: neighborhood = ConfigSpace.util.get_one_exchange_neighbourhood( c, seed=i * j, num_neighbors=4) diff --git a/setup.py b/setup.py index 15773e0b..1d080e66 100644 --- a/setup.py +++ b/setup.py @@ -4,6 +4,7 @@ from setuptools import Extension, find_packages, setup from setuptools.command.build_ext import build_ext +from Cython.Build import cythonize # must go after setuptools # Helper functions @@ -59,14 +60,14 @@ def finalize_options(self): AUTHOR_EMAIL = "feurerm@informatik.uni-freiburg.de" TEST_SUITE = "pytest" -SETUP_REQS = ["numpy", "cython"] -INSTALL_REQS = ["numpy", "cython", "pyparsing", "scipy", "typing_extensions"] +INSTALL_REQS = ["numpy", "pyparsing", "scipy", "typing_extensions", "more_itertools"] MIN_PYTHON_VERSION = ">=3.7" CLASSIFIERS = [ "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Development Status :: 4 - Beta", "Natural Language :: English", "Intended Audience :: Developers", @@ -85,8 +86,36 @@ def finalize_options(self): COMPILER_DIRECTIVES = { "boundscheck": False, "wraparound": False, + "language_level": "3", } + +""" +# Profiling +Set the below flag to True to enable profiling of the code. This will cause some minor performance +overhead so it should only be used for debugging purposes. + +Use [`py-spy`](https://github.com/benfred/py-spy) with [speedscope.app](https://www.speedscope.app/) +```bash +pip install py-spy +py-spy record --rate 800 --format speedscope --subprocesses --native -o profile.svg -- python