diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0374db0e..adce032d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,45 +34,49 @@ jobs: test: needs: - static - runs-on: ubuntu-latest timeout-minutes: 60 strategy: matrix: include: - - script: remote-test - coverage-name: remote - tf-version: tensorflow==2.10 + - script: test + coverage-name: gpu + tf-version: tensorflow[and-cuda] + runs-on: [self-hosted, gpu] - script: test python-version: "3.11" coverage-name: latest - script: test - tf-version: tensorflow==2.4.4 + tf-version: tensorflow~=2.6.0 python-version: "3.8" coverage-name: oldest - script: test - tf-version: tensorflow~=2.6.0 - python-version: "3.8" - coverage-name: tf-2.6 - - script: remote-docs - tf-version: tensorflow==2.10 + tf-version: tensorflow~=2.9.0 + coverage-name: tf-2.9 + - script: test + tf-version: tensorflow~=2.13.0 + coverage-name: tf-2.13 + - script: docs + tf-version: tensorflow[and-cuda]==2.16.1 python-version: "3.9" - - script: remote-examples - tf-version: tensorflow==2.10 + runs-on: [self-hosted, gpu] + - script: examples + tf-version: tensorflow[and-cuda]==2.16.1 + runs-on: [self-hosted, gpu] fail-fast: false env: TF_VERSION: ${{ matrix.tf-version || 'tensorflow' }} - SSH_KEY: ${{ secrets.SSH_KEY }} - SSH_CONFIG: ${{ secrets.SSH_CONFIG }} - GH_TOKEN: ${{ secrets.GH_TOKEN }} + TF_FORCE_GPU_ALLOW_GROWTH: true + GH_TOKEN: ${{ secrets.PUBLIC_GH_TOKEN }} + runs-on: ${{ matrix.runs-on || 'ubuntu-latest' }} steps: - uses: nengo/nengo-bones/actions/setup@main with: python-version: ${{ matrix.python-version || '3.10' }} - uses: nengo/nengo-bones/actions/generate-and-check@main - - name: Write secrets to file + - name: Install docs requirements + if: ${{ contains('docs examples', matrix.script) }} run: | - mkdir -p ~/.ssh - echo '${{ secrets.AZURE_PEM }}' > ~/.ssh/azure.pem + micromamba install -y pandoc matplotlib - uses: nengo/nengo-bones/actions/run-script@main with: name: ${{ matrix.script }} diff --git a/.nengobones.yml b/.nengobones.yml index 2abad0ec..92730e7c 100644 --- a/.nengobones.yml +++ b/.nengobones.yml @@ -17,14 +17,15 @@ manifest_in: {} setup_py: install_req: + - anyio<4 # not compatible with older tensorflow versions - packaging>=20.9 - scipy>=1.0.0 - - tensorflow>=2.4.4 + - tensorflow>=2.6.0 tests_req: - pytest>=6.1.0 - pytest-rng>=1.0.0 docs_req: - - matplotlib>=3.0.2,<3.4.3 + - matplotlib>=3.8.4 - jupyter>=1.0.0 - seaborn>=0.9.0 - sphinx>=1.8 @@ -66,39 +67,15 @@ docs_conf_py: ci_scripts: - template: static - template: docs + pre_commands: + # We run this ahead of time, otherwise the download progress bar causes + # problems in the notebook rendering + - python -c "import tensorflow as tf; tf.keras.datasets.mnist.load_data()" - template: examples - template: test coverage: true pip_install: - $TF_VERSION - - template: remote-script - remote_script: test - output_name: remote-test - host: azure - azure_name: nengo-dl - azure_group: nengo-ci - coverage: true - remote_vars: - TF_FORCE_GPU_ALLOW_GROWTH: "true" - TF_VERSION: $TF_VERSION - remote_setup: - - micromamba install -y "$TF_VERSION" cudnn=8.4 - - template: remote-script - remote_script: docs - output_name: remote-docs - host: azure-docs - azure_name: nengo-dl-docs - azure_group: nengo-ci - remote_setup: - - micromamba install -y "$TF_VERSION" cudnn=8.4 - - template: remote-script - remote_script: examples - output_name: remote-examples - host: azure-examples - azure_name: nengo-dl-examples - azure_group: nengo-ci - remote_setup: - - micromamba install -y "$TF_VERSION" cudnn=8.4 - template: deploy wheel: true @@ -109,6 +86,6 @@ pyproject_toml: {} version_py: type: semver major: 0 - minor: 7 - patch: 1 + minor: 8 + patch: 0 release: false diff --git a/CHANGES.rst b/CHANGES.rst index 004f74c9..2425b457 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -19,10 +19,10 @@ Release history - Removed - Fixed -0.7.1 (unreleased) +0.8.0 (unreleased) ================== -*Compatible with TensorFlow 2.4 - 2.13* +*Compatible with TensorFlow 2.6 - 2.16* 0.7.0 (July 20, 2023) ===================== diff --git a/docs/basic-usage.rst b/docs/basic-usage.rst index 78d525be..62c7dcb7 100644 --- a/docs/basic-usage.rst +++ b/docs/basic-usage.rst @@ -11,13 +11,14 @@ a new LMU layer: .. testcode:: + import keras import keras_lmu lmu_layer = keras_lmu.LMU( memory_d=1, order=256, theta=784, - hidden_cell=tf.keras.layers.SimpleRNNCell(units=10), + hidden_cell=keras.layers.SimpleRNNCell(units=10), ) Note that the values used above for ``memory_d``, ``order``, diff --git a/docs/examples/psMNIST-training.png b/docs/examples/psMNIST-training.png index dc720994..946a86dd 100644 Binary files a/docs/examples/psMNIST-training.png and b/docs/examples/psMNIST-training.png differ diff --git a/docs/examples/psMNIST-weights.hdf5 b/docs/examples/psMNIST-weights.hdf5 deleted file mode 100644 index a8b9a566..00000000 Binary files a/docs/examples/psMNIST-weights.hdf5 and /dev/null differ diff --git a/docs/examples/psMNIST.ipynb b/docs/examples/psMNIST.ipynb index 94bc1375..9167c777 100644 --- a/docs/examples/psMNIST.ipynb +++ b/docs/examples/psMNIST.ipynb @@ -42,6 +42,7 @@ "source": [ "%matplotlib inline\n", "\n", + "import keras\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from IPython.display import Image, display\n", @@ -261,7 +262,7 @@ " memory_d=1,\n", " order=256,\n", " theta=n_pixels,\n", - " hidden_cell=tf.keras.layers.SimpleRNNCell(212),\n", + " hidden_cell=keras.layers.SimpleRNNCell(212),\n", " hidden_to_memory=False,\n", " memory_to_memory=False,\n", " input_to_hidden=True,\n", @@ -269,14 +270,14 @@ ")\n", "\n", "# TensorFlow layer definition\n", - "inputs = tf.keras.Input((n_pixels, 1))\n", + "inputs = keras.Input((n_pixels, 1))\n", "lmus = lmu_layer(inputs)\n", - "outputs = tf.keras.layers.Dense(10)(lmus)\n", + "outputs = keras.layers.Dense(10)(lmus)\n", "\n", "# TensorFlow model definition\n", - "model = tf.keras.Model(inputs=inputs, outputs=outputs)\n", + "model = keras.Model(inputs=inputs, outputs=outputs)\n", "model.compile(\n", - " loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", + " loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", " optimizer=\"adam\",\n", " metrics=[\"accuracy\"],\n", ")\n", @@ -316,10 +317,13 @@ "batch_size = 100\n", "epochs = 10\n", "\n", - "saved_weights_fname = \"./psMNIST-weights.hdf5\"\n", + "saved_model_fname = \"./psMNIST.keras\"\n", "callbacks = [\n", - " tf.keras.callbacks.ModelCheckpoint(\n", - " filepath=saved_weights_fname, monitor=\"val_loss\", verbose=1, save_best_only=True\n", + " keras.callbacks.ModelCheckpoint(\n", + " filepath=saved_model_fname,\n", + " monitor=\"val_accuracy\",\n", + " verbose=1,\n", + " save_best_only=True,\n", " ),\n", "]\n", "\n", @@ -393,8 +397,8 @@ "metadata": {}, "outputs": [], "source": [ - "model.load_weights(saved_weights_fname)\n", - "accuracy = model.evaluate(X_test, Y_test)[1] * 100\n", + "model.load_weights(saved_model_fname)\n", + "accuracy = model.evaluate(X_test, Y_test, verbose=0)[1] * 100\n", "print(f\"Test accuracy: {round(accuracy, 2):0.2f}%\")" ] }, diff --git a/docs/examples/psMNIST.keras b/docs/examples/psMNIST.keras new file mode 100644 index 00000000..039142c6 Binary files /dev/null and b/docs/examples/psMNIST.keras differ diff --git a/keras_lmu/layers.py b/keras_lmu/layers.py index 62f51ca2..63783d0a 100644 --- a/keras_lmu/layers.py +++ b/keras_lmu/layers.py @@ -2,30 +2,38 @@ import warnings +import keras import numpy as np import tensorflow as tf from packaging import version # pylint: disable=ungrouped-imports -if version.parse(tf.__version__) < version.parse("2.6.0rc0"): - from tensorflow.python.keras.layers.recurrent import DropoutRNNCellMixin -elif version.parse(tf.__version__) < version.parse("2.9.0rc0"): +tf_version = version.parse(tf.__version__) +if tf_version < version.parse("2.9.0rc0"): from keras.layers.recurrent import DropoutRNNCellMixin -elif version.parse(tf.__version__) < version.parse("2.13.0rc0"): +elif tf_version < version.parse("2.13.0rc0"): from keras.layers.rnn.dropout_rnn_cell_mixin import DropoutRNNCellMixin -else: +elif tf_version < version.parse("2.16.0rc0"): from keras.src.layers.rnn.dropout_rnn_cell_mixin import DropoutRNNCellMixin +else: + from keras.src.layers.rnn.dropout_rnn_cell import ( + DropoutRNNCell as DropoutRNNCellMixin, + ) -if version.parse(tf.__version__) < version.parse("2.8.0rc0"): +if tf_version < version.parse("2.8.0rc0"): from tensorflow.keras.layers import Layer as BaseRandomLayer -elif version.parse(tf.__version__) < version.parse("2.13.0rc0"): +elif tf_version < version.parse("2.13.0rc0"): from keras.engine.base_layer import BaseRandomLayer -else: +elif tf_version < version.parse("2.16.0rc0"): from keras.src.engine.base_layer import BaseRandomLayer +else: + from keras.layers import Layer as BaseRandomLayer @tf.keras.utils.register_keras_serializable("keras-lmu") -class LMUCell(DropoutRNNCellMixin, BaseRandomLayer): +class LMUCell( + DropoutRNNCellMixin, BaseRandomLayer +): # pylint: disable=too-many-ancestors """ Implementation of LMU cell (to be used within Keras RNN wrapper). @@ -54,7 +62,7 @@ class to create a recurrent Keras layer to process the whole sequence. Calling entire sequence will still be processed in order for information to be projected to and from the hidden layer. If ``trainable_theta`` is enabled, then theta will be updated during the course of training. - hidden_cell : ``tf.keras.layers.Layer`` + hidden_cell : ``keras.layers.Layer`` Keras Layer/RNNCell implementing the hidden component. trainable_theta : bool If True, theta is learnt over the course of training. Otherwise, it is kept @@ -79,15 +87,15 @@ class to create a recurrent Keras layer to process the whole sequence. Calling no weights will be used, and the input size must match the memory/hidden size. recurrent_initializer : ``tf.initializers.Initializer`` Initializer for ``memory_to_memory`` weights (if that connection is enabled). - kernel_regularizer : ``tf.keras.regularizers.Regularizer`` + kernel_regularizer : ``keras.regularizers.Regularizer`` Regularizer for weights from input to memory/hidden component. - recurrent_regularizer : ``tf.keras.regularizers.Regularizer`` + recurrent_regularizer : ``keras.regularizers.Regularizer`` Regularizer for ``memory_to_memory`` weights (if that connection is enabled). use_bias : bool If True, the memory component includes a bias term. bias_initializer : ``tf.initializers.Initializer`` Initializer for the memory component bias term. Only used if ``use_bias=True``. - bias_regularizer : ``tf.keras.regularizers.Regularizer`` + bias_regularizer : ``keras.regularizers.Regularizer`` Regularizer for the memory component bias term. Only used if ``use_bias=True``. dropout : float Dropout rate on input connections. @@ -124,6 +132,7 @@ def __init__( bias_regularizer=None, dropout=0, recurrent_dropout=0, + seed=None, **kwargs, ): super().__init__(**kwargs) @@ -146,6 +155,9 @@ def __init__( self.bias_regularizer = bias_regularizer self.dropout = dropout self.recurrent_dropout = recurrent_dropout + self.seed = seed + if tf_version >= version.parse("2.16.0"): + self.seed_generator = keras.random.SeedGenerator(seed) self.kernel = None self.recurrent_kernel = None @@ -189,7 +201,7 @@ def theta(self): initial value passed in to the constructor. """ if self.built: - return 1 / tf.keras.backend.get_value(self.theta_inv) + return 1 / self.theta_inv.numpy() return self._init_theta @@ -292,7 +304,7 @@ def build(self, input_shape): name="theta_inv", shape=(), initializer=tf.initializers.constant(1 / self._init_theta), - constraint=tf.keras.constraints.NonNeg(), + constraint=keras.constraints.NonNeg(), ) else: self.theta_inv = tf.constant(1 / self._init_theta, dtype=self.dtype) @@ -317,7 +329,7 @@ def build(self, input_shape): # generate A and B matrices self._gen_AB() - def call(self, inputs, states, training=None): # noqa: C901 + def call(self, inputs, states, training=False): # noqa: C901 """ Apply this cell to inputs. @@ -328,9 +340,6 @@ def call(self, inputs, states, training=None): # noqa: C901 with some additional bookkeeping. """ - if training is None: - training = tf.keras.backend.learning_phase() - states = tf.nest.flatten(states) # state for the LMU memory @@ -344,18 +353,18 @@ def call(self, inputs, states, training=None): # noqa: C901 if self.hidden_to_memory else inputs ) - if self.dropout > 0: - u *= self.get_dropout_mask_for_cell(u, training) + if training and self.dropout > 0: + u *= self.get_dropout_mask(u) if self.kernel is not None: u = tf.matmul(u, self.kernel, name="kernel_matmul") if self.bias is not None: u = u + self.bias if self.memory_to_memory: - if self.recurrent_dropout > 0: + if training and self.recurrent_dropout > 0: # note: we don't apply dropout to the memory input, only # the recurrent kernel - rec_m = m * self.get_recurrent_dropout_mask_for_cell(m, training) + rec_m = m * self.get_recurrent_dropout_mask(m) else: rec_m = m @@ -409,6 +418,36 @@ def call(self, inputs, states, training=None): # noqa: C901 return o, [m] + h + def get_dropout_mask(self, step_input): + """Get dropout mask for cell input.""" + if tf_version < version.parse("2.16.0rc0"): + return super().get_dropout_mask_for_cell(step_input, True, count=1) + return super().get_dropout_mask(step_input) + + def get_recurrent_dropout_mask(self, step_input): + """Get dropout mask for recurrent input.""" + if tf_version < version.parse("2.16.0rc0"): + return super().get_recurrent_dropout_mask_for_cell( + step_input, True, count=1 + ) + + # This is copied from DropoutRNNCell.get_recurrent_dropout_mask, with the + # change noted below in order to fix a bug. + # See https://github.com/keras-team/keras/issues/19395 + if not hasattr(self, "_recurrent_dropout_mask"): + self._recurrent_dropout_mask = None + if self._recurrent_dropout_mask is None and self.recurrent_dropout > 0: + ones = keras.ops.ones_like(step_input) + self._recurrent_dropout_mask = keras.src.backend.random.dropout( + ones, + # --- START DIFF --- + # rate=self.dropout, + rate=self.recurrent_dropout, + # --- END DIFF --- + seed=self.seed_generator, + ) + return self._recurrent_dropout_mask + def reset_dropout_mask(self): """Reset dropout mask for memory and hidden components.""" super().reset_dropout_mask() @@ -430,7 +469,7 @@ def get_config(self): "memory_d": self.memory_d, "order": self.order, "theta": self._init_theta, - "hidden_cell": tf.keras.layers.serialize(self.hidden_cell), + "hidden_cell": keras.layers.serialize(self.hidden_cell), "trainable_theta": self.trainable_theta, "hidden_to_memory": self.hidden_to_memory, "memory_to_memory": self.memory_to_memory, @@ -445,6 +484,7 @@ def get_config(self): "bias_regularizer": self.bias_regularizer, "dropout": self.dropout, "recurrent_dropout": self.recurrent_dropout, + "seed": self.seed, } ) @@ -457,13 +497,13 @@ def from_config(cls, config): config["hidden_cell"] = ( None if config["hidden_cell"] is None - else tf.keras.layers.deserialize(config["hidden_cell"]) + else keras.layers.deserialize(config["hidden_cell"]) ) return super().from_config(config) @tf.keras.utils.register_keras_serializable("keras-lmu") -class LMU(tf.keras.layers.Layer): +class LMU(keras.layers.Layer): # pylint: disable=too-many-ancestors,abstract-method """ A layer of trainable low-dimensional delay systems. @@ -495,7 +535,7 @@ class LMU(tf.keras.layers.Layer): entire sequence will still be processed in order for information to be projected to and from the hidden layer. If ``trainable_theta`` is enabled, then theta will be updated during the course of training. - hidden_cell : ``tf.keras.layers.Layer`` + hidden_cell : ``keras.layers.Layer`` Keras Layer/RNNCell implementing the hidden component. trainable_theta : bool If True, theta is learnt over the course of training. Otherwise, it is kept @@ -520,15 +560,15 @@ class LMU(tf.keras.layers.Layer): no weights will be used, and the input size must match the memory/hidden size. recurrent_initializer : ``tf.initializers.Initializer`` Initializer for ``memory_to_memory`` weights (if that connection is enabled). - kernel_regularizer : ``tf.keras.regularizers.Regularizer`` + kernel_regularizer : ``keras.regularizers.Regularizer`` Regularizer for weights from input to memory/hidden component. - recurrent_regularizer : ``tf.keras.regularizers.Regularizer`` + recurrent_regularizer : ``keras.regularizers.Regularizer`` Regularizer for ``memory_to_memory`` weights (if that connection is enabled). use_bias : bool If True, the memory component includes a bias term. bias_initializer : ``tf.initializers.Initializer`` Initializer for the memory component bias term. Only used if ``use_bias=True``. - bias_regularizer : ``tf.keras.regularizers.Regularizer`` + bias_regularizer : ``keras.regularizers.Regularizer`` Regularizer for the memory component bias term. Only used if ``use_bias=True``. dropout : float Dropout rate on input connections. @@ -647,7 +687,7 @@ def build(self, input_shape): dtype=self.dtype, ) else: - self.layer = tf.keras.layers.RNN( + self.layer = keras.layers.RNN( LMUCell( memory_d=self.memory_d, order=self.order, @@ -675,7 +715,7 @@ def build(self, input_shape): self.layer.build(input_shape) - def call(self, inputs, training=None): + def call(self, inputs, training=False): """ Apply this layer to inputs. @@ -697,7 +737,7 @@ def get_config(self): "memory_d": self.memory_d, "order": self.order, "theta": self._init_theta, - "hidden_cell": tf.keras.layers.serialize(self.hidden_cell), + "hidden_cell": keras.layers.serialize(self.hidden_cell), "trainable_theta": self.trainable_theta, "hidden_to_memory": self.hidden_to_memory, "memory_to_memory": self.memory_to_memory, @@ -725,13 +765,15 @@ def from_config(cls, config): config["hidden_cell"] = ( None if config["hidden_cell"] is None - else tf.keras.layers.deserialize(config["hidden_cell"]) + else keras.layers.deserialize(config["hidden_cell"]) ) return super().from_config(config) @tf.keras.utils.register_keras_serializable("keras-lmu") -class LMUFeedforward(tf.keras.layers.Layer): +class LMUFeedforward( + keras.layers.Layer +): # pylint: disable=too-many-ancestors,abstract-method """ Layer class for the feedforward variant of the LMU. @@ -756,7 +798,7 @@ class LMUFeedforward(tf.keras.layers.Layer): number of steps will be represented at the time of prediction, however the entire sequence will still be processed in order for information to be projected to and from the hidden layer. - hidden_cell : ``tf.keras.layers.Layer`` + hidden_cell : ``keras.layers.Layer`` Keras Layer implementing the hidden component. input_to_hidden : bool If True, connect the input directly to the hidden component (in addition to @@ -770,13 +812,13 @@ class LMUFeedforward(tf.keras.layers.Layer): kernel_initializer : ``tf.initializers.Initializer`` Initializer for weights from input to memory/hidden component. If ``None``, no weights will be used, and the input size must match the memory/hidden size. - kernel_regularizer : ``tf.keras.regularizers.Regularizer`` + kernel_regularizer : ``keras.regularizers.Regularizer`` Regularizer for weights from input to memory/hidden component. use_bias : bool If True, the memory component includes a bias term. bias_initializer : ``tf.initializers.Initializer`` Initializer for the memory component bias term. Only used if ``use_bias=True``. - bias_regularizer : ``tf.keras.regularizers.Regularizer`` + bias_regularizer : ``keras.regularizers.Regularizer`` Regularizer for the memory component bias term. Only used if ``use_bias=True``. dropout : float Dropout rate on input connections. @@ -835,7 +877,7 @@ def __init__( self.truncate_ir = truncate_ir # create a standard LMUCell to generate the impulse response during `build` - self.delay_layer = tf.keras.layers.RNN( + self.delay_layer = keras.layers.RNN( LMUCell( memory_d=1, order=order, @@ -856,8 +898,9 @@ def __init__( self.impulse_response = None self.kernel = None self.bias = None + self.dropout_layer = None - def build(self, input_shape): + def build(self, input_shape): # noqa: C901 """ Builds the layer. @@ -947,7 +990,13 @@ def build(self, input_shape): with tf.name_scope(self.hidden_cell.name): self.hidden_cell.build((input_shape[0], hidden_input_d)) - def call(self, inputs, training=None): + if self.dropout: + self.dropout_layer = keras.layers.Dropout( + self.dropout, noise_shape=(input_shape[0], 1) + tuple(input_shape[2:]) + ) + self.dropout_layer.build(input_shape) + + def call(self, inputs, training=False): """ Apply this layer to inputs. @@ -958,13 +1007,8 @@ def call(self, inputs, training=None): with some additional bookkeeping. """ - if training is None: - training = tf.keras.backend.learning_phase() - if self.dropout: - inputs = tf.keras.layers.Dropout( - self.dropout, noise_shape=(inputs.shape[0], 1) + inputs.shape[2:] - )(inputs) + inputs = self.dropout_layer(inputs) # Apply input encoders u = inputs @@ -988,7 +1032,7 @@ def call(self, inputs, training=None): if self.hidden_cell is None: h = h_in if self.return_sequences else h_in[:, -1] elif hasattr(self.hidden_cell, "state_size"): - h = tf.keras.layers.RNN( + h = keras.layers.RNN( self.hidden_cell, return_sequences=self.return_sequences, dtype=self.dtype, @@ -998,7 +1042,7 @@ def call(self, inputs, training=None): # no point applying the hidden cell to the whole sequence h = self.hidden_cell(h_in[:, -1], training=training) else: - h = tf.keras.layers.TimeDistributed(self.hidden_cell)( + h = keras.layers.TimeDistributed(self.hidden_cell)( h_in, training=training ) @@ -1056,7 +1100,7 @@ def get_config(self): "memory_d": self.memory_d, "order": self.order, "theta": self.theta, - "hidden_cell": tf.keras.layers.serialize(self.hidden_cell), + "hidden_cell": keras.layers.serialize(self.hidden_cell), "input_to_hidden": self.input_to_hidden, "discretizer": self.discretizer, "kernel_initializer": self.kernel_initializer, @@ -1080,6 +1124,6 @@ def from_config(cls, config): config["hidden_cell"] = ( None if config["hidden_cell"] is None - else tf.keras.layers.deserialize(config["hidden_cell"]) + else keras.layers.deserialize(config["hidden_cell"]) ) return super().from_config(config) diff --git a/keras_lmu/tests/conftest.py b/keras_lmu/tests/conftest.py index bec12801..10362653 100644 --- a/keras_lmu/tests/conftest.py +++ b/keras_lmu/tests/conftest.py @@ -1,5 +1,6 @@ # pylint: disable=missing-docstring +import keras import tensorflow as tf from packaging import version @@ -7,3 +8,5 @@ def pytest_configure(config): if version.parse(tf.__version__) >= version.parse("2.7.0"): tf.debugging.disable_traceback_filtering() + if version.parse(tf.__version__) >= version.parse("2.16.0"): + keras.config.disable_traceback_filtering() diff --git a/keras_lmu/tests/test_benchmarks.py b/keras_lmu/tests/test_benchmarks.py index 18e574b3..ebd14046 100644 --- a/keras_lmu/tests/test_benchmarks.py +++ b/keras_lmu/tests/test_benchmarks.py @@ -2,6 +2,7 @@ import timeit +import keras import numpy as np import pytest import tensorflow as tf @@ -10,7 +11,7 @@ from keras_lmu.tests import tf_gpu_installed -class SteptimeLogger(tf.keras.callbacks.Callback): +class SteptimeLogger(keras.callbacks.Callback): """Callback that records step times.""" def __init__(self, count_mode="samples", stateful_metrics=None): @@ -38,13 +39,13 @@ def on_predict_batch_end(self, batch, logs=None): @pytest.mark.skipif(not tf_gpu_installed, reason="Very slow on CPU") @pytest.mark.parametrize( "mode, min_time, max_time", - [("rnn", 0.1, 0.2), ("fft", 0.05, 0.15), ("raw", 0.05, 0.15)], + [("rnn", 0.01, 0.1), ("fft", 0.01, 0.1), ("raw", 0.01, 0.1)], ) def test_performance(mode, min_time, max_time): - # performance is based on Azure NC6 VM - # CPU: Intel Xeon E5-2690 v3 @ 2.60Ghz - # GPU: Nvidia Tesla K80 - # TensorFlow version: 2.6.0 + # performance is based on + # CPU: AMD Ryzen 9 5950X + # GPU: Nvidia RTX 3060 + # TensorFlow version: 2.10.0 dims = 32 seq_len = 512 @@ -53,7 +54,7 @@ def test_performance(mode, min_time, max_time): kwargs = {"memory_d": dims, "order": 256, "theta": 784, "hidden_cell": None} if mode == "rnn": - lmu_layer = tf.keras.layers.RNN( + lmu_layer = keras.layers.RNN( layers.LMUCell(**kwargs), return_sequences=False, ) @@ -62,18 +63,18 @@ def test_performance(mode, min_time, max_time): return_sequences=False, conv_mode=mode, **kwargs ) - inputs = tf.keras.layers.Input((seq_len, dims), batch_size=batch_size) + inputs = keras.layers.Input((seq_len, dims), batch_size=batch_size) lmu = lmu_layer(inputs) - outputs = tf.keras.layers.Dense(odims)(lmu) + outputs = keras.layers.Dense(odims)(lmu) - model = tf.keras.Model(inputs=inputs, outputs=outputs) + model = keras.Model(inputs=inputs, outputs=outputs) n_train = 20 * batch_size x_train = tf.random.uniform((n_train, seq_len, dims), minval=-1, maxval=1, seed=0) y_train = tf.random.uniform((n_train, odims), minval=-1, maxval=1, seed=1) model.compile( loss="mse", - optimizer=tf.keras.optimizers.RMSprop(), + optimizer=keras.optimizers.RMSprop(), ) steptimes = SteptimeLogger() diff --git a/keras_lmu/tests/test_layers.py b/keras_lmu/tests/test_layers.py index 5a96ce9c..3e770969 100644 --- a/keras_lmu/tests/test_layers.py +++ b/keras_lmu/tests/test_layers.py @@ -2,9 +2,11 @@ import inspect +import keras import numpy as np import pytest import tensorflow as tf +from packaging import version from scipy.signal import cont2discrete from keras_lmu import layers @@ -21,15 +23,15 @@ def test_multivariate_lmu(rng, discretizer): # check that one multivariate LMU is the same as n one-dimensional LMUs (omitting # the hidden part) - inp = tf.keras.Input(shape=(n_steps, input_d)) - multi_lmu = tf.keras.layers.RNN( + inp = keras.Input(shape=(n_steps, input_d)) + multi_lmu = keras.layers.RNN( layers.LMUCell( memory_d=memory_d, order=order, theta=n_steps, discretizer=discretizer, kernel_initializer=tf.initializers.constant(input_enc), - hidden_cell=tf.keras.layers.SimpleRNNCell( + hidden_cell=keras.layers.SimpleRNNCell( units=memory_d * order, activation=None, kernel_initializer=tf.initializers.constant(np.eye(memory_d * order)), @@ -39,14 +41,14 @@ def test_multivariate_lmu(rng, discretizer): return_sequences=True, )(inp) lmus = [ - tf.keras.layers.RNN( + keras.layers.RNN( layers.LMUCell( memory_d=1, order=order, theta=n_steps, discretizer=discretizer, kernel_initializer=tf.initializers.constant(input_enc[:, [i]]), - hidden_cell=tf.keras.layers.SimpleRNNCell( + hidden_cell=keras.layers.SimpleRNNCell( units=order, activation=None, kernel_initializer=tf.initializers.constant(np.eye(order)), @@ -58,7 +60,7 @@ def test_multivariate_lmu(rng, discretizer): for i in range(memory_d) ] - model = tf.keras.Model(inp, [multi_lmu] + lmus) + model = keras.Model(inp, [multi_lmu] + lmus) results = model.predict(rng.uniform(0, 1, size=(1, n_steps, input_d))) @@ -85,11 +87,11 @@ def test_layer_vs_cell(rng, has_input_kernel, feedforward, discretizer): } def hidden_cell(): - return tf.keras.layers.SimpleRNNCell(units=64) + return keras.layers.SimpleRNNCell(units=64) inp = rng.uniform(-1, 1, size=(2, n_steps, input_d)) - lmu_cell = tf.keras.layers.RNN( + lmu_cell = keras.layers.RNN( layers.LMUCell(hidden_cell=hidden_cell(), **kwargs), return_sequences=True, ) @@ -101,7 +103,7 @@ def hidden_cell(): layer_out = lmu_layer(inp) assert isinstance( - lmu_layer.layer, layers.LMUFeedforward if feedforward else tf.keras.layers.RNN + lmu_layer.layer, layers.LMUFeedforward if feedforward else keras.layers.RNN ) for w0, w1 in zip( @@ -126,35 +128,35 @@ def test_save_load_weights(rng, tmp_path, discretizer, trainable_theta): x = rng.uniform(-1, 1, size=(2, n_steps, input_d)) - inp = tf.keras.Input((None, input_d)) + inp = keras.Input((None, input_d)) lmu0 = layers.LMU( memory_d, order, n_steps, - tf.keras.layers.SimpleRNNCell(units=64), + keras.layers.SimpleRNNCell(units=64), discretizer=discretizer, trainable_theta=trainable_theta, return_sequences=True, )(inp) - model0 = tf.keras.Model(inp, lmu0) + model0 = keras.Model(inp, lmu0) out0 = model0(x) lmu1 = layers.LMU( memory_d, order, n_steps, - tf.keras.layers.SimpleRNNCell(units=64), + keras.layers.SimpleRNNCell(units=64), discretizer=discretizer, trainable_theta=trainable_theta, return_sequences=True, )(inp) - model1 = tf.keras.Model(inp, lmu1) + model1 = keras.Model(inp, lmu1) out1 = model1(x) assert not np.allclose(out0, out1) - model0.save_weights(str(tmp_path)) - model1.load_weights(str(tmp_path)) + model0.save_weights(tmp_path / "model.weights.h5") + model1.load_weights(tmp_path / "model.weights.h5") out2 = model1(x) assert np.allclose(out0, out2) @@ -167,14 +169,14 @@ def test_save_load_serialization(mode, tmp_path, trainable_theta, discretizer): if mode == "feedforward" and trainable_theta: pytest.skip("LMUFeedforward does not support trainable theta") - inp = tf.keras.Input((10 if mode == "feedforward" else None, 32)) + inp = keras.Input((10 if mode == "feedforward" else None, 32)) if mode == "cell": - out = tf.keras.layers.RNN( + out = keras.layers.RNN( layers.LMUCell( 1, 2, 3, - tf.keras.layers.SimpleRNNCell(4), + keras.layers.SimpleRNNCell(4), trainable_theta=trainable_theta, discretizer=discretizer, ), @@ -185,7 +187,7 @@ def test_save_load_serialization(mode, tmp_path, trainable_theta, discretizer): 1, 2, 3, - tf.keras.layers.SimpleRNNCell(4), + keras.layers.SimpleRNNCell(4), return_sequences=True, memory_to_memory=True, trainable_theta=trainable_theta, @@ -196,16 +198,21 @@ def test_save_load_serialization(mode, tmp_path, trainable_theta, discretizer): 1, 2, 3, - tf.keras.layers.SimpleRNNCell(4), + keras.layers.SimpleRNNCell(4), discretizer=discretizer, return_sequences=True, )(inp) - model = tf.keras.Model(inp, out) + model = keras.Model(inp, out) - model.save(str(tmp_path)) + model_path = ( + tmp_path + if version.parse(tf.__version__) < version.parse("2.16.0") + else tmp_path / "model.keras" + ) + model.save(model_path) - model_load = tf.keras.models.load_model(str(tmp_path)) + model_load = keras.models.load_model(model_path) assert np.allclose( model.predict(np.ones((32, 10, 32))), model_load.predict(np.ones((32, 10, 32))) @@ -217,8 +224,8 @@ def test_save_load_serialization(mode, tmp_path, trainable_theta, discretizer): "hidden_cell", ( lambda: None, - lambda: tf.keras.layers.Dense(4, dtype="float64"), - lambda: tf.keras.layers.SimpleRNNCell(4, dtype="float64"), + lambda: keras.layers.Dense(4, dtype="float64"), + lambda: keras.layers.SimpleRNNCell(4, dtype="float64"), ), ) @pytest.mark.parametrize("discretizer", ("zoh", "euler")) @@ -238,7 +245,7 @@ def test_feedforward( x = rng.uniform(-1, 1, size=(2, seq_len, 32)) - rnn_layer = tf.keras.layers.RNN( + rnn_layer = keras.layers.RNN( layers.LMUCell(**kwargs), return_sequences=return_sequences, dtype="float64", @@ -250,7 +257,7 @@ def test_feedforward( ) ff_layer.build((2, None, 32)) # testing with unknown sequence length ff_layer.set_weights(rnn_layer.get_weights()) - ff_out = ff_layer(x, training=None) + ff_out = ff_layer(x) assert ff_out.dtype == rnn_out.dtype == "float64" assert np.allclose( @@ -275,7 +282,7 @@ def test_raw_truncation(truncate_ir, rng): x = rng.uniform(-1, 1, size=(2, seq_len, kwargs["memory_d"])) - rnn_layer = tf.keras.layers.RNN(layers.LMUCell(**kwargs), return_sequences=True) + rnn_layer = keras.layers.RNN(layers.LMUCell(**kwargs), return_sequences=True) rnn_out = rnn_layer(x) ff_layer = layers.LMUFeedforward( @@ -293,7 +300,7 @@ def test_raw_truncation(truncate_ir, rng): def test_validation_errors(): ff_layer = layers.LMUFeedforward(1, 2, 3, None) with pytest.warns(UserWarning, match="unknown impulse length"): - ff_layer(tf.keras.Input((None, 32))) + ff_layer(keras.Input((None, 32))) with pytest.raises(ValueError, match="hidden_to_memory must be False"): layers.LMUCell(1, 2, 3, None, hidden_to_memory=True) @@ -318,7 +325,7 @@ def test_feedforward_auto_swap( 4, 2, 3, - tf.keras.layers.Dense(5), + keras.layers.Dense(5), hidden_to_memory=hidden_to_memory, memory_to_memory=memory_to_memory, trainable_theta=trainable_theta, @@ -330,7 +337,7 @@ def test_feedforward_auto_swap( @pytest.mark.parametrize( "hidden_cell", - (tf.keras.layers.SimpleRNNCell(units=10), tf.keras.layers.Dense(units=10), None), + (keras.layers.SimpleRNNCell(units=10), keras.layers.Dense(units=10), None), ) @pytest.mark.parametrize("feedforward", (True, False)) def test_hidden_types(hidden_cell, feedforward, rng): @@ -340,21 +347,19 @@ def test_hidden_types(hidden_cell, feedforward, rng): "memory_d": 1, "order": 3, "theta": 4, - "kernel_initializer": tf.keras.initializers.constant( + "kernel_initializer": keras.initializers.constant( rng.uniform(-1, 1, size=(32, 1)) ), } - base_lmu = tf.keras.layers.RNN( + base_lmu = keras.layers.RNN( layers.LMUCell(hidden_cell=None, **lmu_params), return_sequences=True, ) base_output = base_lmu(x) - if isinstance(hidden_cell, tf.keras.layers.SimpleRNNCell): - base_output = tf.keras.layers.RNN(hidden_cell, return_sequences=True)( - base_output - ) - elif isinstance(hidden_cell, tf.keras.layers.Dense): + if isinstance(hidden_cell, keras.layers.SimpleRNNCell): + base_output = keras.layers.RNN(hidden_cell, return_sequences=True)(base_output) + elif isinstance(hidden_cell, keras.layers.Dense): base_output = hidden_cell(base_output) lmu = ( @@ -362,7 +367,7 @@ def test_hidden_types(hidden_cell, feedforward, rng): hidden_cell=hidden_cell, return_sequences=True, **lmu_params ) if feedforward - else tf.keras.layers.RNN( + else keras.layers.RNN( layers.LMUCell(hidden_cell=hidden_cell, **lmu_params), return_sequences=True, ) @@ -375,43 +380,20 @@ def test_hidden_types(hidden_cell, feedforward, rng): @pytest.mark.parametrize("feedforward", (True, False)) -@pytest.mark.parametrize("hidden_cell", (None, tf.keras.layers.Dense)) +@pytest.mark.parametrize("hidden_cell", (None, keras.layers.Dense)) def test_connection_params(feedforward, hidden_cell): input_shape = (32, 7 if feedforward else None, 6) - x = tf.keras.Input(batch_shape=input_shape) + x = keras.Input(batch_shape=input_shape) lmu_args = { "memory_d": 1, "order": 3, "theta": 4, "hidden_cell": hidden_cell if hidden_cell is None else hidden_cell(units=5), - "input_to_hidden": False, + "input_to_hidden": hidden_cell is not None, } - if not feedforward: - lmu_args["hidden_to_memory"] = False - lmu_args["memory_to_memory"] = False - - lmu = ( - layers.LMUCell(**lmu_args) - if not feedforward - else layers.LMUFeedforward(**lmu_args) - ) - y = lmu(x) if feedforward else tf.keras.layers.RNN(lmu)(x) - assert lmu.kernel.shape == (input_shape[-1], lmu.memory_d) - if not feedforward: - assert lmu.recurrent_kernel is None - if hidden_cell is not None: - assert lmu.hidden_cell.kernel.shape == ( - lmu.memory_d * lmu.order, - lmu.hidden_cell.units, - ) - assert y.shape == ( - input_shape[0], - lmu.memory_d * lmu.order if hidden_cell is None else lmu.hidden_cell.units, - ) - lmu_args["input_to_hidden"] = hidden_cell is not None if not feedforward: lmu_args["hidden_to_memory"] = hidden_cell is not None lmu_args["memory_to_memory"] = True @@ -421,9 +403,7 @@ def test_connection_params(feedforward, hidden_cell): if not feedforward else layers.LMUFeedforward(**lmu_args) ) - if hidden_cell is not None: - lmu.hidden_cell.built = False # so that the kernel will be rebuilt - y = lmu(x) if feedforward else tf.keras.layers.RNN(lmu)(x) + y = lmu(x) if feedforward else keras.layers.RNN(lmu)(x) assert lmu.kernel.shape == ( input_shape[-1] + (0 if feedforward or hidden_cell is None else lmu.hidden_cell.units), @@ -447,7 +427,27 @@ def test_connection_params(feedforward, hidden_cell): @pytest.mark.parametrize( "dropout, recurrent_dropout, hidden_dropout, hidden_recurrent_dropout", - [(0, 0, 0, 0), (0.5, 0, 0, 0), (0, 0.5, 0, 0), (0, 0, 0.5, 0), (0, 0, 0, 0.5)], + [ + (0, 0, 0, 0), + (0.5, 0, 0, 0), + (0, 0.5, 0, 0), + (0, 0, 0.5, 0), + pytest.param( + 0, + 0, + 0, + 0.5, + **( + {} + if version.parse(tf.__version__) < version.parse("2.16.0rc0") + else { + "marks": pytest.mark.xfail( + reason="TF2.16 recurrent dropout is bugged" + ) + } + ), + ), + ], ) @pytest.mark.parametrize("feedforward", (True, False)) def test_dropout( @@ -461,7 +461,7 @@ def test_dropout( memory_d=1, order=3, theta=4, - hidden_cell=tf.keras.layers.SimpleRNNCell( + hidden_cell=keras.layers.SimpleRNNCell( 5, dropout=hidden_dropout, recurrent_dropout=hidden_recurrent_dropout ), dropout=dropout, @@ -497,7 +497,7 @@ def test_fit(feedforward, discretizer, trainable_theta): order=256, theta=784 if discretizer == "zoh" else 2000, trainable_theta=trainable_theta, - hidden_cell=tf.keras.layers.SimpleRNNCell(units=30), + hidden_cell=keras.layers.SimpleRNNCell(units=30), hidden_to_memory=not feedforward, memory_to_memory=not feedforward, input_to_hidden=not feedforward, @@ -505,19 +505,19 @@ def test_fit(feedforward, discretizer, trainable_theta): kernel_initializer="zeros", ) - inputs = tf.keras.layers.Input((None, 10)) + inputs = keras.layers.Input((None, 10)) lmu = lmu_layer(inputs) - outputs = tf.keras.layers.Dense(2)(lmu) + outputs = keras.layers.Dense(2)(lmu) - model = tf.keras.Model(inputs=inputs, outputs=outputs) + model = keras.Model(inputs=inputs, outputs=outputs) x_train = tf.ones((5, 5, 10)) x_test = tf.ones((5, 5, 10)) y_train = tf.ones((5, 1)) y_test = tf.ones((5, 1)) model.compile( - loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), - optimizer=tf.keras.optimizers.Adam(), + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer="adam", metrics=["accuracy"], ) @@ -526,7 +526,7 @@ def test_fit(feedforward, discretizer, trainable_theta): _, acc = model.evaluate(x_test, y_test, verbose=0) assert isinstance( - lmu_layer.layer, layers.LMUFeedforward if feedforward else tf.keras.layers.RNN + lmu_layer.layer, layers.LMUFeedforward if feedforward else keras.layers.RNN ) assert acc == 1.0 @@ -537,7 +537,7 @@ def test_no_input_kernel_dimension_mismatch(feedforward): memory_d=1, order=4, theta=4, - hidden_cell=tf.keras.layers.SimpleRNNCell(units=10), + hidden_cell=keras.layers.SimpleRNNCell(units=10), hidden_to_memory=False, memory_to_memory=not feedforward, input_to_hidden=not feedforward, @@ -610,17 +610,15 @@ def test_theta_update(discretizer, trainable_theta, tmp_path): order=3, theta=theta, trainable_theta=trainable_theta, - hidden_cell=tf.keras.layers.SimpleRNNCell(units=4), + hidden_cell=keras.layers.SimpleRNNCell(units=4), discretizer=discretizer, ) - inputs = tf.keras.layers.Input((None, 20)) - lmu = tf.keras.layers.RNN(lmu_cell)(inputs) - model = tf.keras.Model(inputs=inputs, outputs=lmu) + inputs = keras.layers.Input((None, 20)) + lmu = keras.layers.RNN(lmu_cell)(inputs) + model = keras.Model(inputs=inputs, outputs=lmu) - model.compile( - loss=tf.keras.losses.MeanSquaredError(), optimizer=tf.keras.optimizers.Adam() - ) + model.compile(loss=keras.losses.MeanSquaredError(), optimizer="adam") # make sure theta_inv is set correctly to initial value assert np.allclose(lmu_cell.theta_inv.numpy(), 1 / theta) @@ -632,9 +630,9 @@ def test_theta_update(discretizer, trainable_theta, tmp_path): assert np.allclose(lmu_cell.theta_inv.numpy(), 1 / theta) != trainable_theta # save model and make sure you get same outputs, that is, correct theta was stored - model.save(str(tmp_path)) + model.save(tmp_path / "model.keras") - model_load = tf.keras.models.load_model(str(tmp_path)) + model_load = keras.models.load_model(tmp_path / "model.keras") assert np.allclose( model.predict(np.ones((32, 10, 20))), @@ -687,22 +685,22 @@ def test_regularizer_loss(fft, bias): memory_d=memory_d, order=4, theta=4, - hidden_cell=tf.keras.layers.SimpleRNNCell(units=10), + hidden_cell=keras.layers.SimpleRNNCell(units=10), hidden_to_memory=False, memory_to_memory=not fft, input_to_hidden=not fft, use_bias=bias, bias_initializer="uniform", # non-zero to make regularization loss non-zero - kernel_regularizer=tf.keras.regularizers.L1L2(l1=reg), - recurrent_regularizer=tf.keras.regularizers.L1L2(l1=rec_reg), - bias_regularizer=tf.keras.regularizers.L1L2(l1=bias_reg), + kernel_regularizer=keras.regularizers.L1L2(l1=reg), + recurrent_regularizer=keras.regularizers.L1L2(l1=rec_reg), + bias_regularizer=keras.regularizers.L1L2(l1=bias_reg), ) - inputs = tf.keras.Input((seq_len, input_d)) + inputs = keras.Input((seq_len, input_d)) lmus = lmu_layer(inputs) - outputs = tf.keras.layers.Dense(10)(lmus) + outputs = keras.layers.Dense(10)(lmus) - model = tf.keras.Model(inputs=inputs, outputs=outputs) - cce_loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) + model = keras.Model(inputs=inputs, outputs=outputs) + cce_loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True) model.compile(loss=cce_loss_fn, optimizer="adam", metrics=[cce_loss_fn]) n_test = 5 @@ -746,12 +744,12 @@ def test_dtype(feedforward, dtype): 1, 2, 3, - tf.keras.layers.SimpleRNNCell(4, dtype=dtype), + keras.layers.SimpleRNNCell(4, dtype=dtype), trainable_theta=not feedforward, dtype=dtype, ) y = layer(x) assert isinstance( - layer.layer, layers.LMUFeedforward if feedforward else tf.keras.layers.RNN + layer.layer, layers.LMUFeedforward if feedforward else keras.layers.RNN ) assert y.dtype == ("float32" if dtype is None else dtype) diff --git a/keras_lmu/version.py b/keras_lmu/version.py index 78b13985..da17931c 100644 --- a/keras_lmu/version.py +++ b/keras_lmu/version.py @@ -11,7 +11,7 @@ tagged with the version. """ -version_info = (0, 7, 1) +version_info = (0, 8, 0) name = "keras-lmu" dev = 0 diff --git a/setup.py b/setup.py index 87268bfc..e345a6c1 100644 --- a/setup.py +++ b/setup.py @@ -28,12 +28,13 @@ def read(*filenames, **kwargs): version = runpy.run_path(str(root / "keras_lmu" / "version.py"))["version"] install_req = [ + "anyio<4", "packaging>=20.9", "scipy>=1.0.0", - "tensorflow>=2.4.4", + "tensorflow>=2.6.0", ] docs_req = [ - "matplotlib>=3.0.2,<3.4.3", + "matplotlib>=3.8.4", "jupyter>=1.0.0", "seaborn>=0.9.0", "sphinx>=1.8",