Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parallel replicas with varying tr-vl masks #1788

Merged
merged 33 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
101056e
Implemented parallel replicas with varying trvl mask layers
goord Jun 22, 2022
90674bd
Added lru caches to speed up validphys multi-replica initalization
goord May 10, 2023
5db86f1
Apply black, isort to modified files
APJansen Jan 10, 2024
c7b1d14
Made isinstance call more elegant
goord Feb 15, 2024
9663a63
Simplify LossInvCovmat for diagonal cases
goord Feb 15, 2024
799c12c
Simplified replica stacking of experimental data in model setup
goord Feb 15, 2024
ba6bb59
Removed axis argument from mask layer and determine output shape at b…
goord Feb 15, 2024
b2edf4b
Return -1 as stop epoch whenever the final epoch object is not create…
goord Feb 15, 2024
06c59ff
Give warning on parallel multi-replica single-point datasets
goord Feb 15, 2024
170c13f
Add default parameters to tr_masks to fix unit tests
goord Feb 16, 2024
36edbdb
Revert change to runcard
APJansen Feb 19, 2024
2bc758b
Generalise import of operations
APJansen Feb 19, 2024
868a9a4
Restore EKO import
APJansen Feb 19, 2024
458469e
Revert "Added lru caches to speed up validphys multi-replica initaliz…
APJansen Feb 19, 2024
916249d
Remove unused index in loop
APJansen Feb 19, 2024
b2ad20e
Refactor list comprehensions
APJansen Feb 19, 2024
b337bf7
Remove unused functools import
APJansen Feb 19, 2024
c2e4935
Fix shape of input and mask in the Mask layer, and adjust test.
APJansen Feb 19, 2024
a8da0db
Simplify einsum call
APJansen Feb 21, 2024
2bbf864
Use enumerate instead of indexing
APJansen Feb 21, 2024
1d91484
Simplify branching on kernel shape (also rename tmp->obs_diff,
APJansen Feb 21, 2024
d994895
be explicit about axis
APJansen Feb 21, 2024
c260f14
Remove check for multiple replicas
APJansen Feb 21, 2024
a4026e1
keys -> items in loop as a value was being used
APJansen Feb 21, 2024
d63115b
Remove deprecated comment
APJansen Feb 21, 2024
49904a3
Clarify branching in loss
APJansen Feb 21, 2024
db24f63
Simplified pos_info and integ_info usage in model_trainer
goord Feb 21, 2024
f6e9bac
Fix bug in losses, incorrectly determining number of replicas
APJansen Feb 21, 2024
66f1068
Made construction of replicas_info more readable
goord Feb 21, 2024
2cd1701
Merge remote-tracking branch 'origin/trvl-mask-layers' into trvl-mask…
goord Feb 21, 2024
bc76939
Simplify replicas_info further
APJansen Feb 22, 2024
44901f0
Automatically regenerated regressions from PR 1788, branch trvl-mask-…
Feb 22, 2024
a7dfc0d
Add comment on flattening
APJansen Feb 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions n3fit/src/n3fit/backends/keras_backend/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,12 @@ def flatten(x):
return tf.reshape(x, (-1,))


@tf.function
def reshape(x, shape):
"""reshape tensor x"""
return tf.reshape(x, shape)


def boolean_mask(*args, **kwargs):
"""
Applies a boolean mask to a tensor
Expand Down
5 changes: 0 additions & 5 deletions n3fit/src/n3fit/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,11 +380,6 @@ def check_consistent_parallel(parameters, parallel_models, same_trvl_per_replica
"""
if not parallel_models:
return
if not same_trvl_per_replica:
raise CheckError(
"Replicas cannot be run in parallel with different training/validation "
" masks, please set `same_trvl_per_replica` to True in the runcard"
)
if parameters.get("layer_type") != "dense":
raise CheckError("Parallelization has only been tested with layer_type=='dense'")

Expand Down
31 changes: 17 additions & 14 deletions n3fit/src/n3fit/layers/losses.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

"""
import numpy as np

from n3fit.backends import MetaLayer
from n3fit.backends import operations as op

Expand Down Expand Up @@ -38,9 +39,6 @@ class LossInvcovmat(MetaLayer):
"""

def __init__(self, invcovmat, y_true, mask=None, covmat=None, **kwargs):
# If we have a diagonal matrix, padd with 0s and hope it's not too heavy on memory
if len(invcovmat.shape) == 1:
invcovmat = np.diag(invcovmat)
self._invcovmat = op.numpy_to_tensor(invcovmat)
self._covmat = covmat
self._y_true = op.numpy_to_tensor(y_true)
Expand All @@ -56,9 +54,7 @@ def build(self, input_shape):
"""Transform the inverse covmat and the mask into
weights of the layers"""
init = MetaLayer.init_constant(self._invcovmat)
self.kernel = self.builder_helper(
"invcovmat", (self._ndata, self._ndata), init, trainable=False
)
self.kernel = self.builder_helper("invcovmat", self._invcovmat.shape, init, trainable=False)
mask_shape = (1, 1, self._ndata)
if self._mask is None:
init_mask = MetaLayer.init_constant(np.ones(mask_shape))
Expand All @@ -79,17 +75,24 @@ def update_mask(self, new_mask):
self.mask.assign(new_mask)

def call(self, y_pred, **kwargs):
tmp_raw = self._y_true - y_pred
obs_diff_raw = self._y_true - y_pred
# TODO: most of the time this is a y * I multiplication and can be skipped
# benchmark how much time (if any) is lost in this in actual fits for the benefit of faster kfolds
tmp = op.op_multiply([tmp_raw, self.mask])
if tmp.shape[1] == 1:
# einsum is not well suited for CPU, so use tensordot if not multimodel
right_dot = op.tensor_product(self.kernel, tmp[0, 0, :], axes=1)
res = op.tensor_product(tmp[0, :, :], right_dot, axes=1)
obs_diff = op.op_multiply([obs_diff_raw, self.mask])

# The experimental loss doesn't depend on replicas, so it doesn't have a replica axis and
# must be treated separately
experimental_loss = len(self.kernel.shape) == 2
one_replica = obs_diff.shape[1] == 1

if one_replica: # einsum is not well suited for CPU, so use tensordot if single replica
kernel = self.kernel if experimental_loss else self.kernel[0]
right_dot = op.tensor_product(kernel, obs_diff[0, 0, :], axes=1)
loss = op.tensor_product(obs_diff[0, :, :], right_dot, axes=1)
else:
res = op.einsum("bri, ij, brj -> r", tmp, self.kernel, tmp)
return res
einstr = "bri, ij, brj -> r" if experimental_loss else "bri, rij, brj -> r"
loss = op.einsum(einstr, obs_diff, self.kernel, obs_diff)
return loss


class LossLagrange(MetaLayer):
Expand Down
38 changes: 28 additions & 10 deletions n3fit/src/n3fit/layers/mask.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from numpy import count_nonzero

from n3fit.backends import MetaLayer
from n3fit.backends import operations as op


class Mask(MetaLayer):
"""
This layers applies a boolean mask to a rank-1 input tensor.
This layers applies a boolean mask to an input tensor.
The mask admit a multiplier for all outputs which will be internally
saved as a weight so it can be updated during trainig.

Expand All @@ -14,34 +16,50 @@ class Mask(MetaLayer):

Parameters
----------
bool_mask: np.array
bool_mask: np.array of shape (n_replicas, n_features)
numpy array with the boolean mask to be applied
c: float
constant multiplier for every output
axis: int
axis in which to apply the mask
"""

def __init__(self, bool_mask=None, c=None, axis=None, **kwargs):
def __init__(self, bool_mask=None, c=None, **kwargs):
if bool_mask is None:
self.mask = None
self.last_dim = -1
else:
self.mask = op.numpy_to_tensor(bool_mask, dtype=bool)
self.last_dim = count_nonzero(bool_mask[0, ...])
self.c = c
self.axis = axis
self.masked_output_shape = None
super().__init__(**kwargs)

def build(self, input_shape):
if self.c is not None:
initializer = MetaLayer.init_constant(value=self.c)
self.kernel = self.builder_helper(
"mask", (1,), initializer, trainable=False
)
self.kernel = self.builder_helper("mask", (1,), initializer, trainable=False)
# Make sure reshape will succeed: set the last dimension to the unmasked data length and before-last to
# the number of replicas
if self.mask is not None:
self.masked_output_shape = [-1 if d is None else d for d in input_shape]
self.masked_output_shape[-1] = self.last_dim
self.masked_output_shape[-2] = self.mask.shape[-2]
super(Mask, self).build(input_shape)

def call(self, ret):
"""
Apply the mask to the input tensor, and multiply by the constant if present.

Parameters
----------
ret: Tensor of shape (batch_size, n_replicas, n_features)

Returns
-------
Tensor of shape (batch_size, n_replicas, n_features)
"""
if self.mask is not None:
ret = op.boolean_mask(ret, self.mask, axis=self.axis)
flat_res = op.boolean_mask(ret, self.mask, axis=1)
ret = op.reshape(flat_res, shape=self.masked_output_shape)
if self.c is not None:
ret = ret * self.kernel
return ret
Loading
Loading