From 4df7e69df1160f0cff736ba9c5b16124a8304908 Mon Sep 17 00:00:00 2001 From: Aron Date: Wed, 25 Oct 2023 08:13:20 +0200 Subject: [PATCH] Implement MultiDense layer Add MultiDense layer Add MultiDense layer improvements Recreate initializer per replica to make sure seed is properly set Add tolerences to test Add multi_dense path in generate_nn Add MultiDropout Replace old dense layer everywhere Remove MultiDropout, not necessary Update developing weights structure Remove MultiDropout once more Fix naming inconsistency wrt parallel-prefactor --- n3fit/runcards/examples/developing_weights.h5 | Bin 41348 -> 41348 bytes .../backends/keras_backend/base_layers.py | 80 +++++--- .../backends/keras_backend/multi_dense.py | 179 ++++++++++++++++++ n3fit/src/n3fit/checks.py | 9 +- n3fit/src/n3fit/model_gen.py | 31 ++- .../src/n3fit/tests/regressions/weights_1.h5 | Bin 29232 -> 29232 bytes .../src/n3fit/tests/regressions/weights_2.h5 | Bin 29232 -> 29232 bytes n3fit/src/n3fit/tests/test_modelgen.py | 13 +- n3fit/src/n3fit/tests/test_multidense.py | 68 +++++++ 9 files changed, 335 insertions(+), 45 deletions(-) create mode 100644 n3fit/src/n3fit/backends/keras_backend/multi_dense.py create mode 100644 n3fit/src/n3fit/tests/test_multidense.py diff --git a/n3fit/runcards/examples/developing_weights.h5 b/n3fit/runcards/examples/developing_weights.h5 index 385ea55a758dc81092dad551d29cdb1f94fab8d9..2749393b421910762e0844381d5628511d4026cc 100644 GIT binary patch delta 933 zcmZoU%+zw2X@dqUqs8Wh8Q&QvE3gYNdQ3KC7x&C9%_+%@Pf5)yPK`HVU|>Mzo1*c} z(D>#+J|hr=fWYR7?CebSVh|pRF=qPNsYQ9IIaUS?417>|5v=NxG82nIsu&pfq4L`i_Ppi*E`V1^_H#>s(l;*20N zlQCoB!Q4p_Y@6-mLpV8L$|k?76=ODHV3_>AR&??TjVQ3H%@Z_NF=3jd12#zq$t2wn zMpToOVJ7LpOwzBC!!SwF4`h;}AHpO)e{_>1p(a%XKut;rTqmFob2ij>V81dj0M&v4 vG_)BQC#xpG&7N#0&$U@5*$d63K($~$novfF delta 751 zcmZoU%+zw2X@dsqXRqvOKjf2{(`Yy04fU6Wu~8NzH!XxltNa}^U;JB-4I4Jt;&-R>LIl1SIL33DnqqOPB!#| zXyx>> from tensorflow.keras import Sequential + >>> from tensorflow.keras.layers import Dense + >>> from tensorflow.keras.initializers import GlorotUniform + >>> import tensorflow as tf + >>> replicas = 2 + >>> multi_dense_model = Sequential([ + >>> MultiDense(units=8, replica_seeds=[42, 43], replica_input=False, kernel_initializer=GlorotUniform(seed=0)), + >>> MultiDense(units=4, replica_seeds=[52, 53], kernel_initializer=GlorotUniform(seed=0)), + >>> ]) + >>> single_models = [ + >>> Sequential([ + >>> Dense(units=8, kernel_initializer=GlorotUniform(seed=42 + r)), + >>> Dense(units=4, kernel_initializer=GlorotUniform(seed=52 + r)), + >>> ]) + >>> for r in range(replicas) + >>> ] + >>> gridsize, features = 100, 2 + >>> multi_dense_model.build(input_shape=(None, gridsize, features)) + >>> for single_model in single_models: + >>> single_model.build(input_shape=(None, gridsize, features)) + >>> test_input = tf.random.uniform(shape=(1, gridsize, features)) + >>> multi_dense_output = multi_dense_model(test_input) + >>> single_dense_output = tf.stack([single_model(test_input) for single_model in single_models], axis=1) + >>> tf.reduce_all(tf.equal(multi_dense_output, single_dense_output)) + + Parameters + ---------- + replica_seeds: List[int] + List of seeds per replica for the kernel initializer. + kernel_initializer: Initializer + Initializer class for the kernel. + replica_input: bool (default: True) + Whether the input already contains multiple replicas. + """ + + def __init__( + self, + replica_seeds: List[int], + kernel_initializer: Initializer, + replica_input: bool = True, + **kwargs + ): + super().__init__(**kwargs) + self.replicas = len(replica_seeds) + self.replica_seeds = replica_seeds + self.kernel_initializer = MultiInitializer( + single_initializer=kernel_initializer, replica_seeds=replica_seeds + ) + self.bias_initializer = MultiInitializer( + single_initializer=self.bias_initializer, replica_seeds=replica_seeds + ) + self.replica_input = replica_input + + def build(self, input_shape): + input_dim = input_shape[-1] + self.kernel = self.add_weight( + name="kernel", + shape=(self.replicas, input_dim, self.units), + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + ) + if self.use_bias: + self.bias = self.add_weight( + name="bias", + shape=(self.replicas, 1, self.units), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + ) + else: + self.bias = None + self.input_spec.axes = {-1: input_dim} + self.built = True + + def call(self, inputs): + """ + Compute output of shape (batch_size, replicas, gridsize, units). + + For the first layer, (self.replica_input is False), this is equivalent to + applying each replica separately and concatenating along the last axis. + If the input already contains multiple replica outputs, it is equivalent + to applying each replica to its corresponding input. + """ + if inputs.dtype.base_dtype != self._compute_dtype_object.base_dtype: + inputs = tf.cast(inputs, dtype=self._compute_dtype_object) + + input_axes = 'brnf' if self.replica_input else 'bnf' + einrule = input_axes + ',rfg->brng' + outputs = tf.einsum(einrule, inputs, self.kernel) + + # Reshape the output back to the original ndim of the input. + if not tf.executing_eagerly(): + output_shape = self.compute_output_shape(inputs.shape.as_list()) + outputs.set_shape(output_shape) + + if self.use_bias: + outputs = outputs + self.bias + + if self.activation is not None: + outputs = self.activation(outputs) + + return outputs + + def compute_output_shape(self, input_shape): + # Remove the replica axis from the input shape. + if self.replica_input: + input_shape = input_shape[:1] + input_shape[2:] + + output_shape = super().compute_output_shape(input_shape) + + # Add back the replica axis to the output shape. + output_shape = output_shape[:1] + [self.replicas] + output_shape[1:] + + return output_shape + + def get_config(self): + config = super().get_config() + config.update({"replica_input": self.replica_input, "replica_seeds": self.replica_seeds}) + return config + + +class MultiInitializer(Initializer): + """ + Multi replica initializer that exactly replicates a stack of single replica initializers. + + Weights are stacked on the first axis, and per replica seeds are added to a base seed of the + given single replica initializer. + + Parameters + ---------- + single_initializer: Initializer + Initializer class for the kernel. + replica_seeds: List[int] + List of seeds per replica for the kernel initializer. + """ + + def __init__(self, single_initializer: Initializer, replica_seeds: List[int]): + self.initializer_class = type(single_initializer) + self.initializer_config = single_initializer.get_config() + self.base_seed = single_initializer.seed if hasattr(single_initializer, "seed") else None + self.replica_seeds = replica_seeds + + def __call__(self, shape, dtype=None, **kwargs): + shape = shape[1:] # Remove the replica axis from the shape. + per_replica_weights = [] + for replica_seed in self.replica_seeds: + if self.base_seed is not None: + self.initializer_config["seed"] = self.base_seed + replica_seed + single_initializer = self.initializer_class.from_config(self.initializer_config) + + per_replica_weights.append(single_initializer(shape, dtype, **kwargs)) + + return tf.stack(per_replica_weights, axis=0) diff --git a/n3fit/src/n3fit/checks.py b/n3fit/src/n3fit/checks.py index 885785a268..d93f3b1d9a 100644 --- a/n3fit/src/n3fit/checks.py +++ b/n3fit/src/n3fit/checks.py @@ -385,8 +385,8 @@ def check_consistent_parallel(parameters, parallel_models, same_trvl_per_replica "Replicas cannot be run in parallel with different training/validation " " masks, please set `same_trvl_per_replica` to True in the runcard" ) - if parameters.get("layer_type") != "dense": - raise CheckError("Parallelization has only been tested with layer_type=='dense'") + if parameters.get("layer_type") == "dense_per_flavour": + raise CheckError("Parallelization has not been tested with layer_type=='dense_per_flavour'") @make_argcheck @@ -427,10 +427,9 @@ def check_fiatlux_pdfs_id(replicas, fiatlux): f"Cannot generate a photon replica with id larger than the number of replicas of the PDFs set {luxset.name}:\nreplica id={max_id}, replicas of {luxset.name} = {pdfs_ids}" ) + @make_argcheck def check_multireplica_qed(replicas, fiatlux): if fiatlux is not None: if len(replicas) > 1: - raise CheckError( - "At the moment, running a multireplica QED fits is not allowed." - ) + raise CheckError("At the moment, running a multireplica QED fits is not allowed.") diff --git a/n3fit/src/n3fit/model_gen.py b/n3fit/src/n3fit/model_gen.py index fc180f392f..5a69fe9396 100644 --- a/n3fit/src/n3fit/model_gen.py +++ b/n3fit/src/n3fit/model_gen.py @@ -750,17 +750,28 @@ def initializer_generator(seed, i_layer): # list_of_pdf_layers[d][r] is the layer at depth d for replica r list_of_pdf_layers = [] for i_layer, (nodes_out, activation) in enumerate(zip(nodes_list, activations)): - layers = [ - base_layer_selector( + if layer_type == "dense": + layers = base_layer_selector( layer_type, - kernel_initializer=initializer_generator(replica_seed, i_layer), + replica_seeds=replica_seeds, + kernel_initializer=initializer_generator(0, i_layer), units=nodes_out, activation=activation, - input_shape=(nodes_in,), + replica_input=(i_layer != 0), **custom_args, ) - for replica_seed in replica_seeds - ] + else: + layers = [ + base_layer_selector( + layer_type, + kernel_initializer=initializer_generator(replica_seed, i_layer), + units=nodes_out, + activation=activation, + input_shape=(nodes_in,), + **custom_args, + ) + for replica_seed in replica_seeds + ] list_of_pdf_layers.append(layers) nodes_in = int(nodes_out) @@ -775,6 +786,14 @@ def initializer_generator(seed, i_layer): list_of_pdf_layers[-1] = [lambda x: concat(layer(x)) for layer in list_of_pdf_layers[-1]] # Apply all layers to the input to create the models + if layer_type == "dense": + pdfs = x_input + for layer in list_of_pdf_layers: + pdfs = layer(pdfs) + model = MetaModel({'NN_input': x_input}, pdfs, name=NN_LAYER_ALL_REPLICAS) + + return model + pdfs = [layer(x_input) for layer in list_of_pdf_layers[0]] for layers in list_of_pdf_layers[1:]: diff --git a/n3fit/src/n3fit/tests/regressions/weights_1.h5 b/n3fit/src/n3fit/tests/regressions/weights_1.h5 index 7f9f9301844822e6cb8ab0e0271f2bc8f538119f..19faf3f6170c060fa392ef424b9c57077c3a6db4 100644 GIT binary patch delta 772 zcmdn+gmD89?OL+C;7F!vBr1+rnVp!Cf>Sw1G<)!9;RPsaR zv6*LzWS#)3d6{SyBMcP8W1umTfkLPTqL^oluO#UdN4vZ~HSw=>F3!peK z^qC>y&p6poPMi@$W=>8B;Fx$KZL)*0z-A43FHU(hm0*2{c;lbE&`*4Fhei}s-{u0% zRZN(s>wrzy$z;r!crX`YwyqZ=0kgRvX6qr$*1yE0j@><6P1S|v=gh_1PV47ganV6Fk@8?$xH1oDb`{o7d6Sx3Dz?8QD delta 567 zcmdn+gmD89?OGD5a6HeD3>|8 zE3##?oO}o;zXVv2k&zK9xlkjKv18)F-pvJ?tC+wVVS&dr*-;0oQ8$Ei$uc|n-O Y<_)F^hMW-BFfdG*c(8r*g7gVo0N?UL+C;7F!vBr1+rnVp!Cf>Sw1G<)!9;RPsaR zv6*LzWS#)3d6{SyBMcP8W1umTfkLPTqL^oluO#UdN4vZ~HSw=>F3!peK z^qC>y&p6poPMi@$W=>8B;Fx$KZL)*0z-A43FHU(hm0*2{c;lbE&`*4Fhei}s-{u0% zRZN(s>wrzy$z;r!crX`YwyqZ=0kgRvX6qr$*1yE0j@><6P1S|v=gh_1PV47ganV6Fk@8?$xH1oDb`{o7d6Sx3Dz?8QD delta 567 zcmdn+gmD89?OGD5a6HeD3>|8 zE3##?oO}o;zXVv2k&zK9xlkjKv18)F-pvJ?tC+wVVS&dr*-;0oQ8$Ei$uc|n-O Y<_)F^hMW-BFfdG*c(8r*g7gVo0N?U