diff --git a/DeepCrazyhouse/src/domain/neural_net/architectures/pytorch/alpha_vile.py b/DeepCrazyhouse/src/domain/neural_net/architectures/pytorch/alpha_vile.py
index ec2e66e8..d2861c29 100644
--- a/DeepCrazyhouse/src/domain/neural_net/architectures/pytorch/alpha_vile.py
+++ b/DeepCrazyhouse/src/domain/neural_net/architectures/pytorch/alpha_vile.py
@@ -46,9 +46,29 @@ def get_alpha_vile_model(args, model_size='normal'):
 
     kernels = [3] * depth
     end_idx = int(len(kernels) * kernel_5_ratio + 0.5)
-    for idx in range(end_idx):
-        kernels[idx] = 5
-    random.shuffle(kernels)
+
+    if model_size == 'large':
+        kernels[1] = 5
+        kernels[6] = 5
+        kernels[7] = 5
+        kernels[9] = 5
+        kernels[10] = 5
+        kernels[14] = 5
+        kernels[18] = 5
+        kernels[19] = 5
+        kernels[23] = 5
+        kernels[25] = 5
+        kernels[26] = 5
+        kernels[27] = 5
+        kernels[28] = 5
+        kernels[29] = 5
+        kernels[33] = 5
+        kernels[34] = 5
+        kernels[35] = 5
+    else:
+        for idx in range(end_idx):
+            kernels[idx] = 5
+        random.shuffle(kernels)
 
     use_transformers = [False] * len(kernels)
     if nb_transformers > 0:
diff --git a/DeepCrazyhouse/src/preprocessing/pgn_converter_util.py b/DeepCrazyhouse/src/preprocessing/pgn_converter_util.py
index e312a2d8..5761b990 100644
--- a/DeepCrazyhouse/src/preprocessing/pgn_converter_util.py
+++ b/DeepCrazyhouse/src/preprocessing/pgn_converter_util.py
@@ -69,14 +69,15 @@ def get_planes_from_pgn(params):
     return metadata, game_idx, results[0], results[1], results[2], results[3], results[4]
 
 
-def get_planes_from_game(game, mate_in_one=False):
+def get_planes_from_move_sequence(board: chess.Board, y_init, all_moves, mate_in_one=False):
     """
     Returns all plane descriptions of a given game and their corresponding target values:
     - the game outcome (-1, 0, 1)
     - the next move which will be played in each position
 
-    :param game: Game handle which is a python-chess object
-    (e.g. mv_hist_len = 8 means that the current position and the 7 previous positions are exported)
+    :param board: Board object which is a python-chess object
+    :param y_init: Evaluation of the initial board position
+    :param all_moves: List of all moves to be applied to the position
     :param mate_in_one: Decide weather only to export the position before the last mate-in-one move
                         (this option is for evaluation and DEBUG purposes)
     :return: x - the position description of all moves in the game
@@ -95,24 +96,11 @@ def get_planes_from_game(game, mate_in_one=False):
     y_policy = []
     plys_to_end = []  # save the number of plys until the end of the game for each position that was considered
     phase_vector = []  # save all phases that occurred during the game
-    board = game.board()  # get the initial board state
-    # update the y value accordingly
-    if board.turn == chess.WHITE:
-        y_init = 1
-    else:
-        y_init = -1
-    if game.headers["Result"] == "0-1":
-        y_init *= -1
-    elif game.headers["Result"] == "1/2-1/2":
-        y_init = 0
 
-    all_moves = []  # Extract all moves first and save them into a list
-    for move in game.main_line():
-        all_moves.append(move)
     # Iterate through all moves (except the last one) and play them on a board.
     # you don't want to push the last move on the board because you had no movement policy to learn from in this case
     # The moves get pushed at the end of the for-loop and is only used in the next loop.
-    # Therefore we can iterate over 'all' moves
+    # Therefore, we can iterate over 'all' moves
     for plys, move in enumerate(all_moves):
         board_occ = 0  # by default the positions hasn't occurred before
         fen = board.fen()
@@ -166,3 +154,47 @@ def get_planes_from_game(game, mate_in_one=False):
         y_policy = np.stack(y_policy, axis=0)
 
     return x, y_value, y_policy, plys_to_end, phase_vector
+
+
+def get_planes_from_game(game, mate_in_one=False):
+    """
+    Returns all plane descriptions of a given game and their corresponding target values:
+    - the game outcome (-1, 0, 1)
+    - the next move which will be played in each position
+
+    :param game: Game handle which is a python-chess object
+    (e.g. mv_hist_len = 8 means that the current position and the 7 previous positions are exported)
+    :param mate_in_one: Decide weather only to export the position before the last mate-in-one move
+                        (this option is for evaluation and DEBUG purposes)
+    :return: x - the position description of all moves in the game
+             y_value - the target values of the scene description. Here the game outcome.
+                  returns -1 if the current player lost, +1 if the current player won, 0 for draw
+             y_policy - the policy vector one-hot encoded indicating the next move the player current player chose
+              in this position
+             plys_to_end - array of how many plys to the end of the game for each position.
+              This can be used to apply discounting
+             phase_vector - array of the game phase of each position
+    """
+
+    board = game.board()  # get the initial board state
+    # update the y value accordingly
+    if board.turn == chess.WHITE:
+        y_init = 1
+    else:
+        y_init = -1
+    if game.headers["Result"] == "0-1":
+        y_init *= -1
+    elif game.headers["Result"] == "1/2-1/2":
+        y_init = 0
+
+    all_moves = []  # Extract all moves first and save them into a list
+    for move in game.main_line():
+        all_moves.append(move)
+
+    try:
+        return get_planes_from_move_sequence(board, y_init, all_moves, mate_in_one)
+    except Exception:
+        print("game.headers:")
+        print(game.headers)
+        print("game", game)
+
diff --git a/DeepCrazyhouse/src/preprocessing/pgn_to_planes_converter.py b/DeepCrazyhouse/src/preprocessing/pgn_to_planes_converter.py
index 4930a681..371a7ed2 100644
--- a/DeepCrazyhouse/src/preprocessing/pgn_to_planes_converter.py
+++ b/DeepCrazyhouse/src/preprocessing/pgn_to_planes_converter.py
@@ -571,51 +571,8 @@ def export_pgn_batch(self, cur_part, game_idx_start, game_idx_end, pgn_sel, nb_w
             synchronizer=zarr.ThreadSynchronizer(),
             compression=compressor,
         )
-        # export the images
-        zarr_file.create_dataset(
-            name="x",
-            data=x,
-            shape=x.shape,
-            dtype=np.int16,
-            chunks=(128, x.shape[1], x.shape[2], x.shape[3]),
-            synchronizer=zarr.ThreadSynchronizer(),
-            compression=compressor,
-        )
-        # create the label arrays and copy the labels data in them
-        zarr_file.create_dataset(
-            name="y_value", shape=y_value.shape, dtype=np.int16, data=y_value, synchronizer=zarr.ThreadSynchronizer()
-        )
-        zarr_file.create_dataset(
-            name="y_policy",
-            shape=y_policy.shape,
-            dtype=np.int16,
-            data=y_policy,
-            chunks=(128, y_policy.shape[1]),
-            synchronizer=zarr.ThreadSynchronizer(),
-            compression=compressor,
-        )
-        zarr_file.create_dataset(
-            name="plys_to_end",
-            shape=plys_to_end.shape,
-            dtype=np.int16,
-            data=plys_to_end,
-            synchronizer=zarr.ThreadSynchronizer()
-        )
-        zarr_file.create_dataset(
-            name="phase_vector",
-            shape=phase_vector.shape,
-            dtype=np.int16,
-            data=phase_vector,
-            synchronizer=zarr.ThreadSynchronizer()
-        )
-        zarr_file.create_dataset(
-            name="start_indices",
-            shape=start_indices.shape,
-            dtype=np.int32,
-            data=start_indices,
-            synchronizer=zarr.ThreadSynchronizer(),
-            compression=compressor,
-        )
+        export_main_data(zarr_file, compressor, start_indices, x, y_value, y_policy, plys_to_end, phase_vector)
+
         zarr_file.create_group("/parameters")  # export the parameter settings and statistics of the file
         zarr_file.create_dataset(
             name="/parameters/pgn_name",
@@ -692,6 +649,55 @@ def export_pgn_batch(self, cur_part, game_idx_start, game_idx_end, pgn_sel, nb_w
         return True
 
 
+def export_main_data(zarr_file, compressor, start_indices, x, y_value, y_policy, plys_to_end, phase_vector):
+    """Exports the main data entries into the zarr-file."""
+    # export the images
+    zarr_file.create_dataset(
+        name="x",
+        data=x,
+        shape=x.shape,
+        dtype=np.int16,
+        chunks=(128, x.shape[1], x.shape[2], x.shape[3]),
+        synchronizer=zarr.ThreadSynchronizer(),
+        compression=compressor,
+    )
+    # create the label arrays and copy the labels data in them
+    zarr_file.create_dataset(
+        name="y_value", shape=y_value.shape, dtype=np.int16, data=y_value, synchronizer=zarr.ThreadSynchronizer()
+    )
+    zarr_file.create_dataset(
+        name="y_policy",
+        shape=y_policy.shape,
+        dtype=np.int16,
+        data=y_policy,
+        chunks=(128, y_policy.shape[1]),
+        synchronizer=zarr.ThreadSynchronizer(),
+        compression=compressor,
+    )
+    zarr_file.create_dataset(
+        name="plys_to_end",
+        shape=plys_to_end.shape,
+        dtype=np.int16,
+        data=plys_to_end,
+        synchronizer=zarr.ThreadSynchronizer()
+    )
+    zarr_file.create_dataset(
+        name="phase_vector",
+        shape=phase_vector.shape,
+        dtype=np.int16,
+        data=phase_vector,
+        synchronizer=zarr.ThreadSynchronizer()
+    )
+    zarr_file.create_dataset(
+        name="start_indices",
+        shape=start_indices.shape,
+        dtype=np.int32,
+        data=start_indices,
+        synchronizer=zarr.ThreadSynchronizer(),
+        compression=compressor,
+    )
+
+
 def export_pgn_to_datasetfile():
     """ Converts the pgn file of the games selected to a dataset file"""
     PGN2PlanesConverter(
diff --git a/DeepCrazyhouse/src/preprocessing/puzzles/puzzle_to_planes_converter.py b/DeepCrazyhouse/src/preprocessing/puzzles/puzzle_to_planes_converter.py
new file mode 100644
index 00000000..3fd61117
--- /dev/null
+++ b/DeepCrazyhouse/src/preprocessing/puzzles/puzzle_to_planes_converter.py
@@ -0,0 +1,204 @@
+"""
+@file: puzzle_to_planes_converter.py
+Created on 05.04.24
+@project: DeepCrazyhouse
+@author: queensgambit
+
+Format
+
+Puzzles are formatted as standard CSV. The fields are as follows:
+
+PuzzleId,FEN,Moves,Rating,RatingDeviation,Popularity,NbPlays,Themes,GameUrl,OpeningTags
+
+Sample
+
+00sHx,q3k1nr/1pp1nQpp/3p4/1P2p3/4P3/B1PP1b2/B5PP/5K2 b k - 0 17,e8d7 a2e6 d7d8 f7f8,1760,80,83,72,mate mateIn2 middlegame short,https://lichess.org/yyznGmXs/black#34,Italian_Game Italian_Game_Classical_Variation
+00sJ9,r3r1k1/p4ppp/2p2n2/1p6/3P1qb1/2NQR3/PPB2PP1/R1B3K1 w - - 5 18,e3g3 e8e1 g1h2 e1c1 a1c1 f4h6 h2g1 h6c1,2671,105,87,325,advantage attraction fork middlegame sacrifice veryLong,https://lichess.org/gyFeQsOE#35,French_Defense French_Defense_Exchange_Variation
+00sJb,Q1b2r1k/p2np2p/5bp1/q7/5P2/4B3/PPP3PP/2KR1B1R w - - 1 17,d1d7 a5e1 d7d1 e1e3 c1b1 e3b6,2235,76,97,64,advantage fork long,https://lichess.org/kiuvTFoE#33,Sicilian_Defense Sicilian_Defense_Dragon_Variation
+00sO1,1k1r4/pp3pp1/2p1p3/4b3/P3n1P1/8/KPP2PN1/3rBR1R b - - 2 31,b8c7 e1a5 b7b6 f1d1,998,85,94,293,advantage discoveredAttack master middlegame short,https://lichess.org/vsfFkG0s/black#62,
+
+Source: https://database.lichess.org/#puzzles
+
+We can use all moves except the first move as our training samples.
+"""
+from pathlib import Path
+
+import sys
+
+sys.path.insert(0, '../../../../')
+import pandas as pd
+import chess
+import chess.engine
+import logging
+from time import time
+from multiprocessing import Pool
+import zarr
+import numpy as np
+from numcodecs import Blosc
+import argparse
+import glob
+import datetime
+import os
+
+from DeepCrazyhouse.src.domain.util import get_dic_sorted_by_key
+from DeepCrazyhouse.src.preprocessing.pgn_converter_util import get_planes_from_move_sequence
+from DeepCrazyhouse.src.preprocessing.pgn_to_planes_converter import export_main_data
+
+
+def get_eval(board: chess.Board, engine: chess.engine):
+    """
+    Evaluates the given board position with the given engine and returns -1, 0, +1 respective to Losing, Drawn or Winning
+    :param board: Board position
+    :param engine: Chess engine object
+    """
+    if not board.is_game_over():
+        result = engine.analyse(board, chess.engine.Limit(time=0.1))
+        print(result)
+
+        if result['score'].is_mate():
+            return -1
+        elif result['score'].relative.score() > 100:
+            return 1
+        elif result['score'].relative.score() < 100:
+            return -1
+        else:
+            return 0
+    elif board.is_checkmate():
+        return -1
+    else:
+        return 0
+
+
+def sort_concat_data(data_dic: dict):
+    """Sorts the dictionary object based on the index and returns the concatenated version.
+    :param data_dic: Data dictionary object
+    return: np.array
+    """
+    data = get_dic_sorted_by_key(data_dic)
+    return np.concatenate(data, axis=0)
+
+
+def process_chunk(chunk_id: int, chunksize: int, df_chunk: pd.DataFrame, export_dir: Path, processes: int):
+    """
+    Processes a data frame chunk by exporting all chess puzzle positions in this chunk.
+    :param chunk_id: Unique id of the data chunk
+    :param chunksize: Size of each chunk
+    :param df_chunk: Data frame chunk
+    :param export_dir: Export directory where the .zip files will be stored
+    :param processes: Number of processes
+    return: None
+    """
+
+    # engine = chess.engine.SimpleEngine.popen_uci(r"stockfish")
+
+    logging.info("starting conversion to planes...")
+    pool = Pool(processes=processes)
+    x_dic = {}
+    y_value_dic = {}
+    y_policy_dic = {}
+    plys_to_end_dic = {}
+    phase_vector_dic = {}
+
+    params_inp = _prepare_parameter_inputs(chunk_id, chunksize, df_chunk)
+
+    # use pool.starmap here and parallelize the export
+    for puzzle_idx, (x, y_value, y_policy, plys_to_end, phase_vector) in enumerate(pool.starmap(
+            get_planes_from_move_sequence, params_inp)):
+        # metadata_dic[puzzle_idx] = metadata
+        x_dic[puzzle_idx] = x
+        y_value_dic[puzzle_idx] = y_value
+        y_policy_dic[puzzle_idx] = y_policy
+        plys_to_end_dic[puzzle_idx] = plys_to_end
+        phase_vector_dic[puzzle_idx] = phase_vector
+    pool.close()
+    pool.join()
+
+    _export_data(chunk_id, export_dir, phase_vector_dic, plys_to_end_dic, x_dic, y_policy_dic, y_value_dic)
+
+    # engine.quit()
+
+
+def _prepare_parameter_inputs(chunk_id, chunksize, df_chunk):
+    params_inp = []
+    for puzzle_idx in range(chunk_id * chunksize, chunk_id * chunksize + len(df_chunk)):
+        board = chess.Board(fen=df_chunk["FEN"][puzzle_idx])
+        moves = df_chunk["Moves"][puzzle_idx]
+
+        for move in moves.split(" "):
+            board.push_uci(move)
+
+        # skip evaluation with Stockfish
+        # eval = -get_eval(board, engine)
+        eval = 1
+
+        board = chess.Board(fen=df_chunk["FEN"][puzzle_idx])
+        board_2 = chess.Board(fen=df_chunk["FEN"][puzzle_idx])
+        all_moves = []
+        moves_uci = moves.split(" ")
+        for idx, move in enumerate(moves_uci):
+            board_2.push_uci(move)
+            if idx == 0:
+                board.push_uci(move)
+            else:
+                all_moves.append(board_2.move_stack[-1])
+
+        params_inp.append((board, eval, all_moves, False))
+    return params_inp
+
+
+def _export_data(chunk_id, export_dir, phase_vector_dic, plys_to_end_dic, x_dic, y_policy_dic, y_value_dic):
+    # open a dataset file and create arrays
+    zarr_path = export_dir / f"puzzles_{chunk_id}.zip"
+    store = zarr.ZipStore(str(zarr_path), mode="w")
+    zarr_file = zarr.group(store=store, overwrite=True)
+    # metadata = sort_concat_data(metadata_dic)
+    x = sort_concat_data(x_dic)
+    y_value = sort_concat_data(y_value_dic)
+    y_policy = sort_concat_data(y_policy_dic)
+    plys_to_end = sort_concat_data(plys_to_end_dic)
+    phase_vector = sort_concat_data(phase_vector_dic)
+    start_indices = np.zeros(len(x))  # create a list which describes where each game starts
+    # define the compressor object
+    compressor = Blosc(cname="lz4", clevel=5, shuffle=Blosc.SHUFFLE)
+    export_main_data(zarr_file, compressor, start_indices, x, y_value, y_policy, plys_to_end, phase_vector)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='This script allows converting a puzzle csv file in the lichess-format'
+                                                 'into plane representation. ')
+
+    parser.add_argument('--puzzle-csv-dir', type=str, default='./', help='Directory where the puzzle csv file is stored.')
+    parser.add_argument('--export-dir', type=str, default='./', help='Directory where the .zip files will be exported to.')
+    parser.add_argument('--processes', type=int, default='4', help='Number of parallel processes.')
+
+    args = parser.parse_args()
+
+    # check if directories exist
+    puzzle_csv_dir = Path(args.puzzle_csv_dir)
+    export_dir = Path(args.export_dir)
+
+    if not puzzle_csv_dir.is_dir():
+        raise Exception("The given puzzle-csv-dir is not a valid directory.")
+    if not export_dir.is_dir():
+        raise Exception("The given export-dir is not a valid directory.")
+
+    puzzle_file_path = glob.glob(args.puzzle_csv_dir + "*.csv")
+    if len(puzzle_file_path) == 0:
+        raise Exception("The given puzzle-csv-dir does not contain a csv file.")
+    puzzle_file_path = puzzle_file_path[0]
+
+    # include current timestamp in dataset export file
+    timestmp = datetime.datetime.fromtimestamp(time()).strftime("%Y-%m-%d-%H-%M-%S")
+    timestmp_dir = export_dir / timestmp
+
+    # create a directory of the current timestamp
+    if not timestmp_dir.is_dir():
+        os.makedirs(timestmp_dir)
+    export_dir = timestmp_dir
+
+    # https://stackoverflow.com/questions/25962114/how-do-i-read-a-large-csv-file-with-pandas#25962187
+    chunksize = 10 ** 4
+    with pd.read_csv(puzzle_file_path, chunksize=chunksize) as reader:
+        for chunk_id, df_chunk in enumerate(reader):
+            print('chunk:', df_chunk)
+            process_chunk(chunk_id, chunksize, df_chunk, export_dir, args.processes)
diff --git a/DeepCrazyhouse/src/training/train_cli.py b/DeepCrazyhouse/src/training/train_cli.py
index bd59fc1c..6e798a1f 100644
--- a/DeepCrazyhouse/src/training/train_cli.py
+++ b/DeepCrazyhouse/src/training/train_cli.py
@@ -16,6 +16,7 @@
 import sys
 import torch
 import logging
+from pathlib import Path
 
 sys.path.insert(0, '../../../')
 
@@ -23,7 +24,7 @@
 from DeepCrazyhouse.configs.train_config import TrainConfig, TrainObjects
 from DeepCrazyhouse.src.training.train_cli_util import create_pytorch_model, get_validation_data, fill_train_objects,\
     print_model_summary, export_best_model_state, fill_train_config, export_configs, create_export_dirs, export_cmd_args
-from DeepCrazyhouse.src.training.trainer_agent_pytorch import TrainerAgentPytorch
+from DeepCrazyhouse.src.training.trainer_agent_pytorch import TrainerAgentPytorch, load_torch_state
 
 
 def parse_args(train_config: TrainConfig):
@@ -80,7 +81,7 @@ def main():
 
     update_train_config_via_args(args, train_config)
 
-    val_data, x_val, _ = get_validation_data(train_config)
+    val_data, x_val = get_validation_data(train_config)
     input_shape = x_val[0].shape
     fill_train_config(train_config, x_val)
 
@@ -91,6 +92,10 @@ def main():
 
     train_objects = TrainObjects()
     fill_train_objects(train_config, train_objects)
+    if train_config.tar_file != "":
+        print("load model weights")
+        load_torch_state(model, torch.optim.SGD(model.parameters(), lr=train_config.max_lr), Path(train_config.tar_file),
+                         train_config.device_id)
 
     create_export_dirs(train_config)
     export_configs(args, train_config)
diff --git a/DeepCrazyhouse/src/training/train_cli_util.py b/DeepCrazyhouse/src/training/train_cli_util.py
index 94d812cc..d514f8c6 100644
--- a/DeepCrazyhouse/src/training/train_cli_util.py
+++ b/DeepCrazyhouse/src/training/train_cli_util.py
@@ -38,7 +38,7 @@
     LinearWarmUp, MomentumSchedule
 from DeepCrazyhouse.src.training.train_util import get_metrics
 from DeepCrazyhouse.src.training.trainer_agent_pytorch import save_torch_state, export_to_onnx, get_context,\
-    get_data_loader
+    get_data_loader, load_torch_state
 
 
 class Args:
@@ -232,6 +232,12 @@ def export_best_model_state(k_steps_best: int, k_steps_final: int, model, policy
     shutil.copy(model_tar_path, best_model_tar_path)
 
     # ## Convert to onnx
+    print("load current best model")
+    load_torch_state(model, torch.optim.SGD(model.parameters(), lr=train_config.max_lr), Path(model_tar_path),
+                     train_config.device_id)
+
+    if hasattr(model, "merge_bn"):
+        model.merge_bn()
     convert_model_to_onnx(input_shape, k_steps_best, model, model_name, train_config)
 
     print("Saved weight & onnx files of the best model to %s" % (train_config.export_dir + "best-model"))
@@ -278,7 +284,7 @@ def get_validation_data(train_config: TrainConfig):
     """
     pgn_dataset_arrays_dict = load_pgn_dataset(dataset_type='val', part_id=0, verbose=True, normalize=train_config.normalize)
     val_data = get_data_loader(pgn_dataset_arrays_dict, train_config, shuffle=False)
-    return val_data, x_val, yp_val
+    return val_data, pgn_dataset_arrays_dict["x"]
 
 
 def print_model_summary(input_shape: tuple, model, x_val) -> None:
diff --git a/DeepCrazyhouse/src/training/trainer_agent_pytorch.py b/DeepCrazyhouse/src/training/trainer_agent_pytorch.py
index 26dff358..eeb177fc 100644
--- a/DeepCrazyhouse/src/training/trainer_agent_pytorch.py
+++ b/DeepCrazyhouse/src/training/trainer_agent_pytorch.py
@@ -186,8 +186,9 @@ def train(self, cur_it=None):
                             # log the metric values to tensorboard
                             self._log_metrics(train_metric_values, global_step=self.k_steps, prefix="train_")
                             self._log_metrics(val_metric_values, global_step=self.k_steps, prefix="val_")
-                            for dataset_name, metric_values in additional_metric_values.items():
-                                self._log_metrics(metric_values, global_step=self.k_steps, prefix=f"{dataset_name}_")
+                            if self.additional_loaders is not None:
+                                for dataset_name, metric_values in additional_metric_values.items():
+                                    self._log_metrics(metric_values, global_step=self.k_steps, prefix=f"{dataset_name}_")
 
                             if self.tc.log_metrics_to_tensorboard and self.tc.export_grad_histograms:
                                 grads = []
@@ -332,21 +333,22 @@ def evaluate(self, train_loader):
 
         # do additional evaluations based on self.additional_loaders
         additional_metric_values = dict()
-        for dataset_name, dataloader in self.additional_loaders.items():
-            print(f"starting {dataset_name} eval")
-            metric_values = evaluate_metrics(
-                self.to.metrics,
-                dataloader,
-                self._model,
-                nb_batches=None,
-                ctx=self._ctx,
-                phase_weights={k: 1.0 for k, v in self.to.phase_weights.items()},  # use no weighting
-                sparse_policy_label=self.tc.sparse_policy_label,
-                apply_select_policy_from_plane=self.tc.select_policy_from_plane and not self.tc.is_policy_from_plane_data,
-                use_wdl=self.tc.use_wdl,
-                use_plys_to_end=self.tc.use_plys_to_end,
-            )
-            additional_metric_values[dataset_name] = metric_values
+        if self.additional_loaders is not None:
+            for dataset_name, dataloader in self.additional_loaders.items():
+                print(f"starting {dataset_name} eval")
+                metric_values = evaluate_metrics(
+                    self.to.metrics,
+                    dataloader,
+                    self._model,
+                    nb_batches=None,
+                    ctx=self._ctx,
+                    phase_weights={k: 1.0 for k, v in self.to.phase_weights.items()},  # use no weighting
+                    sparse_policy_label=self.tc.sparse_policy_label,
+                    apply_select_policy_from_plane=self.tc.select_policy_from_plane and not self.tc.is_policy_from_plane_data,
+                    use_wdl=self.tc.use_wdl,
+                    use_plys_to_end=self.tc.use_plys_to_end,
+                )
+                additional_metric_values[dataset_name] = metric_values
 
         self._model.train()  # return back to training mode
         return train_metric_values, val_metric_values, additional_metric_values