From 7c16e4739989b4b4ec196716e89c01601d5c819b Mon Sep 17 00:00:00 2001
From: Drejc Pesjak <47791324+DrejcPesjak@users.noreply.github.com>
Date: Tue, 20 Feb 2024 03:44:01 +0100
Subject: [PATCH 01/28] Resnet Variants (#9)

* Added ResNet variants

* ResNet50 example

* Simplified example

* fixed resnet config

* [Automated] Updated coverage badge

---------

Co-authored-by: Martin Kozlovsky <martin.kozlovsky@luxonis.com>
Co-authored-by: GitHub Actions <actions@github.com>
---
 configs/resnet_model.yaml                     | 57 +++++++++++++++++++
 luxonis_train/nodes/README.md                 | 13 +++--
 luxonis_train/nodes/__init__.py               |  4 +-
 .../nodes/{resnet18.py => resnet.py}          | 30 +++++++---
 media/coverage_badge.svg                      |  4 +-
 5 files changed, 91 insertions(+), 17 deletions(-)
 create mode 100644 configs/resnet_model.yaml
 rename luxonis_train/nodes/{resnet18.py => resnet.py} (61%)

diff --git a/configs/resnet_model.yaml b/configs/resnet_model.yaml
new file mode 100644
index 00000000..7e93d269
--- /dev/null
+++ b/configs/resnet_model.yaml
@@ -0,0 +1,57 @@
+
+model:
+  name: resnet50_classification
+  nodes:
+    - name: ResNet
+      variant: "50"
+      download_weights: True
+
+    - name: ClassificationHead
+      inputs:
+        - ResNet
+
+  losses:
+    - name: CrossEntropyLoss
+      attached_to: ClassificationHead
+
+  metrics:
+    - name: Accuracy
+      is_main_metric: true
+      attached_to: ClassificationHead
+
+  visualizers:
+    - name: ClassificationVisualizer
+      attached_to: ClassificationHead
+      params:
+        font_scale: 0.5
+        color: [255, 0, 0]
+        thickness: 2
+        include_plot: True
+
+dataset:
+  name: cifar10_test
+
+trainer:
+  batch_size: 4
+  epochs: &epochs 200
+  num_workers: 4
+  validation_interval: 10
+  num_log_images: 8
+
+  preprocessing:
+    train_image_size: [&height 224, &width 224]
+    keep_aspect_ratio: False
+    normalize:
+      active: True
+
+  callbacks:
+    - name: ExportOnTrainEnd
+    - name: TestOnTrainEnd
+
+  optimizer:
+    name: SGD
+    params:
+      lr: 0.02
+
+  scheduler:
+    name: ConstantLR
diff --git a/luxonis_train/nodes/README.md b/luxonis_train/nodes/README.md
index bd44ac5a..637c5026 100644
--- a/luxonis_train/nodes/README.md
+++ b/luxonis_train/nodes/README.md
@@ -5,7 +5,7 @@ arbitrarily as long as the two nodes are compatible with each other.
 
 ## Table Of Contents
 
-- [ResNet18](#resnet18)
+- [ResNet](#resnet)
 - [MicroNet](#micronet)
 - [RepVGG](#repvgg)
 - [EfficientRep](#efficientrep)
@@ -30,15 +30,16 @@ Every node takes these parameters:
 
 Additional parameters for specific nodes are listed below.
 
-## ResNet18
+## ResNet
 
-Adapted from [here](https://pytorch.org/vision/main/models/generated/torchvision.models.resnet18.html).
+Adapted from [here](https://pytorch.org/vision/main/models/resnet.html).
 
 **Params**
 
-| Key              | Type | Default value | Description                            |
-| ---------------- | ---- | ------------- | -------------------------------------- |
-| download_weights | bool | False         | If True download weights from imagenet |
+| Key              | Type                                      | Default value | Description                            |
+| ---------------- | ----------------------------------------- | ------------- | -------------------------------------- |
+| variant          | Literal\["18", "34", "50", "101", "152"\] | "18"          | Variant of the network.                |
+| download_weights | bool                                      | False         | If True download weights from imagenet |
 
 ## MicroNet
 
diff --git a/luxonis_train/nodes/__init__.py b/luxonis_train/nodes/__init__.py
index d7ec70d0..954db2be 100644
--- a/luxonis_train/nodes/__init__.py
+++ b/luxonis_train/nodes/__init__.py
@@ -10,7 +10,7 @@
 from .mobileone import MobileOne
 from .reppan_neck import RepPANNeck
 from .repvgg import RepVGG
-from .resnet18 import ResNet18
+from .resnet import ResNet
 from .rexnetv1 import ReXNetV1_lite
 from .segmentation_head import SegmentationHead
 
@@ -28,6 +28,6 @@
     "ReXNetV1_lite",
     "RepPANNeck",
     "RepVGG",
-    "ResNet18",
+    "ResNet",
     "SegmentationHead",
 ]
diff --git a/luxonis_train/nodes/resnet18.py b/luxonis_train/nodes/resnet.py
similarity index 61%
rename from luxonis_train/nodes/resnet18.py
rename to luxonis_train/nodes/resnet.py
index 9c38681a..14ff8066 100644
--- a/luxonis_train/nodes/resnet18.py
+++ b/luxonis_train/nodes/resnet.py
@@ -1,10 +1,9 @@
-"""ResNet18 backbone.
+"""ResNet backbone.
 
-Source: U{https://pytorch.org/vision/main/models/generated/
-torchvision.models.resnet18.html}
+Source: U{https://pytorch.org/vision/main/models/resnet.html}
 @license: U{PyTorch<https://github.com/pytorch/pytorch/blob/master/LICENSE>}
 """
-
+from typing import Literal
 
 import torchvision
 from torch import Tensor
@@ -12,19 +11,22 @@
 from .base_node import BaseNode
 
 
-class ResNet18(BaseNode[Tensor, list[Tensor]]):
+class ResNet(BaseNode[Tensor, list[Tensor]]):
     attach_index: int = -1
 
     def __init__(
         self,
+        variant: Literal["18", "34", "50", "101", "152"] = "18",
         channels_list: list[int] | None = None,
         download_weights: bool = False,
         **kwargs,
     ):
-        """Implementation of the ResNet18 backbone.
+        """Implementation of the ResNetX backbone.
 
         TODO: add more info
 
+        @type variant: Literal["18", "34", "50", "101", "152"]
+        @param variant: ResNet variant. Defaults to "18".
         @type channels_list: list[int] | None
         @param channels_list: List of channels to return.
             If unset, defaults to [64, 128, 256, 512].
@@ -35,7 +37,12 @@ def __init__(
         """
         super().__init__(**kwargs)
 
-        self.backbone = torchvision.models.resnet18(
+        if variant not in RESNET_VARIANTS:
+            raise ValueError(
+                f"ResNet model variant should be in {list(RESNET_VARIANTS.keys())}"
+            )
+
+        self.backbone = RESNET_VARIANTS[variant](
             weights="DEFAULT" if download_weights else None
         )
         self.channels_list = channels_list or [64, 128, 256, 512]
@@ -57,3 +64,12 @@ def forward(self, x: Tensor) -> list[Tensor]:
         outs.append(x)
 
         return outs
+
+
+RESNET_VARIANTS = {
+    "18": torchvision.models.resnet18,
+    "34": torchvision.models.resnet34,
+    "50": torchvision.models.resnet50,
+    "101": torchvision.models.resnet101,
+    "152": torchvision.models.resnet152,
+}
diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index 12876e69..4033e89e 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">78%</text>
-        <text x="80" y="14">78%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">79%</text>
+        <text x="80" y="14">79%</text>
     </g>
 </svg>

From 8e35f25e21ebc70ae1a5a421a35ffd412f24765d Mon Sep 17 00:00:00 2001
From: KlemenSkrlj <47853619+klemen1999@users.noreply.github.com>
Date: Tue, 20 Feb 2024 03:44:24 +0100
Subject: [PATCH 02/28] MLFlow Upload Fix (#10)

* fixed incorrect class property call

* fixed exporter uploading

* uploadCheckpoint uploads on every checkpoint epoch

* fix temp files names

* updated callback readme

* pre-commit run
---
 luxonis_train/callbacks/README.md             |  9 +++
 luxonis_train/callbacks/__init__.py           |  4 +-
 .../callbacks/export_on_train_end.py          |  4 +-
 luxonis_train/callbacks/upload_checkpoint.py  | 61 +++++++++++++++++++
 .../upload_checkpoint_on_train_end.py         | 41 -------------
 luxonis_train/core/exporter.py                |  6 +-
 6 files changed, 78 insertions(+), 47 deletions(-)
 create mode 100644 luxonis_train/callbacks/upload_checkpoint.py
 delete mode 100644 luxonis_train/callbacks/upload_checkpoint_on_train_end.py

diff --git a/luxonis_train/callbacks/README.md b/luxonis_train/callbacks/README.md
index d8e3da74..be441017 100644
--- a/luxonis_train/callbacks/README.md
+++ b/luxonis_train/callbacks/README.md
@@ -9,6 +9,7 @@ List of all supported callbacks.
 - [LuxonisProgressBar](#luxonisprogressbar)
 - [MetadataLogger](#metadatalogger)
 - [TestOnTrainEnd](#testontrainend)
+- [UploadCheckpoint](#uploadcheckpoint)
 
 ## PytorchLightning Callbacks
 
@@ -51,3 +52,11 @@ Metadata include all defined hyperparameters together with git hashes of `luxoni
 ## TestOnTrainEnd
 
 Callback to perform a test run at the end of the training.
+
+## UploadCheckpoint
+
+Callback that uploads currently best checkpoint (based on validation loss) to specified cloud directory after every validation epoch.
+
+| Key              | Type | Default value | Description                                                                                                                   |
+| ---------------- | ---- | ------------- | ----------------------------------------------------------------------------------------------------------------------------- |
+| upload_directory | str  | /             | Path to cloud directory where checkpoints should be uploaded to. If you want to use current mlflow run set it to `mlflow://`. |
diff --git a/luxonis_train/callbacks/__init__.py b/luxonis_train/callbacks/__init__.py
index 4be94600..cec9e000 100644
--- a/luxonis_train/callbacks/__init__.py
+++ b/luxonis_train/callbacks/__init__.py
@@ -13,7 +13,7 @@
 from .metadata_logger import MetadataLogger
 from .module_freezer import ModuleFreezer
 from .test_on_train_end import TestOnTrainEnd
-from .upload_checkpoint_on_train_end import UploadCheckpointOnTrainEnd
+from .upload_checkpoint import UploadCheckpoint
 
 CALLBACKS.register_module(module=EarlyStopping)
 CALLBACKS.register_module(module=LearningRateMonitor)
@@ -28,5 +28,5 @@
     "MetadataLogger",
     "ModuleFreezer",
     "TestOnTrainEnd",
-    "UploadCheckpointOnTrainEnd",
+    "UploadCheckpoint",
 ]
diff --git a/luxonis_train/callbacks/export_on_train_end.py b/luxonis_train/callbacks/export_on_train_end.py
index de5fde88..923267c1 100644
--- a/luxonis_train/callbacks/export_on_train_end.py
+++ b/luxonis_train/callbacks/export_on_train_end.py
@@ -51,8 +51,8 @@ def on_train_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> No
         if self.upload_to_mlflow:
             if cfg.tracker.is_mlflow:
                 tracker = cast(LuxonisTrackerPL, trainer.logger)
-                new_upload_directory = f"mlflow://{tracker.project_id}/{tracker.run_id}"
-                cfg.exporter.upload_directory = new_upload_directory
+                new_upload_url = f"mlflow://{tracker.project_id}/{tracker.run_id}"
+                cfg.exporter.upload_url = new_upload_url
             else:
                 logging.getLogger(__name__).warning(
                     "`upload_to_mlflow` is set to True, "
diff --git a/luxonis_train/callbacks/upload_checkpoint.py b/luxonis_train/callbacks/upload_checkpoint.py
new file mode 100644
index 00000000..a0fa137a
--- /dev/null
+++ b/luxonis_train/callbacks/upload_checkpoint.py
@@ -0,0 +1,61 @@
+import logging
+import os
+from typing import Any
+
+import lightning.pytorch as pl
+import torch
+from luxonis_ml.utils.filesystem import LuxonisFileSystem
+
+from luxonis_train.utils.registry import CALLBACKS
+
+
+@CALLBACKS.register_module()
+class UploadCheckpoint(pl.Callback):
+    """Callback that uploads best checkpoint based on the validation loss."""
+
+    def __init__(self, upload_directory: str):
+        """Constructs `UploadCheckpoint`.
+
+        @type upload_directory: str
+        @param upload_directory: Path used as upload directory
+        """
+        super().__init__()
+        self.fs = LuxonisFileSystem(
+            upload_directory, allow_active_mlflow_run=True, allow_local=False
+        )
+        self.logger = logging.getLogger(__name__)
+        self.last_logged_epoch = None
+        self.last_best_checkpoint = None
+
+    def on_save_checkpoint(
+        self,
+        trainer: pl.Trainer,
+        pl_module: pl.LightningModule,
+        checkpoint: dict[str, Any],
+    ) -> None:
+        # Log only once per epoch in case there are multiple ModelCheckpoint callbacks
+        if not self.last_logged_epoch == trainer.current_epoch:
+            model_checkpoint_callbacks = [
+                c
+                for c in trainer.callbacks  # type: ignore
+                if isinstance(c, pl.callbacks.ModelCheckpoint)  # type: ignore
+            ]
+            # NOTE: assume that first checkpoint callback is based on val loss
+            curr_best_checkpoint = model_checkpoint_callbacks[0].best_model_path
+
+            if self.last_best_checkpoint != curr_best_checkpoint:
+                self.logger.info(f"Started checkpoint upload to {self.fs.full_path}...")
+                temp_filename = "curr_best_val_loss.ckpt"
+                torch.save(checkpoint, temp_filename)
+                self.fs.put_file(
+                    local_path=temp_filename,
+                    remote_path=temp_filename,
+                    mlflow_instance=trainer.logger.experiment.get(  # type: ignore
+                        "mlflow", None
+                    ),
+                )
+                os.remove(temp_filename)
+                self.logger.info("Checkpoint upload finished")
+                self.last_best_checkpoint = curr_best_checkpoint
+
+            self.last_logged_epoch = trainer.current_epoch
diff --git a/luxonis_train/callbacks/upload_checkpoint_on_train_end.py b/luxonis_train/callbacks/upload_checkpoint_on_train_end.py
deleted file mode 100644
index 86879ec9..00000000
--- a/luxonis_train/callbacks/upload_checkpoint_on_train_end.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import logging
-
-import lightning.pytorch as pl
-from luxonis_ml.utils.filesystem import LuxonisFileSystem
-
-from luxonis_train.utils.registry import CALLBACKS
-
-
-@CALLBACKS.register_module()
-class UploadCheckpointOnTrainEnd(pl.Callback):
-    """Callback that uploads best checkpoint based on the validation loss."""
-
-    def __init__(self, upload_directory: str):
-        """Constructs `UploadCheckpointOnTrainEnd`.
-
-        @type upload_directory: str
-        @param upload_directory: Path used as upload directory
-        """
-        super().__init__()
-        self.fs = LuxonisFileSystem(
-            upload_directory, allow_active_mlflow_run=True, allow_local=False
-        )
-
-    def on_train_end(self, trainer: pl.Trainer, _: pl.LightningModule) -> None:
-        logger = logging.getLogger(__name__)
-        logger.info(f"Started checkpoint upload to {self.fs.full_path()}...")
-        model_checkpoint_callbacks = [
-            c
-            for c in trainer.callbacks  # type: ignore
-            if isinstance(c, pl.callbacks.ModelCheckpoint)  # type: ignore
-        ]
-        # NOTE: assume that first checkpoint callback is based on val loss
-        local_path = model_checkpoint_callbacks[0].best_model_path
-        self.fs.put_file(
-            local_path=local_path,
-            remote_path=local_path.split("/")[-1],
-            mlflow_instance=trainer.logger.experiment.get(  # type: ignore
-                "mlflow", None
-            ),
-        )
-        logger.info("Checkpoint upload finished")
diff --git a/luxonis_train/core/exporter.py b/luxonis_train/core/exporter.py
index ab73ce72..7ed94f45 100644
--- a/luxonis_train/core/exporter.py
+++ b/luxonis_train/core/exporter.py
@@ -200,7 +200,7 @@ def _upload(self, files_to_upload: list[str]):
                 remote_path=self.cfg.exporter.export_model_name + suffix,
             )
 
-        with tempfile.TemporaryFile() as f:
+        with tempfile.NamedTemporaryFile(prefix="config", suffix=".yaml") as f:
             self.cfg.save_data(f.name)
             fs.put_file(local_path=f.name, remote_path="config.yaml")
 
@@ -209,7 +209,9 @@ def _upload(self, files_to_upload: list[str]):
         )
         modelconverter_config = self._get_modelconverter_config(onnx_path)
 
-        with tempfile.TemporaryFile() as f:
+        with tempfile.NamedTemporaryFile(
+            prefix="config_export", suffix=".yaml", mode="w+"
+        ) as f:
             yaml.dump(modelconverter_config, f, default_flow_style=False)
             fs.put_file(local_path=f.name, remote_path="config_export.yaml")
 

From 15bd923479283bdc0eb4a7e390974a495a380123 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Wed, 21 Feb 2024 02:15:43 +0100
Subject: [PATCH 03/28] CLI Source Option (#11)

* option to source custom code in CLI

* removed empty dicts

* [Automated] Updated coverage badge

---------

Co-authored-by: GitHub Actions <actions@github.com>
---
 luxonis_train/__main__.py      |  17 ++-
 luxonis_train/core/exporter.py |   5 +-
 media/coverage_badge.svg       |   4 +-
 pyproject.toml                 |   2 +-
 tools/main.py                  | 226 ---------------------------------
 5 files changed, 15 insertions(+), 239 deletions(-)
 delete mode 100644 tools/main.py

diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index 73843593..24cfd69b 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -214,13 +214,18 @@ def common(
             "--version", callback=version_callback, help="Show version and exit."
         ),
     ] = False,
+    source: Annotated[
+        Optional[Path],
+        typer.Option(
+            help="Path to a python file with custom components. "
+            "Will be sourced before running the command.",
+            metavar="FILE",
+        ),
+    ] = None,
 ):
-    ...
-
-
-def main():
-    app()
+    if source:
+        exec(source.read_text())
 
 
 if __name__ == "__main__":
-    main()
+    app()
diff --git a/luxonis_train/core/exporter.py b/luxonis_train/core/exporter.py
index 7ed94f45..6602a040 100644
--- a/luxonis_train/core/exporter.py
+++ b/luxonis_train/core/exporter.py
@@ -18,15 +18,12 @@
 
 
 class Exporter(Core):
-    """Main API which is used to create the model, setup pytorch lightning environment
-    and perform training based on provided arguments and config."""
-
     def __init__(
         self,
         cfg: str | dict[str, Any] | Config,
         opts: list[str] | tuple[str, ...] | dict[str, Any] | None = None,
     ):
-        """Constructs a new Exporter instance.
+        """Provides an interface for exporting models to .onnx and .blob formats.
 
         @type cfg: str | dict[str, Any] | Config
         @param cfg: Path to config file or config dict used to setup training.
diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index 4033e89e..7a18c7f4 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">79%</text>
-        <text x="80" y="14">79%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">80%</text>
+        <text x="80" y="14">80%</text>
     </g>
 </svg>
diff --git a/pyproject.toml b/pyproject.toml
index 048c005b..2093e25b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,7 +19,7 @@ classifiers = [
 ]
 
 [project.scripts]
-luxonis_train = "tools.main:main"
+luxonis_train = "luxonis_train.__main__:app"
 
 [project.urls]
 repository = "https://github.com/luxonis/luxonis-train"
diff --git a/tools/main.py b/tools/main.py
deleted file mode 100644
index 73843593..00000000
--- a/tools/main.py
+++ /dev/null
@@ -1,226 +0,0 @@
-import os
-from enum import Enum
-from importlib.metadata import version
-from pathlib import Path
-from typing import Annotated, Optional
-
-import cv2
-import torch
-import typer
-
-app = typer.Typer(help="Luxonis Train CLI", add_completion=False)
-
-
-class View(str, Enum):
-    train = "train"
-    val = "val"
-    test = "test"
-
-    def __str__(self):
-        return self.value
-
-
-ConfigType = Annotated[
-    Optional[Path],
-    typer.Option(
-        help="Path to the configuration file.",
-        show_default=False,
-    ),
-]
-
-OptsType = Annotated[
-    Optional[list[str]],
-    typer.Argument(
-        help="A list of optional CLI overrides of the config file.",
-        show_default=False,
-    ),
-]
-
-ViewType = Annotated[View, typer.Option(help="Which dataset view to use.")]
-
-SaveDirType = Annotated[
-    Optional[Path],
-    typer.Option(help="Where to save the inference results."),
-]
-
-
-@app.command()
-def train(config: ConfigType = None, opts: OptsType = None):
-    """Start training."""
-    from luxonis_train.core import Trainer
-
-    Trainer(str(config), opts).train()
-
-
-@app.command()
-def eval(config: ConfigType = None, view: ViewType = View.val, opts: OptsType = None):
-    """Evaluate model."""
-    from luxonis_train.core import Trainer
-
-    Trainer(str(config), opts).test(view=view.name)
-
-
-@app.command()
-def tune(config: ConfigType = None, opts: OptsType = None):
-    """Start hyperparameter tuning."""
-    from luxonis_train.core import Tuner
-
-    Tuner(str(config), opts).tune()
-
-
-@app.command()
-def export(config: ConfigType = None, opts: OptsType = None):
-    """Export model."""
-    from luxonis_train.core import Exporter
-
-    Exporter(str(config), opts).export()
-
-
-@app.command()
-def infer(
-    config: ConfigType = None,
-    view: ViewType = View.val,
-    save_dir: SaveDirType = None,
-    opts: OptsType = None,
-):
-    """Run inference."""
-    from luxonis_train.core import Inferer
-
-    Inferer(str(config), opts, view=view.name, save_dir=save_dir).infer()
-
-
-@app.command()
-def inspect(
-    config: ConfigType = None,
-    view: ViewType = View.val,
-    save_dir: SaveDirType = None,
-    opts: OptsType = None,
-):
-    """Inspect dataset."""
-    from luxonis_ml.data import (
-        LuxonisDataset,
-        TrainAugmentations,
-        ValAugmentations,
-    )
-
-    from luxonis_train.attached_modules.visualizers.utils import (
-        draw_bounding_box_labels,
-        draw_keypoint_labels,
-        draw_segmentation_labels,
-        get_unnormalized_images,
-    )
-    from luxonis_train.utils.config import Config
-    from luxonis_train.utils.loaders import LuxonisLoaderTorch, collate_fn
-    from luxonis_train.utils.types import LabelType
-
-    overrides = {}
-    if opts:
-        if len(opts) % 2 != 0:
-            raise ValueError("Override options should be a list of key-value pairs")
-
-        for i in range(0, len(opts), 2):
-            overrides[opts[i]] = opts[i + 1]
-
-    cfg = Config.get_config(str(config), overrides)
-
-    image_size = cfg.trainer.preprocessing.train_image_size
-
-    dataset = LuxonisDataset(
-        dataset_name=cfg.dataset.name,
-        team_id=cfg.dataset.team_id,
-        dataset_id=cfg.dataset.id,
-        bucket_type=cfg.dataset.bucket_type,
-        bucket_storage=cfg.dataset.bucket_storage,
-    )
-    augmentations = (
-        TrainAugmentations(
-            image_size=image_size,
-            augmentations=[
-                i.model_dump() for i in cfg.trainer.preprocessing.augmentations
-            ],
-            train_rgb=cfg.trainer.preprocessing.train_rgb,
-            keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
-        )
-        if view == "train"
-        else ValAugmentations(
-            image_size=image_size,
-            augmentations=[
-                i.model_dump() for i in cfg.trainer.preprocessing.augmentations
-            ],
-            train_rgb=cfg.trainer.preprocessing.train_rgb,
-            keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
-        )
-    )
-
-    loader_train = LuxonisLoaderTorch(
-        dataset,
-        view=view,
-        augmentations=augmentations,
-    )
-
-    pytorch_loader_train = torch.utils.data.DataLoader(
-        loader_train,
-        batch_size=4,
-        num_workers=1,
-        collate_fn=collate_fn,
-    )
-
-    if save_dir is not None:
-        os.makedirs(save_dir, exist_ok=True)
-
-    counter = 0
-    for data in pytorch_loader_train:
-        imgs, label_dict = data
-        images = get_unnormalized_images(cfg, imgs)
-        for i, img in enumerate(images):
-            for label_type, labels in label_dict.items():
-                if label_type == LabelType.CLASSIFICATION:
-                    continue
-                elif label_type == LabelType.BOUNDINGBOX:
-                    img = draw_bounding_box_labels(
-                        img, labels[labels[:, 0] == i][:, 2:], colors="yellow", width=1
-                    )
-                elif label_type == LabelType.KEYPOINT:
-                    img = draw_keypoint_labels(
-                        img, labels[labels[:, 0] == i][:, 1:], colors="red"
-                    )
-                elif label_type == LabelType.SEGMENTATION:
-                    img = draw_segmentation_labels(
-                        img, labels[i], alpha=0.8, colors="#5050FF"
-                    )
-
-            img_arr = img.permute(1, 2, 0).numpy()
-            img_arr = cv2.cvtColor(img_arr, cv2.COLOR_RGB2BGR)
-            if save_dir is not None:
-                counter += 1
-                cv2.imwrite(os.path.join(save_dir, f"{counter}.png"), img_arr)
-            else:
-                cv2.imshow("img", img_arr)
-                if cv2.waitKey() == ord("q"):
-                    exit()
-
-
-def version_callback(value: bool):
-    if value:
-        typer.echo(f"LuxonisTrain Version: {version(__package__)}")
-        raise typer.Exit()
-
-
-@app.callback()
-def common(
-    _: Annotated[
-        bool,
-        typer.Option(
-            "--version", callback=version_callback, help="Show version and exit."
-        ),
-    ] = False,
-):
-    ...
-
-
-def main():
-    app()
-
-
-if __name__ == "__main__":
-    main()

From 279727897a0f0fdec752fd303d9dd738ef23224d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Wed, 21 Feb 2024 12:54:02 +0100
Subject: [PATCH 04/28] Fix Removed Tensor Metadata (#12)

* option to source custom code in CLI

* removed empty dicts

* fixed issue with removed tensor metadata in match case statements
---
 luxonis_train/attached_modules/visualizers/multi_visualizer.py | 2 +-
 luxonis_train/attached_modules/visualizers/utils.py            | 2 +-
 luxonis_train/core/exporter.py                                 | 2 +-
 luxonis_train/models/luxonis_model.py                          | 2 +-
 luxonis_train/nodes/base_node.py                               | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/luxonis_train/attached_modules/visualizers/multi_visualizer.py b/luxonis_train/attached_modules/visualizers/multi_visualizer.py
index 2fee8e1f..99b64bf0 100644
--- a/luxonis_train/attached_modules/visualizers/multi_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/multi_visualizer.py
@@ -47,7 +47,7 @@ def forward(
     ) -> tuple[Tensor, Tensor]:
         for visualizer in self.visualizers:
             match visualizer.run(label_canvas, prediction_canvas, outputs, labels):
-                case Tensor(data=prediction_viz):
+                case Tensor() as prediction_viz:
                     prediction_canvas = prediction_viz
                 case (Tensor(data=label_viz), Tensor(data=prediction_viz)):
                     label_canvas = label_viz
diff --git a/luxonis_train/attached_modules/visualizers/utils.py b/luxonis_train/attached_modules/visualizers/utils.py
index 52431204..aa1a90d3 100644
--- a/luxonis_train/attached_modules/visualizers/utils.py
+++ b/luxonis_train/attached_modules/visualizers/utils.py
@@ -405,7 +405,7 @@ def resize_to_match(
         return fst_resized, snd_resized
 
     match visualization:
-        case Tensor(data=viz):
+        case Tensor() as viz:
             return viz
         case (Tensor(data=viz_labels), Tensor(data=viz_predictions)):
             viz_labels, viz_predictions = resize_to_match(viz_labels, viz_predictions)
diff --git a/luxonis_train/core/exporter.py b/luxonis_train/core/exporter.py
index 6602a040..0efd6d56 100644
--- a/luxonis_train/core/exporter.py
+++ b/luxonis_train/core/exporter.py
@@ -128,7 +128,7 @@ def export(self, onnx_path: str | None = None):
             model_onnx = onnx.load(onnx_path)
             onnx_model, check = onnxsim.simplify(model_onnx)
             if not check:
-                raise RuntimeError("Onnx simplify failed.")
+                raise RuntimeError("ONNX simplify failed.")
             onnx.save(onnx_model, onnx_path)
             logger.info(f"ONNX model saved to {onnx_path}")
 
diff --git a/luxonis_train/models/luxonis_model.py b/luxonis_train/models/luxonis_model.py
index 80a57d99..88d4fa28 100644
--- a/luxonis_train/models/luxonis_model.py
+++ b/luxonis_train/models/luxonis_model.py
@@ -360,7 +360,7 @@ def compute_metrics(self) -> dict[str, dict[str, Tensor]]:
                         computed_submetrics = {
                             metric_name: metric_value,
                         } | submetrics
-                    case Tensor(data=metric_value):
+                    case Tensor() as metric_value:
                         computed_submetrics = {metric_name: metric_value}
                     case dict(submetrics):
                         computed_submetrics = submetrics
diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py
index 6ec216fb..7338a802 100644
--- a/luxonis_train/nodes/base_node.py
+++ b/luxonis_train/nodes/base_node.py
@@ -291,7 +291,7 @@ def wrap(self, output: ForwardOutputT) -> Packet[Tensor]:
         """
 
         match output:
-            case Tensor(data=out):
+            case Tensor() as out:
                 outputs = [out]
             case list(tensors) if all(isinstance(t, Tensor) for t in tensors):
                 outputs = tensors

From 2c62a0812e3075331a0724d3a25fe1f35c34dd95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Thu, 22 Feb 2024 08:57:10 +0100
Subject: [PATCH 05/28] Forbid Extra Fields (#13)

* forbid extra fields in config

* fixed configs
---
 configs/coco_model.yaml       |  1 -
 configs/resnet_model.yaml     |  5 ++--
 luxonis_train/utils/config.py | 44 +++++++++++++++++++----------------
 3 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/configs/coco_model.yaml b/configs/coco_model.yaml
index 491152ce..67f3b91d 100755
--- a/configs/coco_model.yaml
+++ b/configs/coco_model.yaml
@@ -117,7 +117,6 @@ trainer:
   validation_interval: 10
   num_log_images: 8
   skip_last_batch: True
-  main_head_index: 0
   log_sub_losses: True
   save_top_k: 3
 
diff --git a/configs/resnet_model.yaml b/configs/resnet_model.yaml
index 7e93d269..e768d259 100644
--- a/configs/resnet_model.yaml
+++ b/configs/resnet_model.yaml
@@ -3,8 +3,9 @@ model:
   name: resnet50_classification
   nodes:
     - name: ResNet
-      variant: "50"
-      download_weights: True
+      params:
+        variant: "50"
+        download_weights: True
 
     - name: ClassificationHead
       inputs:
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 48661f7d..591376f8 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -5,7 +5,7 @@
 
 from luxonis_ml.data import BucketStorage, BucketType
 from luxonis_ml.utils import Environ, LuxonisConfig, LuxonisFileSystem, setup_logging
-from pydantic import BaseModel, Field, field_serializer, model_validator
+from pydantic import BaseModel, ConfigDict, Field, field_serializer, model_validator
 
 from luxonis_train.utils.general import is_acyclic
 from luxonis_train.utils.registry import MODELS
@@ -13,7 +13,11 @@
 logger = logging.getLogger(__name__)
 
 
-class AttachedModuleConfig(BaseModel):
+class CustomBaseModel(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+
+class AttachedModuleConfig(CustomBaseModel):
     name: str
     attached_to: str
     alias: str | None = None
@@ -28,12 +32,12 @@ class MetricModuleConfig(AttachedModuleConfig):
     is_main_metric: bool = False
 
 
-class FreezingConfig(BaseModel):
+class FreezingConfig(CustomBaseModel):
     active: bool = False
     unfreeze_after: int | float | None = None
 
 
-class ModelNodeConfig(BaseModel):
+class ModelNodeConfig(CustomBaseModel):
     name: str
     alias: str | None = None
     inputs: list[str] = []
@@ -41,7 +45,7 @@ class ModelNodeConfig(BaseModel):
     freezing: FreezingConfig = FreezingConfig()
 
 
-class PredefinedModelConfig(BaseModel):
+class PredefinedModelConfig(CustomBaseModel):
     name: str
     params: dict[str, Any] = {}
     include_nodes: bool = True
@@ -50,7 +54,7 @@ class PredefinedModelConfig(BaseModel):
     include_visualizers: bool = True
 
 
-class ModelConfig(BaseModel):
+class ModelConfig(CustomBaseModel):
     name: str
     predefined_model: PredefinedModelConfig | None = None
     weights: str | None = None
@@ -114,7 +118,7 @@ def check_unique_names(self):
         return self
 
 
-class TrackerConfig(BaseModel):
+class TrackerConfig(CustomBaseModel):
     project_name: str | None = None
     project_id: str | None = None
     run_name: str | None = None
@@ -126,7 +130,7 @@ class TrackerConfig(BaseModel):
     is_mlflow: bool = False
 
 
-class DatasetConfig(BaseModel):
+class DatasetConfig(CustomBaseModel):
     name: str | None = None
     id: str | None = None
     team_name: str | None = None
@@ -143,7 +147,7 @@ def get_enum_value(self, v: Enum, _) -> str:
         return str(v.value)
 
 
-class NormalizeAugmentationConfig(BaseModel):
+class NormalizeAugmentationConfig(CustomBaseModel):
     active: bool = True
     params: dict[str, Any] = {
         "mean": [0.485, 0.456, 0.406],
@@ -151,12 +155,12 @@ class NormalizeAugmentationConfig(BaseModel):
     }
 
 
-class AugmentationConfig(BaseModel):
+class AugmentationConfig(CustomBaseModel):
     name: str
     params: dict[str, Any] = {}
 
 
-class PreprocessingConfig(BaseModel):
+class PreprocessingConfig(CustomBaseModel):
     train_image_size: Annotated[
         list[int], Field(default=[256, 256], min_length=2, max_length=2)
     ] = [256, 256]
@@ -174,23 +178,23 @@ def check_normalize(self):
         return self
 
 
-class CallbackConfig(BaseModel):
+class CallbackConfig(CustomBaseModel):
     name: str
     active: bool = True
     params: dict[str, Any] = {}
 
 
-class OptimizerConfig(BaseModel):
+class OptimizerConfig(CustomBaseModel):
     name: str = "Adam"
     params: dict[str, Any] = {}
 
 
-class SchedulerConfig(BaseModel):
+class SchedulerConfig(CustomBaseModel):
     name: str = "ConstantLR"
     params: dict[str, Any] = {}
 
 
-class TrainerConfig(BaseModel):
+class TrainerConfig(CustomBaseModel):
     preprocessing: PreprocessingConfig = PreprocessingConfig()
 
     accelerator: Literal["auto", "cpu", "gpu"] = "auto"
@@ -229,17 +233,17 @@ def check_num_workes_platform(self):
         return self
 
 
-class OnnxExportConfig(BaseModel):
+class OnnxExportConfig(CustomBaseModel):
     opset_version: int = 12
     dynamic_axes: dict[str, Any] | None = None
 
 
-class BlobconverterExportConfig(BaseModel):
+class BlobconverterExportConfig(CustomBaseModel):
     active: bool = False
     shaves: int = 6
 
 
-class ExportConfig(BaseModel):
+class ExportConfig(CustomBaseModel):
     export_save_directory: str = "output_export"
     input_shape: list[int] | None = None
     export_model_name: str = "model"
@@ -265,12 +269,12 @@ def pad_values(values: float | list[float] | None):
         return self
 
 
-class StorageConfig(BaseModel):
+class StorageConfig(CustomBaseModel):
     active: bool = True
     storage_type: Literal["local", "remote"] = "local"
 
 
-class TunerConfig(BaseModel):
+class TunerConfig(CustomBaseModel):
     study_name: str = "test-study"
     use_pruner: bool = True
     n_trials: int | None = 15

From 0b51fa0e6f7c124d922738d820fb3c5b3652972c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Sat, 24 Feb 2024 10:09:53 +0100
Subject: [PATCH 06/28] Automatic Inference of attach_index (#14)

* automatic inference of attach index based on type signature

* added inference for input and x names
---
 luxonis_train/nodes/base_node.py              | 19 ++++++++++++---
 luxonis_train/nodes/bisenet_head.py           |  7 +++---
 luxonis_train/nodes/classification_head.py    |  1 -
 luxonis_train/nodes/contextspatial.py         |  8 +++----
 luxonis_train/nodes/efficientrep.py           |  6 ++---
 .../nodes/implicit_keypoint_bbox_head.py      |  4 +---
 luxonis_train/nodes/micronet.py               | 24 ++++++++-----------
 luxonis_train/nodes/mobilenetv2.py            |  6 ++---
 luxonis_train/nodes/mobileone.py              |  5 ++--
 luxonis_train/nodes/resnet.py                 |  6 ++---
 luxonis_train/nodes/rexnetv1.py               | 11 ++++-----
 luxonis_train/nodes/segmentation_head.py      |  1 -
 12 files changed, 45 insertions(+), 53 deletions(-)

diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py
index 7338a802..c3124f82 100644
--- a/luxonis_train/nodes/base_node.py
+++ b/luxonis_train/nodes/base_node.py
@@ -1,3 +1,4 @@
+import inspect
 from abc import ABC, abstractmethod
 from typing import Generic, TypeVar
 
@@ -80,8 +81,6 @@ class BaseNode(
         Provide only in case the `input_shapes` were not provided.
     """
 
-    attach_index: AttachIndexType = "all"
-
     def __init__(
         self,
         *,
@@ -96,7 +95,21 @@ def __init__(
     ):
         super().__init__()
 
-        self.attach_index = attach_index or self.attach_index
+        if attach_index is None:
+            parameters = inspect.signature(self.forward).parameters
+            inputs_forward_type = parameters.get(
+                "inputs", parameters.get("input", parameters.get("x", None))
+            )
+            if (
+                inputs_forward_type is not None
+                and inputs_forward_type.annotation == Tensor
+            ):
+                self.attach_index = -1
+            else:
+                self.attach_index = "all"
+        else:
+            self.attach_index = attach_index
+
         self.in_protocols = in_protocols or [FeaturesProtocol]
         self.task_type = task_type
 
diff --git a/luxonis_train/nodes/bisenet_head.py b/luxonis_train/nodes/bisenet_head.py
index 99845177..a3b11df6 100644
--- a/luxonis_train/nodes/bisenet_head.py
+++ b/luxonis_train/nodes/bisenet_head.py
@@ -15,7 +15,6 @@
 
 
 class BiSeNetHead(BaseNode[Tensor, Tensor]):
-    attach_index: int = -1
     in_height: int
     in_channels: int
 
@@ -45,6 +44,6 @@ def wrap(self, output: Tensor) -> Packet[Tensor]:
         return {"segmentation": [output]}
 
     def forward(self, inputs: Tensor) -> Tensor:
-        inputs = self.conv_3x3(inputs)
-        inputs = self.conv_1x1(inputs)
-        return self.upscale(inputs)
+        x = self.conv_3x3(inputs)
+        x = self.conv_1x1(x)
+        return self.upscale(x)
diff --git a/luxonis_train/nodes/classification_head.py b/luxonis_train/nodes/classification_head.py
index 10f9b3c9..d96e6b72 100644
--- a/luxonis_train/nodes/classification_head.py
+++ b/luxonis_train/nodes/classification_head.py
@@ -7,7 +7,6 @@
 
 class ClassificationHead(BaseNode[Tensor, Tensor]):
     in_channels: int
-    attach_index: int = -1
 
     def __init__(
         self,
diff --git a/luxonis_train/nodes/contextspatial.py b/luxonis_train/nodes/contextspatial.py
index adbb84bc..1ca1460d 100644
--- a/luxonis_train/nodes/contextspatial.py
+++ b/luxonis_train/nodes/contextspatial.py
@@ -18,8 +18,6 @@
 
 
 class ContextSpatial(BaseNode[Tensor, list[Tensor]]):
-    attach_index: int = -1
-
     def __init__(self, context_backbone: str = "MobileNetV2", **kwargs):
         """Context spatial backbone.
         TODO: Add more documentation.
@@ -34,9 +32,9 @@ def __init__(self, context_backbone: str = "MobileNetV2", **kwargs):
         self.spatial_path = SpatialPath(3, 128)
         self.ffm = FeatureFusionBlock(256, 256)
 
-    def forward(self, x: Tensor) -> list[Tensor]:
-        spatial_out = self.spatial_path(x)
-        context16, _ = self.context_path(x)
+    def forward(self, inputs: Tensor) -> list[Tensor]:
+        spatial_out = self.spatial_path(inputs)
+        context16, _ = self.context_path(inputs)
         fm_fuse = self.ffm(spatial_out, context16)
         outs = [fm_fuse]
         return outs
diff --git a/luxonis_train/nodes/efficientrep.py b/luxonis_train/nodes/efficientrep.py
index e6a014af..ccff4189 100644
--- a/luxonis_train/nodes/efficientrep.py
+++ b/luxonis_train/nodes/efficientrep.py
@@ -19,8 +19,6 @@
 
 
 class EfficientRep(BaseNode[Tensor, list[Tensor]]):
-    attach_index: int = -1
-
     def __init__(
         self,
         channels_list: list[int] | None = None,
@@ -104,9 +102,9 @@ def set_export_mode(self, mode: bool = True) -> None:
                 if isinstance(module, RepVGGBlock):
                     module.reparametrize()
 
-    def forward(self, x: Tensor) -> list[Tensor]:
+    def forward(self, inputs: Tensor) -> list[Tensor]:
         outputs = []
-        x = self.repvgg_encoder(x)
+        x = self.repvgg_encoder(inputs)
         for block in self.blocks:
             x = block(x)
             outputs.append(x)
diff --git a/luxonis_train/nodes/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
index 0fdca420..aff2b5a6 100644
--- a/luxonis_train/nodes/implicit_keypoint_bbox_head.py
+++ b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
@@ -1,6 +1,6 @@
 import logging
 import math
-from typing import Literal, cast
+from typing import cast
 
 import torch
 from torch import Tensor, nn
@@ -22,8 +22,6 @@
 
 
 class ImplicitKeypointBBoxHead(BaseNode):
-    attach_index: Literal["all"] = "all"
-
     def __init__(
         self,
         n_keypoints: int | None = None,
diff --git a/luxonis_train/nodes/micronet.py b/luxonis_train/nodes/micronet.py
index 03b43e1f..603eabde 100644
--- a/luxonis_train/nodes/micronet.py
+++ b/luxonis_train/nodes/micronet.py
@@ -15,8 +15,6 @@ class MicroNet(BaseNode[Tensor, list[Tensor]]):
     TODO: DOCS
     """
 
-    attach_index: int = -1
-
     def __init__(self, variant: Literal["M1", "M2", "M3"] = "M1", **kwargs):
         """MicroNet backbone.
 
@@ -236,23 +234,21 @@ def __init__(
                 ChannelShuffle(out_channels // 2) if y3 != 0 else nn.Sequential(),
             )
 
-    def forward(self, x: Tensor):
-        identity = x
-        out = self.layers(x)
+    def forward(self, inputs: Tensor) -> Tensor:
+        out = self.layers(inputs)
         if self.identity:
-            out += identity
+            out += inputs
         return out
 
 
 class ChannelShuffle(nn.Module):
     def __init__(self, groups: int):
-        super(ChannelShuffle, self).__init__()
+        super().__init__()
         self.groups = groups
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         b, c, h, w = x.size()
         channels_per_group = c // self.groups
-        # reshape
         x = x.view(b, self.groups, channels_per_group, h, w)
         x = torch.transpose(x, 1, 2).contiguous()
         out = x.view(b, -1, h, w)
@@ -300,7 +296,7 @@ def __init__(
         indexs = torch.cat([indexs[1], indexs[0]], dim=2)
         self.index = indexs.view(in_channels).long()
 
-    def forward(self, x: Tensor):
+    def forward(self, x: Tensor) -> Tensor:
         B, C, _, _ = x.shape
         x_out = x
 
@@ -350,7 +346,7 @@ def __init__(self, in_channels: int, out_channels: int):
             nn.Linear(in_channels, out_channels), nn.BatchNorm1d(out_channels), HSwish()
         )
 
-    def forward(self, x: Tensor):
+    def forward(self, x: Tensor) -> Tensor:
         return self.linear(x)
 
 
@@ -383,7 +379,7 @@ def __init__(
             ChannelShuffle(out_channels1),
         )
 
-    def forward(self, x: Tensor):
+    def forward(self, x: Tensor) -> Tensor:
         return self.conv(x)
 
 
@@ -394,7 +390,7 @@ def __init__(self, in_channels: int, stride: int, outs: tuple[int, int] = (4, 4)
             SpatialSepConvSF(in_channels, outs, 3, stride), nn.ReLU6(True)
         )
 
-    def forward(self, x: Tensor):
+    def forward(self, x: Tensor) -> Tensor:
         return self.stem(x)
 
 
@@ -430,7 +426,7 @@ def __init__(
             nn.BatchNorm2d(out_channels),
         )
 
-    def forward(self, x: Tensor):
+    def forward(self, x: Tensor) -> Tensor:
         return self.conv(x)
 
 
diff --git a/luxonis_train/nodes/mobilenetv2.py b/luxonis_train/nodes/mobilenetv2.py
index 27fe87ec..732d0b12 100644
--- a/luxonis_train/nodes/mobilenetv2.py
+++ b/luxonis_train/nodes/mobilenetv2.py
@@ -15,8 +15,6 @@ class MobileNetV2(BaseNode[Tensor, list[Tensor]]):
     TODO: add more info
     """
 
-    attach_index: int = -1
-
     def __init__(self, download_weights: bool = False, **kwargs):
         """Constructor of the MobileNetV2 backbone.
 
@@ -37,8 +35,8 @@ def __init__(self, download_weights: bool = False, **kwargs):
 
     def forward(self, x: Tensor) -> list[Tensor]:
         outs = []
-        for i, m in enumerate(self.backbone.features):
-            x = m(x)
+        for i, module in enumerate(self.backbone.features):
+            x = module(x)
             if i in self.out_indices:
                 outs.append(x)
 
diff --git a/luxonis_train/nodes/mobileone.py b/luxonis_train/nodes/mobileone.py
index e92d3225..14e6e02b 100644
--- a/luxonis_train/nodes/mobileone.py
+++ b/luxonis_train/nodes/mobileone.py
@@ -52,7 +52,6 @@ class MobileOne(BaseNode[Tensor, list[Tensor]]):
     TODO: add more details
     """
 
-    attach_index: int = -1
     in_channels: int
 
     VARIANTS_SETTINGS: dict[str, dict] = {
@@ -115,9 +114,9 @@ def __init__(self, variant: Literal["s0", "s1", "s2", "s3", "s4"] = "s0", **kwar
             num_se_blocks=self.num_blocks_per_stage[3] if self.use_se else 0,
         )
 
-    def forward(self, x: Tensor) -> list[Tensor]:
+    def forward(self, inputs: Tensor) -> list[Tensor]:
         outs = []
-        x = self.stage0(x)
+        x = self.stage0(inputs)
         outs.append(x)
         x = self.stage1(x)
         outs.append(x)
diff --git a/luxonis_train/nodes/resnet.py b/luxonis_train/nodes/resnet.py
index 14ff8066..8228d37a 100644
--- a/luxonis_train/nodes/resnet.py
+++ b/luxonis_train/nodes/resnet.py
@@ -12,8 +12,6 @@
 
 
 class ResNet(BaseNode[Tensor, list[Tensor]]):
-    attach_index: int = -1
-
     def __init__(
         self,
         variant: Literal["18", "34", "50", "101", "152"] = "18",
@@ -47,9 +45,9 @@ def __init__(
         )
         self.channels_list = channels_list or [64, 128, 256, 512]
 
-    def forward(self, x: Tensor) -> list[Tensor]:
+    def forward(self, inputs: Tensor) -> list[Tensor]:
         outs = []
-        x = self.backbone.conv1(x)
+        x = self.backbone.conv1(inputs)
         x = self.backbone.bn1(x)
         x = self.backbone.relu(x)
         x = self.backbone.maxpool(x)
diff --git a/luxonis_train/nodes/rexnetv1.py b/luxonis_train/nodes/rexnetv1.py
index fb4de4b1..de2c08ae 100644
--- a/luxonis_train/nodes/rexnetv1.py
+++ b/luxonis_train/nodes/rexnetv1.py
@@ -17,8 +17,6 @@
 
 
 class ReXNetV1_lite(BaseNode[Tensor, list[Tensor]]):
-    attach_index: int = -1
-
     def __init__(
         self,
         fix_head_stem: bool = False,
@@ -129,8 +127,8 @@ def __init__(
 
     def forward(self, x: Tensor) -> list[Tensor]:
         outs = []
-        for i, m in enumerate(self.features):
-            x = m(x)
+        for i, module in enumerate(self.features):
+            x = module(x)
             if i in self.out_indices:
                 outs.append(x)
         return outs
@@ -186,12 +184,11 @@ def __init__(
 
         self.out = nn.Sequential(*out)
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         out = self.out(x)
 
         if self.use_shortcut:
-            # this results in a ScatterND node which isn't supported yet in myriad
-            # out[:, 0:self.in_channels] += x
+            # NOTE: this results in a ScatterND node which isn't supported yet in myriad
             a = out[:, : self.in_channels]
             b = x
             a = a + b
diff --git a/luxonis_train/nodes/segmentation_head.py b/luxonis_train/nodes/segmentation_head.py
index bdfe814d..a3420491 100644
--- a/luxonis_train/nodes/segmentation_head.py
+++ b/luxonis_train/nodes/segmentation_head.py
@@ -16,7 +16,6 @@
 
 
 class SegmentationHead(BaseNode[Tensor, Tensor]):
-    attach_index: int = -1
     in_height: int
     in_channels: int
 

From bd67595c88e2d43f03cf95f91cbfd619a3366067 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Tue, 27 Feb 2024 10:28:35 +0100
Subject: [PATCH 07/28] Backbone Fix (#15)

* fixed link in docs

* fixed repvgg backbone

* fixed efficientnet
---
 luxonis_train/nodes/__init__.py      |  2 +
 luxonis_train/nodes/blocks/blocks.py | 69 +++++++++++-----------------
 luxonis_train/nodes/efficientnet.py  |  2 +
 luxonis_train/nodes/efficientrep.py  |  7 +--
 luxonis_train/nodes/mobileone.py     | 35 +-------------
 luxonis_train/nodes/repvgg.py        | 61 +++++++++++++-----------
 6 files changed, 70 insertions(+), 106 deletions(-)

diff --git a/luxonis_train/nodes/__init__.py b/luxonis_train/nodes/__init__.py
index 954db2be..9a506c1f 100644
--- a/luxonis_train/nodes/__init__.py
+++ b/luxonis_train/nodes/__init__.py
@@ -3,6 +3,7 @@
 from .classification_head import ClassificationHead
 from .contextspatial import ContextSpatial
 from .efficient_bbox_head import EfficientBBoxHead
+from .efficientnet import EfficientNet
 from .efficientrep import EfficientRep
 from .implicit_keypoint_bbox_head import ImplicitKeypointBBoxHead
 from .micronet import MicroNet
@@ -19,6 +20,7 @@
     "ClassificationHead",
     "ContextSpatial",
     "EfficientBBoxHead",
+    "EfficientNet",
     "EfficientRep",
     "ImplicitKeypointBBoxHead",
     "BaseNode",
diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py
index f4bd0172..4ab2ad2d 100644
--- a/luxonis_train/nodes/blocks/blocks.py
+++ b/luxonis_train/nodes/blocks/blocks.py
@@ -216,10 +216,7 @@ def __init__(
         kernel_size: int = 3,
         stride: int = 1,
         padding: int = 1,
-        dilation: int = 1,
         groups: int = 1,
-        padding_mode: str = "zeros",
-        deploy: bool = False,
         use_se: bool = False,
     ):
         """RepVGGBlock is a basic rep-style block, including training and deploy status
@@ -249,7 +246,6 @@ def __init__(
         """
         super().__init__()
 
-        self.deploy = deploy
         self.groups = groups
         self.in_channels = in_channels
         self.out_channels = out_channels
@@ -262,49 +258,37 @@ def __init__(
         self.nonlinearity = nn.ReLU()
 
         if use_se:
-            #   Note that RepVGG-D2se uses SE before nonlinearity. But RepVGGplus models uses SqueezeExciteBlock after nonlinearity.
+            # NOTE: that RepVGG-D2se uses SE before nonlinearity.
+            # But RepVGGplus models uses SqueezeExciteBlock after nonlinearity.
             self.se = SqueezeExciteBlock(
                 out_channels, intermediate_channels=int(out_channels // 16)
             )
         else:
-            self.se = nn.Identity()  # type: ignore
+            self.se = nn.Identity()
 
-        if deploy:
-            self.rbr_reparam = nn.Conv2d(
-                in_channels=in_channels,
-                out_channels=out_channels,
-                kernel_size=kernel_size,
-                stride=stride,
-                padding=padding,
-                dilation=dilation,
-                groups=groups,
-                bias=True,
-                padding_mode=padding_mode,
-            )
-        else:
-            self.rbr_identity = (
-                nn.BatchNorm2d(num_features=in_channels)
-                if out_channels == in_channels and stride == 1
-                else None
-            )
-            self.rbr_dense = ConvModule(
-                in_channels=in_channels,
-                out_channels=out_channels,
-                kernel_size=kernel_size,
-                stride=stride,
-                padding=padding,
-                groups=groups,
-                activation=nn.Identity(),
-            )
-            self.rbr_1x1 = ConvModule(
-                in_channels=in_channels,
-                out_channels=out_channels,
-                kernel_size=1,
-                stride=stride,
-                padding=padding_11,
-                groups=groups,
-                activation=nn.Identity(),
-            )
+        self.rbr_identity = (
+            nn.BatchNorm2d(num_features=in_channels)
+            if out_channels == in_channels and stride == 1
+            else None
+        )
+        self.rbr_dense = ConvModule(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            activation=nn.Identity(),
+        )
+        self.rbr_1x1 = ConvModule(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=1,
+            stride=stride,
+            padding=padding_11,
+            groups=groups,
+            activation=nn.Identity(),
+        )
 
     def forward(self, x: Tensor):
         if hasattr(self, "rbr_reparam"):
@@ -320,6 +304,7 @@ def forward(self, x: Tensor):
     def reparametrize(self):
         if hasattr(self, "rbr_reparam"):
             return
+
         kernel, bias = self._get_equivalent_kernel_bias()
         self.rbr_reparam = nn.Conv2d(
             in_channels=self.rbr_dense[0].in_channels,
diff --git a/luxonis_train/nodes/efficientnet.py b/luxonis_train/nodes/efficientnet.py
index 0b0aedde..57b52d09 100644
--- a/luxonis_train/nodes/efficientnet.py
+++ b/luxonis_train/nodes/efficientnet.py
@@ -11,6 +11,8 @@
 
 
 class EfficientNet(BaseNode[Tensor, list[Tensor]]):
+    attach_index: int = -1
+
     def __init__(self, download_weights: bool = False, **kwargs):
         """EfficientNet backbone.
 
diff --git a/luxonis_train/nodes/efficientrep.py b/luxonis_train/nodes/efficientrep.py
index ccff4189..4e92222f 100644
--- a/luxonis_train/nodes/efficientrep.py
+++ b/luxonis_train/nodes/efficientrep.py
@@ -17,6 +17,8 @@
 
 from .base_node import BaseNode
 
+logger = logging.getLogger(__name__)
+
 
 class EfficientRep(BaseNode[Tensor, list[Tensor]]):
     def __init__(
@@ -89,14 +91,13 @@ def __init__(
         )
 
     def set_export_mode(self, mode: bool = True) -> None:
-        """Reparametrizes instances of `RepVGGBlock` in the network.
+        """Reparametrizes instances of L{RepVGGBlock} in the network.
 
         @type mode: bool
         @param mode: Whether to set the export mode. Defaults to C{True}.
         """
         super().set_export_mode(mode)
-        logger = logging.getLogger(__name__)
-        if mode:
+        if self.export:
             logger.info("Reparametrizing EfficientRep.")
             for module in self.modules():
                 if isinstance(module, RepVGGBlock):
diff --git a/luxonis_train/nodes/mobileone.py b/luxonis_train/nodes/mobileone.py
index 14e6e02b..b1658eb4 100644
--- a/luxonis_train/nodes/mobileone.py
+++ b/luxonis_train/nodes/mobileone.py
@@ -1,38 +1,7 @@
 """MobileOne backbone.
 
-Soure: U{https://github.com/apple/ml-mobileone} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>} @license: U{Apple
-<https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
-@license: U{Apple <https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
+Source: U{<https://github.com/apple/ml-mobileone>}
+@license: U{Apple<https://github.com/apple/ml-mobileone/blob/main/LICENSE>}
 """
 
 
diff --git a/luxonis_train/nodes/repvgg.py b/luxonis_train/nodes/repvgg.py
index 44579fa5..f488a68c 100644
--- a/luxonis_train/nodes/repvgg.py
+++ b/luxonis_train/nodes/repvgg.py
@@ -1,4 +1,5 @@
-from copy import deepcopy
+import logging
+from typing import Literal
 
 import torch.utils.checkpoint as checkpoint
 from torch import Tensor, nn
@@ -7,6 +8,8 @@
 
 from .base_node import BaseNode
 
+logger = logging.getLogger(__name__)
+
 
 class RepVGG(BaseNode):
     """Implementation of RepVGG backbone.
@@ -18,53 +21,37 @@ class RepVGG(BaseNode):
     """
 
     in_channels: int
+    attach_index: int = -1
 
     VARIANTS_SETTINGS = {
         "A0": {
             "num_blocks": [2, 4, 14, 1],
-            "num_classes": 1000,
             "width_multiplier": [0.75, 0.75, 0.75, 2.5],
         },
         "A1": {
             "num_blocks": [2, 4, 14, 1],
-            "num_classes": 1000,
             "width_multiplier": [1, 1, 1, 2.5],
         },
         "A2": {
             "num_blocks": [2, 4, 14, 1],
-            "num_classes": 1000,
             "width_multiplier": [1.5, 1.5, 1.5, 2.75],
         },
     }
 
-    def __new__(cls, **kwargs):
-        variant = kwargs.pop("variant", "A0")
-
-        if variant not in RepVGG.VARIANTS_SETTINGS.keys():
-            raise ValueError(
-                f"RepVGG model variant should be in {list(RepVGG.VARIANTS_SETTINGS.keys())}"
-            )
-
-        overrides = deepcopy(kwargs)
-        kwargs.clear()
-        kwargs.update(RepVGG.VARIANTS_SETTINGS[variant])
-        kwargs.update(overrides)
-        return cls.__new__(cls)
-
     def __init__(
         self,
-        deploy: bool = False,
+        variant: Literal["A0", "A1", "A2"] = "A0",
+        num_blocks: list[int] | None = None,
+        width_multiplier: list[float] | None = None,
         override_groups_map: dict[int, int] | None = None,
         use_se: bool = False,
         use_checkpoint: bool = False,
-        num_blocks: list[int] | None = None,
-        width_multiplier: list[float] | None = None,
         **kwargs,
     ):
         """Constructor for the RepVGG module.
 
-        @type deploy: bool
-        @param deploy: Whether to use the model in deploy mode.
+        @type variant: Literal["A0", "A1", "A2"]
+        @param variant: RepVGG model variant. Defaults to "A0".
         @type override_groups_map: dict[int, int] | None
         @param override_groups_map: Dictionary mapping layer index to number of groups.
         @type use_se: bool
@@ -77,9 +64,16 @@ def __init__(
         @param width_multiplier: Width multiplier for each stage.
         """
         super().__init__(**kwargs)
-        num_blocks = num_blocks or [2, 4, 14, 1]
-        width_multiplier = width_multiplier or [0.75, 0.75, 0.75, 2.5]
-        self.deploy = deploy
+        if variant not in self.VARIANTS_SETTINGS.keys():
+            raise ValueError(
+                f"RepVGG model variant should be one of "
+                f"{list(self.VARIANTS_SETTINGS.keys())}."
+            )
+
+        num_blocks = num_blocks or self.VARIANTS_SETTINGS[variant]["num_blocks"]
+        width_multiplier = (
+            width_multiplier or self.VARIANTS_SETTINGS[variant]["width_multiplier"]
+        )
         self.override_groups_map = override_groups_map or {}
         assert 0 not in self.override_groups_map
         self.use_se = use_se
@@ -92,7 +86,6 @@ def __init__(
             kernel_size=3,
             stride=2,
             padding=1,
-            deploy=self.deploy,
             use_se=self.use_se,
         )
         self.cur_layer_idx = 1
@@ -135,10 +128,22 @@ def _make_stage(self, planes: int, num_blocks: int, stride: int):
                     stride=stride,
                     padding=1,
                     groups=cur_groups,
-                    deploy=self.deploy,
                     use_se=self.use_se,
                 )
             )
             self.in_planes = planes
             self.cur_layer_idx += 1
         return nn.ModuleList(blocks)
+
+    def set_export_mode(self, mode: bool = True) -> None:
+        """Reparametrizes instances of L{RepVGGBlock} in the network.
+
+        @type mode: bool
+        @param mode: Whether to set the export mode. Defaults to C{True}.
+        """
+        super().set_export_mode(mode)
+        if self.export:
+            logger.info("Reparametrizing RepVGG.")
+            for module in self.modules():
+                if isinstance(module, RepVGGBlock):
+                    module.reparametrize()

From f42192cfd679aa6ed4e6200908b089a963c5c7d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Wed, 28 Feb 2024 17:07:11 +0100
Subject: [PATCH 08/28] Uploading logs to MLFlow (#16)

* upload logs to mlflow

* added mlflwo instance

* multithread log upload

* fixed upload logs

* fixed log file path

* removed exceptions

* logging exceptions

* fixed typo

* reverted exception

* moved line

* replaced warning with error log

* Update trainer.py
---
 .../callbacks/export_on_train_end.py          | 16 +++++++----
 luxonis_train/core/core.py                    |  4 ++-
 luxonis_train/core/trainer.py                 | 28 +++++++++++++++++--
 luxonis_train/models/luxonis_model.py         |  2 ++
 4 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/luxonis_train/callbacks/export_on_train_end.py b/luxonis_train/callbacks/export_on_train_end.py
index 923267c1..5d7bf6da 100644
--- a/luxonis_train/callbacks/export_on_train_end.py
+++ b/luxonis_train/callbacks/export_on_train_end.py
@@ -8,6 +8,8 @@
 from luxonis_train.utils.registry import CALLBACKS
 from luxonis_train.utils.tracker import LuxonisTrackerPL
 
+logger = logging.getLogger(__name__)
+
 
 @CALLBACKS.register_module()
 class ExportOnTrainEnd(pl.Callback):
@@ -41,11 +43,13 @@ def on_train_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> No
         # NOTE: assume that first checkpoint callback is based on val loss
         best_model_path = model_checkpoint_callbacks[0].best_model_path
         if not best_model_path:
-            raise RuntimeError(
-                "No best model path found. "
-                "Please make sure that ModelCheckpoint callback is present "
-                "and at least one validation epoch has been performed."
+            logger.error(
+                "No model checkpoint found. "
+                "Make sure that `ModelCheckpoint` callback is present "
+                "and at least one validation epoch has been performed. "
+                "Skipping model export."
             )
+            return
         cfg: Config = pl_module.cfg
         cfg.model.weights = best_model_path
         if self.upload_to_mlflow:
@@ -54,9 +58,9 @@ def on_train_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> No
                 new_upload_url = f"mlflow://{tracker.project_id}/{tracker.run_id}"
                 cfg.exporter.upload_url = new_upload_url
             else:
-                logging.getLogger(__name__).warning(
+                logger.error(
                     "`upload_to_mlflow` is set to True, "
-                    "but there is  no MLFlow active run, skipping."
+                    "but there is no MLFlow active run, skipping."
                 )
         exporter = Exporter(cfg=cfg)
         onnx_path = str(Path(best_model_path).parent.with_suffix(".onnx"))
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index 75bd1d2a..86b63600 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -79,12 +79,14 @@ def __init__(
         self.run_save_dir = os.path.join(
             self.cfg.tracker.save_directory, self.tracker.run_name
         )
+        self.log_file = osp.join(self.run_save_dir, "luxonis_train.log")
+
         # NOTE: to add the file handler (we only get the save dir now,
         # but we want to use the logger before)
         reset_logging()
         setup_logging(
             use_rich=self.cfg.use_rich_text,
-            file=osp.join(self.run_save_dir, "luxonis_train.log"),
+            file=self.log_file,
         )
 
         # NOTE: overriding logger in pl so it uses our logger to log device info
diff --git a/luxonis_train/core/trainer.py b/luxonis_train/core/trainer.py
index cb2c5a2c..2b3d6a78 100644
--- a/luxonis_train/core/trainer.py
+++ b/luxonis_train/core/trainer.py
@@ -3,6 +3,7 @@
 from typing import Any, Literal
 
 from lightning.pytorch.utilities import rank_zero_only  # type: ignore
+from luxonis_ml.utils import LuxonisFileSystem
 
 from luxonis_train.models import LuxonisModel
 from luxonis_train.utils.config import Config
@@ -39,6 +40,28 @@ def __init__(
             input_shape=self.loader_train.input_shape,
         )
 
+    def _upload_logs(self) -> None:
+        if self.cfg.tracker.is_mlflow:
+            logger.info("Uploading logs to MLFlow.")
+            fs = LuxonisFileSystem(
+                "mlflow://",
+                allow_active_mlflow_run=True,
+                allow_local=False,
+            )
+            fs.put_file(
+                local_path=self.log_file,
+                remote_path="luxonis_train.log",
+                mlflow_instance=self.tracker.experiment.get("mlflow", None),
+            )
+
+    def _trainer_fit(self, *args, **kwargs):
+        try:
+            self.pl_trainer.fit(*args, **kwargs)
+        except Exception:
+            logger.exception("Encountered exception during training.")
+        finally:
+            self._upload_logs()
+
     def train(self, new_thread: bool = False) -> None:
         """Runs training.
 
@@ -48,13 +71,14 @@ def train(self, new_thread: bool = False) -> None:
         if not new_thread:
             logger.info(f"Checkpoints will be saved in: {self.get_save_dir()}")
             logger.info("Starting training...")
-            self.pl_trainer.fit(
+            self._trainer_fit(
                 self.lightning_module,
                 self.pytorch_loader_train,
                 self.pytorch_loader_val,
             )
             logger.info("Training finished")
             logger.info(f"Checkpoints saved in: {self.get_save_dir()}")
+
         else:
             # Every time exception happens in the Thread, this hook will activate
             def thread_exception_hook(args):
@@ -63,7 +87,7 @@ def thread_exception_hook(args):
             threading.excepthook = thread_exception_hook
 
             self.thread = threading.Thread(
-                target=self.pl_trainer.fit,
+                target=self._trainer_fit,
                 args=(
                     self.lightning_module,
                     self.pytorch_loader_train,
diff --git a/luxonis_train/models/luxonis_model.py b/luxonis_train/models/luxonis_model.py
index 88d4fa28..7cd396f9 100644
--- a/luxonis_train/models/luxonis_model.py
+++ b/luxonis_train/models/luxonis_model.py
@@ -681,7 +681,9 @@ def load_checkpoint(self, path: str | None) -> None:
         """
         if path is None:
             return
+
         checkpoint = torch.load(path, map_location=self.device)
+
         if "state_dict" not in checkpoint:
             raise ValueError("Checkpoint does not contain state_dict.")
         state_dict = {}

From e1ab39b7bd49e16971e49f181e9ceefd8129b3dd Mon Sep 17 00:00:00 2001
From: jkbmrz <74824974+jkbmrz@users.noreply.github.com>
Date: Wed, 20 Mar 2024 09:06:32 +0100
Subject: [PATCH 09/28] Generate NN archive from training configs (#17)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add archiver CLI

* add archiver callback

* add max_det parameter to EfficientBBoxHead

* add enum to categorize tasks for the implemented heads

* add archiver tests

* adjust Archiver to new nn archive format

* pre-comit formatting

* add LDF creation and adjust to new nn archive format

* update requirements.txt

* add opencv-python to requirements.txt

* add support for ImplicitKeypointBBoxHead

* remove support for ObjectDetectionSSD

* Update requirements.txt

* Added mlflow and removed opencv

* [Automated] Updated coverage badge

* add support for SegmentationHead and BiSeNetHead

* base archiver tests on model from luxonis-train instead of torchvision

* adjust head parameters to changes in NN Archive

* adjust keypoint detection head parameters to changes in NN Archive

* bugfix - make sure self.max_det is used in nms

* add max_det parameter to ImplicitKeypointBBoxHead

* adjust task categorization for ImplicitKeypointBBoxHead

* fixing  Windows PermissionError occuring on file deletion

* fixing Windows PermissionError occuring on file deletion due to unreleased logging handlers

* add method to remove file handlers keeping the log file open

* add a logging statement at the end of archiving

* add optuna_integration to requirements.txt

* add hard-coded solution to determining is_softmax parameter

* added help

---------

Co-authored-by: Martin Kozlovský <martin.kozlovsky@luxonis.com>
Co-authored-by: GitHub Actions <actions@github.com>
---
 luxonis_train/__main__.py                     |  14 +
 luxonis_train/callbacks/__init__.py           |   2 +
 .../callbacks/archive_on_train_end.py         |  72 ++++
 luxonis_train/core/__init__.py                |   3 +-
 luxonis_train/core/archiver.py                | 371 ++++++++++++++++++
 luxonis_train/core/core.py                    |   4 +
 luxonis_train/nodes/efficient_bbox_head.py    |   6 +
 .../nodes/enums/head_categorization.py        |  21 +
 .../nodes/implicit_keypoint_bbox_head.py      |   5 +
 luxonis_train/utils/config.py                 |   7 +
 media/coverage_badge.svg                      |   4 +-
 requirements.txt                              |   5 +-
 tests/unittests/test_core/__init__.py         |   0
 tests/unittests/test_core/test_archiver.py    | 158 ++++++++
 14 files changed, 668 insertions(+), 4 deletions(-)
 create mode 100644 luxonis_train/callbacks/archive_on_train_end.py
 create mode 100644 luxonis_train/core/archiver.py
 create mode 100644 luxonis_train/nodes/enums/head_categorization.py
 create mode 100644 tests/unittests/test_core/__init__.py
 create mode 100644 tests/unittests/test_core/test_archiver.py

diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index 24cfd69b..b1fd3971 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -200,6 +200,20 @@ def inspect(
                     exit()
 
 
+@app.command()
+def archive(
+    executable: Annotated[
+        Optional[Path], typer.Option(help="Path to the model file.", show_default=False)
+    ],
+    config: ConfigType = None,
+    opts: OptsType = None,
+):
+    """Generate NN archive."""
+    from luxonis_train.core import Archiver
+
+    Archiver(str(config), opts).archive(executable)
+
+
 def version_callback(value: bool):
     if value:
         typer.echo(f"LuxonisTrain Version: {version(__package__)}")
diff --git a/luxonis_train/callbacks/__init__.py b/luxonis_train/callbacks/__init__.py
index cec9e000..ae1fe86e 100644
--- a/luxonis_train/callbacks/__init__.py
+++ b/luxonis_train/callbacks/__init__.py
@@ -8,6 +8,7 @@
 
 from luxonis_train.utils.registry import CALLBACKS
 
+from .archive_on_train_end import ArchiveOnTrainEnd
 from .export_on_train_end import ExportOnTrainEnd
 from .luxonis_progress_bar import LuxonisProgressBar
 from .metadata_logger import MetadataLogger
@@ -23,6 +24,7 @@
 
 
 __all__ = [
+    "ArchiveOnTrainEnd",
     "ExportOnTrainEnd",
     "LuxonisProgressBar",
     "MetadataLogger",
diff --git a/luxonis_train/callbacks/archive_on_train_end.py b/luxonis_train/callbacks/archive_on_train_end.py
new file mode 100644
index 00000000..4f5b6bc2
--- /dev/null
+++ b/luxonis_train/callbacks/archive_on_train_end.py
@@ -0,0 +1,72 @@
+import logging
+import os
+from pathlib import Path
+from typing import cast
+
+import lightning.pytorch as pl
+
+from luxonis_train.utils.config import Config
+from luxonis_train.utils.registry import CALLBACKS
+from luxonis_train.utils.tracker import LuxonisTrackerPL
+
+
+@CALLBACKS.register_module()
+class ArchiveOnTrainEnd(pl.Callback):
+    def __init__(self, upload_to_mlflow: bool = False):
+        """Callback that performs archiving of onnx or exported model at the end of
+        training/export. TODO: description.
+
+        @type upload_to_mlflow: bool
+        @param upload_to_mlflow: If set to True, overrides the upload url in Archiver
+            with currently active MLFlow run (if present).
+        """
+        super().__init__()
+        self.upload_to_mlflow = upload_to_mlflow
+
+    def on_train_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> None:
+        """Archives the model on train end.
+
+        @type trainer: L{pl.Trainer}
+        @param trainer: Pytorch Lightning trainer.
+        @type pl_module: L{pl.LightningModule}
+        @param pl_module: Pytorch Lightning module.
+        @raises RuntimeError: If no best model path is found.
+        """
+        from luxonis_train.core.archiver import Archiver
+
+        model_checkpoint_callbacks = [
+            c
+            for c in trainer.callbacks  # type: ignore
+            if isinstance(c, pl.callbacks.ModelCheckpoint)  # type: ignore
+        ]
+
+        # NOTE: assume that first checkpoint callback is based on val loss
+        best_model_path = model_checkpoint_callbacks[0].best_model_path
+        if not best_model_path:
+            raise RuntimeError(
+                "No best model path found. "
+                "Please make sure that ModelCheckpoint callback is present "
+                "and at least one validation epoch has been performed."
+            )
+        cfg: Config = pl_module.cfg
+        cfg.model.weights = best_model_path
+        if self.upload_to_mlflow:
+            if cfg.tracker.is_mlflow:
+                tracker = cast(LuxonisTrackerPL, trainer.logger)
+                new_upload_url = f"mlflow://{tracker.project_id}/{tracker.run_id}"
+                cfg.archiver.upload_url = new_upload_url
+            else:
+                logging.getLogger(__name__).warning(
+                    "`upload_to_mlflow` is set to True, "
+                    "but there is  no MLFlow active run, skipping."
+                )
+
+        onnx_path = str(Path(best_model_path).parent.with_suffix(".onnx"))
+        if not os.path.exists(onnx_path):
+            raise FileNotFoundError(
+                "Model executable not found. Make sure to run exporter callback before archiver callback"
+            )
+
+        archiver = Archiver(cfg=cfg)
+
+        archiver.archive(onnx_path)
diff --git a/luxonis_train/core/__init__.py b/luxonis_train/core/__init__.py
index 6264473b..d3e89663 100644
--- a/luxonis_train/core/__init__.py
+++ b/luxonis_train/core/__init__.py
@@ -1,6 +1,7 @@
+from .archiver import Archiver
 from .exporter import Exporter
 from .inferer import Inferer
 from .trainer import Trainer
 from .tuner import Tuner
 
-__all__ = ["Exporter", "Trainer", "Tuner", "Inferer"]
+__all__ = ["Exporter", "Trainer", "Tuner", "Inferer", "Archiver"]
diff --git a/luxonis_train/core/archiver.py b/luxonis_train/core/archiver.py
new file mode 100644
index 00000000..58fc231f
--- /dev/null
+++ b/luxonis_train/core/archiver.py
@@ -0,0 +1,371 @@
+import os
+from logging import getLogger
+from pathlib import Path
+from typing import Any
+
+import onnx
+from luxonis_ml.nn_archive.archive_generator import ArchiveGenerator
+from luxonis_ml.nn_archive.config import CONFIG_VERSION
+from luxonis_ml.nn_archive.config_building_blocks import ObjectDetectionSubtypeYOLO
+from luxonis_ml.utils import LuxonisFileSystem
+
+from luxonis_train.models import LuxonisModel
+from luxonis_train.nodes.enums.head_categorization import (
+    ImplementedHeads,
+    ImplementedHeadsIsSoxtmaxed,
+)
+from luxonis_train.utils.config import Config
+
+from .core import Core
+
+logger = getLogger(__name__)
+
+
+class Archiver(Core):
+    """Main API which is used to construct the NN archive out of a trainig config and
+    model executables."""
+
+    def __init__(
+        self,
+        cfg: str | dict[str, Any] | Config,
+        opts: list[str] | tuple[str, ...] | dict[str, Any] | None = None,
+    ):
+        """Constructs a new Archiver instance.
+
+        @type cfg: str | dict[str, Any] | Config
+        @param cfg: Path to config file or config dict used to setup training.
+        @type opts: list[str] | tuple[str, ...] | dict[str, Any] | None
+        @param opts: Argument dict provided through command line,
+            used for config overriding.
+        """
+
+        super().__init__(cfg, opts)
+
+        self.lightning_module = LuxonisModel(
+            cfg=self.cfg,
+            dataset_metadata=self.dataset_metadata,
+            save_dir=self.run_save_dir,
+            input_shape=self.loader_train.input_shape,
+        )
+
+        self.model_name = self.cfg.model.name
+
+        self.archive_name = self.cfg.archiver.archive_name
+        archive_save_directory = Path(self.cfg.archiver.archive_save_directory)
+        if not archive_save_directory.exists():
+            logger.info(f"Creating archive directory {archive_save_directory}")
+            archive_save_directory.mkdir(parents=True, exist_ok=True)
+        self.archive_save_directory = str(archive_save_directory)
+
+        self.inputs = []
+        self.outputs = []
+        self.heads = []
+
+    def archive(self, executable_path: str):
+        """Runs archiving.
+
+        @type executable_path: str
+        @param executable_path: Path to model executable file (e.g. ONNX model).
+        """
+
+        executable_fname = os.path.split(executable_path)[1]
+        _, executable_suffix = os.path.splitext(executable_fname)
+        self.archive_name += f"_{executable_suffix[1:]}"
+
+        preprocessing = {  # TODO: keep preprocessing same for each input?
+            "mean": self.cfg.trainer.preprocessing.normalize.params["mean"],
+            "scale": self.cfg.trainer.preprocessing.normalize.params["std"],
+            "reverse_channels": self.cfg.trainer.preprocessing.train_rgb,
+            "interleaved_to_planar": False,  # TODO: make it modifiable?
+        }
+
+        inputs_dict = self._get_inputs(executable_path)
+        for input_name in inputs_dict:
+            self._add_input(
+                name=input_name,
+                dtype=inputs_dict[input_name]["dtype"],
+                shape=inputs_dict[input_name]["shape"],
+                preprocessing=preprocessing,
+            )
+
+        outputs_dict = self._get_outputs(executable_path)
+        for output_name in outputs_dict:
+            self._add_output(name=output_name, dtype=outputs_dict[output_name]["dtype"])
+
+        heads_dict = self._get_heads(executable_path)
+        for head_name in heads_dict:
+            self._add_head(heads_dict[head_name])
+
+        model = {
+            "metadata": {
+                "name": self.model_name,
+                "path": executable_fname,
+            },
+            "inputs": self.inputs,
+            "outputs": self.outputs,
+            "heads": self.heads,
+        }
+
+        cfg_dict = {
+            "config_version": CONFIG_VERSION.__args__[0],
+            "model": model,
+        }
+
+        self.archive_path = ArchiveGenerator(
+            archive_name=self.archive_name,
+            save_path=self.archive_save_directory,
+            cfg_dict=cfg_dict,
+            executables_paths=[executable_path],  # TODO: what if more executables?
+        ).make_archive()
+
+        logger.info(f"archive saved to {self.archive_path}")
+
+        if self.cfg.archiver.upload_url is not None:
+            self._upload()
+
+        return self.archive_path
+
+    def _get_inputs(self, executable_path: str):
+        """Get inputs of a model executable.
+
+        @type executable_path: str
+        @param executable_path: Path to model executable file.
+        """
+
+        _, executable_suffix = os.path.splitext(executable_path)
+        if executable_suffix == ".onnx":
+            return self._get_onnx_inputs(executable_path)
+        else:
+            raise NotImplementedError(
+                f"Missing input reading function for {executable_suffix} models."
+            )
+
+    def _get_onnx_inputs(self, executable_path: str):
+        """Get inputs of an ONNX model executable.
+
+        @type executable_path: str
+        @param executable_path: Path to model executable file.
+        """
+
+        inputs_dict = {}
+        model = onnx.load(executable_path)
+        for input in model.graph.input:
+            tensor_type = input.type.tensor_type
+            dtype_idx = tensor_type.elem_type
+            dtype = str(onnx.helper.tensor_dtype_to_np_dtype(dtype_idx))
+            shape = []
+            for d in tensor_type.shape.dim:
+                if d.HasField("dim_value"):
+                    shape.append(d.dim_value)
+                else:
+                    raise ValueError("Unsupported input dimension identifier type")
+            inputs_dict[input.name] = {"dtype": dtype, "shape": shape}
+        return inputs_dict
+
+    def _add_input(
+        self,
+        name: str,
+        dtype: str,
+        shape: list,
+        preprocessing: dict,
+        input_type: str = "image",
+    ) -> None:
+        """Add input to self.inputs.
+
+        @type name: str
+        @param name: Name of the input layer.
+        @type dtype: str
+        @param dtype: Data type of the input data (e.g., 'float32').
+        @type shape: list
+        @param shape: Shape of the input data as a list of integers (e.g. [H,W], [H,W,C], [BS,H,W,C], ...).
+        @type preprocessing: dict
+        @param preprocessing: Preprocessing steps applied to the input data.
+        @type input_type: str
+        @param input_type: Type of input data (e.g., 'image').
+        """
+
+        self.inputs.append(
+            {
+                "name": name,
+                "dtype": dtype,
+                "input_type": input_type,
+                "shape": shape,
+                "preprocessing": preprocessing,
+            }
+        )
+
+    def _get_outputs(self, executable_path):
+        """Get outputs of a model executable.
+
+        @type executable_path: str
+        @param executable_path: Path to model executable file.
+        """
+
+        _, executable_suffix = os.path.splitext(executable_path)
+        if executable_suffix == ".onnx":
+            return self._get_onnx_outputs(executable_path)
+        else:
+            raise NotImplementedError(
+                f"Missing input reading function for {executable_suffix} models."
+            )
+
+    def _get_onnx_outputs(self, executable_path):
+        """Get outputs of an ONNX model executable.
+
+        @type executable_path: str
+        @param executable_path: Path to model executable file.
+        """
+
+        outputs_dict = {}
+        model = onnx.load(executable_path)
+        for output in model.graph.output:
+            tensor_type = output.type.tensor_type
+            dtype_idx = tensor_type.elem_type
+            dtype = str(onnx.helper.tensor_dtype_to_np_dtype(dtype_idx))
+            outputs_dict[output.name] = {"dtype": dtype}
+        return outputs_dict
+
+    def _add_output(self, name: str, dtype: str) -> None:
+        """Add output to self.outputs.
+
+        @type name: str
+        @param name: Name of the output layer.
+        @type dtype: str
+        @param dtype: Data type of the output data (e.g., 'float32').
+        """
+
+        self.outputs.append({"name": name, "dtype": dtype})
+
+    def _get_classes(self, head_family):
+        if head_family.startswith("Classification"):
+            return self.dataset_metadata._classes["class"]
+        elif head_family.startswith("Object"):
+            return self.dataset_metadata._classes["boxes"]
+        elif head_family.startswith("Segmentation"):
+            return self.dataset_metadata._classes["segmentation"]
+        elif head_family.startswith("Keypoint"):
+            return self.dataset_metadata._classes["keypoints"]
+        else:
+            raise ValueError(
+                f"No classes found for the specified head family ({head_family})"
+            )
+
+    def _get_head_specific_parameters(
+        self, head_name, head_alias, executable_path
+    ) -> dict:
+        """Get parameters specific to head.
+
+        @type head_name: str
+        @param head_name: Name of the head (e.g. 'EfficientBBoxHead').
+        @type head_alias: str
+        @param head_alias: Alias of the head (e.g. 'detection_head').
+        @type executable_path: str
+        @param executable_path: Path to model executable file.
+        """
+
+        parameters = {}
+        if head_name == "ClassificationHead":
+            parameters["is_softmax"] = getattr(
+                ImplementedHeadsIsSoxtmaxed, head_name
+            ).value
+        elif head_name == "EfficientBBoxHead":
+            parameters["subtype"] = ObjectDetectionSubtypeYOLO.YOLOv6.value
+            head_node = self.lightning_module._modules["nodes"][head_alias]
+            parameters["iou_threshold"] = head_node.iou_thres
+            parameters["conf_threshold"] = head_node.conf_thres
+            parameters["max_det"] = head_node.max_det
+        elif head_name in ["SegmentationHead", "BiSeNetHead"]:
+            parameters["is_softmax"] = getattr(
+                ImplementedHeadsIsSoxtmaxed, head_name
+            ).value
+        elif head_name == "ImplicitKeypointBBoxHead":
+            parameters["subtype"] = ObjectDetectionSubtypeYOLO.YOLOv7.value
+            head_node = self.lightning_module._modules["nodes"][head_alias]
+            parameters["iou_threshold"] = head_node.iou_thres
+            parameters["conf_threshold"] = head_node.conf_thres
+            parameters["max_det"] = head_node.max_det
+            parameters["n_keypoints"] = head_node.n_keypoints
+            parameters["anchors"] = head_node.anchors.tolist()
+
+        else:
+            raise ValueError("Unknown head name")
+        return parameters
+
+    def _get_head_outputs(self, head_name) -> dict:
+        """Get model outputs in a head-specific format.
+
+        @type head_name: str
+        @param head_name: Name of the head (e.g. 'EfficientBBoxHead').
+        """
+
+        head_outputs = {}
+        if head_name == "ClassificationHead":
+            head_outputs["predictions"] = self.outputs[0]["name"]
+        elif head_name == "EfficientBBoxHead":
+            head_outputs["yolo_outputs"] = [output["name"] for output in self.outputs]
+        elif head_name in ["SegmentationHead", "BiSeNetHead"]:
+            head_outputs["predictions"] = self.outputs[0]["name"]
+        elif head_name == "ImplicitKeypointBBoxHead":
+            head_outputs["predictions"] = self.outputs[0]["name"]
+        else:
+            raise ValueError("Unknown head name")
+        return head_outputs
+
+    def _get_heads(self, executable_path):
+        """Get model heads.
+
+        @type executable_path: str
+        @param executable_path: Path to model executable file.
+        """
+        heads_dict = {}
+
+        for node in self.cfg.model.nodes:
+            node_name = node.name
+            node_alias = node.alias
+            # node_inputs = node.inputs
+            if node_alias in self.lightning_module.outputs:
+                if node_name in ImplementedHeads.__members__:
+                    head_family = getattr(ImplementedHeads, node_name).value
+                    classes = self._get_classes(head_family)
+                    head_outputs = self._get_head_outputs(node_name)
+                    head_dict = {
+                        "family": head_family,
+                        "outputs": head_outputs,
+                        "classes": classes,
+                        "n_classes": len(classes),
+                    }
+                    head_dict.update(
+                        self._get_head_specific_parameters(
+                            node_name, node_alias, executable_path
+                        )
+                    )
+                    heads_dict[node_name] = head_dict
+        return heads_dict
+
+    def _add_head(self, head_metadata: dict) -> str:
+        """Add head to self.heads.
+
+        @type metadata: dict
+        @param metadata: Parameters required by head to run postprocessing.
+        """
+
+        self.heads.append(head_metadata)
+
+    def _upload(self):
+        """Uploads the archive file to specified s3 bucket.
+
+        @raises ValueError: If upload url was not specified in config file.
+        """
+
+        if self.cfg.archiver.upload_url is None:
+            raise ValueError("Upload url must be specified in config file.")
+
+        fs = LuxonisFileSystem(self.cfg.archiver.upload_url, allow_local=False)
+        logger.info(f"Started Archive upload to {fs.full_path}...")
+
+        fs.put_file(
+            local_path=self.archive_path,
+            remote_path=self.archive_name,
+        )
+
+        logger.info("Files upload finished")
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index 86b63600..761bc26f 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -234,3 +234,7 @@ def get_best_metric_checkpoint_path(self) -> str:
         @return: Path to best checkpoint with respect to best validation metric
         """
         return self.pl_trainer.checkpoint_callbacks[1].best_model_path  # type: ignore
+
+    def reset_logging(self) -> None:
+        """Close file handlers to release the log file."""
+        reset_logging()
diff --git a/luxonis_train/nodes/efficient_bbox_head.py b/luxonis_train/nodes/efficient_bbox_head.py
index 9f500cd4..a4f3bc93 100644
--- a/luxonis_train/nodes/efficient_bbox_head.py
+++ b/luxonis_train/nodes/efficient_bbox_head.py
@@ -30,6 +30,7 @@ def __init__(
         n_heads: Literal[2, 3, 4] = 3,
         conf_thres: float = 0.25,
         iou_thres: float = 0.45,
+        max_det: int = 300,
         **kwargs,
     ):
         """Head for object detection.
@@ -45,6 +46,9 @@ def __init__(
 
         @type iou_thres: float
         @param iou_thres: Threshold for IoU. Defaults to C{0.45}.
+
+        @type max_det: int
+        @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}.
         """
         super().__init__(task_type=LabelType.BOUNDINGBOX, **kwargs)
 
@@ -52,6 +56,7 @@ def __init__(
 
         self.conf_thres = conf_thres
         self.iou_thres = iou_thres
+        self.max_det = max_det
 
         self.stride = self._fit_stride_to_num_heads()
         self.grid_cell_offset = 0.5
@@ -163,5 +168,6 @@ def _process_to_bbox(
             conf_thres=self.conf_thres,
             iou_thres=self.iou_thres,
             bbox_format="xyxy",
+            max_det=self.max_det,
             predicts_objectness=False,
         )
diff --git a/luxonis_train/nodes/enums/head_categorization.py b/luxonis_train/nodes/enums/head_categorization.py
new file mode 100644
index 00000000..56f98ff3
--- /dev/null
+++ b/luxonis_train/nodes/enums/head_categorization.py
@@ -0,0 +1,21 @@
+from enum import Enum
+
+
+class ImplementedHeads(Enum):
+    """Task categorization for the implemented heads."""
+
+    ClassificationHead = "Classification"
+    EfficientBBoxHead = "ObjectDetectionYOLO"
+    ImplicitKeypointBBoxHead = "KeypointDetectionYOLO"
+    SegmentationHead = "Segmentation"
+    BiSeNetHead = "Segmentation"
+
+
+class ImplementedHeadsIsSoxtmaxed(Enum):
+    """Softmaxed output categorization for the implemented heads."""
+
+    ClassificationHead = False
+    EfficientBBoxHead = None
+    ImplicitKeypointBBoxHead = None
+    SegmentationHead = False
+    BiSeNetHead = False
diff --git a/luxonis_train/nodes/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
index aff2b5a6..7f0c3d61 100644
--- a/luxonis_train/nodes/implicit_keypoint_bbox_head.py
+++ b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
@@ -30,6 +30,7 @@ def __init__(
         init_coco_biases: bool = True,
         conf_thres: float = 0.25,
         iou_thres: float = 0.45,
+        max_det: int = 300,
         **kwargs,
     ):
         """Head for object and keypoint detection.
@@ -53,6 +54,8 @@ def __init__(
         @param conf_thres: Threshold for confidence. Defaults to C{0.25}.
         @type iou_thres: float
         @param iou_thres: Threshold for IoU. Defaults to C{0.45}.
+        @type max_det: int
+        @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}.
         """
         super().__init__(task_type=LabelType.KEYPOINT, **kwargs)
 
@@ -63,6 +66,7 @@ def __init__(
 
         self.conf_thres = conf_thres
         self.iou_thres = iou_thres
+        self.max_det = max_det
 
         n_keypoints = n_keypoints or self.dataset_metadata._n_keypoints
 
@@ -164,6 +168,7 @@ def wrap(self, outputs: tuple[list[Tensor], Tensor]) -> Packet[Tensor]:
             conf_thres=self.conf_thres,
             iou_thres=self.iou_thres,
             bbox_format="cxcywh",
+            max_det=self.max_det,
         )
 
         return {
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 591376f8..a2d4f332 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -269,6 +269,12 @@ def pad_values(values: float | list[float] | None):
         return self
 
 
+class ArchiveConfig(BaseModel):
+    archive_name: str = "nn_archive"
+    archive_save_directory: str = "output_archive"
+    upload_url: str | None = None
+
+
 class StorageConfig(CustomBaseModel):
     active: bool = True
     storage_type: Literal["local", "remote"] = "local"
@@ -292,6 +298,7 @@ class Config(LuxonisConfig):
     tracker: TrackerConfig = TrackerConfig()
     trainer: TrainerConfig = TrainerConfig()
     exporter: ExportConfig = ExportConfig()
+    archiver: ArchiveConfig = ArchiveConfig()
     tuner: TunerConfig | None = None
     ENVIRON: Environ = Field(Environ(), exclude=True)
 
diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index 7a18c7f4..4033e89e 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">80%</text>
-        <text x="80" y="14">80%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">79%</text>
+        <text x="80" y="14">79%</text>
     </g>
 </svg>
diff --git a/requirements.txt b/requirements.txt
index eecf828e..3a884284 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,12 @@
 blobconverter>=1.4.2
 lightning>=2.0.0
-luxonis-ml[all]>=0.0.1
+#luxonis-ml[all]>=0.0.1
+luxonis-ml[all]@git+https://github.com/luxonis/luxonis-ml.git@dev
 onnx>=1.12.0
 onnxruntime>=1.13.1
 onnxsim>=0.4.10
 optuna>=3.2.0
+optuna_integration>=3.6.0
 psycopg2-binary>=2.9.1
 pycocotools>=2.0.7
 rich>=13.0.0
@@ -12,3 +14,4 @@ s3fs>=2023.0.0
 tensorboard>=2.10.1
 torchvision>=0.16.0
 typer>=0.9.0
+mlflow>=2.10.0
diff --git a/tests/unittests/test_core/__init__.py b/tests/unittests/test_core/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/unittests/test_core/test_archiver.py b/tests/unittests/test_core/test_archiver.py
new file mode 100644
index 00000000..bdbaa5b9
--- /dev/null
+++ b/tests/unittests/test_core/test_archiver.py
@@ -0,0 +1,158 @@
+import io
+import json
+import os
+import random
+import shutil
+import tarfile
+
+import cv2
+import lightning.pytorch as pl
+import numpy as np
+import onnx
+import yaml
+from luxonis_ml.data import LuxonisDataset
+
+import luxonis_train
+from luxonis_train.core import Archiver
+from luxonis_train.core.exporter import Exporter
+from luxonis_train.core.trainer import Trainer
+from luxonis_train.utils.config import Config
+
+
+class TestArchiver:
+    @classmethod
+    def setup_class(cls):
+        """Create and load all files required for testing."""
+
+        luxonis_train_parent_dir = os.path.dirname(
+            os.path.dirname(luxonis_train.__file__)
+        )
+        cls.tmp_path = os.path.join(
+            luxonis_train_parent_dir, "tests", "unittests", "test_core", "tmp"
+        )
+        os.mkdir(cls.tmp_path)
+
+        # make LDF
+        os.mkdir(os.path.join(cls.tmp_path, "images"))
+        cls.ldf_name = "dummyLDF"
+        labels = ["label1", "label2", "label3"]
+
+        def classification_dataset_generator():
+            for i in range(10):
+                img = np.random.randint(0, 256, (10, 10, 3), dtype=np.uint8)
+                img_file_path = os.path.join(cls.tmp_path, "images", f"img{i}.png")
+                cv2.imwrite(img_file_path, img)
+                yield {
+                    "file": img_file_path,
+                    "type": "classification",
+                    "value": True,
+                    "class": random.choice(labels),
+                }
+
+        if LuxonisDataset.exists(cls.ldf_name):
+            print("Deleting existing dataset")
+            LuxonisDataset(cls.ldf_name).delete_dataset()
+        dataset = LuxonisDataset(cls.ldf_name)
+        dataset.add(classification_dataset_generator)
+        dataset.set_classes(list(labels))
+        dataset.make_splits()
+
+        # make config
+        config_dict = {
+            "model": {
+                "name": "test_model",
+                "predefined_model": {"name": "ClassificationModel"},
+            },
+            "dataset": {"name": cls.ldf_name},
+            "tracker": {"save_directory": cls.tmp_path},
+        }
+        cls.config_path = os.path.join(cls.tmp_path, "config.yaml")
+        with open(cls.config_path, "w") as yaml_file:
+            yaml_str = yaml.dump(config_dict)
+            yaml_file.write(yaml_str)
+        cfg = Config.get_config(config_dict)
+
+        # train model
+        cfg.trainer.epochs = 1
+        cfg.trainer.validation_interval = 1
+        cfg.trainer.batch_size = 4
+        trainer = Trainer(cfg=cfg)
+        trainer.train()
+        callbacks = [
+            c
+            for c in trainer.pl_trainer.callbacks
+            if isinstance(c, pl.callbacks.ModelCheckpoint)
+        ]
+        model_checkpoint_path = callbacks[0].best_model_path
+        model_ckpt = os.path.join(trainer.run_save_dir, model_checkpoint_path)
+        trainer.reset_logging()
+
+        # export model to ONNX
+        cfg.model.weights = model_ckpt
+        exporter = Exporter(cfg=cfg)
+        cls.onnx_model_path = os.path.join(cls.tmp_path, "model.onnx")
+        exporter.export(onnx_path=cls.onnx_model_path)
+        exporter.reset_logging()
+
+        # make archive
+        cfg.archiver.archive_save_directory = cls.tmp_path
+        archiver = Archiver(cls.config_path)
+        cls.archive_path = archiver.archive(cls.onnx_model_path)
+        archiver.reset_logging()
+
+        # load archive files into memory
+        with tarfile.open(cls.archive_path, mode="r") as tar:
+            cls.archive_fnames = tar.getnames()
+            for fname in cls.archive_fnames:
+                f = tar.extractfile(fname)
+                if fname.endswith(".json"):
+                    cls.json_dict = json.load(f)
+                elif fname.endswith(".onnx"):
+                    model_bytes = f.read()
+                    model_io = io.BytesIO(model_bytes)
+                    cls.onnx_model = onnx.load(model_io)
+
+    @classmethod
+    def teardown_class(cls):
+        """Remove all created files."""
+        LuxonisDataset(cls.ldf_name).delete_dataset()
+        shutil.rmtree(cls.tmp_path)
+
+    def test_archive_creation(self):
+        """Test if nn_archive was created."""
+        assert os.path.exists(self.archive_path)
+
+    def test_archive_suffix(self):
+        """Test if nn_archive is compressed using xz option (should be the default
+        option)."""
+        assert self.archive_path.endswith("tar.xz")
+
+    def test_archive_contents(self):
+        """Test if nn_archive consists of config.json and model.onnx."""
+        assert (
+            len(self.archive_fnames) == 2
+            and any([fname == "config.json" for fname in self.archive_fnames])
+            and any([fname == "model.onnx" for fname in self.archive_fnames])
+        )
+
+    def test_onnx(self):
+        """Test if archived ONNX model is valid."""
+        assert onnx.checker.check_model(self.onnx_model, full_check=True) is None
+
+    def test_config_inputs(self):
+        """Test if archived config inputs are valid."""
+        config_input_names = []
+        for input in self.json_dict["model"]["inputs"]:
+            config_input_names.append(input["name"])
+        assert set([input.name for input in self.onnx_model.graph.input]) == set(
+            config_input_names
+        )
+
+    def test_config_outputs(self):
+        """Test if archived config outputs are valid."""
+        config_output_names = []
+        for input in self.json_dict["model"]["outputs"]:
+            config_output_names.append(input["name"])
+        assert set([output.name for output in self.onnx_model.graph.output]) == set(
+            config_output_names
+        )

From b3b4e32969d4c7f3c2f337048b5a50f0d33bf900 Mon Sep 17 00:00:00 2001
From: jkbmrz <74824974+jkbmrz@users.noreply.github.com>
Date: Mon, 25 Mar 2024 10:11:42 +0100
Subject: [PATCH 10/28] Extend NN Archive Generation Test Coverage (#18)

* extend NN Archive generation test coverage to cover all implemented heads

* [Automated] Updated coverage badge

---------

Co-authored-by: GitHub Actions <actions@github.com>
---
 media/coverage_badge.svg                   |   4 +-
 requirements.txt                           |   1 +
 tests/unittests/test_core/test_archiver.py | 407 +++++++++++++++------
 3 files changed, 307 insertions(+), 105 deletions(-)

diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index 4033e89e..7a18c7f4 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">79%</text>
-        <text x="80" y="14">79%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">80%</text>
+        <text x="80" y="14">80%</text>
     </g>
 </svg>
diff --git a/requirements.txt b/requirements.txt
index 3a884284..5e436e44 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,6 +7,7 @@ onnxruntime>=1.13.1
 onnxsim>=0.4.10
 optuna>=3.2.0
 optuna_integration>=3.6.0
+parameterized>=0.9.0
 psycopg2-binary>=2.9.1
 pycocotools>=2.0.7
 rich>=13.0.0
diff --git a/tests/unittests/test_core/test_archiver.py b/tests/unittests/test_core/test_archiver.py
index bdbaa5b9..a044be52 100644
--- a/tests/unittests/test_core/test_archiver.py
+++ b/tests/unittests/test_core/test_archiver.py
@@ -9,21 +9,26 @@
 import lightning.pytorch as pl
 import numpy as np
 import onnx
-import yaml
 from luxonis_ml.data import LuxonisDataset
+from luxonis_ml.nn_archive.config_building_blocks.base_models import head_outputs
+from parameterized import parameterized
 
 import luxonis_train
 from luxonis_train.core import Archiver
 from luxonis_train.core.exporter import Exporter
 from luxonis_train.core.trainer import Trainer
+from luxonis_train.nodes.enums.head_categorization import ImplementedHeads
 from luxonis_train.utils.config import Config
 
+HEAD_NAMES = [head_name for head_name in ImplementedHeads.__members__]
+
 
 class TestArchiver:
     @classmethod
     def setup_class(cls):
-        """Create and load all files required for testing."""
+        """Creates all files required for testing."""
 
+        # make tmp dir
         luxonis_train_parent_dir = os.path.dirname(
             os.path.dirname(luxonis_train.__file__)
         )
@@ -32,127 +37,323 @@ def setup_class(cls):
         )
         os.mkdir(cls.tmp_path)
 
-        # make LDF
-        os.mkdir(os.path.join(cls.tmp_path, "images"))
-        cls.ldf_name = "dummyLDF"
-        labels = ["label1", "label2", "label3"]
+        # make LDFs
+        unilabelLDF = "dummyLDF_unilabel"
+        cls._make_dummy_ldf(
+            ldf_name=unilabelLDF,
+            save_path=cls.tmp_path,
+            bbx_anno=True,
+            kpt_anno=True,
+        )
+        multilabelLDF = "dummyLDF_multilabel"
+        cls._make_dummy_ldf(
+            ldf_name=multilabelLDF,
+            save_path=cls.tmp_path,
+            cls_anno=True,
+            bbx_anno=True,
+            sgm_anno=True,
+            multilabel=True,
+        )
+        cls.ldf_names = [unilabelLDF, multilabelLDF]
+
+        for head_name in HEAD_NAMES:
+            if head_name == "ImplicitKeypointBBoxHead":
+                ldf_name = unilabelLDF  # multiclass keypoint detection not yet supported in luxonis-train
+            else:
+                ldf_name = multilabelLDF
+
+            # make config
+            cfg_dict = cls._make_dummy_cfg_dict(
+                head_name=head_name,
+                save_path=cls.tmp_path,
+                ldf_name=ldf_name,
+            )
+            cfg = Config.get_config(cfg_dict)
+
+            # train model
+            cfg.trainer.epochs = 1
+            cfg.trainer.validation_interval = 1
+            cfg.trainer.batch_size = 1
+            trainer = Trainer(cfg=cfg)
+            trainer.train()
+            callbacks = [
+                c
+                for c in trainer.pl_trainer.callbacks
+                if isinstance(c, pl.callbacks.ModelCheckpoint)
+            ]
+            model_checkpoint_path = callbacks[0].best_model_path
+            model_ckpt = os.path.join(trainer.run_save_dir, model_checkpoint_path)
+            trainer.reset_logging()
+
+            # export model to ONNX
+            cfg.model.weights = model_ckpt
+            exporter = Exporter(cfg=cfg)
+            cls.onnx_model_path = os.path.join(cls.tmp_path, "model.onnx")
+            exporter.export(onnx_path=cls.onnx_model_path)
+            exporter.reset_logging()
+
+            # make archive
+            cfg.archiver.archive_save_directory = cls.tmp_path
+            cfg.archiver.archive_name = f"nnarchive_{head_name}"
+            archiver = Archiver(cfg=cfg)
+            cls.archive_path = archiver.archive(cls.onnx_model_path)
+            archiver.reset_logging()
+
+            # clear the loaded config instance
+            Config.clear_instance()
 
-        def classification_dataset_generator():
-            for i in range(10):
-                img = np.random.randint(0, 256, (10, 10, 3), dtype=np.uint8)
-                img_file_path = os.path.join(cls.tmp_path, "images", f"img{i}.png")
+    def _make_dummy_ldf(
+        ldf_name: str,
+        save_path: str,
+        number: int = 3,
+        dim: tuple = (10, 10, 3),
+        cls_anno: bool = False,
+        bbx_anno: bool = False,
+        sgm_anno: bool = False,
+        kpt_anno: bool = False,
+        multilabel: bool = False,
+        split_ratios: list = None,
+    ):
+        """Creates random-pixel images with fictional annotations and parses them to
+        L{LuxonisDataset} format.
+
+        @type ldf_name: str
+        @param ldf_name: Name of the created L{LuxonisDataset} format dataset.
+        @type save_path: str
+        @param save_path: Path to where the created images are saved.
+        @type number: int
+        @param number: Number of images to create.
+        @type dim: Tuple[int, int, int]
+        @param dim: Dimensions of the created images in HWC order.
+        @type cls_anno: bool
+        @param cls_anno: True if created dataset should contain classification annotations.
+        type bbx_anno: bool
+        @param bbx_anno: True if created dataset should contain bounding box annotations.
+        type sgm_anno: bool
+        @param sgm_anno: True if created dataset should contain segmentation annotations.
+        type kpt_anno: bool
+        @param kpt_anno: True if created dataset should contain keypoint annotations.
+        type multilabel: bool
+        @param multilabel: True if created dataset should contain multilabel annotations.
+        type split_ratios: List[float, float, float]
+        @param split_ratios: List of ratios defining the train, val, and test splits.
+        """
+
+        if split_ratios is None:
+            split_ratios = [0.333, 0.333, 0.333]
+
+        os.makedirs(os.path.join(save_path, "images"), exist_ok=True)
+
+        if multilabel:
+            labels = ["label_x", "label_y", "label_z"]
+        else:
+            labels = ["label_x"]
+
+        def dataset_generator():
+            for i in range(number):
+                label = random.choice(labels)
+                img = np.random.randint(0, 256, dim, dtype=np.uint8)
+                img_file_path = os.path.join(save_path, "images", f"img{i}.png")
                 cv2.imwrite(img_file_path, img)
-                yield {
-                    "file": img_file_path,
-                    "type": "classification",
-                    "value": True,
-                    "class": random.choice(labels),
-                }
 
-        if LuxonisDataset.exists(cls.ldf_name):
+                if cls_anno:
+                    yield {
+                        "file": img_file_path,
+                        "type": "classification",
+                        "value": True,
+                        "class": label,
+                    }
+
+                if bbx_anno:
+                    box = (0.25, 0.25, 0.5, 0.5)
+                    yield {
+                        "file": img_file_path,
+                        "type": "box",
+                        "value": box,
+                        "class": label,
+                    }
+
+                if kpt_anno:
+                    keypoints = [
+                        (0.25, 0.25, 2),
+                        (0.75, 0.25, 2),
+                        (0.75, 0.75, 2),
+                        (0.25, 0.75, 2),
+                    ]
+                    yield {
+                        "file": img_file_path,
+                        "type": "keypoints",
+                        "value": keypoints,
+                        "class": label,
+                    }
+
+                if sgm_anno:
+                    polyline = [
+                        (0.25, 0.75),
+                        (0.75, 0.25),
+                        (0.75, 0.75),
+                        (0.25, 0.75),
+                        (0.25, 0.25),
+                    ]
+                    yield {
+                        "file": img_file_path,
+                        "type": "polyline",
+                        "value": polyline,
+                        "class": label,
+                    }
+
+        if LuxonisDataset.exists(ldf_name):
             print("Deleting existing dataset")
-            LuxonisDataset(cls.ldf_name).delete_dataset()
-        dataset = LuxonisDataset(cls.ldf_name)
-        dataset.add(classification_dataset_generator)
+            LuxonisDataset(ldf_name).delete_dataset()
+        dataset = LuxonisDataset(ldf_name)
         dataset.set_classes(list(labels))
-        dataset.make_splits()
-
-        # make config
-        config_dict = {
-            "model": {
-                "name": "test_model",
-                "predefined_model": {"name": "ClassificationModel"},
-            },
-            "dataset": {"name": cls.ldf_name},
-            "tracker": {"save_directory": cls.tmp_path},
-        }
-        cls.config_path = os.path.join(cls.tmp_path, "config.yaml")
-        with open(cls.config_path, "w") as yaml_file:
-            yaml_str = yaml.dump(config_dict)
-            yaml_file.write(yaml_str)
-        cfg = Config.get_config(config_dict)
-
-        # train model
-        cfg.trainer.epochs = 1
-        cfg.trainer.validation_interval = 1
-        cfg.trainer.batch_size = 4
-        trainer = Trainer(cfg=cfg)
-        trainer.train()
-        callbacks = [
-            c
-            for c in trainer.pl_trainer.callbacks
-            if isinstance(c, pl.callbacks.ModelCheckpoint)
-        ]
-        model_checkpoint_path = callbacks[0].best_model_path
-        model_ckpt = os.path.join(trainer.run_save_dir, model_checkpoint_path)
-        trainer.reset_logging()
-
-        # export model to ONNX
-        cfg.model.weights = model_ckpt
-        exporter = Exporter(cfg=cfg)
-        cls.onnx_model_path = os.path.join(cls.tmp_path, "model.onnx")
-        exporter.export(onnx_path=cls.onnx_model_path)
-        exporter.reset_logging()
-
-        # make archive
-        cfg.archiver.archive_save_directory = cls.tmp_path
-        archiver = Archiver(cls.config_path)
-        cls.archive_path = archiver.archive(cls.onnx_model_path)
-        archiver.reset_logging()
-
-        # load archive files into memory
-        with tarfile.open(cls.archive_path, mode="r") as tar:
-            cls.archive_fnames = tar.getnames()
-            for fname in cls.archive_fnames:
-                f = tar.extractfile(fname)
-                if fname.endswith(".json"):
-                    cls.json_dict = json.load(f)
-                elif fname.endswith(".onnx"):
-                    model_bytes = f.read()
-                    model_io = io.BytesIO(model_bytes)
-                    cls.onnx_model = onnx.load(model_io)
+        if kpt_anno:
+            keypoint_labels = [
+                "kp1",
+                "kp2",
+                "kp3",
+                "kp4",
+            ]
+            keypoint_edges = [
+                [0, 1],
+                [1, 2],
+                [2, 3],
+                [3, 0],
+            ]
+            dataset.set_skeletons(
+                {
+                    label: {"labels": keypoint_labels, "edges": keypoint_edges}
+                    for label in labels
+                }
+            )
+        dataset.add(dataset_generator)
+        dataset.make_splits(ratios=split_ratios)
 
-    @classmethod
-    def teardown_class(cls):
-        """Remove all created files."""
-        LuxonisDataset(cls.ldf_name).delete_dataset()
-        shutil.rmtree(cls.tmp_path)
+    def _make_dummy_cfg_dict(head_name: str, ldf_name: str, save_path: str) -> dict:
+        """Creates a configuration dict based on the type of the provided model head.
+
+        @type head_name: str
+        @param head_name: Name of the specified head.
+        @type ldf_name: str
+        @param ldf_name: Name of the L{LuxonisDataset} format dataset on which the
+            training will be performed.
+        @type save_path: str
+        @param save_path: Path to LuxonisTrackerPL save directory.
+        @rtype: dict
+        @return: Created config dict.
+        """
+
+        cfg_dict = {"model": {"name": f"model_w_{head_name}"}}
+        cfg_dict["dataset"] = {"name": ldf_name}
+        cfg_dict["tracker"] = {"save_directory": save_path}
 
-    def test_archive_creation(self):
-        """Test if nn_archive was created."""
-        assert os.path.exists(self.archive_path)
+        if head_name == "ClassificationHead":
+            cfg_dict["model"]["predefined_model"] = {"name": "ClassificationModel"}
+        elif head_name == "EfficientBBoxHead":
+            cfg_dict["model"]["predefined_model"] = {"name": "DetectionModel"}
+        elif head_name == "ImplicitKeypointBBoxHead":
+            cfg_dict["model"]["predefined_model"] = {"name": "KeypointDetectionModel"}
+        elif head_name == "SegmentationHead":
+            cfg_dict["model"]["predefined_model"] = {"name": "SegmentationModel"}
+        elif head_name == "BiSeNetHead":
+            cfg_dict["model"]["nodes"] = [
+                {"name": "MicroNet", "alias": "segmentation_backbone"},
+                {
+                    "name": "BiSeNetHead",
+                    "alias": "segmentation_head",
+                    "inputs": ["segmentation_backbone"],
+                },
+            ]
+            cfg_dict["model"]["losses"] = [
+                {"name": "BCEWithLogitsLoss", "attached_to": "segmentation_head"}
+            ]
+        else:
+            raise NotImplementedError(f"No implementation for {head_name}")
 
-    def test_archive_suffix(self):
-        """Test if nn_archive is compressed using xz option (should be the default
+        return cfg_dict
+
+    @parameterized.expand(HEAD_NAMES)
+    def test_archive_creation(self, head_name):
+        """Tests if NN archive was created using xz compression (should be the default
         option)."""
-        assert self.archive_path.endswith("tar.xz")
+        archive_path = os.path.join(self.tmp_path, f"nnarchive_{head_name}_onnx.tar.xz")
+        assert archive_path.endswith("tar.xz")
 
-    def test_archive_contents(self):
-        """Test if nn_archive consists of config.json and model.onnx."""
+    @parameterized.expand(HEAD_NAMES)
+    def test_archive_contents(self, head_name):
+        """Tests if NN archive consists of config.json and model.onnx."""
+        archive_path = os.path.join(self.tmp_path, f"nnarchive_{head_name}_onnx.tar.xz")
+        with tarfile.open(archive_path, mode="r") as tar:
+            archive_fnames = tar.getnames()
         assert (
-            len(self.archive_fnames) == 2
-            and any([fname == "config.json" for fname in self.archive_fnames])
-            and any([fname == "model.onnx" for fname in self.archive_fnames])
+            len(archive_fnames) == 2
+            and any([fname == "config.json" for fname in archive_fnames])
+            and any([fname == "model.onnx" for fname in archive_fnames])
         )
 
-    def test_onnx(self):
-        """Test if archived ONNX model is valid."""
-        assert onnx.checker.check_model(self.onnx_model, full_check=True) is None
+    @parameterized.expand(HEAD_NAMES)
+    def test_onnx(self, head_name):
+        """Tests if archive ONNX model is valid."""
+        archive_path = os.path.join(self.tmp_path, f"nnarchive_{head_name}_onnx.tar.xz")
+        with tarfile.open(archive_path, mode="r") as tar:
+            f = tar.extractfile("model.onnx")
+            model_bytes = f.read()
+            model_io = io.BytesIO(model_bytes)
+            onnx_model = onnx.load(model_io)
+        assert onnx.checker.check_model(onnx_model, full_check=True) is None
+
+    @parameterized.expand(HEAD_NAMES)
+    def test_config_io(self, head_name):
+        """Tests if archived config inputs and outputs are valid."""
+        archive_path = os.path.join(self.tmp_path, f"nnarchive_{head_name}_onnx.tar.xz")
+        with tarfile.open(archive_path, mode="r") as tar:
+            f = tar.extractfile("config.json")
+            json_dict = json.load(f)
+            f = tar.extractfile("model.onnx")
+            model_bytes = f.read()
+            model_io = io.BytesIO(model_bytes)
+            onnx_model = onnx.load(model_io)
 
-    def test_config_inputs(self):
-        """Test if archived config inputs are valid."""
         config_input_names = []
-        for input in self.json_dict["model"]["inputs"]:
+        for input in json_dict["model"]["inputs"]:
             config_input_names.append(input["name"])
-        assert set([input.name for input in self.onnx_model.graph.input]) == set(
+        valid_inputs = set([input.name for input in onnx_model.graph.input]) == set(
             config_input_names
         )
 
-    def test_config_outputs(self):
-        """Test if archived config outputs are valid."""
         config_output_names = []
-        for input in self.json_dict["model"]["outputs"]:
+        for input in json_dict["model"]["outputs"]:
             config_output_names.append(input["name"])
-        assert set([output.name for output in self.onnx_model.graph.output]) == set(
+        valid_outputs = set([output.name for output in onnx_model.graph.output]) == set(
             config_output_names
         )
+
+        assert valid_inputs and valid_outputs
+
+    @parameterized.expand(HEAD_NAMES)
+    def test_head_outputs(self, head_name):
+        """Tests if archived config head outputs are valid."""
+        archive_path = os.path.join(self.tmp_path, f"nnarchive_{head_name}_onnx.tar.xz")
+        with tarfile.open(archive_path, mode="r") as tar:
+            f = tar.extractfile("config.json")
+            json_dict = json.load(f)
+        head_output = json_dict["model"]["heads"][0]["outputs"]
+        if head_name == "ClassificationHead":
+            assert head_outputs.OutputsClassification.parse_obj(head_output)
+        elif head_name == "EfficientBBoxHead":
+            assert head_outputs.OutputsYOLO.parse_obj(head_output)
+        elif head_name == "ImplicitKeypointBBoxHead":
+            assert head_outputs.OutputsKeypointDetectionYOLO.parse_obj(head_output)
+        elif head_name == "SegmentationHead":
+            assert head_outputs.OutputsSegmentation.parse_obj(head_output)
+        elif head_name == "BiSeNetHead":
+            assert head_outputs.OutputsSegmentation.parse_obj(head_output)
+        else:
+            raise NotImplementedError(f"Missing tests for {head_name} head")
+
+    @classmethod
+    def teardown_class(cls):
+        """Removes all files created during setup."""
+        for ldf_name in cls.ldf_names:
+            LuxonisDataset(ldf_name).delete_dataset()
+        shutil.rmtree(cls.tmp_path)

From 351e0c58ff281987a2c9642e09c3ca2d3851dfa8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Thu, 11 Apr 2024 16:11:58 +0200
Subject: [PATCH 11/28] Upload All Checkpoints (#19)

* uploading all checkpoints

* fix names

* removed comment
---
 luxonis_train/callbacks/upload_checkpoint.py | 47 +++++++++++---------
 1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/luxonis_train/callbacks/upload_checkpoint.py b/luxonis_train/callbacks/upload_checkpoint.py
index a0fa137a..efd7fe02 100644
--- a/luxonis_train/callbacks/upload_checkpoint.py
+++ b/luxonis_train/callbacks/upload_checkpoint.py
@@ -1,5 +1,6 @@
 import logging
 import os
+from pathlib import Path
 from typing import Any
 
 import lightning.pytorch as pl
@@ -25,37 +26,41 @@ def __init__(self, upload_directory: str):
         )
         self.logger = logging.getLogger(__name__)
         self.last_logged_epoch = None
-        self.last_best_checkpoint = None
+        self.last_best_checkpoints = set()
 
     def on_save_checkpoint(
         self,
         trainer: pl.Trainer,
-        pl_module: pl.LightningModule,
+        _: pl.LightningModule,
         checkpoint: dict[str, Any],
     ) -> None:
         # Log only once per epoch in case there are multiple ModelCheckpoint callbacks
         if not self.last_logged_epoch == trainer.current_epoch:
-            model_checkpoint_callbacks = [
-                c
+            checkpoint_paths = [
+                c.best_model_path
                 for c in trainer.callbacks  # type: ignore
                 if isinstance(c, pl.callbacks.ModelCheckpoint)  # type: ignore
+                and c.best_model_path
             ]
-            # NOTE: assume that first checkpoint callback is based on val loss
-            curr_best_checkpoint = model_checkpoint_callbacks[0].best_model_path
-
-            if self.last_best_checkpoint != curr_best_checkpoint:
-                self.logger.info(f"Started checkpoint upload to {self.fs.full_path}...")
-                temp_filename = "curr_best_val_loss.ckpt"
-                torch.save(checkpoint, temp_filename)
-                self.fs.put_file(
-                    local_path=temp_filename,
-                    remote_path=temp_filename,
-                    mlflow_instance=trainer.logger.experiment.get(  # type: ignore
-                        "mlflow", None
-                    ),
-                )
-                os.remove(temp_filename)
-                self.logger.info("Checkpoint upload finished")
-                self.last_best_checkpoint = curr_best_checkpoint
+            for curr_best_checkpoint in checkpoint_paths:
+                if curr_best_checkpoint not in self.last_best_checkpoints:
+                    self.logger.info(
+                        f"Started checkpoint upload to {self.fs.full_path}..."
+                    )
+                    temp_filename = (
+                        Path(curr_best_checkpoint).parent.with_suffix(".ckpt").name
+                    )
+                    torch.save(checkpoint, temp_filename)
+
+                    self.fs.put_file(
+                        local_path=temp_filename,
+                        remote_path=temp_filename,
+                        mlflow_instance=trainer.logger.experiment.get(  # type: ignore
+                            "mlflow", None
+                        ),
+                    )
+                    os.remove(temp_filename)
+                    self.logger.info("Checkpoint upload finished")
+                    self.last_best_checkpoints.add(curr_best_checkpoint)
 
             self.last_logged_epoch = trainer.current_epoch

From 9c4cadb932254e7ad559350a2eb55ebc72f20266 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Thu, 11 Apr 2024 16:20:48 +0200
Subject: [PATCH 12/28] LuxonisML v0.1.0 (#20)

---
 requirements.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 5e436e44..03081b48 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,6 @@
 blobconverter>=1.4.2
 lightning>=2.0.0
-#luxonis-ml[all]>=0.0.1
-luxonis-ml[all]@git+https://github.com/luxonis/luxonis-ml.git@dev
+luxonis-ml[all]>=0.1.0
 onnx>=1.12.0
 onnxruntime>=1.13.1
 onnxsim>=0.4.10

From f425fdb39ae11ead1ff09385ce802729ab96e4dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Mon, 15 Apr 2024 20:22:14 +0200
Subject: [PATCH 13/28] SIGTERM Handling (#21)

* handling SIGTERM signal

* resume argument takes path
---
 luxonis_train/__main__.py                     | 10 ++++--
 .../callbacks/luxonis_progress_bar.py         |  2 +-
 luxonis_train/core/trainer.py                 | 36 ++++++++++++++++++-
 3 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index b1fd3971..94276b60 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -45,11 +45,17 @@ def __str__(self):
 
 
 @app.command()
-def train(config: ConfigType = None, opts: OptsType = None):
+def train(
+    config: ConfigType = None,
+    resume: Annotated[
+        Optional[str], typer.Option(help="Resume training from this checkpoint.")
+    ] = None,
+    opts: OptsType = None,
+):
     """Start training."""
     from luxonis_train.core import Trainer
 
-    Trainer(str(config), opts).train()
+    Trainer(str(config), opts, resume=resume).train()
 
 
 @app.command()
diff --git a/luxonis_train/callbacks/luxonis_progress_bar.py b/luxonis_train/callbacks/luxonis_progress_bar.py
index fcc130cd..16d173e7 100644
--- a/luxonis_train/callbacks/luxonis_progress_bar.py
+++ b/luxonis_train/callbacks/luxonis_progress_bar.py
@@ -28,7 +28,7 @@ def get_metrics(
     ) -> dict[str, int | str | float | dict[str, float]]:
         # NOTE: there might be a cleaner way of doing this
         items = super().get_metrics(trainer, pl_module)
-        if trainer.training:
+        if trainer.training and pl_module.training_step_outputs:
             items["Loss"] = pl_module.training_step_outputs[-1]["loss"].item()
         return items
 
diff --git a/luxonis_train/core/trainer.py b/luxonis_train/core/trainer.py
index 2b3d6a78..8326ce48 100644
--- a/luxonis_train/core/trainer.py
+++ b/luxonis_train/core/trainer.py
@@ -1,3 +1,5 @@
+import os.path as osp
+import signal
 import threading
 from logging import getLogger
 from typing import Any, Literal
@@ -21,6 +23,7 @@ def __init__(
         self,
         cfg: str | dict[str, Any] | Config,
         opts: list[str] | tuple[str, ...] | dict[str, Any] | None = None,
+        resume: str | None = None,
     ):
         """Constructs a new Trainer instance.
 
@@ -30,9 +33,17 @@ def __init__(
         @type opts: list[str] | tuple[str, ...] | dict[str, Any] | None
         @param opts: Argument dict provided through command line,
             used for config overriding.
+
+        @type resume: str | None
+        @param resume: Training will resume from this checkpoint.
         """
         super().__init__(cfg, opts)
 
+        if resume is not None:
+            self.resume = str(LuxonisFileSystem.download(resume, self.run_save_dir))
+        else:
+            self.resume = None
+
         self.lightning_module = LuxonisModel(
             cfg=self.cfg,
             dataset_metadata=self.dataset_metadata,
@@ -40,6 +51,29 @@ def __init__(
             input_shape=self.loader_train.input_shape,
         )
 
+        def graceful_exit(signum, frame):
+            logger.info("SIGTERM received, stopping training...")
+            ckpt_path = osp.join(self.run_save_dir, "resume.ckpt")
+            self.pl_trainer.save_checkpoint(ckpt_path)
+            self._upload_logs()
+
+            if self.cfg.tracker.is_mlflow:
+                logger.info("Uploading checkpoint to MLFlow.")
+                fs = LuxonisFileSystem(
+                    "mlflow://",
+                    allow_active_mlflow_run=True,
+                    allow_local=False,
+                )
+                fs.put_file(
+                    local_path=ckpt_path,
+                    remote_path="resume.ckpt",
+                    mlflow_instance=self.tracker.experiment.get("mlflow", None),
+                )
+
+            exit(0)
+
+        signal.signal(signal.SIGTERM, graceful_exit)
+
     def _upload_logs(self) -> None:
         if self.cfg.tracker.is_mlflow:
             logger.info("Uploading logs to MLFlow.")
@@ -56,7 +90,7 @@ def _upload_logs(self) -> None:
 
     def _trainer_fit(self, *args, **kwargs):
         try:
-            self.pl_trainer.fit(*args, **kwargs)
+            self.pl_trainer.fit(*args, ckpt_path=self.resume, **kwargs)
         except Exception:
             logger.exception("Encountered exception during training.")
         finally:

From ca570637eefae0912dae338cf4b25871b3bba52f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Wed, 24 Apr 2024 02:06:57 +0200
Subject: [PATCH 14/28] Task Label Groups Support (#22)

* handling SIGTERM signal

* resume argument takes path

* basic task group labels support

* updated requirements

* fixed tests

* fixed loader test

* Update luxonis_train/models/luxonis_model.py

Co-authored-by: conorsim <60359299+conorsim@users.noreply.github.com>

---------

Co-authored-by: conorsim <60359299+conorsim@users.noreply.github.com>
---
 luxonis_train/models/luxonis_model.py         | 12 ++-
 luxonis_train/utils/boxutils.py               |  4 +-
 luxonis_train/utils/config.py                 |  1 +
 luxonis_train/utils/loaders/base_loader.py    | 81 ++++++++++---------
 .../utils/loaders/luxonis_loader_torch.py     | 10 ++-
 luxonis_train/utils/types.py                  |  1 +
 requirements.txt                              |  3 +-
 tests/integration/conftest.py                 |  4 +-
 tests/unittests/test_core/test_archiver.py    |  2 +-
 .../test_loaders/test_base_loader.py          |  6 +-
 10 files changed, 71 insertions(+), 53 deletions(-)

diff --git a/luxonis_train/models/luxonis_model.py b/luxonis_train/models/luxonis_model.py
index 7cd396f9..58aeccd1 100644
--- a/luxonis_train/models/luxonis_model.py
+++ b/luxonis_train/models/luxonis_model.py
@@ -35,7 +35,7 @@
 )
 from luxonis_train.utils.registry import CALLBACKS, OPTIMIZERS, SCHEDULERS, Registry
 from luxonis_train.utils.tracker import LuxonisTrackerPL
-from luxonis_train.utils.types import Kwargs, Labels, Packet
+from luxonis_train.utils.types import Kwargs, Labels, Packet, TaskLabels
 
 from .luxonis_output import LuxonisOutput
 
@@ -139,10 +139,13 @@ def __init__(
         frozen_nodes: list[tuple[str, int]] = []
         nodes: dict[str, tuple[type[BaseNode], Kwargs]] = {}
 
+        self.node_tasks: dict[str, str] = {}
+
         for node_cfg in self.cfg.model.nodes:
             node_name = node_cfg.name
             Node = BaseNode.REGISTRY.get(node_name)
             node_name = node_cfg.alias or node_name
+            self.node_tasks[node_name] = node_cfg.task_group
             if node_cfg.freezing.active:
                 epochs = self.cfg.trainer.epochs
                 if node_cfg.freezing.unfreeze_after is None:
@@ -244,7 +247,7 @@ def _initiate_nodes(
     def forward(
         self,
         inputs: Tensor,
-        labels: Labels | None = None,
+        task_labels: TaskLabels | None = None,
         images: Tensor | None = None,
         *,
         compute_loss: bool = True,
@@ -259,8 +262,8 @@ def forward(
 
         @type inputs: L{Tensor}
         @param inputs: Input tensor.
-        @type labels: L{Labels} | None
-        @param labels: Labels dictionary. Defaults to C{None}.
+        @type task_labels: L{TaskLabels} | None
+        @param task_labels: Labels dictionary. Defaults to C{None}.
         @type images: L{Tensor} | None
         @param images: Canvas tensor for visualizers. Defaults to C{None}.
         @type compute_loss: bool
@@ -296,6 +299,7 @@ def forward(
             node_inputs = [computed[pred] for pred in input_names]
             outputs = node.run(node_inputs)
             computed[node_name] = outputs
+            labels = task_labels[self.node_tasks[node_name]] if task_labels else None
 
             if compute_loss and node_name in self.losses and labels is not None:
                 for loss_name, loss in self.losses[node_name].items():
diff --git a/luxonis_train/utils/boxutils.py b/luxonis_train/utils/boxutils.py
index 0d708f79..a59f4cd0 100644
--- a/luxonis_train/utils/boxutils.py
+++ b/luxonis_train/utils/boxutils.py
@@ -404,6 +404,7 @@ def anchors_from_dataset(
     n_anchors: int = 9,
     n_generations: int = 1000,
     ratio_threshold: float = 4.0,
+    task_group: str = "default",
 ) -> tuple[Tensor, float]:
     """Generates anchors based on bounding box annotations present in provided data
     loader. It uses K-Means for initial proposals which are then refined with genetic
@@ -425,7 +426,8 @@ def anchors_from_dataset(
 
     widths = []
     inputs = None
-    for inp, labels in loader:
+    for inp, task_labels in loader:
+        labels = next(iter(task_labels.values()))  # TODO: handle multiple tasks
         boxes = labels[LabelType.BOUNDINGBOX]
         curr_wh = boxes[:, 4:]
         widths.append(curr_wh)
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index a2d4f332..45dde192 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -43,6 +43,7 @@ class ModelNodeConfig(CustomBaseModel):
     inputs: list[str] = []
     params: dict[str, Any] = {}
     freezing: FreezingConfig = FreezingConfig()
+    task_group: str = "default"
 
 
 class PredefinedModelConfig(CustomBaseModel):
diff --git a/luxonis_train/utils/loaders/base_loader.py b/luxonis_train/utils/loaders/base_loader.py
index 93f3fd0c..be12b439 100644
--- a/luxonis_train/utils/loaders/base_loader.py
+++ b/luxonis_train/utils/loaders/base_loader.py
@@ -8,7 +8,7 @@
 from luxonis_train.utils.registry import LOADERS
 from luxonis_train.utils.types import Labels, LabelType
 
-LuxonisLoaderTorchOutput = tuple[Tensor, Labels]
+LuxonisLoaderTorchOutput = tuple[Tensor, dict[str, Labels]]
 """LuxonisLoaderTorchOutput is a tuple of images and corresponding labels."""
 
 
@@ -46,7 +46,7 @@ def __getitem__(self, idx: int) -> LuxonisLoaderTorchOutput:
 
 def collate_fn(
     batch: list[LuxonisLoaderTorchOutput],
-) -> tuple[Tensor, dict[LabelType, Tensor]]:
+) -> tuple[Tensor, dict[str, dict[LabelType, Tensor]]]:
     """Default collate function used for training.
 
     @type batch: list[LuxonisLoaderTorchOutput]
@@ -55,41 +55,46 @@ def collate_fn(
     @rtype: tuple[Tensor, dict[LabelType, Tensor]]
     @return: Tuple of images and annotations in the format expected by the model.
     """
-    zipped = zip(*batch)
-    imgs, anno_dicts = zipped
+    imgs, group_dicts = zip(*batch)
+    out_group_dicts = {task: {} for task in group_dicts[0].keys()}
     imgs = torch.stack(imgs, 0)
 
-    present_annotations = anno_dicts[0].keys()
-    out_annotations: dict[LabelType, Tensor] = {
-        anno: torch.empty(0) for anno in present_annotations
-    }
-
-    if LabelType.CLASSIFICATION in present_annotations:
-        class_annos = [anno[LabelType.CLASSIFICATION] for anno in anno_dicts]
-        out_annotations[LabelType.CLASSIFICATION] = torch.stack(class_annos, 0)
-
-    if LabelType.SEGMENTATION in present_annotations:
-        seg_annos = [anno[LabelType.SEGMENTATION] for anno in anno_dicts]
-        out_annotations[LabelType.SEGMENTATION] = torch.stack(seg_annos, 0)
-
-    if LabelType.BOUNDINGBOX in present_annotations:
-        bbox_annos = [anno[LabelType.BOUNDINGBOX] for anno in anno_dicts]
-        label_box: list[Tensor] = []
-        for i, box in enumerate(bbox_annos):
-            l_box = torch.zeros((box.shape[0], 6))
-            l_box[:, 0] = i  # add target image index for build_targets()
-            l_box[:, 1:] = box
-            label_box.append(l_box)
-        out_annotations[LabelType.BOUNDINGBOX] = torch.cat(label_box, 0)
-
-    if LabelType.KEYPOINT in present_annotations:
-        keypoint_annos = [anno[LabelType.KEYPOINT] for anno in anno_dicts]
-        label_keypoints: list[Tensor] = []
-        for i, points in enumerate(keypoint_annos):
-            l_kps = torch.zeros((points.shape[0], points.shape[1] + 1))
-            l_kps[:, 0] = i  # add target image index for build_targets()
-            l_kps[:, 1:] = points
-            label_keypoints.append(l_kps)
-        out_annotations[LabelType.KEYPOINT] = torch.cat(label_keypoints, 0)
-
-    return imgs, out_annotations
+    for task in list(group_dicts[0].keys()):
+        anno_dicts = [group[task] for group in group_dicts]
+
+        present_annotations = anno_dicts[0].keys()
+        out_annotations: dict[LabelType, Tensor] = {
+            anno: torch.empty(0) for anno in present_annotations
+        }
+
+        if LabelType.CLASSIFICATION in present_annotations:
+            class_annos = [anno[LabelType.CLASSIFICATION] for anno in anno_dicts]
+            out_annotations[LabelType.CLASSIFICATION] = torch.stack(class_annos, 0)
+
+        if LabelType.SEGMENTATION in present_annotations:
+            seg_annos = [anno[LabelType.SEGMENTATION] for anno in anno_dicts]
+            out_annotations[LabelType.SEGMENTATION] = torch.stack(seg_annos, 0)
+
+        if LabelType.BOUNDINGBOX in present_annotations:
+            bbox_annos = [anno[LabelType.BOUNDINGBOX] for anno in anno_dicts]
+            label_box: list[Tensor] = []
+            for i, box in enumerate(bbox_annos):
+                l_box = torch.zeros((box.shape[0], 6))
+                l_box[:, 0] = i  # add target image index for build_targets()
+                l_box[:, 1:] = box
+                label_box.append(l_box)
+            out_annotations[LabelType.BOUNDINGBOX] = torch.cat(label_box, 0)
+
+        if LabelType.KEYPOINT in present_annotations:
+            keypoint_annos = [anno[LabelType.KEYPOINT] for anno in anno_dicts]
+            label_keypoints: list[Tensor] = []
+            for i, points in enumerate(keypoint_annos):
+                l_kps = torch.zeros((points.shape[0], points.shape[1] + 1))
+                l_kps[:, 0] = i  # add target image index for build_targets()
+                l_kps[:, 1:] = points
+                label_keypoints.append(l_kps)
+            out_annotations[LabelType.KEYPOINT] = torch.cat(label_keypoints, 0)
+
+        out_group_dicts[task] = out_annotations
+
+    return imgs, out_group_dicts
diff --git a/luxonis_train/utils/loaders/luxonis_loader_torch.py b/luxonis_train/utils/loaders/luxonis_loader_torch.py
index a0e1f324..dfd4091a 100644
--- a/luxonis_train/utils/loaders/luxonis_loader_torch.py
+++ b/luxonis_train/utils/loaders/luxonis_loader_torch.py
@@ -29,11 +29,13 @@ def input_shape(self) -> Size:
         return Size([1, *img.shape])
 
     def __getitem__(self, idx: int) -> LuxonisLoaderTorchOutput:
-        img, annotations = self.base_loader[idx]
+        img, group_annotations = self.base_loader[idx]
 
         img = np.transpose(img, (2, 0, 1))  # HWC to CHW
         tensor_img = Tensor(img)
-        for key in annotations:
-            annotations[key] = Tensor(annotations[key])  # type: ignore
+        for task in group_annotations:
+            annotations = group_annotations[task]
+            for key in annotations:
+                annotations[key] = Tensor(annotations[key])  # type: ignore
 
-        return tensor_img, annotations
+        return tensor_img, group_annotations
diff --git a/luxonis_train/utils/types.py b/luxonis_train/utils/types.py
index dbbf471e..3fb724c3 100644
--- a/luxonis_train/utils/types.py
+++ b/luxonis_train/utils/types.py
@@ -7,6 +7,7 @@
 Kwargs = dict[str, Any]
 OutputTypes = Literal["boxes", "class", "keypoints", "segmentation", "features"]
 Labels = dict[LabelType, Tensor]
+TaskLabels = dict[str, Labels]
 
 AttachIndexType = Literal["all"] | int | tuple[int, int] | tuple[int, int, int]
 """AttachIndexType is used to specify to which output of the prevoius node does the
diff --git a/requirements.txt b/requirements.txt
index 03081b48..7f7e996a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,7 @@
 blobconverter>=1.4.2
 lightning>=2.0.0
-luxonis-ml[all]>=0.1.0
+#luxonis-ml[all]>=0.1.0
+luxonis-ml[all]@git+https://github.com/luxonis/luxonis-ml.git@dev
 onnx>=1.12.0
 onnxruntime>=1.13.1
 onnxsim>=0.4.10
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 35c893d4..815a4bd5 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -120,7 +120,7 @@ def COCO_people_subset_generator():
             }
         }
     )
-    dataset.add(COCO_people_subset_generator)  # type: ignore
+    dataset.add(COCO_people_subset_generator())
     dataset.make_splits()
 
 
@@ -161,5 +161,5 @@ def CIFAR10_subset_generator():
 
     dataset.set_classes(classes)
 
-    dataset.add(CIFAR10_subset_generator)  # type: ignore
+    dataset.add(CIFAR10_subset_generator())
     dataset.make_splits()
diff --git a/tests/unittests/test_core/test_archiver.py b/tests/unittests/test_core/test_archiver.py
index a044be52..fe10a46e 100644
--- a/tests/unittests/test_core/test_archiver.py
+++ b/tests/unittests/test_core/test_archiver.py
@@ -226,7 +226,7 @@ def dataset_generator():
                     for label in labels
                 }
             )
-        dataset.add(dataset_generator)
+        dataset.add(dataset_generator())
         dataset.make_splits(ratios=split_ratios)
 
     def _make_dummy_cfg_dict(head_name: str, ldf_name: str, save_path: str) -> dict:
diff --git a/tests/unittests/test_utils/test_loaders/test_base_loader.py b/tests/unittests/test_utils/test_loaders/test_base_loader.py
index e48f81ad..b5c8b299 100644
--- a/tests/unittests/test_utils/test_loaders/test_base_loader.py
+++ b/tests/unittests/test_utils/test_loaders/test_base_loader.py
@@ -12,11 +12,11 @@ def test_collate_fn():
     batch = [
         (
             torch.rand(3, 224, 224, dtype=torch.float32),
-            {LabelType.CLASSIFICATION: torch.tensor([1, 0])},
+            {"default": {LabelType.CLASSIFICATION: torch.tensor([1, 0])}},
         ),
         (
             torch.rand(3, 224, 224, dtype=torch.float32),
-            {LabelType.CLASSIFICATION: torch.tensor([0, 1])},
+            {"default": {LabelType.CLASSIFICATION: torch.tensor([0, 1])}},
         ),
     ]
 
@@ -28,6 +28,8 @@ def test_collate_fn():
     assert imgs.dtype == torch.float32
 
     # Check annotations
+    assert "default" in annotations
+    annotations = annotations["default"]
     assert LabelType.CLASSIFICATION in annotations
     assert annotations[LabelType.CLASSIFICATION].shape == (2, 2)
     assert annotations[LabelType.CLASSIFICATION].dtype == torch.int64

From d1d71f059d6ee3f7bdbad22a3978b05b6fa79518 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Wed, 8 May 2024 02:06:42 +0200
Subject: [PATCH 15/28] Tensor Core Float16 Precision (#24)

* option to set torch matmul precision for tensor cores

* updated readme
---
 configs/README.md             | 35 ++++++++++++++++++-----------------
 luxonis_train/core/trainer.py |  4 ++++
 luxonis_train/utils/config.py |  1 +
 3 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/configs/README.md b/configs/README.md
index 27e2fb6e..c1f4889b 100644
--- a/configs/README.md
+++ b/configs/README.md
@@ -142,23 +142,24 @@ To store and load the data we use LuxonisDataset and LuxonisLoader. For specific
 
 Here you can change everything related to actual training of the model.
 
-| Key                     | Type                                    | Default value | Description                                                                                                                                      |
-| ----------------------- | --------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ |
-| batch_size              | int                                     | 32            | batch size used for training                                                                                                                     |
-| accumulate_grad_batches | int                                     | 1             | number of batches for gradient accumulation                                                                                                      |
-| use_weighted_sampler    | bool                                    | False         | bool if use WeightedRandomSampler for training, only works with classification tasks                                                             |
-| epochs                  | int                                     | 100           | number of training epochs                                                                                                                        |
-| num_workers             | int                                     | 2             | number of workers for data loading                                                                                                               |
-| train_metrics_interval  | int                                     | -1            | frequency of computing metrics on train data, -1 if don't perform                                                                                |
-| validation_interval     | int                                     | 1             | frequency of computing metrics on validation data                                                                                                |
-| num_log_images          | int                                     | 4             | maximum number of images to visualize and log                                                                                                    |
-| skip_last_batch         | bool                                    | True          | whether to skip last batch while training                                                                                                        |
-| accelerator             | Literal\["auto", "cpu", "gpu"\]         | "auto"        | What accelerator to use for training.                                                                                                            |
-| devices                 | int \| list\[int\] \| str               | "auto"        | Either specify how many devices to use (int), list specific devices, or use "auto" for automatic configuration based on the selected accelerator |
-| strategy                | Literal\["auto", "ddp"\]                | "auto"        | What strategy to use for training.                                                                                                               |
-| num_sanity_val_steps    | int                                     | 2             | Number of sanity validation steps performed before training.                                                                                     |
-| profiler                | Literal\["simple", "advanced"\] \| None | None          | PL profiler for GPU/CPU/RAM utilization analysis                                                                                                 |
-| verbose                 | bool                                    | True          | Print all intermediate results to console.                                                                                                       |
+| Key                     | Type                                           | Default value | Description                                                                                                                                      |
+| ----------------------- | ---------------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ |
+| batch_size              | int                                            | 32            | batch size used for training                                                                                                                     |
+| accumulate_grad_batches | int                                            | 1             | number of batches for gradient accumulation                                                                                                      |
+| use_weighted_sampler    | bool                                           | False         | bool if use WeightedRandomSampler for training, only works with classification tasks                                                             |
+| epochs                  | int                                            | 100           | number of training epochs                                                                                                                        |
+| num_workers             | int                                            | 2             | number of workers for data loading                                                                                                               |
+| train_metrics_interval  | int                                            | -1            | frequency of computing metrics on train data, -1 if don't perform                                                                                |
+| validation_interval     | int                                            | 1             | frequency of computing metrics on validation data                                                                                                |
+| num_log_images          | int                                            | 4             | maximum number of images to visualize and log                                                                                                    |
+| skip_last_batch         | bool                                           | True          | whether to skip last batch while training                                                                                                        |
+| accelerator             | Literal\["auto", "cpu", "gpu"\]                | "auto"        | What accelerator to use for training.                                                                                                            |
+| devices                 | int \| list\[int\] \| str                      | "auto"        | Either specify how many devices to use (int), list specific devices, or use "auto" for automatic configuration based on the selected accelerator |
+| matmul_precision        | Literal\["medium", "high", "highest"\] \| None | None          | Sets the internal precision of float32 matrix multiplications.                                                                                   |
+| strategy                | Literal\["auto", "ddp"\]                       | "auto"        | What strategy to use for training.                                                                                                               |
+| num_sanity_val_steps    | int                                            | 2             | Number of sanity validation steps performed before training.                                                                                     |
+| profiler                | Literal\["simple", "advanced"\] \| None        | None          | PL profiler for GPU/CPU/RAM utilization analysis                                                                                                 |
+| verbose                 | bool                                           | True          | Print all intermediate results to console.                                                                                                       |
 
 ### Preprocessing
 
diff --git a/luxonis_train/core/trainer.py b/luxonis_train/core/trainer.py
index 8326ce48..fc634544 100644
--- a/luxonis_train/core/trainer.py
+++ b/luxonis_train/core/trainer.py
@@ -4,6 +4,7 @@
 from logging import getLogger
 from typing import Any, Literal
 
+import torch
 from lightning.pytorch.utilities import rank_zero_only  # type: ignore
 from luxonis_ml.utils import LuxonisFileSystem
 
@@ -39,6 +40,9 @@ def __init__(
         """
         super().__init__(cfg, opts)
 
+        if self.cfg.trainer.matmul_precision is not None:
+            torch.set_float32_matmul_precision(self.cfg.trainer.matmul_precision)
+
         if resume is not None:
             self.resume = str(LuxonisFileSystem.download(resume, self.run_save_dir))
         else:
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 45dde192..e94c591e 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -203,6 +203,7 @@ class TrainerConfig(CustomBaseModel):
     strategy: Literal["auto", "ddp"] = "auto"
     num_sanity_val_steps: int = 2
     profiler: Literal["simple", "advanced"] | None = None
+    matmul_precision: Literal["medium", "high", "highest"] | None = None
     verbose: bool = True
 
     batch_size: int = 32

From 08300436944448f22644577c0a96ef77ba5a51fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Tue, 14 May 2024 18:55:31 +0200
Subject: [PATCH 16/28] Metrics - Fixed Missing Reset (#25)

* fixed reset not being called

* added metric resets

* removed inheritance

* proper oks reset

* removed unnecessary resets

* added annotations
---
 luxonis_train/attached_modules/metrics/common.py          | 8 ++++++--
 .../attached_modules/metrics/mean_average_precision.py    | 5 ++++-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/luxonis_train/attached_modules/metrics/common.py b/luxonis_train/attached_modules/metrics/common.py
index 27d1069a..6d16a4b4 100644
--- a/luxonis_train/attached_modules/metrics/common.py
+++ b/luxonis_train/attached_modules/metrics/common.py
@@ -1,6 +1,7 @@
 import logging
 
 import torchmetrics
+from torch import Tensor
 
 from .base_metric import BaseMetric
 
@@ -47,14 +48,17 @@ def __init__(self, **kwargs):
 
         self.metric = self.Metric(**kwargs)
 
-    def update(self, preds, target, *args, **kwargs):
+    def update(self, preds, target, *args, **kwargs) -> None:
         if self.task in ["multiclass"]:
             target = target.argmax(dim=1)
         self.metric.update(preds, target, *args, **kwargs)
 
-    def compute(self):
+    def compute(self) -> Tensor:
         return self.metric.compute()
 
+    def reset(self) -> None:
+        self.metric.reset()
+
 
 class Accuracy(TorchMetricWrapper):
     Metric = torchmetrics.Accuracy
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision.py b/luxonis_train/attached_modules/metrics/mean_average_precision.py
index 34adbcd9..0a58d061 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision.py
@@ -12,7 +12,7 @@
 from .base_metric import BaseMetric
 
 
-class MeanAveragePrecision(BaseMetric, detection.MeanAveragePrecision):
+class MeanAveragePrecision(BaseMetric):
     """Compute the Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR) for object
     detection predictions.
 
@@ -62,6 +62,9 @@ def prepare(
 
         return output_list, label_list
 
+    def reset(self) -> None:
+        self.metric.reset()
+
     def compute(self) -> tuple[Tensor, dict[str, Tensor]]:
         metric_dict = self.metric.compute()
 

From 5a31f72976875ca9471a97827ff70410ef10b4e7 Mon Sep 17 00:00:00 2001
From: KlemenSkrlj <47853619+klemen1999@users.noreply.github.com>
Date: Wed, 15 May 2024 20:55:50 +0200
Subject: [PATCH 17/28] Deterministic Training Support (#23)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Added seed to config for reproducibility

* fixed seg drawing when using torch deterministic backend

* added deterministic order of creating nodes

* removed seed from example config

* added reproducability to inspect

* formatting

---------

Co-authored-by: DrejcPesjak <dp8949@student.uni-lj.si>
Co-authored-by: Martin Kozlovský <martin.kozlovsky@luxonis.com>
---
 configs/README.md                                         | 1 +
 luxonis_train/__main__.py                                 | 3 +++
 .../visualizers/segmentation_visualizer.py                | 6 ++----
 luxonis_train/core/core.py                                | 6 ++++++
 luxonis_train/core/tuner.py                               | 7 +++++++
 luxonis_train/utils/config.py                             | 1 +
 luxonis_train/utils/general.py                            | 8 +++++---
 7 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/configs/README.md b/configs/README.md
index c1f4889b..01d1ebd3 100644
--- a/configs/README.md
+++ b/configs/README.md
@@ -144,6 +144,7 @@ Here you can change everything related to actual training of the model.
 
 | Key                     | Type                                           | Default value | Description                                                                                                                                      |
 | ----------------------- | ---------------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ |
+| seed                    | int                                            | None          | seed for reproducibility                                                                                                                         |
 | batch_size              | int                                            | 32            | batch size used for training                                                                                                                     |
 | accumulate_grad_batches | int                                            | 1             | number of batches for gradient accumulation                                                                                                      |
 | use_weighted_sampler    | bool                                           | False         | bool if use WeightedRandomSampler for training, only works with classification tasks                                                             |
diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index 94276b60..e3b9c7d5 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -103,6 +103,7 @@ def inspect(
     opts: OptsType = None,
 ):
     """Inspect dataset."""
+    from lightning.pytorch import seed_everything
     from luxonis_ml.data import (
         LuxonisDataset,
         TrainAugmentations,
@@ -128,6 +129,8 @@ def inspect(
             overrides[opts[i]] = opts[i + 1]
 
     cfg = Config.get_config(str(config), overrides)
+    if cfg.trainer.seed is not None:
+        seed_everything(cfg.trainer.seed, workers=True)
 
     image_size = cfg.trainer.preprocessing.train_image_size
 
diff --git a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
index 6d8f3c79..2b2dc7a3 100644
--- a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
@@ -9,7 +9,6 @@
 from .utils import (
     Color,
     draw_segmentation_labels,
-    draw_segmentation_masks,
     get_color,
     seg_output_to_bool,
 )
@@ -63,10 +62,9 @@ def draw_predictions(
         for i in range(len(canvas)):
             prediction = predictions[i]
             mask = seg_output_to_bool(prediction)
-            mask = mask.to(canvas.device)
-            viz[i] = draw_segmentation_masks(
+            viz[i] = draw_segmentation_labels(
                 canvas[i].clone(), mask, colors=colors, **kwargs
-            )
+            ).to(canvas.device)
         return viz
 
     @staticmethod
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index 761bc26f..555e464a 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -92,6 +92,11 @@ def __init__(
         # NOTE: overriding logger in pl so it uses our logger to log device info
         rank_zero_module.log = logger
 
+        deterministic = False
+        if self.cfg.trainer.seed is not None:
+            pl.seed_everything(self.cfg.trainer.seed, workers=True)
+            deterministic = True
+
         self.train_augmentations = TrainAugmentations(
             image_size=self.cfg.trainer.preprocessing.train_image_size,
             augmentations=[
@@ -122,6 +127,7 @@ def __init__(
             # NOTE: this is likely PL bug,
             # should be configurable inside configure_callbacks(),
             callbacks=LuxonisProgressBar() if self.cfg.use_rich_text else None,
+            deterministic=deterministic,
         )
         self.dataset = LuxonisDataset(
             dataset_name=self.cfg.dataset.name,
diff --git a/luxonis_train/core/tuner.py b/luxonis_train/core/tuner.py
index c9f8e151..d8e5fa51 100644
--- a/luxonis_train/core/tuner.py
+++ b/luxonis_train/core/tuner.py
@@ -101,6 +101,12 @@ def _objective(self, trial: optuna.trial.Trial) -> float:
             [LuxonisProgressBar()] if self.cfg.use_rich_text else []
         )
         callbacks.append(pruner_callback)
+
+        deterministic = False
+        if self.cfg.trainer.seed:
+            pl.seed_everything(cfg.trainer.seed, workers=True)
+            deterministic = True
+
         pl_trainer = pl.Trainer(
             accelerator=cfg.trainer.accelerator,
             devices=cfg.trainer.devices,
@@ -112,6 +118,7 @@ def _objective(self, trial: optuna.trial.Trial) -> float:
             num_sanity_val_steps=cfg.trainer.num_sanity_val_steps,
             profiler=cfg.trainer.profiler,
             callbacks=callbacks,
+            deterministic=deterministic,
         )
 
         pl_trainer.fit(
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index e94c591e..685c296f 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -206,6 +206,7 @@ class TrainerConfig(CustomBaseModel):
     matmul_precision: Literal["medium", "high", "highest"] | None = None
     verbose: bool = True
 
+    seed: int | None = None
     batch_size: int = 32
     accumulate_grad_batches: int = 1
     use_weighted_sampler: bool = False
diff --git a/luxonis_train/utils/general.py b/luxonis_train/utils/general.py
index 9ea5884d..ebe75ebd 100644
--- a/luxonis_train/utils/general.py
+++ b/luxonis_train/utils/general.py
@@ -265,7 +265,7 @@ def validate_packet(data: Packet[Tensor], protocol: type[BaseModel]) -> Packet[T
 # TEST:
 def traverse_graph(
     graph: dict[str, list[str]], nodes: dict[str, T]
-) -> Generator[tuple[str, T, list[str], set[str]], None, None]:
+) -> Generator[tuple[str, T, list[str], list[str]], None, None]:
     """Traverses the graph in topological order.
 
     @type graph: dict[str, list[str]]
@@ -273,12 +273,14 @@ def traverse_graph(
         names, values are inputs to the node (list of node names).
     @type nodes: dict[str, T]
     @param nodes: Dictionary mapping node names to node objects.
-    @rtype: Generator[tuple[str, T, list[str], set[str]], None, None]
+    @rtype: Generator[tuple[str, T, list[str], list[str]], None, None]
     @return: Generator of tuples containing node name, node object, node dependencies
         and unprocessed nodes.
     @raises RuntimeError: If the graph is malformed.
     """
-    unprocessed_nodes = set(nodes.keys())
+    unprocessed_nodes = sorted(
+        set(nodes.keys())
+    )  # sort the set to allow reproducibility
     processed: set[str] = set()
 
     while unprocessed_nodes:

From 99b18575784ea9a86125884cfb4203d60cff9b86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Tue, 21 May 2024 05:30:34 +0200
Subject: [PATCH 18/28] Custom Loaders Support (#27)

* support for custom loaders and datasets

* updated configs

* custom loaders in inspect command

* updated inspect for multi-task labels

* removed custom loader from test config

* deleted comment

* deleted comment

* removed custom dataset

* removed comment

* skipping archiver test untill fixed in luxonis-ml

* [Automated] Updated coverage badge

---------

Co-authored-by: GitHub Actions <actions@github.com>
---
 configs/classification_model.yaml             |   5 +-
 configs/coco_model.yaml                       |   6 +-
 configs/detection_model.yaml                  |   5 +-
 configs/example_export.yaml                   |   5 +-
 configs/example_tuning.yaml                   |   5 +-
 configs/keypoint_bbox_model.yaml              |   5 +-
 configs/resnet_model.yaml                     |   5 +-
 configs/segmentation_model.yaml               |   5 +-
 luxonis_train/__init__.py                     |   1 +
 luxonis_train/__main__.py                     | 113 ++++++++----------
 luxonis_train/callbacks/test_on_train_end.py  |  39 +-----
 luxonis_train/core/__init__.py                |   3 +-
 luxonis_train/core/archiver.py                |   2 +-
 luxonis_train/core/core.py                    |  84 ++++++-------
 luxonis_train/core/exporter.py                |   2 +-
 luxonis_train/core/inferer.py                 |   6 +-
 luxonis_train/core/trainer.py                 |  21 ++--
 luxonis_train/core/tuner.py                   |   7 +-
 luxonis_train/models/luxonis_model.py         |   4 +-
 luxonis_train/utils/config.py                 |  21 +---
 luxonis_train/utils/general.py                |  27 +++--
 luxonis_train/utils/loaders/__init__.py       |  13 +-
 luxonis_train/utils/loaders/base_loader.py    |  32 ++++-
 .../utils/loaders/luxonis_loader_torch.py     |  38 ++++--
 luxonis_train/utils/registry.py               |   3 +
 media/coverage_badge.svg                      |   4 +-
 tests/unittests/test_core/test_archiver.py    |   5 +-
 27 files changed, 239 insertions(+), 227 deletions(-)

diff --git a/configs/classification_model.yaml b/configs/classification_model.yaml
index 62c1014e..5d2eb1f2 100755
--- a/configs/classification_model.yaml
+++ b/configs/classification_model.yaml
@@ -15,8 +15,9 @@ model:
         thickness: 2
         include_plot: True
 
-dataset:
-  name: cifar10_test
+loader:
+  params:
+    dataset_name: cifar10_test
 
 trainer:
   preprocessing:
diff --git a/configs/coco_model.yaml b/configs/coco_model.yaml
index 67f3b91d..c8ffff69 100755
--- a/configs/coco_model.yaml
+++ b/configs/coco_model.yaml
@@ -95,12 +95,14 @@ tracker:
   wandb_entity: luxonis
   is_mlflow: False
 
-dataset:
-  name: coco_test
+loader:
   train_view: train
   val_view: val
   test_view: test
 
+  params:
+    dataset_name: coco_test
+
 trainer:
   accelerator: auto
   devices: auto
diff --git a/configs/detection_model.yaml b/configs/detection_model.yaml
index 8d7f9c25..899e317d 100755
--- a/configs/detection_model.yaml
+++ b/configs/detection_model.yaml
@@ -10,8 +10,9 @@ model:
     params:
       use_neck: True
 
-dataset:
-  name: coco_test
+loader:
+  params:
+    dataset_name: coco_test
 
 trainer:
   preprocessing:
diff --git a/configs/example_export.yaml b/configs/example_export.yaml
index a999a2bd..7aadc30c 100755
--- a/configs/example_export.yaml
+++ b/configs/example_export.yaml
@@ -12,8 +12,9 @@ model:
       backbone: MicroNet
       task: binary
 
-dataset:
-  name: coco_test
+loader:
+  params:
+    dataset_name: coco_test
 
 trainer:
   preprocessing:
diff --git a/configs/example_tuning.yaml b/configs/example_tuning.yaml
index 980036ae..41c4d8a8 100755
--- a/configs/example_tuning.yaml
+++ b/configs/example_tuning.yaml
@@ -11,8 +11,9 @@ model:
       backbone: MicroNet
       task: binary
 
-dataset:
-  name: coco_test
+loader:
+  params:
+    dataset_name: coco_test
 
 trainer:
   preprocessing:
diff --git a/configs/keypoint_bbox_model.yaml b/configs/keypoint_bbox_model.yaml
index dc4fe3d7..8cdd3149 100755
--- a/configs/keypoint_bbox_model.yaml
+++ b/configs/keypoint_bbox_model.yaml
@@ -8,8 +8,9 @@ model:
   predefined_model:
     name: KeypointDetectionModel
 
-dataset:
-  name: coco_test
+loader:
+  params:
+    dataset_name: coco_test
 
 trainer:
   preprocessing:
diff --git a/configs/resnet_model.yaml b/configs/resnet_model.yaml
index e768d259..e8353870 100644
--- a/configs/resnet_model.yaml
+++ b/configs/resnet_model.yaml
@@ -29,8 +29,9 @@ model:
         thickness: 2
         include_plot: True
 
-dataset:
-  name: cifar10_test
+loader:
+  params:
+    dataset_name: cifar10_test
 
 trainer:
   batch_size: 4
diff --git a/configs/segmentation_model.yaml b/configs/segmentation_model.yaml
index c26fb0cc..b7becbfa 100755
--- a/configs/segmentation_model.yaml
+++ b/configs/segmentation_model.yaml
@@ -11,8 +11,9 @@ model:
       backbone: MicroNet
       task: binary
 
-dataset:
-  name: coco_test
+loader:
+  params:
+    dataset_name: coco_test
 
 trainer:
   preprocessing:
diff --git a/luxonis_train/__init__.py b/luxonis_train/__init__.py
index 59ec7367..066e1110 100644
--- a/luxonis_train/__init__.py
+++ b/luxonis_train/__init__.py
@@ -1,4 +1,5 @@
 from .attached_modules import *
+from .core import *
 from .models import *
 from .utils import *
 
diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index e3b9c7d5..f749439f 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -5,8 +5,10 @@
 from typing import Annotated, Optional
 
 import cv2
-import torch
 import typer
+from torch.utils.data import DataLoader
+
+from luxonis_train.utils.registry import LOADERS
 
 app = typer.Typer(help="Luxonis Train CLI", add_completion=False)
 
@@ -105,7 +107,6 @@ def inspect(
     """Inspect dataset."""
     from lightning.pytorch import seed_everything
     from luxonis_ml.data import (
-        LuxonisDataset,
         TrainAugmentations,
         ValAugmentations,
     )
@@ -117,7 +118,7 @@ def inspect(
         get_unnormalized_images,
     )
     from luxonis_train.utils.config import Config
-    from luxonis_train.utils.loaders import LuxonisLoaderTorch, collate_fn
+    from luxonis_train.utils.loaders import collate_fn
     from luxonis_train.utils.types import LabelType
 
     overrides = {}
@@ -134,43 +135,21 @@ def inspect(
 
     image_size = cfg.trainer.preprocessing.train_image_size
 
-    dataset = LuxonisDataset(
-        dataset_name=cfg.dataset.name,
-        team_id=cfg.dataset.team_id,
-        dataset_id=cfg.dataset.id,
-        bucket_type=cfg.dataset.bucket_type,
-        bucket_storage=cfg.dataset.bucket_storage,
-    )
-    augmentations = (
-        TrainAugmentations(
-            image_size=image_size,
-            augmentations=[
-                i.model_dump() for i in cfg.trainer.preprocessing.augmentations
-            ],
-            train_rgb=cfg.trainer.preprocessing.train_rgb,
-            keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
-        )
-        if view == "train"
-        else ValAugmentations(
-            image_size=image_size,
-            augmentations=[
-                i.model_dump() for i in cfg.trainer.preprocessing.augmentations
-            ],
-            train_rgb=cfg.trainer.preprocessing.train_rgb,
-            keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
-        )
+    augmentations = (TrainAugmentations if view == "train" else ValAugmentations)(
+        image_size=image_size,
+        augmentations=[i.model_dump() for i in cfg.trainer.preprocessing.augmentations],
+        train_rgb=cfg.trainer.preprocessing.train_rgb,
+        keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
     )
 
-    loader_train = LuxonisLoaderTorch(
-        dataset,
-        view=view,
-        augmentations=augmentations,
+    loader = LOADERS.get(cfg.loader.name)(
+        view=view, augmentations=augmentations, **cfg.loader.params
     )
 
-    pytorch_loader_train = torch.utils.data.DataLoader(
-        loader_train,
-        batch_size=4,
-        num_workers=1,
+    pytorch_loader = DataLoader(
+        loader,
+        batch_size=1,
+        num_workers=0,
         collate_fn=collate_fn,
     )
 
@@ -178,35 +157,41 @@ def inspect(
         os.makedirs(save_dir, exist_ok=True)
 
     counter = 0
-    for data in pytorch_loader_train:
-        imgs, label_dict = data
-        images = get_unnormalized_images(cfg, imgs)
-        for i, img in enumerate(images):
-            for label_type, labels in label_dict.items():
-                if label_type == LabelType.CLASSIFICATION:
-                    continue
-                elif label_type == LabelType.BOUNDINGBOX:
-                    img = draw_bounding_box_labels(
-                        img, labels[labels[:, 0] == i][:, 2:], colors="yellow", width=1
-                    )
-                elif label_type == LabelType.KEYPOINT:
-                    img = draw_keypoint_labels(
-                        img, labels[labels[:, 0] == i][:, 1:], colors="red"
+    for data in pytorch_loader:
+        imgs, task_dict = data
+        for task, label_dict in task_dict.items():
+            images = get_unnormalized_images(cfg, imgs)
+            for i, img in enumerate(images):
+                for label_type, labels in label_dict.items():
+                    if label_type == LabelType.CLASSIFICATION:
+                        continue
+                    elif label_type == LabelType.BOUNDINGBOX:
+                        img = draw_bounding_box_labels(
+                            img,
+                            labels[labels[:, 0] == i][:, 2:],
+                            colors="yellow",
+                            width=1,
+                        )
+                    elif label_type == LabelType.KEYPOINT:
+                        img = draw_keypoint_labels(
+                            img, labels[labels[:, 0] == i][:, 1:], colors="red"
+                        )
+                    elif label_type == LabelType.SEGMENTATION:
+                        img = draw_segmentation_labels(
+                            img, labels[i], alpha=0.8, colors="#5050FF"
+                        )
+
+                img_arr = img.permute(1, 2, 0).numpy()
+                img_arr = cv2.cvtColor(img_arr, cv2.COLOR_RGB2BGR)
+                if save_dir is not None:
+                    counter += 1
+                    cv2.imwrite(
+                        os.path.join(save_dir, f"{counter}_{task}.png"), img_arr
                     )
-                elif label_type == LabelType.SEGMENTATION:
-                    img = draw_segmentation_labels(
-                        img, labels[i], alpha=0.8, colors="#5050FF"
-                    )
-
-            img_arr = img.permute(1, 2, 0).numpy()
-            img_arr = cv2.cvtColor(img_arr, cv2.COLOR_RGB2BGR)
-            if save_dir is not None:
-                counter += 1
-                cv2.imwrite(os.path.join(save_dir, f"{counter}.png"), img_arr)
-            else:
-                cv2.imshow("img", img_arr)
-                if cv2.waitKey() == ord("q"):
-                    exit()
+                else:
+                    cv2.imshow(task, img_arr)
+        if save_dir is None and cv2.waitKey() == ord("q"):
+            exit()
 
 
 @app.command()
diff --git a/luxonis_train/callbacks/test_on_train_end.py b/luxonis_train/callbacks/test_on_train_end.py
index 8cf23e3c..3f8da1db 100644
--- a/luxonis_train/callbacks/test_on_train_end.py
+++ b/luxonis_train/callbacks/test_on_train_end.py
@@ -1,9 +1,6 @@
 import lightning.pytorch as pl
-from luxonis_ml.data import LuxonisDataset, ValAugmentations
-from torch.utils.data import DataLoader
 
-from luxonis_train.utils.config import Config
-from luxonis_train.utils.loaders import LuxonisLoaderTorch, collate_fn
+import luxonis_train
 from luxonis_train.utils.registry import CALLBACKS
 
 
@@ -11,33 +8,7 @@
 class TestOnTrainEnd(pl.Callback):
     """Callback to perform a test run at the end of the training."""
 
-    def on_train_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> None:
-        cfg: Config = pl_module.cfg
-
-        dataset = LuxonisDataset(
-            dataset_name=cfg.dataset.name,
-            team_id=cfg.dataset.team_id,
-            dataset_id=cfg.dataset.id,
-            bucket_type=cfg.dataset.bucket_type,
-            bucket_storage=cfg.dataset.bucket_storage,
-        )
-
-        loader_test = LuxonisLoaderTorch(
-            dataset,
-            view=cfg.dataset.test_view,
-            augmentations=ValAugmentations(
-                image_size=cfg.trainer.preprocessing.train_image_size,
-                augmentations=[
-                    i.model_dump() for i in cfg.trainer.preprocessing.augmentations
-                ],
-                train_rgb=cfg.trainer.preprocessing.train_rgb,
-                keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
-            ),
-        )
-        pytorch_loader_test = DataLoader(
-            loader_test,
-            batch_size=cfg.trainer.batch_size,
-            num_workers=cfg.trainer.num_workers,
-            collate_fn=collate_fn,
-        )
-        trainer.test(pl_module, pytorch_loader_test)
+    def on_train_end(
+        self, trainer: pl.Trainer, pl_module: "luxonis_train.models.LuxonisModel"
+    ) -> None:
+        trainer.test(pl_module, pl_module._core.pytorch_loaders["test"])
diff --git a/luxonis_train/core/__init__.py b/luxonis_train/core/__init__.py
index d3e89663..7e60f321 100644
--- a/luxonis_train/core/__init__.py
+++ b/luxonis_train/core/__init__.py
@@ -1,7 +1,8 @@
 from .archiver import Archiver
+from .core import Core
 from .exporter import Exporter
 from .inferer import Inferer
 from .trainer import Trainer
 from .tuner import Tuner
 
-__all__ = ["Exporter", "Trainer", "Tuner", "Inferer", "Archiver"]
+__all__ = ["Exporter", "Trainer", "Tuner", "Inferer", "Archiver", "Core"]
diff --git a/luxonis_train/core/archiver.py b/luxonis_train/core/archiver.py
index 58fc231f..a0706846 100644
--- a/luxonis_train/core/archiver.py
+++ b/luxonis_train/core/archiver.py
@@ -45,7 +45,7 @@ def __init__(
             cfg=self.cfg,
             dataset_metadata=self.dataset_metadata,
             save_dir=self.run_save_dir,
-            input_shape=self.loader_train.input_shape,
+            input_shape=self.loaders["train"].input_shape,
         )
 
         self.model_name = self.cfg.model.name
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index 555e464a..60beb624 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -7,14 +7,16 @@
 import lightning_utilities.core.rank_zero as rank_zero_module
 import rich.traceback
 import torch
+import torch.utils.data as torch_data
 from lightning.pytorch.utilities import rank_zero_only  # type: ignore
-from luxonis_ml.data import LuxonisDataset, TrainAugmentations, ValAugmentations
+from luxonis_ml.data import TrainAugmentations, ValAugmentations
 from luxonis_ml.utils import reset_logging, setup_logging
 
 from luxonis_train.callbacks import LuxonisProgressBar
 from luxonis_train.utils.config import Config
 from luxonis_train.utils.general import DatasetMetadata
-from luxonis_train.utils.loaders import LuxonisLoaderTorch, collate_fn
+from luxonis_train.utils.loaders import collate_fn
+from luxonis_train.utils.registry import LOADERS
 from luxonis_train.utils.tracker import LuxonisTrackerPL
 
 logger = getLogger(__name__)
@@ -129,42 +131,19 @@ def __init__(
             callbacks=LuxonisProgressBar() if self.cfg.use_rich_text else None,
             deterministic=deterministic,
         )
-        self.dataset = LuxonisDataset(
-            dataset_name=self.cfg.dataset.name,
-            team_id=self.cfg.dataset.team_id,
-            dataset_id=self.cfg.dataset.id,
-            bucket_type=self.cfg.dataset.bucket_type,
-            bucket_storage=self.cfg.dataset.bucket_storage,
-        )
-
-        self.loader_train = LuxonisLoaderTorch(
-            self.dataset,
-            view=self.cfg.dataset.train_view,
-            augmentations=self.train_augmentations,
-        )
-        self.loader_val = LuxonisLoaderTorch(
-            self.dataset,
-            view=self.cfg.dataset.val_view,
-            augmentations=self.val_augmentations,
-        )
-        self.loader_test = LuxonisLoaderTorch(
-            self.dataset,
-            view=self.cfg.dataset.test_view,
-            augmentations=self.val_augmentations,
-        )
 
-        self.pytorch_loader_val = torch.utils.data.DataLoader(
-            self.loader_val,
-            batch_size=self.cfg.trainer.batch_size,
-            num_workers=self.cfg.trainer.num_workers,
-            collate_fn=collate_fn,
-        )
-        self.pytorch_loader_test = torch.utils.data.DataLoader(
-            self.loader_test,
-            batch_size=self.cfg.trainer.batch_size,
-            num_workers=self.cfg.trainer.num_workers,
-            collate_fn=collate_fn,
-        )
+        self.loaders = {
+            view: LOADERS.get(self.cfg.loader.name)(
+                augmentations=self.train_augmentations
+                if view == "train"
+                else self.val_augmentations,
+                view=self.cfg.loader.train_view
+                if view == "train"
+                else self.cfg.loader.val_view,
+                **self.cfg.loader.params,
+            )
+            for view in ["train", "val", "test"]
+        }
         sampler = None
         if self.cfg.trainer.use_weighted_sampler:
             classes_count = self.dataset.get_classes()[1]
@@ -175,21 +154,26 @@ def __init__(
             else:
                 weights = [1 / i for i in classes_count.values()]
                 num_samples = sum(classes_count.values())
-                sampler = torch.utils.data.WeightedRandomSampler(weights, num_samples)
-
-        self.pytorch_loader_train = torch.utils.data.DataLoader(
-            self.loader_train,
-            shuffle=True,
-            batch_size=self.cfg.trainer.batch_size,
-            num_workers=self.cfg.trainer.num_workers,
-            collate_fn=collate_fn,
-            drop_last=self.cfg.trainer.skip_last_batch,
-            sampler=sampler,
-        )
+                sampler = torch_data.WeightedRandomSampler(weights, num_samples)
+
+        self.pytorch_loaders = {
+            view: torch_data.DataLoader(
+                self.loaders[view],
+                batch_size=self.cfg.trainer.batch_size,
+                num_workers=self.cfg.trainer.num_workers,
+                collate_fn=collate_fn,
+                shuffle=view == "train",
+                drop_last=self.cfg.trainer.skip_last_batch
+                if view == "train"
+                else False,
+                sampler=sampler if view == "train" else None,
+            )
+            for view in ["train", "val", "test"]
+        }
         self.error_message = None
 
-        self.dataset_metadata = DatasetMetadata.from_dataset(self.dataset)
-        self.dataset_metadata.set_loader(self.pytorch_loader_train)
+        self.dataset_metadata = DatasetMetadata.from_loader(self.loaders["train"])
+        self.dataset_metadata.set_loader(self.pytorch_loaders["train"])
 
         self.cfg.save_data(os.path.join(self.run_save_dir, "config.yaml"))
 
diff --git a/luxonis_train/core/exporter.py b/luxonis_train/core/exporter.py
index 0efd6d56..5318931f 100644
--- a/luxonis_train/core/exporter.py
+++ b/luxonis_train/core/exporter.py
@@ -42,7 +42,7 @@ def __init__(
             )
         self.local_path = self.cfg.model.weights
         if input_shape is None:
-            self.input_shape = self.loader_val.input_shape
+            self.input_shape = self.loaders["val"].input_shape
         else:
             self.input_shape = Size(input_shape)
 
diff --git a/luxonis_train/core/inferer.py b/luxonis_train/core/inferer.py
index b4d13b77..710c4bb2 100644
--- a/luxonis_train/core/inferer.py
+++ b/luxonis_train/core/inferer.py
@@ -22,11 +22,11 @@ def __init__(
         opts += ["trainer.batch_size", "1"]
         super().__init__(cfg, opts)
         if view == "train":
-            self.loader = self.pytorch_loader_train
+            self.loader = self.pytorch_loaders["train"]
         elif view == "test":
-            self.loader = self.pytorch_loader_test
+            self.loader = self.pytorch_loaders["test"]
         else:
-            self.loader = self.pytorch_loader_val
+            self.loader = self.pytorch_loaders["val"]
         self.save_dir = save_dir
         if self.save_dir is not None:
             self.save_dir.mkdir(exist_ok=True, parents=True)
diff --git a/luxonis_train/core/trainer.py b/luxonis_train/core/trainer.py
index fc634544..ef20dc9e 100644
--- a/luxonis_train/core/trainer.py
+++ b/luxonis_train/core/trainer.py
@@ -52,11 +52,12 @@ def __init__(
             cfg=self.cfg,
             dataset_metadata=self.dataset_metadata,
             save_dir=self.run_save_dir,
-            input_shape=self.loader_train.input_shape,
+            input_shape=self.loaders["train"].input_shape,
         )
+        self.lightning_module._core = self
 
-        def graceful_exit(signum, frame):
-            logger.info("SIGTERM received, stopping training...")
+        def graceful_exit(signum: int, _):
+            logger.info(f"{signal.Signals(signum).name} received, stopping training...")
             ckpt_path = osp.join(self.run_save_dir, "resume.ckpt")
             self.pl_trainer.save_checkpoint(ckpt_path)
             self._upload_logs()
@@ -111,8 +112,8 @@ def train(self, new_thread: bool = False) -> None:
             logger.info("Starting training...")
             self._trainer_fit(
                 self.lightning_module,
-                self.pytorch_loader_train,
-                self.pytorch_loader_val,
+                self.pytorch_loaders["train"],
+                self.pytorch_loaders["val"],
             )
             logger.info("Training finished")
             logger.info(f"Checkpoints saved in: {self.get_save_dir()}")
@@ -128,8 +129,8 @@ def thread_exception_hook(args):
                 target=self._trainer_fit,
                 args=(
                     self.lightning_module,
-                    self.pytorch_loader_train,
-                    self.pytorch_loader_val,
+                    self.pytorch_loaders["train"],
+                    self.pytorch_loaders["val"],
                 ),
                 daemon=True,
             )
@@ -145,11 +146,11 @@ def test(
         """
 
         if view == "test":
-            loader = self.pytorch_loader_test
+            loader = self.pytorch_loaders["test"]
         elif view == "val":
-            loader = self.pytorch_loader_val
+            loader = self.pytorch_loaders["val"]
         elif view == "train":
-            loader = self.pytorch_loader_train
+            loader = self.pytorch_loaders["train"]
 
         if not new_thread:
             self.pl_trainer.test(self.lightning_module, loader)
diff --git a/luxonis_train/core/tuner.py b/luxonis_train/core/tuner.py
index d8e5fa51..4635789c 100644
--- a/luxonis_train/core/tuner.py
+++ b/luxonis_train/core/tuner.py
@@ -92,8 +92,9 @@ def _objective(self, trial: optuna.trial.Trial) -> float:
             cfg=cfg,
             dataset_metadata=self.dataset_metadata,
             save_dir=run_save_dir,
-            input_shape=self.loader_train.input_shape,
+            input_shape=self.loaders["train"].input_shape,
         )
+        lightning_module._core = self
         pruner_callback = PyTorchLightningPruningCallback(
             trial, monitor="val_loss/loss"
         )
@@ -123,8 +124,8 @@ def _objective(self, trial: optuna.trial.Trial) -> float:
 
         pl_trainer.fit(
             lightning_module,  # type: ignore
-            self.pytorch_loader_train,
-            self.pytorch_loader_val,
+            self.pytorch_loaders["train"],
+            self.pytorch_loaders["val"],
         )
         pruner_callback.check_pruned()
 
diff --git a/luxonis_train/models/luxonis_model.py b/luxonis_train/models/luxonis_model.py
index 58aeccd1..e1dec644 100644
--- a/luxonis_train/models/luxonis_model.py
+++ b/luxonis_train/models/luxonis_model.py
@@ -12,6 +12,7 @@
 from lightning.pytorch.utilities import rank_zero_only  # type: ignore
 from torch import Size, Tensor, nn
 
+import luxonis_train
 from luxonis_train.attached_modules import (
     BaseAttachedModule,
     BaseLoss,
@@ -90,6 +91,7 @@ class LuxonisModel(pl.LightningModule):
     """
 
     _trainer: pl.Trainer
+    _core: "luxonis_train.core.Core"
     logger: LuxonisTrackerPL
 
     def __init__(
@@ -496,7 +498,7 @@ def process_losses(
         training_step_output["loss"] = final_loss.detach().cpu()
         return final_loss, training_step_output
 
-    def training_step(self, train_batch: tuple[Tensor, Labels]) -> Tensor:
+    def training_step(self, train_batch: tuple[Tensor, TaskLabels]) -> Tensor:
         """Performs one step of training with provided batch."""
         outputs = self.forward(*train_batch)
         assert outputs.losses, "Losses are empty, check if you have defined any loss"
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 685c296f..40638103 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -1,11 +1,9 @@
 import logging
 import sys
-from enum import Enum
 from typing import Annotated, Any, Literal
 
-from luxonis_ml.data import BucketStorage, BucketType
 from luxonis_ml.utils import Environ, LuxonisConfig, LuxonisFileSystem, setup_logging
-from pydantic import BaseModel, ConfigDict, Field, field_serializer, model_validator
+from pydantic import BaseModel, ConfigDict, Field, model_validator
 
 from luxonis_train.utils.general import is_acyclic
 from luxonis_train.utils.registry import MODELS
@@ -131,21 +129,12 @@ class TrackerConfig(CustomBaseModel):
     is_mlflow: bool = False
 
 
-class DatasetConfig(CustomBaseModel):
-    name: str | None = None
-    id: str | None = None
-    team_name: str | None = None
-    team_id: str | None = None
-    bucket_type: BucketType = BucketType.INTERNAL
-    bucket_storage: BucketStorage = BucketStorage.LOCAL
-    json_mode: bool = False
+class LoaderConfig(CustomBaseModel):
+    name: str = "LuxonisLoaderTorch"
     train_view: str = "train"
     val_view: str = "val"
     test_view: str = "test"
-
-    @field_serializer("bucket_storage", "bucket_type")
-    def get_enum_value(self, v: Enum, _) -> str:
-        return str(v.value)
+    params: dict[str, Any] = {}
 
 
 class NormalizeAugmentationConfig(CustomBaseModel):
@@ -297,7 +286,7 @@ class TunerConfig(CustomBaseModel):
 class Config(LuxonisConfig):
     use_rich_text: bool = True
     model: ModelConfig
-    dataset: DatasetConfig = DatasetConfig()
+    loader: LoaderConfig = LoaderConfig()
     tracker: TrackerConfig = TrackerConfig()
     trainer: TrainerConfig = TrainerConfig()
     exporter: ExportConfig = ExportConfig()
diff --git a/luxonis_train/utils/general.py b/luxonis_train/utils/general.py
index ebe75ebd..bf3d0e8f 100644
--- a/luxonis_train/utils/general.py
+++ b/luxonis_train/utils/general.py
@@ -2,12 +2,12 @@
 import math
 from typing import Generator, TypeVar
 
-from luxonis_ml.data import LuxonisDataset
 from pydantic import BaseModel
 from torch import Size, Tensor
 from torch.utils.data import DataLoader
 
 from luxonis_train.utils.boxutils import anchors_from_dataset
+from luxonis_train.utils.loaders import BaseLoaderTorch
 from luxonis_train.utils.types import LabelType, Packet
 
 
@@ -154,7 +154,7 @@ def set_loader(self, loader: DataLoader) -> None:
         self.loader = loader
 
     @classmethod
-    def from_dataset(cls, dataset: LuxonisDataset) -> "DatasetMetadata":
+    def from_loader(cls, loader: BaseLoaderTorch) -> "DatasetMetadata":
         """Creates a L{DatasetMetadata} object from a L{LuxonisDataset}.
 
         @type dataset: LuxonisDataset
@@ -162,22 +162,23 @@ def from_dataset(cls, dataset: LuxonisDataset) -> "DatasetMetadata":
         @rtype: DatasetMetadata
         @return: Instance of L{DatasetMetadata} created from the provided dataset.
         """
-        _, classes = dataset.get_classes()
-        skeletons = dataset.get_skeletons()
+        classes = loader.get_classes()
+        skeletons = loader.get_skeletons()
 
         keypoint_names = None
         connectivity = None
 
-        if len(skeletons) == 1:
-            name = list(skeletons.keys())[0]
-            keypoint_names = skeletons[name]["labels"]
-            connectivity = skeletons[name]["edges"]
+        if skeletons is not None:
+            if len(skeletons) == 1:
+                name = list(skeletons.keys())[0]
+                keypoint_names = skeletons[name]["labels"]
+                connectivity = skeletons[name]["edges"]
 
-        elif len(skeletons) > 1:
-            raise NotImplementedError(
-                "The dataset defines multiclass keypoint detection. "
-                "This is not yet supported."
-            )
+            elif len(skeletons) > 1:
+                raise NotImplementedError(
+                    "The dataset defines multiclass keypoint detection. "
+                    "This is not yet supported."
+                )
 
         return cls(
             classes=classes,
diff --git a/luxonis_train/utils/loaders/__init__.py b/luxonis_train/utils/loaders/__init__.py
index fe5cc4e8..d25e3856 100644
--- a/luxonis_train/utils/loaders/__init__.py
+++ b/luxonis_train/utils/loaders/__init__.py
@@ -1,4 +1,13 @@
-from .base_loader import collate_fn
+from .base_loader import (
+    BaseLoaderTorch,
+    LuxonisLoaderTorchOutput,
+    collate_fn,
+)
 from .luxonis_loader_torch import LuxonisLoaderTorch
 
-__all__ = ["LuxonisLoaderTorch", "collate_fn"]
+__all__ = [
+    "LuxonisLoaderTorch",
+    "collate_fn",
+    "BaseLoaderTorch",
+    "LuxonisLoaderTorchOutput",
+]
diff --git a/luxonis_train/utils/loaders/base_loader.py b/luxonis_train/utils/loaders/base_loader.py
index be12b439..f96f65e1 100644
--- a/luxonis_train/utils/loaders/base_loader.py
+++ b/luxonis_train/utils/loaders/base_loader.py
@@ -1,6 +1,7 @@
-from abc import ABC, abstractmethod, abstractproperty
+from abc import ABC, abstractmethod
 
 import torch
+from luxonis_ml.data import Augmentations
 from luxonis_ml.utils.registry import AutoRegisterMeta
 from torch import Size, Tensor
 from torch.utils.data import Dataset
@@ -22,7 +23,16 @@ class BaseLoaderTorch(
     """Base abstract loader class that enforces LuxonisLoaderTorchOutput output label
     structure."""
 
-    @abstractproperty
+    def __init__(
+        self,
+        view: str,
+        augmentations: Augmentations | None = None,
+    ):
+        self.view = view
+        self.augmentations = augmentations
+
+    @property
+    @abstractmethod
     def input_shape(self) -> Size:
         """Input shape in [N,C,H,W] format."""
         ...
@@ -43,6 +53,24 @@ def __getitem__(self, idx: int) -> LuxonisLoaderTorchOutput:
         """
         ...
 
+    @abstractmethod
+    def get_classes(self) -> dict[LabelType, list[str]]:
+        """Gets classes according to computer vision task.
+
+        @rtype: dict[LabelType, list[str]]
+        @return: A dictionary mapping tasks to their classes.
+        """
+        pass
+
+    def get_skeletons(self) -> dict[str, dict] | None:
+        """Returns the dictionary defining the semantic skeleton for each class using
+        keypoints.
+
+        @rtype: Dict[str, Dict]
+        @return: A dictionary mapping classes to their skeleton definitions.
+        """
+        return None
+
 
 def collate_fn(
     batch: list[LuxonisLoaderTorchOutput],
diff --git a/luxonis_train/utils/loaders/luxonis_loader_torch.py b/luxonis_train/utils/loaders/luxonis_loader_torch.py
index dfd4091a..6a375436 100644
--- a/luxonis_train/utils/loaders/luxonis_loader_torch.py
+++ b/luxonis_train/utils/loaders/luxonis_loader_torch.py
@@ -1,5 +1,11 @@
 import numpy as np
-from luxonis_ml.data import Augmentations, LuxonisDataset, LuxonisLoader
+from luxonis_ml.data import (
+    BucketStorage,
+    BucketType,
+    LabelType,
+    LuxonisDataset,
+    LuxonisLoader,
+)
 from torch import Size, Tensor
 
 from .base_loader import BaseLoaderTorch, LuxonisLoaderTorchOutput
@@ -8,16 +14,27 @@
 class LuxonisLoaderTorch(BaseLoaderTorch):
     def __init__(
         self,
-        dataset: LuxonisDataset,
-        view: str = "train",
+        dataset_name: str | None = None,
+        team_id: str | None = None,
+        dataset_id: str | None = None,
+        bucket_type: BucketType = BucketType.INTERNAL,
+        bucket_storage: BucketStorage = BucketStorage.LOCAL,
         stream: bool = False,
-        augmentations: Augmentations | None = None,
+        **kwargs,
     ):
+        super().__init__(**kwargs)
+        self.dataset = LuxonisDataset(
+            dataset_name=dataset_name,
+            team_id=team_id,
+            dataset_id=dataset_id,
+            bucket_type=bucket_type,
+            bucket_storage=bucket_storage,
+        )
         self.base_loader = LuxonisLoader(
-            dataset=dataset,
-            view=view,
+            dataset=self.dataset,
+            view=self.view,
             stream=stream,
-            augmentations=augmentations,
+            augmentations=self.augmentations,
         )
 
     def __len__(self) -> int:
@@ -39,3 +56,10 @@ def __getitem__(self, idx: int) -> LuxonisLoaderTorchOutput:
                 annotations[key] = Tensor(annotations[key])  # type: ignore
 
         return tensor_img, group_annotations
+
+    def get_classes(self) -> dict[LabelType, list[str]]:
+        _, classes = self.dataset.get_classes()
+        return {LabelType(task): classes[task] for task in classes}
+
+    def get_skeletons(self) -> dict[str, dict] | None:
+        return self.dataset.get_skeletons()
diff --git a/luxonis_train/utils/registry.py b/luxonis_train/utils/registry.py
index 7f76df7c..6da8893a 100644
--- a/luxonis_train/utils/registry.py
+++ b/luxonis_train/utils/registry.py
@@ -3,6 +3,9 @@
 
 from luxonis_ml.utils.registry import Registry
 
+LOADERS = Registry(name="loaders")
+"""Registry for all loaders."""
+
 CALLBACKS = Registry(name="callbacks")
 """Registry for all callbacks."""
 
diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index 7a18c7f4..b750dd9c 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">80%</text>
-        <text x="80" y="14">80%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">77%</text>
+        <text x="80" y="14">77%</text>
     </g>
 </svg>
diff --git a/tests/unittests/test_core/test_archiver.py b/tests/unittests/test_core/test_archiver.py
index fe10a46e..52449e6a 100644
--- a/tests/unittests/test_core/test_archiver.py
+++ b/tests/unittests/test_core/test_archiver.py
@@ -4,11 +4,13 @@
 import random
 import shutil
 import tarfile
+import unittest
 
 import cv2
 import lightning.pytorch as pl
 import numpy as np
 import onnx
+import pytest
 from luxonis_ml.data import LuxonisDataset
 from luxonis_ml.nn_archive.config_building_blocks.base_models import head_outputs
 from parameterized import parameterized
@@ -23,7 +25,8 @@
 HEAD_NAMES = [head_name for head_name in ImplementedHeads.__members__]
 
 
-class TestArchiver:
+@pytest.mark.skip()
+class TestArchiver(unittest.TestCase):
     @classmethod
     def setup_class(cls):
         """Creates all files required for testing."""

From b6b46889002da00fd7f832c4a1c1b3d957175a1c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Tue, 21 May 2024 15:46:31 +0200
Subject: [PATCH 19/28] enums handling (#31)

---
 luxonis_train/utils/loaders/luxonis_loader_torch.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/luxonis_train/utils/loaders/luxonis_loader_torch.py b/luxonis_train/utils/loaders/luxonis_loader_torch.py
index 6a375436..b2eeb168 100644
--- a/luxonis_train/utils/loaders/luxonis_loader_torch.py
+++ b/luxonis_train/utils/loaders/luxonis_loader_torch.py
@@ -1,3 +1,5 @@
+from typing import Literal
+
 import numpy as np
 from luxonis_ml.data import (
     BucketStorage,
@@ -17,8 +19,8 @@ def __init__(
         dataset_name: str | None = None,
         team_id: str | None = None,
         dataset_id: str | None = None,
-        bucket_type: BucketType = BucketType.INTERNAL,
-        bucket_storage: BucketStorage = BucketStorage.LOCAL,
+        bucket_type: Literal["internal", "external"] = "internal",
+        bucket_storage: Literal["local", "s3", "gcs", "azure"] = "local",
         stream: bool = False,
         **kwargs,
     ):
@@ -27,8 +29,8 @@ def __init__(
             dataset_name=dataset_name,
             team_id=team_id,
             dataset_id=dataset_id,
-            bucket_type=bucket_type,
-            bucket_storage=bucket_storage,
+            bucket_type=BucketType(bucket_type),
+            bucket_storage=BucketStorage(bucket_storage),
         )
         self.base_loader = LuxonisLoader(
             dataset=self.dataset,

From 72afb721ac093b269947dd5168a92016820beeca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Fri, 24 May 2024 18:28:46 +0200
Subject: [PATCH 20/28] GPUStatsMonitor (#29)

Co-authored-by: GitHub Actions <actions@github.com>
---
 configs/coco_model.yaml                      |   1 -
 luxonis_train/__main__.py                    |   6 +-
 luxonis_train/callbacks/README.md            |   9 +-
 luxonis_train/callbacks/__init__.py          |   2 +
 luxonis_train/callbacks/gpu_stats_monitor.py | 293 +++++++++++++++++++
 luxonis_train/core/core.py                   |   2 +-
 luxonis_train/models/luxonis_model.py        |  21 +-
 media/coverage_badge.svg                     |   4 +-
 requirements.txt                             |   1 +
 9 files changed, 326 insertions(+), 13 deletions(-)
 create mode 100644 luxonis_train/callbacks/gpu_stats_monitor.py

diff --git a/configs/coco_model.yaml b/configs/coco_model.yaml
index c8ffff69..cad138a5 100755
--- a/configs/coco_model.yaml
+++ b/configs/coco_model.yaml
@@ -155,7 +155,6 @@ trainer:
         monitor: val/loss
         mode: min
         verbose: true
-    - name: DeviceStatsMonitor
     - name: ExportOnTrainEnd
     - name: TestOnTrainEnd
 
diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index f749439f..7b8e0251 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -10,7 +10,11 @@
 
 from luxonis_train.utils.registry import LOADERS
 
-app = typer.Typer(help="Luxonis Train CLI", add_completion=False)
+app = typer.Typer(
+    help="Luxonis Train CLI",
+    add_completion=False,
+    pretty_exceptions_show_locals=False,
+)
 
 
 class View(str, Enum):
diff --git a/luxonis_train/callbacks/README.md b/luxonis_train/callbacks/README.md
index be441017..6c4d635b 100644
--- a/luxonis_train/callbacks/README.md
+++ b/luxonis_train/callbacks/README.md
@@ -15,11 +15,12 @@ List of all supported callbacks.
 
 List of supported callbacks from `lightning.pytorch`.
 
+- [GPUStatsMonitor](https://pytorch-lightning.readthedocs.io/en/1.5.10/api/pytorch_lightning.callbacks.gpu_stats_monitor.html)
 - [DeviceStatsMonitor](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.DeviceStatsMonitor.html#lightning.pytorch.callbacks.DeviceStatsMonitor)
-- [ EarlyStopping ](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.EarlyStopping.html#lightning.pytorch.callbacks.EarlyStopping)
-- [ LearningRateMonitor ](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.LearningRateMonitor.html#lightning.pytorch.callbacks.LearningRateMonitor)
-- [ ModelCheckpoint ](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html#lightning.pytorch.callbacks.ModelCheckpoint)
-- [ RichModelSummary ](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.RichModelSummary.html#lightning.pytorch.callbacks.RichModelSummary)
+- [EarlyStopping](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.EarlyStopping.html#lightning.pytorch.callbacks.EarlyStopping)
+- [LearningRateMonitor](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.LearningRateMonitor.html#lightning.pytorch.callbacks.LearningRateMonitor)
+- [ModelCheckpoint](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html#lightning.pytorch.callbacks.ModelCheckpoint)
+- [RichModelSummary](https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.RichModelSummary.html#lightning.pytorch.callbacks.RichModelSummary)
   - Added automatically if `use_rich_text` is set to `True` in [config](../../configs/README.md#topleveloptions).
 
 ## ExportOnTrainEnd
diff --git a/luxonis_train/callbacks/__init__.py b/luxonis_train/callbacks/__init__.py
index ae1fe86e..84d2d1cf 100644
--- a/luxonis_train/callbacks/__init__.py
+++ b/luxonis_train/callbacks/__init__.py
@@ -10,6 +10,7 @@
 
 from .archive_on_train_end import ArchiveOnTrainEnd
 from .export_on_train_end import ExportOnTrainEnd
+from .gpu_stats_monitor import GPUStatsMonitor
 from .luxonis_progress_bar import LuxonisProgressBar
 from .metadata_logger import MetadataLogger
 from .module_freezer import ModuleFreezer
@@ -31,4 +32,5 @@
     "ModuleFreezer",
     "TestOnTrainEnd",
     "UploadCheckpoint",
+    "GPUStatsMonitor",
 ]
diff --git a/luxonis_train/callbacks/gpu_stats_monitor.py b/luxonis_train/callbacks/gpu_stats_monitor.py
new file mode 100644
index 00000000..9479d4d2
--- /dev/null
+++ b/luxonis_train/callbacks/gpu_stats_monitor.py
@@ -0,0 +1,293 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+GPU Stats Monitor
+=================
+
+Monitor and logs GPU stats during training.
+
+"""
+
+import os
+import shutil
+import subprocess
+import time
+from typing import Any, Dict, List, Optional, Tuple
+
+import pytorch_lightning as pl
+import torch
+from lightning.pytorch.accelerators import CUDAAccelerator  # type: ignore
+from pytorch_lightning.utilities import rank_zero_only
+from pytorch_lightning.utilities.exceptions import (
+    MisconfigurationException,  # type: ignore
+)
+from pytorch_lightning.utilities.parsing import AttributeDict
+from pytorch_lightning.utilities.types import STEP_OUTPUT
+
+from luxonis_train.utils.registry import CALLBACKS
+
+
+@CALLBACKS.register_module()
+class GPUStatsMonitor(pl.Callback):
+    """Automatically monitors and logs GPU stats during training stage.
+    C{GPUStatsMonitor} is a callback and in order to use it you need to assign a logger
+    in the C{Trainer}.
+
+    Args:
+        memory_utilization: Set to C{True} to monitor used, free and percentage of memory
+            utilization at the start and end of each step. Default: C{True}.
+        gpu_utilization: Set to C{True} to monitor percentage of GPU utilization
+            at the start and end of each step. Default: C{True}.
+        intra_step_time: Set to C{True} to monitor the time of each step. Default: {False}.
+        inter_step_time: Set to C{True} to monitor the time between the end of one step
+            and the start of the next step. Default: C{False}.
+        fan_speed: Set to C{True} to monitor percentage of fan speed. Default: C{False}.
+        temperature: Set to C{True} to monitor the memory and gpu temperature in degree Celsius.
+            Default: C{False}.
+
+    Raises:
+        MisconfigurationException:
+            If NVIDIA driver is not installed, not running on GPUs, or C{Trainer} has no logger.
+
+    Example::
+
+        >>> from pytorch_lightning import Trainer
+        >>> from pytorch_lightning.callbacks import GPUStatsMonitor
+        >>> gpu_stats = GPUStatsMonitor() # doctest: +SKIP
+        >>> trainer = Trainer(callbacks=[gpu_stats]) # doctest: +SKIP
+
+    GPU stats are mainly based on C{nvidia-smi --query-gpu} command. The description of the queries is as follows:
+
+    - **fan.speed** – The fan speed value is the percent of maximum speed that the device's fan is currently
+      intended to run at. It ranges from 0 to 100 %. Note: The reported speed is the intended fan speed.
+      If the fan is physically blocked and unable to spin, this output will not match the actual fan speed.
+      Many parts do not report fan speeds because they rely on cooling via fans in the surrounding enclosure.
+    - **memory.used** – Total memory allocated by active contexts.
+    - **memory.free** – Total free memory.
+    - **utilization.gpu** – Percent of time over the past sample period during which one or more kernels was
+      executing on the GPU. The sample period may be between 1 second and 1/6 second depending on the product.
+    - **utilization.memory** – Percent of time over the past sample period during which global (device) memory was
+      being read or written. The sample period may be between 1 second and 1/6 second depending on the product.
+    - **temperature.gpu** – Core GPU temperature, in degrees C.
+    - **temperature.memory** – HBM memory temperature, in degrees C.
+    """
+
+    def __init__(
+        self,
+        memory_utilization: bool = True,
+        gpu_utilization: bool = True,
+        intra_step_time: bool = False,
+        inter_step_time: bool = False,
+        fan_speed: bool = False,
+        temperature: bool = False,
+    ):
+        super().__init__()
+
+        if shutil.which("nvidia-smi") is None:
+            raise MisconfigurationException(
+                "Cannot use GPUStatsMonitor callback because NVIDIA driver is not installed."
+            )
+
+        self._log_stats = AttributeDict(
+            {
+                "memory_utilization": memory_utilization,
+                "gpu_utilization": gpu_utilization,
+                "intra_step_time": intra_step_time,
+                "inter_step_time": inter_step_time,
+                "fan_speed": fan_speed,
+                "temperature": temperature,
+            }
+        )
+
+        # The logical device IDs for selected devices
+        self._device_ids: List[int] = []  # will be assigned later in setup()
+
+        # The unmasked real GPU IDs
+        self._gpu_ids: List[str] = []  # will be assigned later in setup()
+
+    @staticmethod
+    def is_available() -> bool:
+        if shutil.which("nvidia-smi") is None:
+            return False
+        return CUDAAccelerator.is_available()
+
+    def setup(
+        self,
+        trainer: "pl.Trainer",
+        pl_module: "pl.LightningModule",
+        stage: Optional[str] = None,
+    ) -> None:
+        if not trainer.logger:
+            raise MisconfigurationException(
+                "Cannot use GPUStatsMonitor callback with Trainer that has no logger."
+            )
+
+        if not CUDAAccelerator.is_available():
+            raise MisconfigurationException(
+                "You are using GPUStatsMonitor teh CUDA Accelerator is not available."
+            )
+
+        # The logical device IDs for selected devices
+        # ignoring mypy check because `trainer.data_parallel_device_ids` is None when using CPU
+        self._device_ids = sorted(set(trainer.device_ids))
+
+        # The unmasked real GPU IDs
+        self._gpu_ids = self._get_gpu_ids(self._device_ids)
+
+    def on_train_epoch_start(
+        self, trainer: "pl.Trainer", pl_module: "pl.LightningModule"
+    ) -> None:
+        self._snap_intra_step_time: Optional[float] = None
+        self._snap_inter_step_time: Optional[float] = None
+
+    @rank_zero_only
+    def on_train_batch_start(
+        self,
+        trainer: "pl.Trainer",
+        pl_module: "pl.LightningModule",
+        batch: Any,
+        batch_idx: int,
+    ) -> None:
+        if self._log_stats.intra_step_time:
+            self._snap_intra_step_time = time.time()
+
+        if not trainer._logger_connector.should_update_logs:
+            return
+
+        gpu_stat_keys = self._get_gpu_stat_keys()
+        gpu_stats = self._get_gpu_stats([k for k, _ in gpu_stat_keys])
+        logs = self._parse_gpu_stats(self._device_ids, gpu_stats, gpu_stat_keys)
+
+        if self._log_stats.inter_step_time and self._snap_inter_step_time:
+            # First log at beginning of second step
+            logs["batch_time/inter_step (ms)"] = (
+                time.time() - self._snap_inter_step_time
+            ) * 1000
+
+        assert trainer.logger is not None
+        trainer.logger.log_metrics(logs, step=trainer.global_step)
+
+    @rank_zero_only
+    def on_train_batch_end(
+        self,
+        trainer: "pl.Trainer",
+        pl_module: "pl.LightningModule",
+        outputs: STEP_OUTPUT,
+        batch: Any,
+        batch_idx: int,
+    ) -> None:
+        if self._log_stats.inter_step_time:
+            self._snap_inter_step_time = time.time()
+
+        if not trainer._logger_connector.should_update_logs:
+            return
+
+        gpu_stat_keys = self._get_gpu_stat_keys() + self._get_gpu_device_stat_keys()
+        gpu_stats = self._get_gpu_stats([k for k, _ in gpu_stat_keys])
+        logs = self._parse_gpu_stats(self._device_ids, gpu_stats, gpu_stat_keys)
+
+        if self._log_stats.intra_step_time and self._snap_intra_step_time:
+            logs["batch_time/intra_step (ms)"] = (
+                time.time() - self._snap_intra_step_time
+            ) * 1000
+
+        assert trainer.logger is not None
+        trainer.logger.log_metrics(logs, step=trainer.global_step)
+
+    @staticmethod
+    def _get_gpu_ids(device_ids: List[int]) -> List[str]:
+        """Get the unmasked real GPU IDs."""
+        # All devices if `CUDA_VISIBLE_DEVICES` unset
+        default = ",".join(str(i) for i in range(torch.cuda.device_count()))
+        cuda_visible_devices: List[str] = os.getenv(
+            "CUDA_VISIBLE_DEVICES", default=default
+        ).split(",")
+        return [cuda_visible_devices[device_id].strip() for device_id in device_ids]
+
+    def _get_gpu_stats(self, queries: List[str]) -> List[List[float]]:
+        if not queries:
+            return []
+
+        """Run nvidia-smi to get the gpu stats"""
+        gpu_query = ",".join(queries)
+        format = "csv,nounits,noheader"
+        gpu_ids = ",".join(self._gpu_ids)
+        result = subprocess.run(
+            [
+                # it's ok to supress the warning here since we ensure nvidia-smi exists during init
+                shutil.which("nvidia-smi"),  # type: ignore
+                f"--query-gpu={gpu_query}",
+                f"--format={format}",
+                f"--id={gpu_ids}",
+            ],
+            encoding="utf-8",
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,  # for backward compatibility with python version 3.6
+            check=True,
+        )
+
+        def _to_float(x: str) -> float:
+            try:
+                return float(x)
+            except ValueError:
+                return 0.0
+
+        stats = [
+            [_to_float(x) for x in s.split(", ")]
+            for s in result.stdout.strip().split(os.linesep)
+        ]
+        return stats
+
+    @staticmethod
+    def _parse_gpu_stats(
+        device_ids: List[int], stats: List[List[float]], keys: List[Tuple[str, str]]
+    ) -> Dict[str, float]:
+        """Parse the gpu stats into a loggable dict."""
+        logs = {}
+        for i, device_id in enumerate(device_ids):
+            for j, (x, unit) in enumerate(keys):
+                if unit == "%":
+                    unit = "percent"
+                logs[f"GPU_{device_id}/{x} - {unit}"] = stats[i][j]
+        return logs
+
+    def _get_gpu_stat_keys(self) -> List[Tuple[str, str]]:
+        """Get the GPU stats keys."""
+        stat_keys = []
+
+        if self._log_stats.gpu_utilization:
+            stat_keys.append(("utilization.gpu", "%"))
+
+        if self._log_stats.memory_utilization:
+            stat_keys.extend(
+                [
+                    ("memory.used", "MB"),
+                    ("memory.free", "MB"),
+                    ("utilization.memory", "%"),
+                ]
+            )
+
+        return stat_keys
+
+    def _get_gpu_device_stat_keys(self) -> List[Tuple[str, str]]:
+        """Get the device stats keys."""
+        stat_keys = []
+
+        if self._log_stats.fan_speed:
+            stat_keys.append(("fan.speed", "%"))
+
+        if self._log_stats.temperature:
+            stat_keys.extend([("temperature.gpu", "°C"), ("temperature.memory", "°C")])
+
+        return stat_keys
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index 60beb624..6b02242f 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -68,7 +68,7 @@ def __init__(
         opts = opts or []
 
         if self.cfg.use_rich_text:
-            rich.traceback.install(suppress=[pl, torch])
+            rich.traceback.install(suppress=[pl, torch], show_locals=False)
 
         self.rank = rank_zero_only.rank
 
diff --git a/luxonis_train/models/luxonis_model.py b/luxonis_train/models/luxonis_model.py
index e1dec644..d3ed26a2 100644
--- a/luxonis_train/models/luxonis_model.py
+++ b/luxonis_train/models/luxonis_model.py
@@ -24,6 +24,8 @@
     get_unnormalized_images,
 )
 from luxonis_train.callbacks import (
+    DeviceStatsMonitor,
+    GPUStatsMonitor,
     LuxonisProgressBar,
     ModuleFreezer,
 )
@@ -620,9 +622,9 @@ def configure_callbacks(self) -> list[pl.Callback]:
         self.best_val_metric_checkpoints_path = f"{self.save_dir}/best_val_metric"
         model_name = self.cfg.model.name
 
-        callbacks: list[pl.Callback] = []
+        user_callbacks = [c.name for c in self.cfg.trainer.callbacks]
 
-        callbacks.append(
+        callbacks: list[pl.Callback] = [
             ModelCheckpoint(
                 monitor="val/loss",
                 dirpath=self.min_val_loss_checkpoints_path,
@@ -630,8 +632,19 @@ def configure_callbacks(self) -> list[pl.Callback]:
                 auto_insert_metric_name=False,
                 save_top_k=self.cfg.trainer.save_top_k,
                 mode="min",
-            )
-        )
+            ),
+        ]
+        if "DeviceStatsMonitor" not in user_callbacks:
+            callbacks.append(DeviceStatsMonitor(cpu_stats=True))
+
+        if "GPUStatsMonitor" not in user_callbacks:
+            if GPUStatsMonitor.is_available():
+                callbacks.append(GPUStatsMonitor())
+            else:
+                logger.warning(
+                    "GPUStatsMonitor is not available for this machine."
+                    "Verify that `nvidia-smi` is installed."
+                )
 
         if self.main_metric is not None:
             main_metric = self.main_metric.replace("/", "_")
diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index b750dd9c..90299371 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">77%</text>
-        <text x="80" y="14">77%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">76%</text>
+        <text x="80" y="14">76%</text>
     </g>
 </svg>
diff --git a/requirements.txt b/requirements.txt
index 7f7e996a..6dc87275 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,3 +16,4 @@ tensorboard>=2.10.1
 torchvision>=0.16.0
 typer>=0.9.0
 mlflow>=2.10.0
+psutil>=5.0.0

From 5893c3ef48c908d8e0d1446cdb7fd219559d56c6 Mon Sep 17 00:00:00 2001
From: Jernej Sabadin <116955183+JSabadin@users.noreply.github.com>
Date: Fri, 24 May 2024 18:29:24 +0200
Subject: [PATCH 21/28] More Efficient Keypoint Export (#28)

---
 luxonis_train/nodes/implicit_keypoint_bbox_head.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/luxonis_train/nodes/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
index 7f0c3d61..76a66eb6 100644
--- a/luxonis_train/nodes/implicit_keypoint_bbox_head.py
+++ b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
@@ -197,10 +197,9 @@ def _build_predictions(
         kpt_x, kpt_y, kpt_vis = process_keypoints_predictions(x_keypoints)
         kpt_x = (kpt_x + grid_x) * stride
         kpt_y = (kpt_y + grid_y) * stride
-        out_kpt = torch.stack([kpt_x, kpt_y, kpt_vis.sigmoid()], dim=-1).reshape(
-            *kpt_x.shape[:-1], -1
-        )
-
+        kpt_vis_sig = kpt_vis.sigmoid()
+        out_kpt = torch.cat((kpt_x, kpt_y, kpt_vis_sig), dim=-1)
+        out_kpt = out_kpt.reshape(*kpt_x.shape[:-1], -1)
         out = torch.cat((out_bbox, out_kpt), dim=-1)
 
         return out.reshape(batch_size, -1, self.n_out)

From 4110f78fe50a3ba5cadc0954f0b651712d3b3bf2 Mon Sep 17 00:00:00 2001
From: KlemenSkrlj <47853619+klemen1999@users.noreply.github.com>
Date: Fri, 24 May 2024 18:34:47 +0200
Subject: [PATCH 22/28] Added active param to augmentations (#32)

---
 luxonis_train/__main__.py     |  4 +++-
 luxonis_train/core/core.py    | 28 +++++++++++++++++-----------
 luxonis_train/utils/config.py |  8 ++++++++
 3 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index 7b8e0251..c76f28c1 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -141,7 +141,9 @@ def inspect(
 
     augmentations = (TrainAugmentations if view == "train" else ValAugmentations)(
         image_size=image_size,
-        augmentations=[i.model_dump() for i in cfg.trainer.preprocessing.augmentations],
+        augmentations=[
+            i.model_dump() for i in cfg.trainer.preprocessing.get_active_augmentations()
+        ],
         train_rgb=cfg.trainer.preprocessing.train_rgb,
         keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
     )
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index 6b02242f..d23787fc 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -102,7 +102,8 @@ def __init__(
         self.train_augmentations = TrainAugmentations(
             image_size=self.cfg.trainer.preprocessing.train_image_size,
             augmentations=[
-                i.model_dump() for i in self.cfg.trainer.preprocessing.augmentations
+                i.model_dump()
+                for i in self.cfg.trainer.preprocessing.get_active_augmentations()
             ],
             train_rgb=self.cfg.trainer.preprocessing.train_rgb,
             keep_aspect_ratio=self.cfg.trainer.preprocessing.keep_aspect_ratio,
@@ -110,7 +111,8 @@ def __init__(
         self.val_augmentations = ValAugmentations(
             image_size=self.cfg.trainer.preprocessing.train_image_size,
             augmentations=[
-                i.model_dump() for i in self.cfg.trainer.preprocessing.augmentations
+                i.model_dump()
+                for i in self.cfg.trainer.preprocessing.get_active_augmentations()
             ],
             train_rgb=self.cfg.trainer.preprocessing.train_rgb,
             keep_aspect_ratio=self.cfg.trainer.preprocessing.keep_aspect_ratio,
@@ -134,12 +136,16 @@ def __init__(
 
         self.loaders = {
             view: LOADERS.get(self.cfg.loader.name)(
-                augmentations=self.train_augmentations
-                if view == "train"
-                else self.val_augmentations,
-                view=self.cfg.loader.train_view
-                if view == "train"
-                else self.cfg.loader.val_view,
+                augmentations=(
+                    self.train_augmentations
+                    if view == "train"
+                    else self.val_augmentations
+                ),
+                view=(
+                    self.cfg.loader.train_view
+                    if view == "train"
+                    else self.cfg.loader.val_view
+                ),
                 **self.cfg.loader.params,
             )
             for view in ["train", "val", "test"]
@@ -163,9 +169,9 @@ def __init__(
                 num_workers=self.cfg.trainer.num_workers,
                 collate_fn=collate_fn,
                 shuffle=view == "train",
-                drop_last=self.cfg.trainer.skip_last_batch
-                if view == "train"
-                else False,
+                drop_last=(
+                    self.cfg.trainer.skip_last_batch if view == "train" else False
+                ),
                 sampler=sampler if view == "train" else None,
             )
             for view in ["train", "val", "test"]
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 40638103..dc2f737d 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -147,6 +147,7 @@ class NormalizeAugmentationConfig(CustomBaseModel):
 
 class AugmentationConfig(CustomBaseModel):
     name: str
+    active: bool = True
     params: dict[str, Any] = {}
 
 
@@ -167,6 +168,13 @@ def check_normalize(self):
             )
         return self
 
+    def get_active_augmentations(self) -> list[AugmentationConfig]:
+        """Returns list of augmentations that are active
+        @rtype: list[AugmentationConfig]
+        @return: Filtered list of active augmentation configs
+        """
+        return [aug for aug in self.augmentations if aug.active]
+
 
 class CallbackConfig(CustomBaseModel):
     name: str

From 36a92a665b71c336ddd5648bd4d350ee3376ea7f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Thu, 30 May 2024 18:54:06 +0200
Subject: [PATCH 23/28] Fix Archiver Pre-Processing (#34)

---
 .github/workflows/tests.yaml                 |  2 ++
 luxonis_train/callbacks/test_on_train_end.py | 14 ++++++++++++++
 luxonis_train/core/archiver.py               |  7 +++++--
 luxonis_train/utils/config.py                |  3 ++-
 tests/integration/test_sanity.py             |  4 ++++
 5 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index b5c0e44f..0b4f51da 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -50,6 +50,8 @@ jobs:
       run: pytest tests --cov=luxonis_train --cov-report xml --junit-xml pytest.xml
 
     - name: Run tests [Windows, macOS]
+      env:
+        PYTORCH_MPS_HIGH_WATERMARK_RATIO: 0.0
       if: matrix.os != 'ubuntu-latest' || matrix.version != '3.10'
       run: pytest tests --junit-xml pytest.xml
 
diff --git a/luxonis_train/callbacks/test_on_train_end.py b/luxonis_train/callbacks/test_on_train_end.py
index 3f8da1db..bf7db341 100644
--- a/luxonis_train/callbacks/test_on_train_end.py
+++ b/luxonis_train/callbacks/test_on_train_end.py
@@ -1,4 +1,5 @@
 import lightning.pytorch as pl
+from lightning.pytorch.callbacks import ModelCheckpoint
 
 import luxonis_train
 from luxonis_train.utils.registry import CALLBACKS
@@ -11,4 +12,17 @@ class TestOnTrainEnd(pl.Callback):
     def on_train_end(
         self, trainer: pl.Trainer, pl_module: "luxonis_train.models.LuxonisModel"
     ) -> None:
+        # `trainer.test` would delete the paths so we need to save them
+        best_paths = {
+            hash(callback.monitor): callback.best_model_path
+            for callback in trainer.callbacks  # type: ignore
+            if isinstance(callback, ModelCheckpoint)
+        }
+
         trainer.test(pl_module, pl_module._core.pytorch_loaders["test"])
+
+        # Restore the paths
+        for callback in trainer.callbacks:  # type: ignore
+            if isinstance(callback, ModelCheckpoint):
+                if hash(callback.monitor) in best_paths:
+                    callback.best_model_path = best_paths[hash(callback.monitor)]
diff --git a/luxonis_train/core/archiver.py b/luxonis_train/core/archiver.py
index a0706846..1473df1c 100644
--- a/luxonis_train/core/archiver.py
+++ b/luxonis_train/core/archiver.py
@@ -72,9 +72,12 @@ def archive(self, executable_path: str):
         _, executable_suffix = os.path.splitext(executable_fname)
         self.archive_name += f"_{executable_suffix[1:]}"
 
+        def _mult(lst: list[float | int]) -> list[float]:
+            return [round(x * 255.0, 5) for x in lst]
+
         preprocessing = {  # TODO: keep preprocessing same for each input?
-            "mean": self.cfg.trainer.preprocessing.normalize.params["mean"],
-            "scale": self.cfg.trainer.preprocessing.normalize.params["std"],
+            "mean": _mult(self.cfg.trainer.preprocessing.normalize.params["mean"]),
+            "scale": _mult(self.cfg.trainer.preprocessing.normalize.params["std"]),
             "reverse_channels": self.cfg.trainer.preprocessing.train_rgb,
             "interleaved_to_planar": False,  # TODO: make it modifiable?
         }
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index dc2f737d..875819e2 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -169,7 +169,8 @@ def check_normalize(self):
         return self
 
     def get_active_augmentations(self) -> list[AugmentationConfig]:
-        """Returns list of augmentations that are active
+        """Returns list of augmentations that are active.
+
         @rtype: list[AugmentationConfig]
         @return: Filtered list of active augmentation configs
         """
diff --git a/tests/integration/test_sanity.py b/tests/integration/test_sanity.py
index 8b6f872b..efb3ded7 100644
--- a/tests/integration/test_sanity.py
+++ b/tests/integration/test_sanity.py
@@ -22,6 +22,8 @@ def test_sanity(config_file):
         "1",
         "trainer.callbacks",
         "[]",
+        "trainer.batch_size",
+        "1",
     ]
     result = subprocess.run(
         ["luxonis_train", "train", "--config", f"configs/{config_file}", *opts],
@@ -80,6 +82,8 @@ def test_tuner():
             "[]",
             "tuner.n_trials",
             "4",
+            "trainer.batch_size",
+            "1",
         ],
     )
     assert result.returncode == 0

From 1d9998b1416b08e7b0a1d6423606a8467441393c Mon Sep 17 00:00:00 2001
From: Jernej Sabadin <116955183+JSabadin@users.noreply.github.com>
Date: Fri, 31 May 2024 12:57:06 +0200
Subject: [PATCH 24/28] EfficientRep Variants (#33)

---
 luxonis_train/nodes/efficientrep.py | 34 ++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/luxonis_train/nodes/efficientrep.py b/luxonis_train/nodes/efficientrep.py
index 4e92222f..24e43397 100644
--- a/luxonis_train/nodes/efficientrep.py
+++ b/luxonis_train/nodes/efficientrep.py
@@ -5,6 +5,7 @@
 """
 
 import logging
+from typing import Literal
 
 from torch import Tensor, nn
 
@@ -23,6 +24,7 @@
 class EfficientRep(BaseNode[Tensor, list[Tensor]]):
     def __init__(
         self,
+        variant: Literal["s", "n", "m", "l"] = "n",
         channels_list: list[int] | None = None,
         num_repeats: list[int] | None = None,
         depth_mul: float = 0.33,
@@ -31,21 +33,33 @@ def __init__(
     ):
         """EfficientRep backbone.
 
+        @type variant: Literal["s", "n", "m", "l"]
+        @param variant: EfficientRep variant. Defaults to "n".
         @type channels_list: list[int] | None
-        @param channels_list: List of number of channels for each block. Defaults to
-            C{[64, 128, 256, 512, 1024]}.
+        @param channels_list: List of number of channels for each block. If unspecified,
+            defaults to [64, 128, 256, 512, 1024].
         @type num_repeats: list[int] | None
-        @param num_repeats: List of number of repeats of RepVGGBlock. Defaults to C{[1,
-            6, 12, 18, 6]}.
+        @param num_repeats: List of number of repeats of RepVGGBlock. If unspecified,
+            defaults to [1, 6, 12, 18, 6].
         @type depth_mul: float
-        @param depth_mul: Depth multiplier. Defaults to 0.33.
+        @param depth_mul: Depth multiplier. Depending on the variant, defaults to 0.33.
         @type width_mul: float
-        @param width_mul: Width multiplier. Defaults to 0.25.
+        @param width_mul: Width multiplier. Depending on the variant, defaults to 0.25.
         @type kwargs: Any
         @param kwargs: Additional arguments to pass to L{BaseNode}.
         """
         super().__init__(**kwargs)
 
+        if variant not in EFFICIENTREP_VARIANTS:
+            raise ValueError(
+                f"EfficientRep model variant should be in {list(EFFICIENTREP_VARIANTS.keys())}"
+            )
+
+        (
+            depth_mul,
+            width_mul,
+        ) = EFFICIENTREP_VARIANTS[variant]
+
         channels_list = channels_list or [64, 128, 256, 512, 1024]
         num_repeats = num_repeats or [1, 6, 12, 18, 6]
         channels_list = [make_divisible(i * width_mul, 8) for i in channels_list]
@@ -110,3 +124,11 @@ def forward(self, inputs: Tensor) -> list[Tensor]:
             x = block(x)
             outputs.append(x)
         return outputs
+
+
+EFFICIENTREP_VARIANTS = {
+    "n": (0.33, 0.25),
+    "s": (0.33, 0.50),
+    "m": (0.60, 0.75),
+    "l": (1.0, 1.0),
+}

From c2e98b713331ed48ad4f8855d93c6dea80b6ccd1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Thu, 6 Jun 2024 21:35:02 +0200
Subject: [PATCH 25/28] Support for LuxonisML - Annotation Refactor (#37)

Co-authored-by: GitHub Actions <actions@github.com>
---
 configs/resnet_multitask_model.yaml           | 110 +++++++++++++++
 luxonis_train/__main__.py                     |  10 +-
 .../attached_modules/base_attached_module.py  |  68 +++++++---
 .../losses/adaptive_detection_loss.py         |   2 +-
 .../losses/implicit_keypoint_bbox_loss.py     |   6 +-
 .../attached_modules/losses/keypoint_loss.py  |   4 +-
 .../attached_modules/metrics/common.py        |   8 +-
 .../metrics/mean_average_precision.py         |   4 +-
 .../mean_average_precision_keypoints.py       |   8 +-
 .../metrics/object_keypoint_similarity.py     |   6 +-
 .../visualizers/keypoint_visualizer.py        |   9 +-
 .../visualizers/segmentation_visualizer.py    |   2 +-
 luxonis_train/core/archiver.py                |   2 +-
 luxonis_train/core/core.py                    |  15 ++-
 luxonis_train/models/luxonis_model.py         |  12 +-
 luxonis_train/nodes/base_node.py              |  21 ++-
 luxonis_train/nodes/bisenet_head.py           |   2 +-
 luxonis_train/nodes/classification_head.py    |   6 +-
 luxonis_train/nodes/efficient_bbox_head.py    |   6 +-
 .../nodes/implicit_keypoint_bbox_head.py      |   4 +-
 luxonis_train/nodes/segmentation_head.py      |   2 +-
 luxonis_train/utils/boxutils.py               |  14 +-
 luxonis_train/utils/config.py                 |   2 +-
 luxonis_train/utils/general.py                |  43 +++---
 luxonis_train/utils/loaders/base_loader.py    |  54 +++-----
 .../utils/loaders/luxonis_loader_torch.py     |  15 +--
 luxonis_train/utils/types.py                  |  26 ++--
 media/coverage_badge.svg                      |   4 +-
 tests/integration/conftest.py                 | 127 ++++--------------
 .../test_loaders/test_base_loader.py          |  14 +-
 30 files changed, 324 insertions(+), 282 deletions(-)
 create mode 100644 configs/resnet_multitask_model.yaml

diff --git a/configs/resnet_multitask_model.yaml b/configs/resnet_multitask_model.yaml
new file mode 100644
index 00000000..844c83d4
--- /dev/null
+++ b/configs/resnet_multitask_model.yaml
@@ -0,0 +1,110 @@
+
+model:
+  name: resnet50_classification
+  nodes:
+    - name: ResNet
+      params:
+        variant: "50"
+        download_weights: True
+
+    - name: ClassificationHead
+      alias: ClassificationHead_1
+      task: classification_1
+      inputs:
+        - ResNet
+
+    - name: ClassificationHead
+      alias: ClassificationHead_2
+      task: classification_2
+      inputs:
+        - ResNet
+
+    - name: ClassificationHead
+      alias: ClassificationHead_3
+      task: classification_3
+      inputs:
+        - ResNet
+
+  losses:
+    - name: CrossEntropyLoss
+      alias: CrossEntropyLoss_1
+      attached_to: ClassificationHead_1
+
+    - name: CrossEntropyLoss
+      alias: CrossEntropyLoss_2
+      attached_to: ClassificationHead_2
+
+    - name: CrossEntropyLoss
+      alias: CrossEntropyLoss_3
+      attached_to: ClassificationHead_3
+
+  metrics:
+    - name: Accuracy
+      is_main_metric: true
+      alias: Accuracy_1
+      attached_to: ClassificationHead_1
+
+    - name: Accuracy
+      alias: Accuracy_2
+      attached_to: ClassificationHead_2
+
+    - name: Accuracy
+      alias: Accuracy_3
+      attached_to: ClassificationHead_3
+
+  visualizers:
+    - name: ClassificationVisualizer
+      alias: ClassificationVisualizer_1
+      attached_to: ClassificationHead_1
+      params:
+        font_scale: 0.5
+        color: [255, 0, 0]
+        thickness: 2
+        include_plot: True
+
+    - name: ClassificationVisualizer
+      alias: ClassificationVisualizer_2
+      attached_to: ClassificationHead_2
+      params:
+        font_scale: 0.5
+        color: [255, 0, 0]
+        thickness: 2
+        include_plot: True
+
+    - name: ClassificationVisualizer
+      alias: ClassificationVisualizer_3
+      attached_to: ClassificationHead_3
+      params:
+        font_scale: 0.5
+        color: [255, 0, 0]
+        thickness: 2
+        include_plot: True
+
+loader:
+  params:
+    dataset_name: cifar10_task_test
+
+trainer:
+  batch_size: 4
+  epochs: &epochs 200
+  num_workers: 4
+  validation_interval: 10
+  num_log_images: 8
+
+  preprocessing:
+    train_image_size: [&height 224, &width 224]
+    keep_aspect_ratio: False
+    normalize:
+      active: True
+
+  callbacks:
+    - name: ExportOnTrainEnd
+    - name: TestOnTrainEnd
+
+  optimizer:
+    name: SGD
+    params:
+      lr: 0.02
+
+  scheduler:
+    name: ConstantLR
diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py
index c76f28c1..759bc87c 100644
--- a/luxonis_train/__main__.py
+++ b/luxonis_train/__main__.py
@@ -110,10 +110,7 @@ def inspect(
 ):
     """Inspect dataset."""
     from lightning.pytorch import seed_everything
-    from luxonis_ml.data import (
-        TrainAugmentations,
-        ValAugmentations,
-    )
+    from luxonis_ml.data import Augmentations
 
     from luxonis_train.attached_modules.visualizers.utils import (
         draw_bounding_box_labels,
@@ -139,13 +136,14 @@ def inspect(
 
     image_size = cfg.trainer.preprocessing.train_image_size
 
-    augmentations = (TrainAugmentations if view == "train" else ValAugmentations)(
+    augmentations = Augmentations(
         image_size=image_size,
         augmentations=[
             i.model_dump() for i in cfg.trainer.preprocessing.get_active_augmentations()
         ],
         train_rgb=cfg.trainer.preprocessing.train_rgb,
         keep_aspect_ratio=cfg.trainer.preprocessing.keep_aspect_ratio,
+        only_normalize=view != "train",
     )
 
     loader = LOADERS.get(cfg.loader.name)(
@@ -178,7 +176,7 @@ def inspect(
                             colors="yellow",
                             width=1,
                         )
-                    elif label_type == LabelType.KEYPOINT:
+                    elif label_type == LabelType.KEYPOINTS:
                         img = draw_keypoint_labels(
                             img, labels[labels[:, 0] == i][:, 1:], colors="red"
                         )
diff --git a/luxonis_train/attached_modules/base_attached_module.py b/luxonis_train/attached_modules/base_attached_module.py
index a015e09f..1e446fbb 100644
--- a/luxonis_train/attached_modules/base_attached_module.py
+++ b/luxonis_train/attached_modules/base_attached_module.py
@@ -74,6 +74,44 @@ def node(self) -> BaseNode:
             )
         return self._node
 
+    def get_label(self, labels: Labels) -> tuple[Tensor, LabelType]:
+        if len(self.required_labels) != 1:
+            if self.task in labels:
+                return labels[self.task]
+            raise NotImplementedError(
+                f"{self.__class__.__name__} requires multiple labels, "
+                "the default `prepare` implementation does not support this."
+            )
+        for label, label_type in labels.values():
+            if label_type == self.required_labels[0]:
+                return label, label_type
+        raise IncompatibleException.from_missing_task(
+            self.required_labels[0].value, list(labels.keys()), self.__class__.__name__
+        )
+
+    def get_input_tensors(self, inputs: Packet[Tensor]) -> list[Tensor]:
+        if self.protocol is not None:
+            return inputs[self.protocol.get_task()]
+        if self.node._task_type is not None:
+            return inputs[self.node._task_type.value]
+        return inputs[self.node.task]
+
+    @property
+    def task(self) -> str:
+        """Task of the node that this module is attached to.
+
+        @rtype: str
+        """
+        task = self.node._task
+        if task is None:
+            if self.required_labels and len(self.required_labels) == 1:
+                return self.required_labels[0].value
+            raise RuntimeError(
+                "Attempt to access `task` reference, but the node does not have a task. ",
+                f"You have to specify the task in the configuration for node {self.node.__class__.__name__}.",
+            )
+        return task
+
     def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]:
         """Prepares node outputs for the forward pass of the module.
 
@@ -102,20 +140,13 @@ def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]:
                 "This module requires multiple labels, the default `prepare` "
                 "implementation does not support this."
             )
-        if not self.required_labels:
-            if "boxes" in inputs and LabelType.BOUNDINGBOX in labels:
-                return inputs["boxes"], labels[LabelType.BOUNDINGBOX]  # type: ignore
-            if "classes" in inputs and LabelType.CLASSIFICATION in labels:
-                return inputs["classes"][0], labels[LabelType.CLASSIFICATION]  # type: ignore
-            if "keypoints" in inputs and LabelType.KEYPOINT in labels:
-                return inputs["keypoints"], labels[LabelType.KEYPOINT]  # type: ignore
-            if "segmentation" in inputs and LabelType.SEGMENTATION in labels:
-                return inputs["segmentation"][0], labels[LabelType.SEGMENTATION]  # type: ignore
-            raise IncompatibleException(
-                f"No matching labels and outputs found for {self.__class__.__name__}"
-            )
-        label_type = self.required_labels[0]
-        return inputs[label_type.value], labels[label_type]  # type: ignore
+        x = self.get_input_tensors(inputs)
+        label, label_type = self.get_label(labels)
+        if label_type in [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]:
+            if isinstance(x, list) and len(x) == 1:
+                x = x[0]
+
+        return x, label  # type: ignore
 
     def validate(self, inputs: Packet[Tensor], labels: Labels) -> None:
         """Validates that the inputs and labels are compatible with the module.
@@ -126,11 +157,10 @@ def validate(self, inputs: Packet[Tensor], labels: Labels) -> None:
         @param labels: Labels from the dataset. @raises L{IncompatibleException}: If the
             inputs are not compatible with the module.
         """
-        for label in self.required_labels:
-            if label not in labels:
-                raise IncompatibleException.from_missing_label(
-                    label, list(labels.keys()), self.__class__.__name__
-                )
+        if self.node.task is not None and self.node.task not in labels:
+            raise IncompatibleException.from_missing_task(
+                self.node.task, list(labels.keys()), self.__class__.__name__
+            )
 
         if self.protocol is not None:
             try:
diff --git a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
index af1a7e6a..521b6d8e 100644
--- a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
+++ b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py
@@ -104,7 +104,7 @@ def prepare(
         batch_size = pred_scores.shape[0]
         device = pred_scores.device
 
-        target = labels[LabelType.BOUNDINGBOX].to(device)
+        target = labels[self.task][0].to(device)
         gt_bboxes_scale = torch.tensor(
             [
                 self.original_img_size[1],
diff --git a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
index 7169d2a4..555d0d30 100644
--- a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
+++ b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py
@@ -89,7 +89,7 @@ def __init__(
         """
 
         super().__init__(
-            required_labels=[LabelType.BOUNDINGBOX, LabelType.KEYPOINT],
+            required_labels=[LabelType.BOUNDINGBOX, LabelType.KEYPOINTS],
             **kwargs,
         )
 
@@ -165,8 +165,8 @@ def prepare(
         """
         predictions = outputs["features"]
 
-        kpts = labels[LabelType.KEYPOINT]
-        boxes = labels[LabelType.BOUNDINGBOX]
+        kpts = labels["keypoints"][0]
+        boxes = labels["boundingbox"][0]
 
         nkpts = (kpts.shape[1] - 2) // 3
         targets = torch.zeros((len(boxes), nkpts * 2 + self.box_offset + 1))
diff --git a/luxonis_train/attached_modules/losses/keypoint_loss.py b/luxonis_train/attached_modules/losses/keypoint_loss.py
index 4728b045..b1ddd8ba 100644
--- a/luxonis_train/attached_modules/losses/keypoint_loss.py
+++ b/luxonis_train/attached_modules/losses/keypoint_loss.py
@@ -29,7 +29,7 @@ def __init__(
         **kwargs,
     ):
         super().__init__(
-            protocol=Protocol, required_labels=[LabelType.KEYPOINT], **kwargs
+            protocol=Protocol, required_labels=[LabelType.KEYPOINTS], **kwargs
         )
         self.b_cross_entropy = BCEWithLogitsLoss(
             pos_weight=torch.tensor([bce_power]), **kwargs
@@ -38,7 +38,7 @@ def __init__(
         self.visibility_weight = visibility_weight
 
     def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Tensor, Tensor]:
-        return torch.cat(inputs["keypoints"], dim=0), labels[LabelType.KEYPOINT]
+        return torch.cat(inputs["keypoints"], dim=0), labels[LabelType.KEYPOINTS]
 
     def forward(
         self, prediction: Tensor, target: Tensor
diff --git a/luxonis_train/attached_modules/metrics/common.py b/luxonis_train/attached_modules/metrics/common.py
index 6d16a4b4..8d181840 100644
--- a/luxonis_train/attached_modules/metrics/common.py
+++ b/luxonis_train/attached_modules/metrics/common.py
@@ -27,9 +27,9 @@ def __init__(self, **kwargs):
                 f"assuming {task}."
             )
             kwargs["task"] = task
-        self.task = task
+        self._task = task
 
-        if self.task == "multiclass":
+        if self._task == "multiclass":
             if "num_classes" not in kwargs:
                 if self.node is None:
                     raise ValueError(
@@ -37,7 +37,7 @@ def __init__(self, **kwargs):
                         "multiclass torchmetrics."
                     )
                 kwargs["num_classes"] = self.node.n_classes
-        elif self.task == "multilabel":
+        elif self._task == "multilabel":
             if "num_labels" not in kwargs:
                 if self.node is None:
                     raise ValueError(
@@ -49,7 +49,7 @@ def __init__(self, **kwargs):
         self.metric = self.Metric(**kwargs)
 
     def update(self, preds, target, *args, **kwargs) -> None:
-        if self.task in ["multiclass"]:
+        if self._task in ["multiclass"]:
             target = target.argmax(dim=1)
         self.metric.update(preds, target, *args, **kwargs)
 
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision.py b/luxonis_train/attached_modules/metrics/mean_average_precision.py
index 0a58d061..680b0e5a 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision.py
@@ -38,8 +38,8 @@ def update(
     def prepare(
         self, outputs: Packet[Tensor], labels: Labels
     ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]:
-        label = labels[LabelType.BOUNDINGBOX]
-        output_nms = outputs["boxes"]
+        label = labels[self.task][0]
+        output_nms = self.get_input_tensors(outputs)
 
         image_size = self.node.original_in_shape[2:]
 
diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py
index 3740f58e..42b1395d 100644
--- a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py
+++ b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py
@@ -68,7 +68,7 @@ def __init__(
         """
         super().__init__(
             protocol=Protocol,
-            required_labels=[LabelType.BOUNDINGBOX, LabelType.KEYPOINT],
+            required_labels=[LabelType.BOUNDINGBOX, LabelType.KEYPOINTS],
             **kwargs,
         )
 
@@ -97,8 +97,8 @@ def __init__(
         self.add_state("groundtruth_keypoints", default=[], dist_reduce_fx=None)
 
     def prepare(self, outputs: Packet[Tensor], labels: Labels):
-        kpts = labels[LabelType.KEYPOINT]
-        boxes = labels[LabelType.BOUNDINGBOX]
+        kpts = labels["keypoints"][0]
+        boxes = labels["boundingbox"][0]
         nkpts = (kpts.shape[1] - 2) // 3
         label = torch.zeros((len(boxes), nkpts * 3 + 6))
         label[:, :2] = boxes[:, :2]
@@ -112,7 +112,7 @@ def prepare(self, outputs: Packet[Tensor], labels: Labels):
         image_size = self.node.original_in_shape[2:]
 
         output_kpts: list[Tensor] = outputs["keypoints"]
-        output_bboxes: list[Tensor] = outputs["boxes"]
+        output_bboxes: list[Tensor] = outputs["boundingbox"]
         for i in range(len(output_kpts)):
             output_list_kpt_map.append(
                 {
diff --git a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
index c5e4a19b..959108c4 100644
--- a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
+++ b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
@@ -46,7 +46,7 @@ def __init__(
         **kwargs,
     ) -> None:
         super().__init__(
-            required_labels=[LabelType.KEYPOINT], protocol=KeypointProtocol, **kwargs
+            required_labels=[LabelType.KEYPOINTS], protocol=KeypointProtocol, **kwargs
         )
 
         if n_keypoints is None and self.node is None:
@@ -67,8 +67,8 @@ def __init__(
     def prepare(
         self, outputs: Packet[Tensor], labels: Labels
     ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]:
-        kpts_labels = labels[LabelType.KEYPOINT]
-        bbox_labels = labels[LabelType.BOUNDINGBOX]
+        kpts_labels = labels["keypoints"][0]
+        bbox_labels = labels["boundingbox"][0]
         num_keypoints = (kpts_labels.shape[1] - 2) // 3
         label = torch.zeros((len(bbox_labels), num_keypoints * 3 + 6))
         label[:, :2] = bbox_labels[:, :2]
diff --git a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py
index beebaf3f..6594912f 100644
--- a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py
@@ -4,9 +4,7 @@
 from torch import Tensor
 
 from luxonis_train.utils.types import (
-    Labels,
     LabelType,
-    Packet,
 )
 
 from .base_visualizer import BaseVisualizer
@@ -42,17 +40,12 @@ def __init__(
         @param nonvisible_color: Color of nonvisible keypoints. If C{None}, nonvisible
             keypoints are not drawn. Defaults to C{None}.
         """
-        super().__init__(required_labels=[LabelType.KEYPOINT], **kwargs)
+        super().__init__(required_labels=[LabelType.KEYPOINTS], **kwargs)
         self.visibility_threshold = visibility_threshold
         self.connectivity = connectivity
         self.visible_color = visible_color
         self.nonvisible_color = nonvisible_color
 
-    def prepare(
-        self, output: Packet[Tensor], label: Labels
-    ) -> tuple[list[Tensor], Tensor]:
-        return output["keypoints"], label[LabelType.KEYPOINT]
-
     @staticmethod
     def draw_predictions(
         canvas: Tensor,
diff --git a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
index 2b2dc7a3..f5348873 100644
--- a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
@@ -45,7 +45,7 @@ def __init__(
         self.alpha = alpha
 
     def prepare(self, output: Packet[Tensor], label: Labels) -> tuple[Tensor, Tensor]:
-        return output["segmentation"][0], label[LabelType.SEGMENTATION]
+        return output[self.node.task][0], label[self.task][0]
 
     @staticmethod
     def draw_predictions(
diff --git a/luxonis_train/core/archiver.py b/luxonis_train/core/archiver.py
index 1473df1c..a42d2ec7 100644
--- a/luxonis_train/core/archiver.py
+++ b/luxonis_train/core/archiver.py
@@ -243,7 +243,7 @@ def _get_classes(self, head_family):
         if head_family.startswith("Classification"):
             return self.dataset_metadata._classes["class"]
         elif head_family.startswith("Object"):
-            return self.dataset_metadata._classes["boxes"]
+            return self.dataset_metadata._classes["boundingbox"]
         elif head_family.startswith("Segmentation"):
             return self.dataset_metadata._classes["segmentation"]
         elif head_family.startswith("Keypoint"):
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index d23787fc..1ac3fce0 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -9,7 +9,7 @@
 import torch
 import torch.utils.data as torch_data
 from lightning.pytorch.utilities import rank_zero_only  # type: ignore
-from luxonis_ml.data import TrainAugmentations, ValAugmentations
+from luxonis_ml.data import Augmentations
 from luxonis_ml.utils import reset_logging, setup_logging
 
 from luxonis_train.callbacks import LuxonisProgressBar
@@ -99,7 +99,7 @@ def __init__(
             pl.seed_everything(self.cfg.trainer.seed, workers=True)
             deterministic = True
 
-        self.train_augmentations = TrainAugmentations(
+        self.train_augmentations = Augmentations(
             image_size=self.cfg.trainer.preprocessing.train_image_size,
             augmentations=[
                 i.model_dump()
@@ -108,7 +108,7 @@ def __init__(
             train_rgb=self.cfg.trainer.preprocessing.train_rgb,
             keep_aspect_ratio=self.cfg.trainer.preprocessing.keep_aspect_ratio,
         )
-        self.val_augmentations = ValAugmentations(
+        self.val_augmentations = Augmentations(
             image_size=self.cfg.trainer.preprocessing.train_image_size,
             augmentations=[
                 i.model_dump()
@@ -116,6 +116,7 @@ def __init__(
             ],
             train_rgb=self.cfg.trainer.preprocessing.train_rgb,
             keep_aspect_ratio=self.cfg.trainer.preprocessing.keep_aspect_ratio,
+            only_normalize=True,
         )
 
         self.pl_trainer = pl.Trainer(
@@ -152,7 +153,7 @@ def __init__(
         }
         sampler = None
         if self.cfg.trainer.use_weighted_sampler:
-            classes_count = self.dataset.get_classes()[1]
+            classes_count = self.loaders["train"].get_classes()[1]
             if len(classes_count) == 0:
                 logger.warning(
                     "WeightedRandomSampler only available for classification tasks. Using default sampler instead."
@@ -183,15 +184,15 @@ def __init__(
 
         self.cfg.save_data(os.path.join(self.run_save_dir, "config.yaml"))
 
-    def set_train_augmentations(self, aug: TrainAugmentations) -> None:
+    def set_train_augmentations(self, aug: Augmentations) -> None:
         """Sets augmentations used for training dataset."""
         self.train_augmentations = aug
 
-    def set_val_augmentations(self, aug: ValAugmentations) -> None:
+    def set_val_augmentations(self, aug: Augmentations) -> None:
         """Sets augmentations used for validation dataset."""
         self.val_augmentations = aug
 
-    def set_test_augmentations(self, aug: ValAugmentations) -> None:
+    def set_test_augmentations(self, aug: Augmentations) -> None:
         """Sets augmentations used for test dataset."""
         self.test_augmentations = aug
 
diff --git a/luxonis_train/models/luxonis_model.py b/luxonis_train/models/luxonis_model.py
index d3ed26a2..e2568ec0 100644
--- a/luxonis_train/models/luxonis_model.py
+++ b/luxonis_train/models/luxonis_model.py
@@ -38,7 +38,7 @@
 )
 from luxonis_train.utils.registry import CALLBACKS, OPTIMIZERS, SCHEDULERS, Registry
 from luxonis_train.utils.tracker import LuxonisTrackerPL
-from luxonis_train.utils.types import Kwargs, Labels, Packet, TaskLabels
+from luxonis_train.utils.types import Kwargs, Labels, Packet
 
 from .luxonis_output import LuxonisOutput
 
@@ -143,13 +143,10 @@ def __init__(
         frozen_nodes: list[tuple[str, int]] = []
         nodes: dict[str, tuple[type[BaseNode], Kwargs]] = {}
 
-        self.node_tasks: dict[str, str] = {}
-
         for node_cfg in self.cfg.model.nodes:
             node_name = node_cfg.name
             Node = BaseNode.REGISTRY.get(node_name)
             node_name = node_cfg.alias or node_name
-            self.node_tasks[node_name] = node_cfg.task_group
             if node_cfg.freezing.active:
                 epochs = self.cfg.trainer.epochs
                 if node_cfg.freezing.unfreeze_after is None:
@@ -159,7 +156,7 @@ def __init__(
                 else:
                     unfreeze_after = int(node_cfg.freezing.unfreeze_after * epochs)
                 frozen_nodes.append((node_name, unfreeze_after))
-            nodes[node_name] = (Node, node_cfg.params)
+            nodes[node_name] = (Node, {**node_cfg.params, "task": node_cfg.task})
             if not node_cfg.inputs:
                 self.input_shapes[node_name] = [Size(input_shape)]
             self.graph[node_name] = node_cfg.inputs
@@ -251,7 +248,7 @@ def _initiate_nodes(
     def forward(
         self,
         inputs: Tensor,
-        task_labels: TaskLabels | None = None,
+        labels: Labels | None = None,
         images: Tensor | None = None,
         *,
         compute_loss: bool = True,
@@ -303,7 +300,6 @@ def forward(
             node_inputs = [computed[pred] for pred in input_names]
             outputs = node.run(node_inputs)
             computed[node_name] = outputs
-            labels = task_labels[self.node_tasks[node_name]] if task_labels else None
 
             if compute_loss and node_name in self.losses and labels is not None:
                 for loss_name, loss in self.losses[node_name].items():
@@ -500,7 +496,7 @@ def process_losses(
         training_step_output["loss"] = final_loss.detach().cpu()
         return final_loss, training_step_output
 
-    def training_step(self, train_batch: tuple[Tensor, TaskLabels]) -> Tensor:
+    def training_step(self, train_batch: tuple[Tensor, Labels]) -> Tensor:
         """Performs one step of training with provided batch."""
         outputs = self.forward(*train_batch)
         assert outputs.losses, "Losses are empty, check if you have defined any loss"
diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py
index c3124f82..327c8d8f 100644
--- a/luxonis_train/nodes/base_node.py
+++ b/luxonis_train/nodes/base_node.py
@@ -91,7 +91,8 @@ def __init__(
         in_protocols: list[type[BaseModel]] | None = None,
         n_classes: int | None = None,
         in_sizes: Size | list[Size] | None = None,
-        task_type: LabelType | None = None,
+        task: str | None = None,
+        _task_type: LabelType | None = None,
     ):
         super().__init__()
 
@@ -111,7 +112,10 @@ def __init__(
             self.attach_index = attach_index
 
         self.in_protocols = in_protocols or [FeaturesProtocol]
-        self.task_type = task_type
+        self._task_type = _task_type
+        if task is None and self._task_type is not None:
+            task = self._task_type.value
+        self._task = task
 
         self._input_shapes = input_shapes
         self._original_in_shape = original_in_shape
@@ -130,15 +134,22 @@ def _non_set_error(self, name: str) -> ValueError:
             "but it was not set during initialization. "
         )
 
+    @property
+    def task(self) -> str:
+        """Getter for the task."""
+        if self._task is None:
+            raise self._non_set_error("task")
+        return self._task
+
     @property
     def n_classes(self) -> int:
         """Getter for the number of classes."""
-        return self.dataset_metadata.n_classes(self.task_type)
+        return self.dataset_metadata.n_classes(self.task)
 
     @property
     def class_names(self) -> list[str]:
         """Getter for the class names."""
-        return self.dataset_metadata.class_names(self.task_type)
+        return self.dataset_metadata.class_names(self.task)
 
     @property
     def input_shapes(self) -> list[Packet[Size]]:
@@ -312,7 +323,7 @@ def wrap(self, output: ForwardOutputT) -> Packet[Tensor]:
                 raise IncompatibleException(
                     "Default `wrap` expects a single tensor or a list of tensors."
                 )
-        return {"features": outputs}
+        return {self._task or "features": outputs}
 
     def run(self, inputs: list[Packet[Tensor]]) -> Packet[Tensor]:
         """Combines the forward pass with the wrapping and unwrapping of the inputs.
diff --git a/luxonis_train/nodes/bisenet_head.py b/luxonis_train/nodes/bisenet_head.py
index a3b11df6..9185d823 100644
--- a/luxonis_train/nodes/bisenet_head.py
+++ b/luxonis_train/nodes/bisenet_head.py
@@ -30,7 +30,7 @@ def __init__(
         @param intermediate_channels: How many intermediate channels to use.
             Defaults to C{64}.
         """
-        super().__init__(task_type=LabelType.SEGMENTATION, **kwargs)
+        super().__init__(task=LabelType.SEGMENTATION, **kwargs)
 
         original_height = self.original_in_shape[2]
         upscale_factor = 2 ** infer_upscale_factor(self.in_height, original_height)
diff --git a/luxonis_train/nodes/classification_head.py b/luxonis_train/nodes/classification_head.py
index d96e6b72..7e55a590 100644
--- a/luxonis_train/nodes/classification_head.py
+++ b/luxonis_train/nodes/classification_head.py
@@ -19,7 +19,9 @@ def __init__(
         @param dropout_rate: Dropout rate before last layer, range C{[0, 1]}. Defaults
             to C{0.2}.
         """
-        super().__init__(task_type=LabelType.CLASSIFICATION, **kwargs)
+        super().__init__(
+            _task_type=kwargs.pop("_task_type", LabelType.CLASSIFICATION), **kwargs
+        )
 
         self.head = nn.Sequential(
             nn.AdaptiveAvgPool2d(1),
@@ -32,4 +34,4 @@ def forward(self, inputs: Tensor) -> Tensor:
         return self.head(inputs)
 
     def wrap(self, output: Tensor) -> Packet[Tensor]:
-        return {"classes": [output]}
+        return {"classification": [output]}
diff --git a/luxonis_train/nodes/efficient_bbox_head.py b/luxonis_train/nodes/efficient_bbox_head.py
index a4f3bc93..97ee1bfc 100644
--- a/luxonis_train/nodes/efficient_bbox_head.py
+++ b/luxonis_train/nodes/efficient_bbox_head.py
@@ -50,7 +50,7 @@ def __init__(
         @type max_det: int
         @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}.
         """
-        super().__init__(task_type=LabelType.BOUNDINGBOX, **kwargs)
+        super().__init__(_task_type=LabelType.BOUNDINGBOX, **kwargs)
 
         self.n_heads = n_heads
 
@@ -97,7 +97,7 @@ def wrap(
                 conf, _ = out_cls.max(1, keepdim=True)
                 out = torch.cat([out_reg, conf, out_cls], dim=1)
                 outputs.append(out)
-            return {"boxes": outputs}
+            return {"boundingbox": outputs}
 
         cls_tensor = torch.cat(
             [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))], dim=2
@@ -116,7 +116,7 @@ def wrap(
         else:
             boxes = self._process_to_bbox((features, cls_tensor, reg_tensor))
             return {
-                "boxes": boxes,
+                "boundingbox": boxes,
                 "features": features,
                 "class_scores": [cls_tensor],
                 "distributions": [reg_tensor],
diff --git a/luxonis_train/nodes/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
index 76a66eb6..431dcf46 100644
--- a/luxonis_train/nodes/implicit_keypoint_bbox_head.py
+++ b/luxonis_train/nodes/implicit_keypoint_bbox_head.py
@@ -57,7 +57,7 @@ def __init__(
         @type max_det: int
         @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}.
         """
-        super().__init__(task_type=LabelType.KEYPOINT, **kwargs)
+        super().__init__(_task_type=LabelType.KEYPOINTS, **kwargs)
 
         if anchors is None:
             logger.info("No anchors provided, generating them automatically.")
@@ -172,7 +172,7 @@ def wrap(self, outputs: tuple[list[Tensor], Tensor]) -> Packet[Tensor]:
         )
 
         return {
-            "boxes": [detection[:, :6] for detection in nms],
+            "boundingbox": [detection[:, :6] for detection in nms],
             "keypoints": [
                 detection[:, 6:].reshape(-1, self.n_keypoints, 3) for detection in nms
             ],
diff --git a/luxonis_train/nodes/segmentation_head.py b/luxonis_train/nodes/segmentation_head.py
index a3420491..5955953d 100644
--- a/luxonis_train/nodes/segmentation_head.py
+++ b/luxonis_train/nodes/segmentation_head.py
@@ -27,7 +27,7 @@ def __init__(self, **kwargs):
         @type kwargs: Any
         @param kwargs: Additional arguments to pass to L{BaseNode}.
         """
-        super().__init__(task_type=LabelType.SEGMENTATION, **kwargs)
+        super().__init__(_task_type=LabelType.SEGMENTATION, **kwargs)
 
         original_height = self.original_in_shape[2]
         num_up = infer_upscale_factor(self.in_height, original_height, strict=False)
diff --git a/luxonis_train/utils/boxutils.py b/luxonis_train/utils/boxutils.py
index a59f4cd0..15fca04f 100644
--- a/luxonis_train/utils/boxutils.py
+++ b/luxonis_train/utils/boxutils.py
@@ -6,6 +6,7 @@
 import torch
 from scipy.cluster.vq import kmeans
 from torch import Tensor
+from torch.utils.data import DataLoader
 from torchvision.ops import (
     batched_nms,
     box_convert,
@@ -400,11 +401,10 @@ def non_max_suppression(
 
 
 def anchors_from_dataset(
-    loader: torch.utils.data.DataLoader,
+    loader: DataLoader,
     n_anchors: int = 9,
     n_generations: int = 1000,
     ratio_threshold: float = 4.0,
-    task_group: str = "default",
 ) -> tuple[Tensor, float]:
     """Generates anchors based on bounding box annotations present in provided data
     loader. It uses K-Means for initial proposals which are then refined with genetic
@@ -426,11 +426,11 @@ def anchors_from_dataset(
 
     widths = []
     inputs = None
-    for inp, task_labels in loader:
-        labels = next(iter(task_labels.values()))  # TODO: handle multiple tasks
-        boxes = labels[LabelType.BOUNDINGBOX]
-        curr_wh = boxes[:, 4:]
-        widths.append(curr_wh)
+    for inp, labels in loader:
+        for tensor, label_type in labels.values():
+            if label_type == LabelType.BOUNDINGBOX:
+                curr_wh = tensor[:, 4:]
+                widths.append(curr_wh)
         inputs = inp
     assert inputs is not None, "No inputs found in data loader"
     _, _, h, w = inputs.shape  # assuming all images are same size
diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 875819e2..31fd55ee 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -41,7 +41,7 @@ class ModelNodeConfig(CustomBaseModel):
     inputs: list[str] = []
     params: dict[str, Any] = {}
     freezing: FreezingConfig = FreezingConfig()
-    task_group: str = "default"
+    task: str | None = None
 
 
 class PredefinedModelConfig(CustomBaseModel):
diff --git a/luxonis_train/utils/general.py b/luxonis_train/utils/general.py
index bf3d0e8f..21c35df0 100644
--- a/luxonis_train/utils/general.py
+++ b/luxonis_train/utils/general.py
@@ -71,11 +71,11 @@ def classes(self) -> dict[LabelType, list[str]]:
             )
         return self._classes
 
-    def n_classes(self, label_type: LabelType | None) -> int:
-        """Gets the number of classes for the specified label type.
+    def n_classes(self, task: str | None) -> int:
+        """Gets the number of classes for the specified task.
 
-        @type label_type: L{LabelType} | None
-        @param label_type: Label type to get the number of classes for.
+        @type task: str | None
+        @param task: Task to get the number of classes for.
         @rtype: int
         @return: Number of classes for the specified label type.
         @raises ValueError: If the dataset loader was not provided during
@@ -83,12 +83,10 @@ def n_classes(self, label_type: LabelType | None) -> int:
         @raises ValueError: If the dataset contains different number of classes for
             different label types.
         """
-        if label_type is not None:
-            if label_type not in self.classes:
-                raise ValueError(
-                    f"Task type {label_type.name} is not present in the dataset."
-                )
-            return len(self.classes[label_type])
+        if task is not None:
+            if task not in self.classes:
+                raise ValueError(f"Task '{task}' is not present in the dataset.")
+            return len(self.classes[task])
         n_classes = len(list(self.classes.values())[0])
         for classes in self.classes.values():
             if len(classes) != n_classes:
@@ -97,11 +95,11 @@ def n_classes(self, label_type: LabelType | None) -> int:
                 )
         return n_classes
 
-    def class_names(self, label_type: LabelType | None) -> list[str]:
-        """Gets the class names for the specified label type.
+    def class_names(self, task: str | None) -> list[str]:
+        """Gets the class names for the specified task.
 
-        @type label_type: L{LabelType} | None
-        @param label_type: Label type to get the class names for.
+        @type task: str | None
+        @param task: Task to get the class names for.
         @rtype: list[str]
         @return: List of class names for the specified label type.
         @raises ValueError: If the dataset loader was not provided during
@@ -109,12 +107,10 @@ def class_names(self, label_type: LabelType | None) -> list[str]:
         @raises ValueError: If the dataset contains different class names for different
             label types.
         """
-        if label_type is not None:
-            if label_type not in self.classes:
-                raise ValueError(
-                    f"Task type {label_type.name} is not present in the dataset."
-                )
-            return self.classes[label_type]
+        if task is not None:
+            if task not in self.classes:
+                raise ValueError(f"Task type {task} is not present in the dataset.")
+            return self.classes[task]
         class_names = list(self.classes.values())[0]
         for classes in self.classes.values():
             if classes != class_names:
@@ -170,9 +166,10 @@ def from_loader(cls, loader: BaseLoaderTorch) -> "DatasetMetadata":
 
         if skeletons is not None:
             if len(skeletons) == 1:
-                name = list(skeletons.keys())[0]
-                keypoint_names = skeletons[name]["labels"]
-                connectivity = skeletons[name]["edges"]
+                task_name = next(iter(skeletons))
+                class_name = next(iter(skeletons[task_name]))
+                keypoint_names = skeletons[task_name][class_name]["labels"]
+                connectivity = skeletons[task_name][class_name]["edges"]
 
             elif len(skeletons) > 1:
                 raise NotImplementedError(
diff --git a/luxonis_train/utils/loaders/base_loader.py b/luxonis_train/utils/loaders/base_loader.py
index f96f65e1..c3f5e141 100644
--- a/luxonis_train/utils/loaders/base_loader.py
+++ b/luxonis_train/utils/loaders/base_loader.py
@@ -9,7 +9,7 @@
 from luxonis_train.utils.registry import LOADERS
 from luxonis_train.utils.types import Labels, LabelType
 
-LuxonisLoaderTorchOutput = tuple[Tensor, dict[str, Labels]]
+LuxonisLoaderTorchOutput = tuple[Tensor, Labels]
 """LuxonisLoaderTorchOutput is a tuple of images and corresponding labels."""
 
 
@@ -74,7 +74,7 @@ def get_skeletons(self) -> dict[str, dict] | None:
 
 def collate_fn(
     batch: list[LuxonisLoaderTorchOutput],
-) -> tuple[Tensor, dict[str, dict[LabelType, Tensor]]]:
+) -> tuple[Tensor, Labels]:
     """Default collate function used for training.
 
     @type batch: list[LuxonisLoaderTorchOutput]
@@ -83,46 +83,26 @@ def collate_fn(
     @rtype: tuple[Tensor, dict[LabelType, Tensor]]
     @return: Tuple of images and annotations in the format expected by the model.
     """
-    imgs, group_dicts = zip(*batch)
-    out_group_dicts = {task: {} for task in group_dicts[0].keys()}
-    imgs = torch.stack(imgs, 0)
+    imgs: tuple[Tensor, ...]
+    labels: tuple[Labels, ...]
+    imgs, labels = zip(*batch)
 
-    for task in list(group_dicts[0].keys()):
-        anno_dicts = [group[task] for group in group_dicts]
+    out_labels = {}
 
-        present_annotations = anno_dicts[0].keys()
-        out_annotations: dict[LabelType, Tensor] = {
-            anno: torch.empty(0) for anno in present_annotations
-        }
+    for task in labels[0].keys():
+        label_type = labels[0][task][1]
+        annos = [label[task][0] for label in labels]
+        if label_type in [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]:
+            out_labels[task] = torch.stack(annos, 0), label_type
 
-        if LabelType.CLASSIFICATION in present_annotations:
-            class_annos = [anno[LabelType.CLASSIFICATION] for anno in anno_dicts]
-            out_annotations[LabelType.CLASSIFICATION] = torch.stack(class_annos, 0)
-
-        if LabelType.SEGMENTATION in present_annotations:
-            seg_annos = [anno[LabelType.SEGMENTATION] for anno in anno_dicts]
-            out_annotations[LabelType.SEGMENTATION] = torch.stack(seg_annos, 0)
-
-        if LabelType.BOUNDINGBOX in present_annotations:
-            bbox_annos = [anno[LabelType.BOUNDINGBOX] for anno in anno_dicts]
+        elif label_type in [LabelType.KEYPOINTS, LabelType.BOUNDINGBOX]:
             label_box: list[Tensor] = []
-            for i, box in enumerate(bbox_annos):
-                l_box = torch.zeros((box.shape[0], 6))
+            for i, box in enumerate(annos):
+                l_box = torch.zeros((box.shape[0], box.shape[1] + 1))
                 l_box[:, 0] = i  # add target image index for build_targets()
                 l_box[:, 1:] = box
                 label_box.append(l_box)
-            out_annotations[LabelType.BOUNDINGBOX] = torch.cat(label_box, 0)
-
-        if LabelType.KEYPOINT in present_annotations:
-            keypoint_annos = [anno[LabelType.KEYPOINT] for anno in anno_dicts]
-            label_keypoints: list[Tensor] = []
-            for i, points in enumerate(keypoint_annos):
-                l_kps = torch.zeros((points.shape[0], points.shape[1] + 1))
-                l_kps[:, 0] = i  # add target image index for build_targets()
-                l_kps[:, 1:] = points
-                label_keypoints.append(l_kps)
-            out_annotations[LabelType.KEYPOINT] = torch.cat(label_keypoints, 0)
-
-        out_group_dicts[task] = out_annotations
+            out_labels[task] = torch.cat(label_box, 0), label_type
 
-    return imgs, out_group_dicts
+    # exit()
+    return torch.stack(imgs, 0), out_labels
diff --git a/luxonis_train/utils/loaders/luxonis_loader_torch.py b/luxonis_train/utils/loaders/luxonis_loader_torch.py
index b2eeb168..a6b9bf82 100644
--- a/luxonis_train/utils/loaders/luxonis_loader_torch.py
+++ b/luxonis_train/utils/loaders/luxonis_loader_torch.py
@@ -4,7 +4,6 @@
 from luxonis_ml.data import (
     BucketStorage,
     BucketType,
-    LabelType,
     LuxonisDataset,
     LuxonisLoader,
 )
@@ -48,20 +47,18 @@ def input_shape(self) -> Size:
         return Size([1, *img.shape])
 
     def __getitem__(self, idx: int) -> LuxonisLoaderTorchOutput:
-        img, group_annotations = self.base_loader[idx]
+        img, labels = self.base_loader[idx]
 
         img = np.transpose(img, (2, 0, 1))  # HWC to CHW
         tensor_img = Tensor(img)
-        for task in group_annotations:
-            annotations = group_annotations[task]
-            for key in annotations:
-                annotations[key] = Tensor(annotations[key])  # type: ignore
+        for task, (array, label_type) in labels.items():
+            labels[task] = (Tensor(array), label_type)  # type: ignore
 
-        return tensor_img, group_annotations
+        return tensor_img, labels
 
-    def get_classes(self) -> dict[LabelType, list[str]]:
+    def get_classes(self) -> dict[str, list[str]]:
         _, classes = self.dataset.get_classes()
-        return {LabelType(task): classes[task] for task in classes}
+        return {task: classes[task] for task in classes}
 
     def get_skeletons(self) -> dict[str, dict] | None:
         return self.dataset.get_skeletons()
diff --git a/luxonis_train/utils/types.py b/luxonis_train/utils/types.py
index 3fb724c3..5bebc7e4 100644
--- a/luxonis_train/utils/types.py
+++ b/luxonis_train/utils/types.py
@@ -1,13 +1,12 @@
 from typing import Annotated, Any, Literal, TypeVar
 
-from luxonis_ml.enums import LabelType
+from luxonis_ml.data import LabelType
 from pydantic import BaseModel, Field, ValidationError
 from torch import Size, Tensor
 
 Kwargs = dict[str, Any]
-OutputTypes = Literal["boxes", "class", "keypoints", "segmentation", "features"]
-Labels = dict[LabelType, Tensor]
-TaskLabels = dict[str, Labels]
+OutputTypes = Literal["boundingbox", "class", "keypoints", "segmentation", "features"]
+Labels = dict[str, tuple[Tensor, LabelType]]
 
 AttachIndexType = Literal["all"] | int | tuple[int, int] | tuple[int, int, int]
 """AttachIndexType is used to specify to which output of the prevoius node does the
@@ -36,12 +35,10 @@ def from_validation_error(cls, val_error: ValidationError, class_name: str):
         )
 
     @classmethod
-    def from_missing_label(
-        cls, label: LabelType, present_labels: list[LabelType], class_name: str
-    ):
+    def from_missing_task(cls, task: str, present_tasks: list[str], class_name: str):
         return cls(
-            f"{class_name} requires {label} label, but it was not found in "
-            f"the label dictionary. Available labels: {present_labels}."
+            f"{class_name} requires {task} label, but it was not found in "
+            f"the label dictionary. Available labels: {present_tasks}."
         )
 
 
@@ -49,6 +46,15 @@ class BaseProtocol(BaseModel):
     class Config:
         arbitrary_types_allowed = True
 
+    @classmethod
+    def get_task(cls) -> str:
+        if len(cls.__annotations__) == 1:
+            return list(cls.__annotations__)[0]
+        raise ValueError(
+            "Protocol must have exactly one field for automatic task inference. "
+            "Implement custom `prepare` method in your attached module."
+        )
+
 
 class SegmentationProtocol(BaseProtocol):
     segmentation: Annotated[list[Tensor], Field(min_length=1)]
@@ -59,7 +65,7 @@ class KeypointProtocol(BaseProtocol):
 
 
 class BBoxProtocol(BaseProtocol):
-    boxes: Annotated[list[Tensor], Field(min_length=1)]
+    boundingbox: Annotated[list[Tensor], Field(min_length=1)]
 
 
 class FeaturesProtocol(BaseProtocol):
diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index 90299371..b750dd9c 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">76%</text>
-        <text x="80" y="14">76%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">77%</text>
+        <text x="80" y="14">77%</text>
     </g>
 </svg>
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 815a4bd5..73909431 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -1,15 +1,11 @@
-import glob
-import json
 import os
-import zipfile
 from pathlib import Path
 
-import cv2
 import gdown
-import numpy as np
 import pytest
 import torchvision
 from luxonis_ml.data import LuxonisDataset
+from luxonis_ml.data.parsers import LuxonisParser
 from luxonis_ml.utils import environ
 
 Path(environ.LUXONISML_BASE_PATH).mkdir(exist_ok=True)
@@ -24,7 +20,7 @@ def create_dataset(name: str) -> LuxonisDataset:
 
 @pytest.fixture(scope="session", autouse=True)
 def create_coco_dataset():
-    dataset = create_dataset("coco_test")
+    dataset_name = "coco_test"
     url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT"
     output_folder = "../data/"
     output_zip = os.path.join(output_folder, "COCO_people_subset.zip")
@@ -37,96 +33,12 @@ def create_coco_dataset():
     ):
         gdown.download(url, output_zip, quiet=False)
 
-        with zipfile.ZipFile(output_zip, "r") as zip_ref:
-            zip_ref.extractall(output_folder)
-
-    def COCO_people_subset_generator():
-        img_dir = os.path.join(output_folder, "person_val2017_subset")
-        annot_file = os.path.join(output_folder, "person_keypoints_val2017.json")
-        im_paths = glob.glob(img_dir + "/*.jpg")
-        nums = np.array([int(Path(path).stem) for path in im_paths])
-        idxs = np.argsort(nums)
-        im_paths = list(np.array(im_paths)[idxs])
-        with open(annot_file) as file:
-            data = json.load(file)
-        imgs = data["images"]
-        anns = data["annotations"]
-
-        for path in im_paths:
-            gran = Path(path).name
-            img = [img for img in imgs if img["file_name"] == gran][0]
-            img_id = img["id"]
-            img_anns = [ann for ann in anns if ann["image_id"] == img_id]
-
-            im = cv2.imread(path)
-            height, width, _ = im.shape
-
-            if len(img_anns):
-                yield {
-                    "file": path,
-                    "class": "person",
-                    "type": "classification",
-                    "value": True,
-                }
-
-            for ann in img_anns:
-                seg = ann["segmentation"]
-                if isinstance(seg, list):
-                    poly = []
-                    for s in seg:
-                        poly_arr = np.array(s).reshape(-1, 2)
-                        poly += [
-                            (poly_arr[i, 0] / width, poly_arr[i, 1] / height)
-                            for i in range(len(poly_arr))
-                        ]
-                    yield {
-                        "file": path,
-                        "class": "person",
-                        "type": "polyline",
-                        "value": poly,
-                    }
-
-                x, y, w, h = ann["bbox"]
-                yield {
-                    "file": path,
-                    "class": "person",
-                    "type": "box",
-                    "value": (x / width, y / height, w / width, h / height),
-                }
-
-                kps = np.array(ann["keypoints"]).reshape(-1, 3)
-                keypoint = []
-                for kp in kps:
-                    keypoint.append(
-                        (float(kp[0] / width), float(kp[1] / height), int(kp[2]))
-                    )
-                yield {
-                    "file": path,
-                    "class": "person",
-                    "type": "keypoints",
-                    "value": keypoint,
-                }
-
-    dataset.set_classes(["person"])
-
-    annot_file = os.path.join(output_folder, "person_keypoints_val2017.json")
-    with open(annot_file) as file:
-        data = json.load(file)
-    dataset.set_skeletons(
-        {
-            "person": {
-                "labels": data["categories"][0]["keypoints"],
-                "edges": (np.array(data["categories"][0]["skeleton"]) - 1).tolist(),
-            }
-        }
-    )
-    dataset.add(COCO_people_subset_generator())
-    dataset.make_splits()
+    parser = LuxonisParser(output_zip, dataset_name=dataset_name, delete_existing=True)
+    parser.parse(random_split=True)
 
 
-@pytest.fixture(scope="session", autouse=True)
-def create_cifar10_dataset():
-    dataset = create_dataset("cifar10_test")
+def _create_cifar10(dataset_name: str, task_names: list[str]) -> None:
+    dataset = create_dataset(dataset_name)
     output_folder = "../data/"
     if not os.path.exists(output_folder):
         os.makedirs(output_folder)
@@ -152,14 +64,25 @@ def CIFAR10_subset_generator():
                 break
             path = os.path.join(output_folder, f"cifar_{i}.png")
             image.save(path)
-            yield {
-                "file": path,
-                "class": classes[label],
-                "type": "classification",
-                "value": True,
-            }
-
-    dataset.set_classes(classes)
+            for task_name in task_names:
+                yield {
+                    "file": path,
+                    "annotation": {
+                        "type": "classification",
+                        "task": task_name,
+                        "class": classes[label],
+                    },
+                }
 
     dataset.add(CIFAR10_subset_generator())
     dataset.make_splits()
+
+
+@pytest.fixture(scope="session", autouse=True)
+def create_cifar10_dataset():
+    _create_cifar10("cifar10_test", ["classification"])
+
+
+@pytest.fixture(scope="session", autouse=True)
+def create_cifar10_task_dataset():
+    _create_cifar10("cifar10_task_test", [f"classification_{i}" for i in [1, 2, 3]])
diff --git a/tests/unittests/test_utils/test_loaders/test_base_loader.py b/tests/unittests/test_utils/test_loaders/test_base_loader.py
index b5c8b299..a54be4b6 100644
--- a/tests/unittests/test_utils/test_loaders/test_base_loader.py
+++ b/tests/unittests/test_utils/test_loaders/test_base_loader.py
@@ -12,27 +12,25 @@ def test_collate_fn():
     batch = [
         (
             torch.rand(3, 224, 224, dtype=torch.float32),
-            {"default": {LabelType.CLASSIFICATION: torch.tensor([1, 0])}},
+            {"classification": (torch.tensor([1, 0]), LabelType.CLASSIFICATION)},
         ),
         (
             torch.rand(3, 224, 224, dtype=torch.float32),
-            {"default": {LabelType.CLASSIFICATION: torch.tensor([0, 1])}},
+            {"classification": (torch.tensor([0, 1]), LabelType.CLASSIFICATION)},
         ),
     ]
 
     # Call collate_fn
-    imgs, annotations = collate_fn(batch)
+    imgs, annotations = collate_fn(batch)  # type: ignore
 
     # Check images tensor
     assert imgs.shape == (2, 3, 224, 224)
     assert imgs.dtype == torch.float32
 
     # Check annotations
-    assert "default" in annotations
-    annotations = annotations["default"]
-    assert LabelType.CLASSIFICATION in annotations
-    assert annotations[LabelType.CLASSIFICATION].shape == (2, 2)
-    assert annotations[LabelType.CLASSIFICATION].dtype == torch.int64
+    assert "classification" in annotations
+    assert annotations["classification"][0].shape == (2, 2)
+    assert annotations["classification"][0].dtype == torch.int64
 
     # TODO: test also segmentation, boundingbox and keypoint
 

From abe7d3dc8fa18a106bc96687ef07c746feceea9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Fri, 7 Jun 2024 20:02:03 +0200
Subject: [PATCH 26/28] Changed Imports in Config (#38)

Co-authored-by: GitHub Actions <actions@github.com>
---
 luxonis_train/utils/config.py | 7 ++++---
 media/coverage_badge.svg      | 4 ++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py
index 31fd55ee..74a8e6a5 100644
--- a/luxonis_train/utils/config.py
+++ b/luxonis_train/utils/config.py
@@ -5,9 +5,6 @@
 from luxonis_ml.utils import Environ, LuxonisConfig, LuxonisFileSystem, setup_logging
 from pydantic import BaseModel, ConfigDict, Field, model_validator
 
-from luxonis_train.utils.general import is_acyclic
-from luxonis_train.utils.registry import MODELS
-
 logger = logging.getLogger(__name__)
 
 
@@ -65,6 +62,8 @@ class ModelConfig(CustomBaseModel):
 
     @model_validator(mode="after")
     def check_predefined_model(self):
+        from luxonis_train.utils.registry import MODELS
+
         if self.predefined_model:
             logger.info(f"Using predefined model: `{self.predefined_model.name}`")
             model = MODELS.get(self.predefined_model.name)(
@@ -85,6 +84,8 @@ def check_predefined_model(self):
 
     @model_validator(mode="after")
     def check_graph(self):
+        from luxonis_train.utils.general import is_acyclic
+
         graph = {node.alias or node.name: node.inputs for node in self.nodes}
         if not is_acyclic(graph):
             raise ValueError("Model graph is not acyclic.")
diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index b750dd9c..90299371 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">77%</text>
-        <text x="80" y="14">77%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">76%</text>
+        <text x="80" y="14">76%</text>
     </g>
 </svg>

From da0106e39441c351417c3b10e3f1c81f495126e9 Mon Sep 17 00:00:00 2001
From: nn <m.nacer950@gmail.com>
Date: Mon, 10 Jun 2024 07:28:48 +0100
Subject: [PATCH 27/28] adding OCR Decoding support

---
 configs/ocr_decoding.yaml                     | 132 ++++++++++++++
 .../attached_modules/losses/__init__.py       |   2 +
 .../losses/ocr_decoding_loss.py               |  49 +++++
 .../attached_modules/metrics/__init__.py      |   3 +
 .../attached_modules/metrics/ocr_accuracy.py  |  54 ++++++
 luxonis_train/nodes/__init__.py               |   3 +
 luxonis_train/nodes/ocr_decoder.py            | 171 ++++++++++++++++++
 7 files changed, 414 insertions(+)
 create mode 100755 configs/ocr_decoding.yaml
 create mode 100644 luxonis_train/attached_modules/losses/ocr_decoding_loss.py
 create mode 100644 luxonis_train/attached_modules/metrics/ocr_accuracy.py
 create mode 100644 luxonis_train/nodes/ocr_decoder.py

diff --git a/configs/ocr_decoding.yaml b/configs/ocr_decoding.yaml
new file mode 100755
index 00000000..55dfb54c
--- /dev/null
+++ b/configs/ocr_decoding.yaml
@@ -0,0 +1,132 @@
+# An example configuration for OCR Decoding network.
+
+
+model:
+  name: ocr_decoding_test
+  nodes:
+    - name: OCRDecoderBackbone
+      params:
+        task: "text"
+        num_characters: 37
+        in_channels: 3
+        dropout_rate: 0.1
+
+    - name: OCRDecoderHead
+      inputs:
+        - OCRDecoderBackbone
+      params:
+        task: "text"
+        num_characters: 37
+
+
+
+  losses:
+    - name: FocalCTC
+      attached_to: OCRDecoderHead
+      params:
+        blank: 0
+
+  metrics:
+    - name: OCRAccuracy
+      is_main_metric: true
+      attached_to: OCRDecoderHead
+
+#  visualizers:
+#    - name: MultiVisualizer
+#      attached_to: ImplicitKeypointBBoxHead
+#      params:
+#        visualizers:
+#          - name: KeypointVisualizer
+#            params:
+#              nonvisible_color: blue
+#          - name: BBoxVisualizer
+#            params:
+#              colors:
+#                person: "#FF5055"
+#    - name: SegmentationVisualizer
+#      attached_to: SegmentationHead
+#      params:
+#        colors: "#FF5055"
+#    - name: BBoxVisualizer
+#      attached_to: EfficientBBoxHead
+
+tracker:
+  project_name: ocr_example
+  save_directory: ocr_output
+  is_tensorboard: True
+  is_wandb: False
+  wandb_entity: luxonis
+  is_mlflow: False
+
+loader:
+  train_view: train
+  val_view: val
+  test_view: test
+
+  params:
+    dataset_name: dataset_dev_0
+
+trainer:
+  accelerator: auto
+  devices: auto
+  strategy: auto
+
+  num_sanity_val_steps: 1
+  profiler: null
+  verbose: True
+  batch_size: 2
+  accumulate_grad_batches: 1
+  epochs: &epochs 200
+  num_workers: 2
+  train_metrics_interval: -1
+  validation_interval: 1
+  num_log_images: 1
+  skip_last_batch: False
+  log_sub_losses: True
+  save_top_k: 3
+
+  preprocessing:
+    train_image_size: [&height 160, &width 320]
+    keep_aspect_ratio: False
+    train_rgb: True
+    normalize:
+      active: True
+    augmentations:
+      - name: OCRAugmentation
+        params:
+          image_size: [160, 320]
+          is_rgb: True
+          is_train: True
+
+  callbacks:
+    - name: LearningRateMonitor
+      params:
+        logging_interval: step
+    - name: MetadataLogger
+      params:
+        hyperparams: ["trainer.epochs", trainer.batch_size]
+    - name: TestOnTrainEnd
+
+  optimizer:
+    name: SGD
+    params:
+      lr: 0.0001
+      momentum: 0.937
+      nesterov: True
+      weight_decay: 0.0005
+
+  scheduler:
+    name: CosineAnnealingLR
+    params:
+      T_max: *epochs
+      eta_min: 0
+
+exporter:
+  onnx:
+    opset_version: 11
+
+tuner:
+  params:
+    trainer.optimizer.name_categorical: ["Adam", "SGD"]
+    trainer.optimizer.params.lr_float: [0.0001, 0.001]
+    trainer.batch_size_int: [4, 16, 4]
diff --git a/luxonis_train/attached_modules/losses/__init__.py b/luxonis_train/attached_modules/losses/__init__.py
index 737373d2..ecaf6cfd 100644
--- a/luxonis_train/attached_modules/losses/__init__.py
+++ b/luxonis_train/attached_modules/losses/__init__.py
@@ -7,6 +7,7 @@
 from .sigmoid_focal_loss import SigmoidFocalLoss
 from .smooth_bce_with_logits import SmoothBCEWithLogitsLoss
 from .softmax_focal_loss import SoftmaxFocalLoss
+from .ocr_decoding_loss import FocalCTC
 
 __all__ = [
     "AdaptiveDetectionLoss",
@@ -18,4 +19,5 @@
     "SigmoidFocalLoss",
     "SmoothBCEWithLogitsLoss",
     "SoftmaxFocalLoss",
+    "FocalCTC"
 ]
diff --git a/luxonis_train/attached_modules/losses/ocr_decoding_loss.py b/luxonis_train/attached_modules/losses/ocr_decoding_loss.py
new file mode 100644
index 00000000..6c58ec8b
--- /dev/null
+++ b/luxonis_train/attached_modules/losses/ocr_decoding_loss.py
@@ -0,0 +1,49 @@
+import torch
+from torch import Tensor, nn
+
+from .base_loss import BaseLoss
+
+
+class FocalCTC(BaseLoss[Tensor, Tensor]):
+    def __init__(self, blank=0, alpha=0.99, gamma=1.0, **kwargs):
+        super().__init__(**kwargs)
+        self.alpha = alpha
+        self.gamma = gamma
+        self.loss = nn.CTCLoss(zero_infinity=True, blank=blank, reduction="none")
+
+    def forward(
+            self,
+            logits,
+            labels
+    ):
+        input_lengths = torch.full(size=(logits.shape[1],), fill_value=logits.shape[0], dtype=torch.long)
+
+        targets, target_lengths, max_len = labels
+
+        ctc_loss = self.loss(logits, targets, input_lengths, target_lengths)
+        p = torch.exp(-ctc_loss)
+        focal_ctc_loss = (self.alpha * ((1 - p) ** self.gamma) * ctc_loss)
+        focal_ctc_loss = focal_ctc_loss.mean()
+
+        return focal_ctc_loss
+
+
+class SmoothCTCLoss(BaseLoss[Tensor, Tensor, Tensor, Tensor]):
+
+    def __init__(self, num_classes, blank=0, weight=0.01):
+        super().__init__()
+        self.weight = weight
+        self.num_classes = num_classes
+
+        self.ctc = nn.CTCLoss(reduction='mean', blank=blank, zero_infinity=True)
+        self.kldiv = nn.KLDivLoss(reduction='batchmean')
+
+    def forward(self, log_probs, targets, input_lengths, target_lengths):
+        ctc_loss = self.ctc(log_probs, targets, input_lengths, target_lengths)
+
+        kl_inp = log_probs.transpose(0, 1)
+        kl_tar = torch.full_like(kl_inp, 1. / self.num_classes)
+        kldiv_loss = self.kldiv(kl_inp, kl_tar)
+
+        loss = (1. - self.weight) * ctc_loss + self.weight * kldiv_loss
+        return loss.mean()
diff --git a/luxonis_train/attached_modules/metrics/__init__.py b/luxonis_train/attached_modules/metrics/__init__.py
index 9e73e4ac..0bd9c138 100644
--- a/luxonis_train/attached_modules/metrics/__init__.py
+++ b/luxonis_train/attached_modules/metrics/__init__.py
@@ -3,6 +3,8 @@
 from .mean_average_precision import MeanAveragePrecision
 from .mean_average_precision_keypoints import MeanAveragePrecisionKeypoints
 from .object_keypoint_similarity import ObjectKeypointSimilarity
+from .ocr_accuracy import OCRAccuracy
+
 
 __all__ = [
     "Accuracy",
@@ -14,4 +16,5 @@
     "ObjectKeypointSimilarity",
     "Precision",
     "Recall",
+    "OCRAccuracy"
 ]
diff --git a/luxonis_train/attached_modules/metrics/ocr_accuracy.py b/luxonis_train/attached_modules/metrics/ocr_accuracy.py
new file mode 100644
index 00000000..709e8822
--- /dev/null
+++ b/luxonis_train/attached_modules/metrics/ocr_accuracy.py
@@ -0,0 +1,54 @@
+import logging
+
+import torch
+from .base_metric import BaseMetric
+
+logger = logging.getLogger(__name__)
+
+
+class OCRAccuracy(BaseMetric):
+    def __init__(self, **kwargs):
+        super().__init__(
+            node=kwargs.pop("node", None),
+            protocol=kwargs.pop("protocol", None),
+            required_labels=kwargs.pop("required_labels", None),
+        )
+        self.blank_cls = kwargs.get("task")
+        self._init_metric()
+
+    def _init_metric(self):
+        self.running_metric = {
+            "acc_0": 0,
+            "acc_1": 0,
+            "acc_2": 0
+        }
+        self.n = 0
+
+    def update(self, preds, target, *args, **kwargs):
+        B, C, T = preds.shape  # batch, class, step
+        target, _, _ = target
+        preds = preds.softmax(dim=1)
+        pred_classes = preds.argmax(dim=1)  # batch, step
+        pred_classes = torch.unique_consecutive(pred_classes, dim=1)
+        pred_classes_aligned = torch.zeros_like(pred_classes)
+        for idx, pred_cls in enumerate(pred_classes):
+            aligned_cls = [cls for cls in pred_classes if len(cls) > self.blank_cls]
+            aligned_cls = aligned_cls + [0 for _ in range(T - len(aligned_cls))]
+            pred_classes_aligned[idx] = torch.tensor(aligned_cls).to(pred_classes.device)
+
+        errors = pred_classes_aligned == target
+        errors = errors.sum(dim=1)
+
+        for acc_at in range(3):
+            matching = (errors == acc_at) * 1.0
+            self.running_metric[f"acc_{acc_at}"] += matching.sum().item()
+        self.n += B
+
+    def compute(self):
+        result = {
+            "acc_0": self.running_metric["acc_0"] / self.n,
+            "acc_1": self.running_metric["acc_1"] / self.n,
+            "acc_2": self.running_metric["acc_2"] / self.n
+        }
+        self._init_metric()
+        return result["acc_0"], result
diff --git a/luxonis_train/nodes/__init__.py b/luxonis_train/nodes/__init__.py
index 9a506c1f..50cc2afa 100644
--- a/luxonis_train/nodes/__init__.py
+++ b/luxonis_train/nodes/__init__.py
@@ -14,6 +14,7 @@
 from .resnet import ResNet
 from .rexnetv1 import ReXNetV1_lite
 from .segmentation_head import SegmentationHead
+from .ocr_decoder import OCRDecoderHead, OCRDecoderBackbone
 
 __all__ = [
     "BiSeNetHead",
@@ -32,4 +33,6 @@
     "RepVGG",
     "ResNet",
     "SegmentationHead",
+    "OCRDecoderHead",
+    "OCRDecoderBackbone"
 ]
diff --git a/luxonis_train/nodes/ocr_decoder.py b/luxonis_train/nodes/ocr_decoder.py
new file mode 100644
index 00000000..b4da6b8b
--- /dev/null
+++ b/luxonis_train/nodes/ocr_decoder.py
@@ -0,0 +1,171 @@
+"""ResNet backbone.
+
+Source: U{https://github.com/hailo-ai/LPRNet_Pytorch/blob/master/model/LPRNet.py}
+@license: U{PyTorch<https://github.com/hailo-ai/LPRNet_Pytorch?tab=Apache-2.0-1-ov-file#readme>}
+"""
+from typing import Literal
+
+import torch
+import torch.nn as nn
+import torchvision
+from torch import Tensor
+
+from .base_node import BaseNode
+from luxonis_train.utils.types import LabelType
+
+
+class ResBlock(nn.Module):
+    def __init__(self, ch_in, ch_out, stride=1, ks=3, downsample=None, padding=1):
+        super(ResBlock, self).__init__()
+        self.downsample = downsample
+        self.block = nn.Sequential(
+            nn.Conv2d(in_channels=ch_in, out_channels=ch_out, kernel_size=ks, stride=stride, padding=padding),
+            nn.BatchNorm2d(num_features=ch_out),
+            nn.ReLU(),
+            nn.Conv2d(in_channels=ch_out, out_channels=ch_out, kernel_size=ks, stride=1, padding=padding),
+            nn.BatchNorm2d(num_features=ch_out),
+        )
+        self.act = nn.ReLU()
+
+    def forward(self, x):
+        out = self.block(x)
+        if self.downsample is not None:
+            x = self.downsample(x)
+        out += x
+        out = self.act(out)
+        return out
+
+
+class DownSample(nn.Module):
+    def __init__(self, ch_in, ch_out, kernel_size=3, stride=1, padding=0):
+        super(DownSample, self).__init__()
+        self.block = nn.Sequential(
+            nn.Conv2d(in_channels=ch_in, out_channels=ch_out, kernel_size=kernel_size, stride=stride, padding=padding)
+        )
+
+    def forward(self, x):
+        out = self.block(x)
+        return out
+
+
+class OCRDecoderBackbone(BaseNode):
+
+    def __init__(
+            self,
+            num_characters: int = 37,
+            in_channels: int = 3,
+            dropout_rate: float = 0.5,
+            **kwargs
+    ):
+        super().__init__(**kwargs, _task_type=LabelType.TEXT)
+        self.num_characters = num_characters
+        self.dropout_rate = dropout_rate
+
+        self.stage1 = nn.Sequential(
+            nn.Conv2d(in_channels=in_channels, out_channels=64, kernel_size=7, stride=1, padding=3),
+            nn.BatchNorm2d(num_features=64),
+            nn.ReLU(),
+
+            ResBlock(ch_in=64, ch_out=64, padding=1),
+            ResBlock(ch_in=64, ch_out=128, padding=1,
+                     downsample=DownSample(64, 128, kernel_size=1, stride=1)),
+
+            # s2
+            ResBlock(ch_in=128, ch_out=128, stride=2, padding=1,
+                     downsample=DownSample(128, 128, kernel_size=1, stride=2)),
+            ResBlock(ch_in=128, ch_out=256, padding=1,
+                     downsample=DownSample(128, 256, kernel_size=1, stride=1)),
+        )  # (38 x 150)
+
+        self.downsample1 = nn.Sequential(
+            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1, stride=2),
+            nn.BatchNorm2d(num_features=256),
+            nn.ReLU(),
+            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1, stride=2),
+            nn.BatchNorm2d(num_features=256),
+            nn.ReLU(),
+            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1, stride=2),
+            nn.BatchNorm2d(num_features=256)
+        )
+
+        self.stage2 = nn.Sequential(
+            ResBlock(ch_in=256, ch_out=256, stride=2, padding=1,
+                     downsample=DownSample(256, 256, kernel_size=1, stride=2)),
+            ResBlock(ch_in=256, ch_out=256, padding=1)
+        )  # (19 x 75)
+
+        self.downsample2 = nn.Sequential(
+            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1, stride=2),
+            nn.BatchNorm2d(num_features=256),
+            nn.ReLU(),
+            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=1, stride=2),
+            nn.BatchNorm2d(num_features=256),
+        )
+
+        self.stage3 = nn.Sequential(
+            ResBlock(ch_in=256, ch_out=256, stride=2, padding=1,
+                     downsample=DownSample(256, 256, kernel_size=1, stride=2)),
+            ResBlock(ch_in=256, ch_out=256, stride=2, padding=1,
+                     downsample=DownSample(256, 256, kernel_size=1, stride=2))
+        )  # (5 x 19)
+        if dropout_rate > 0:
+            self.stage4 = nn.Sequential(
+                nn.Dropout(dropout_rate),
+                nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(1, 5), stride=1, padding=(0, 2)),  # (6 x 24)
+                nn.BatchNorm2d(num_features=256),
+                nn.ReLU(),
+                nn.Dropout(dropout_rate),
+                nn.Conv2d(in_channels=256, out_channels=num_characters, kernel_size=(5, 1), stride=1, padding=(2, 0)),
+                # (6 x 24)
+                nn.BatchNorm2d(num_features=num_characters),
+                nn.ReLU(),
+            )
+        else:
+            self.stage4 = nn.Sequential(
+                nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(1, 5), stride=1, padding=(0, 2)),  # (6 x 24)
+                nn.BatchNorm2d(num_features=256),
+                nn.ReLU(),
+                nn.Conv2d(in_channels=256, out_channels=num_characters, kernel_size=(5, 1), stride=1, padding=(2, 0)),
+                # (6 x 24)
+                nn.BatchNorm2d(num_features=num_characters),
+                nn.ReLU(),
+            )  # (5 x 19)
+
+    def forward(self, inputs: Tensor) -> list[Tensor]:
+        stage1 = self.stage1(inputs)
+        stage2 = self.stage2(stage1)
+        stage3 = self.stage3(stage2)
+        stage4 = self.stage4(stage3)
+
+        skip1 = self.downsample1(stage1)
+        skip2 = self.downsample2(stage2)
+        skip3 = stage3
+        skip4 = stage4
+
+        return [skip1, skip2, skip3, skip4]
+
+
+class OCRDecoderHead(BaseNode):
+
+    def __init__(
+            self,
+            num_characters: int = 37,
+            **kwargs
+    ):
+        super().__init__(**kwargs, _task_type=LabelType.TEXT)
+
+        self.num_characters = num_characters
+        self.container = nn.Sequential(
+            nn.Conv2d(
+                in_channels=768 + self.num_characters,
+                out_channels=self.num_characters,
+                kernel_size=(1, 1),
+                stride=(1, 1)
+            )
+        )
+
+    def forward(self, inputs: list[Tensor]) -> Tensor:
+        features = torch.cat(inputs, dim=1)
+        logits = self.container(features)
+        logits = torch.mean(logits, dim=2)  # B, Classes, Sequence
+        return logits

From b716f50a5e3a1b1fb952c1f47a9a42910fc52f8e Mon Sep 17 00:00:00 2001
From: GitHub Actions <actions@github.com>
Date: Mon, 10 Jun 2024 14:46:45 +0000
Subject: [PATCH 28/28] [Automated] Updated coverage badge

---
 media/coverage_badge.svg | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg
index 90299371..6c15cace 100644
--- a/media/coverage_badge.svg
+++ b/media/coverage_badge.svg
@@ -15,7 +15,7 @@
     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
         <text x="31.5" y="14">coverage</text>
-        <text x="80" y="15" fill="#010101" fill-opacity=".3">76%</text>
-        <text x="80" y="14">76%</text>
+        <text x="80" y="15" fill="#010101" fill-opacity=".3">75%</text>
+        <text x="80" y="14">75%</text>
     </g>
 </svg>