diff --git a/recipes/objection_detection/cfg/data/coco.yaml b/recipes/objection_detection/cfg/data/coco.yaml
new file mode 100644
index 0000000..5578693
--- /dev/null
+++ b/recipes/objection_detection/cfg/data/coco.yaml
@@ -0,0 +1,151 @@
+########
+# Data configuration file for COCO8 trainings.
+# Based on the ultralytics data conf.
+#
+# Adapted by:
+# - Matteo Beltrami, 2023
+# - Francesco Paissan, 2023
+########
+task: detect  # (str) YOLO task, i.e. detect, segment, classify, pose
+mode: train  # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark
+
+# Train settings -------------------------------------------------------------------------------------------------------
+imgsz: 640  # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes
+rect: False  # (bool) rectangular training if mode='train' or rectangular validation if mode='val'
+cache: False  # (bool) True/ram, disk or False. Use cache for data loading
+single_cls: False  # (bool) train multi-class data as single-class
+fraction: 1.0  # (float) dataset fraction to train on (default is 1.0, all images in train set)
+
+# Segmentation
+overlap_mask: True  # (bool) masks should overlap during training (segment train only)
+mask_ratio: 4  # (int) mask downsample ratio (segment train only)
+
+# Prediction settings --------------------------------------------------------------------------------------------------
+classes:  # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3]
+
+# Hyperparameters ------------------------------------------------------------------------------------------------------
+box: 7.5  # (float) box loss gain
+cls: 0.5  # (float) cls loss gain (scale with pixels)
+dfl: 1.5  # (float) dfl loss gain
+
+hsv_h: 0.015  # (float) image HSV-Hue augmentation (fraction)
+hsv_s: 0.7  # (float) image HSV-Saturation augmentation (fraction)
+hsv_v: 0.4  # (float) image HSV-Value augmentation (fraction)
+degrees: 0.0  # (float) image rotation (+/- deg)
+translate: 0.1  # (float) image translation (+/- fraction)
+scale: 0.5  # (float) image scale (+/- gain)
+shear: 0.0  # (float) image shear (+/- deg)
+perspective: 0.0  # (float) image perspective (+/- fraction), range 0-0.001
+flipud: 0.0  # (float) image flip up-down (probability)
+fliplr: 0.5  # (float) image flip left-right (probability)
+mosaic: 1.0  # (float) image mosaic (probability)
+mixup: 0.0  # (float) image mixup (probability)
+copy_paste: 0.0  # (float) segment copy-paste (probability)
+
+
+# Dataset location
+path: /mnt/data/coco  # dataset root dir
+train: train2017.txt  # train images (relative to 'path') 118287 images
+val: val2017.txt  # val images (relative to 'path') 5000 images
+test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
+
+
+# Download script/URL (optional)
+download: |
+  from ultralytics.utils.downloads import download
+  from pathlib import Path
+
+  # Download labels
+  segments = True  # segment or box labels
+  dir = Path(data_cfg['path'])  # dataset root dir
+  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
+  urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')]  # labels
+  download(urls, dir=dir.parent)
+  # Download data
+  urls = ['http://images.cocodataset.org/zips/train2017.zip',  # 19G, 118k images
+          'http://images.cocodataset.org/zips/val2017.zip',  # 1G, 5k images
+          'http://images.cocodataset.org/zips/test2017.zip']  # 7G, 41k images (optional)
+  download(urls, dir=dir / 'images', threads=3)
diff --git a/recipes/objection_detection/cfg/data/coco8.yaml b/recipes/objection_detection/cfg/data/coco8.yaml
new file mode 100644
index 0000000..6492788
--- /dev/null
+++ b/recipes/objection_detection/cfg/data/coco8.yaml
@@ -0,0 +1,144 @@
+########
+# Data configuration file for COCO8 trainings.
+# Based on the ultralytics data conf.
+#
+# Adapted by:
+# - Matteo Beltrami, 2023
+# - Francesco Paissan, 2023
+########
+task: detect  # (str) YOLO task, i.e. detect, segment, classify, pose
+mode: train  # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark
+
+# Train settings -------------------------------------------------------------------------------------------------------
+imgsz: 640  # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes
+rect: False  # (bool) rectangular training if mode='train' or rectangular validation if mode='val'
+cache: False  # (bool) True/ram, disk or False. Use cache for data loading
+single_cls: False  # (bool) train multi-class data as single-class
+fraction: 1.0  # (float) dataset fraction to train on (default is 1.0, all images in train set)
+
+# Segmentation
+overlap_mask: True  # (bool) masks should overlap during training (segment train only)
+mask_ratio: 4  # (int) mask downsample ratio (segment train only)
+
+# Prediction settings --------------------------------------------------------------------------------------------------
+classes:  # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3]
+
+# Hyperparameters ------------------------------------------------------------------------------------------------------
+box: 7.5  # (float) box loss gain
+cls: 0.5  # (float) cls loss gain (scale with pixels)
+dfl: 1.5  # (float) dfl loss gain
+
+hsv_h: 0.015  # (float) image HSV-Hue augmentation (fraction)
+hsv_s: 0.7  # (float) image HSV-Saturation augmentation (fraction)
+hsv_v: 0.4  # (float) image HSV-Value augmentation (fraction)
+degrees: 0.0  # (float) image rotation (+/- deg)
+translate: 0.1  # (float) image translation (+/- fraction)
+scale: 0.5  # (float) image scale (+/- gain)
+shear: 0.0  # (float) image shear (+/- deg)
+perspective: 0.0  # (float) image perspective (+/- fraction), range 0-0.001
+flipud: 0.0  # (float) image flip up-down (probability)
+fliplr: 0.5  # (float) image flip left-right (probability)
+mosaic: 1.0  # (float) image mosaic (probability)
+mixup: 0.0  # (float) image mixup (probability)
+copy_paste: 0.0  # (float) segment copy-paste (probability)
+
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: /mnt/data/coco8  # dataset root dir
+train: images/train  # train images (relative to 'path') 4 images
+val: images/val  # val images (relative to 'path') 4 images
+test:  # test images (optional)
+
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
+
+# Download script/URL (optional)
+download: |
+    from pathlib import Path
+    import zipfile
+    import os
+    data_cfg['path'] = Path(data_cfg['path'])
+    os.makedirs(data_cfg["path"], exist_ok=True)
+    os.system(f"wget https://ultralytics.com/assets/coco8.zip -O {os.path.join(data_cfg['path'], 'coco8.zip')}")
+    with zipfile.ZipFile(os.path.join(data_cfg['path'], 'coco8.zip'), 'r') as zip_ref:
+        zip_ref.extractall(data_cfg['path'].parent)
diff --git a/recipes/objection_detection/cfg/yolo_phinet.py b/recipes/objection_detection/cfg/yolo_phinet.py
new file mode 100644
index 0000000..4d7e5c8
--- /dev/null
+++ b/recipes/objection_detection/cfg/yolo_phinet.py
@@ -0,0 +1,20 @@
+"""
+YOLOPhiNet training configuration.
+
+Authors:
+    - Matteo Beltrami, 2023
+    - Francesco Paissan, 2023
+"""
+# Data configuration
+batch_size = 8
+data_cfg = "cfg/data/coco.yaml"
+
+# Model configuration
+input_shape = (3, 672, 672)
+alpha = 3
+num_layers = 7
+beta = 0.75
+t_zero = 6
+divisor = 8
+downsampling_layers = [5, 7]
+return_layers = [4, 6, 7]
diff --git a/recipes/objection_detection/prepare_data.py b/recipes/objection_detection/prepare_data.py
new file mode 100644
index 0000000..bb3b570
--- /dev/null
+++ b/recipes/objection_detection/prepare_data.py
@@ -0,0 +1,88 @@
+"""
+Data preparation script for YOLO training. Parses ultralytics yaml files
+and, if needed, downloads them on disk.
+
+Authors:
+    - Matteo Beltrami, 2023
+    - Francesco Paissan, 2023
+"""
+from typing import Dict
+import os
+
+from torch.utils.data import DataLoader, ConcatDataset
+from ultralytics.data import build_yolo_dataset
+
+
+def create_loaders(m_cfg: Dict, data_cfg: Dict, batch_size: int):
+    """Creates DataLoaders for dataset specified in the configuration file.
+    Refer to ... for how to select the proper configuration.
+
+    Arguments
+    ---------
+    m_cfg : Dict
+        Contains information about the training process (e.g., data augmentation).
+    data_cfg : Dict
+        Contains details about the data configurations (e.g., image size, etc.).
+    batch_size : int
+        Batch size for the training process.
+
+    """
+    if "download" in data_cfg and not os.path.exists(data_cfg["path"]):
+        # download data if it's not there
+        exec(data_cfg["download"])
+
+    mode = "train"
+    if isinstance(data_cfg["train"], list):
+        train_set = []
+        for p in data_cfg["train"]:
+            train_set.append(
+                build_yolo_dataset(
+                    m_cfg,
+                    p,
+                    batch_size,
+                    data_cfg,
+                    mode=mode,
+                    rect=mode == "val",
+                )
+            )
+        train_set = ConcatDataset(train_set)
+    train_set = build_yolo_dataset(
+        m_cfg,
+        data_cfg["train"],
+        batch_size,
+        data_cfg,
+        mode=mode,
+        rect=mode == "val",
+    )
+
+    train_loader = DataLoader(
+        train_set,
+        batch_size,
+        shuffle=True,
+        num_workers=16,
+        persistent_workers=True,
+        pin_memory=True,
+        collate_fn=getattr(train_set, "collate_fn", None),
+    )
+
+    mode = "val"
+    val_set = build_yolo_dataset(
+        m_cfg,
+        data_cfg["val"],
+        batch_size,
+        data_cfg,
+        mode=mode,
+        rect=mode == "val",
+    )
+
+    val_loader = DataLoader(
+        val_set,
+        batch_size,
+        shuffle=False,
+        num_workers=16,
+        persistent_workers=True,
+        pin_memory=True,
+        collate_fn=getattr(val_set, "collate_fn", None),
+    )
+
+    return train_loader, val_loader
diff --git a/recipes/objection_detection/train.py b/recipes/objection_detection/train.py
new file mode 100644
index 0000000..51cc0c5
--- /dev/null
+++ b/recipes/objection_detection/train.py
@@ -0,0 +1,194 @@
+"""
+YOLO training.
+
+This code allows you to train an object detection model with the YOLOv8 neck and loss.
+
+To run this script, you can start it with:
+    python train.py cfg/yolo_phinet.py
+
+Authors:
+    - Matteo Beltrami, 2023
+    - Francesco Paissan, 2023
+"""
+
+import torch
+from prepare_data import create_loaders
+from torchinfo import summary
+from ultralytics.utils.ops import scale_boxes, xywh2xyxy
+from yolo_loss import Loss
+
+import micromind as mm
+from micromind.networks import PhiNet
+from micromind.networks.yolo import SPPF, DetectionHead, Yolov8Neck
+from micromind.utils import parse_configuration
+from micromind.utils.yolo import (
+    load_config,
+    mean_average_precision,
+    postprocess,
+)
+import sys
+
+
+class YOLO(mm.MicroMind):
+    def __init__(self, m_cfg, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self.modules["phinet"] = PhiNet(
+            input_shape=hparams.input_shape,
+            alpha=hparams.alpha,
+            num_layers=hparams.num_layers,
+            beta=hparams.beta,
+            t_zero=hparams.t_zero,
+            include_top=False,
+            compatibility=False,
+            divisor=hparams.divisor,
+            downsampling_layers=hparams.downsampling_layers,
+            return_layers=hparams.return_layers,
+        )
+
+        # load ImageNet checkpoint
+        self.modules["phinet"].load_state_dict(
+            torch.load(hparams.model_path), strict=False
+        )
+
+        sppf_ch, neck_filters, up, head_filters = self.get_parameters()
+
+        self.modules["sppf"] = SPPF(*sppf_ch)
+        self.modules["neck"] = Yolov8Neck(filters=neck_filters, up=up)
+        self.modules["head"] = DetectionHead(filters=head_filters)
+
+        tot_params = 0
+        for m in self.modules.values():
+            temp = summary(m, verbose=0)
+            tot_params += temp.total_params
+
+        print(f"Total parameters of model: {tot_params * 1e-6:.2f} M")
+
+        self.m_cfg = m_cfg
+
+    def get_parameters(self):
+        """
+        Gets the parameters with which to initialize the network detection part
+        (SPPF block, Yolov8Neck, DetectionHead).
+        """
+        in_shape = self.modules["phinet"].input_shape
+        x = torch.randn(1, *in_shape)
+        y = self.modules["phinet"](x)
+
+        c1 = c2 = y[1][2].shape[1]
+        sppf = SPPF(c1, c2)
+        out_sppf = sppf(y[1][2])
+
+        neck_filters = [y[1][0].shape[1], y[1][1].shape[1], out_sppf.shape[1]]
+        up = [2, 2]
+        up[0] = y[1][1].shape[2] / out_sppf.shape[2]
+        up[1] = y[1][0].shape[2] / (up[0] * out_sppf.shape[2])
+        temp = """The layers you selected are not valid. \
+            Please choose only layers between which the spatial resolution \
+            doubles every time. Eventually, you can achieve this by \
+            changing the downsampling layers."""
+
+        assert up == [2, 2], " ".join(temp.split())
+
+        neck = Yolov8Neck(filters=neck_filters, up=up)
+        out_neck = neck(y[1][0], y[1][1], out_sppf)
+
+        head_filters = (
+            out_neck[0].shape[1],
+            out_neck[1].shape[1],
+            out_neck[2].shape[1],
+        )
+        # head = DetectionHead(filters=head_filters)
+
+        return (c1, c2), neck_filters, up, head_filters
+
+    def preprocess_batch(self, batch):
+        """Preprocesses a batch of images by scaling and converting to float."""
+        preprocessed_batch = {}
+        preprocessed_batch["img"] = (
+            batch["img"].to(self.device, non_blocking=True).float() / 255
+        )
+        for k in batch:
+            if isinstance(batch[k], torch.Tensor) and k != "img":
+                preprocessed_batch[k] = batch[k].to(self.device)
+
+        return preprocessed_batch
+
+    def forward(self, batch):
+        preprocessed_batch = self.preprocess_batch(batch)
+        backbone = self.modules["phinet"](preprocessed_batch["img"].to(self.device))[1]
+        backbone[-1] = self.modules["sppf"](backbone[-1])
+        neck = self.modules["neck"](*backbone)
+        head = self.modules["head"](neck)
+
+        return head
+
+    def compute_loss(self, pred, batch):
+        self.criterion = Loss(self.m_cfg, self.modules["head"], self.device)
+        preprocessed_batch = self.preprocess_batch(batch)
+
+        lossi_sum, lossi = self.criterion(
+            pred[1],
+            preprocessed_batch,
+        )
+
+        return lossi_sum
+
+    def configure_optimizers(self):
+        opt = torch.optim.SGD(self.modules.parameters(), lr=1e-2, weight_decay=0.0005)
+        sched = torch.optim.lr_scheduler.CosineAnnealingLR(
+            opt, T_max=14000, eta_min=1e-3
+        )
+        return opt, sched
+
+    @torch.no_grad()
+    def mAP(self, pred, batch):
+        preprocessed_batch = self.preprocess_batch(batch)
+        post_predictions = postprocess(
+            preds=pred[0], img=preprocessed_batch, orig_imgs=batch
+        )
+
+        batch_bboxes_xyxy = xywh2xyxy(batch["bboxes"])
+        dim = batch["resized_shape"][0][0]
+        batch_bboxes_xyxy[:, :4] *= dim
+
+        batch_bboxes = []
+        for i in range(len(batch["batch_idx"])):
+            for b in range(len(batch_bboxes_xyxy[batch["batch_idx"] == i, :])):
+                batch_bboxes.append(
+                    scale_boxes(
+                        batch["resized_shape"][i],
+                        batch_bboxes_xyxy[batch["batch_idx"] == i, :][b],
+                        batch["ori_shape"][i],
+                    )
+                )
+        batch_bboxes = torch.stack(batch_bboxes)
+        mmAP = mean_average_precision(post_predictions, batch, batch_bboxes)
+
+        return torch.Tensor([mmAP])
+
+
+if __name__ == "__main__":
+    assert len(sys.argv) > 1, "Please pass the configuration file to the script."
+    hparams = parse_configuration(sys.argv[1])
+
+    m_cfg, data_cfg = load_config(hparams.data_cfg)
+    train_loader, val_loader = create_loaders(m_cfg, data_cfg, hparams.batch_size)
+
+    exp_folder = mm.utils.checkpointer.create_experiment_folder(
+        hparams.output_folder, hparams.experiment_name
+    )
+
+    checkpointer = mm.utils.checkpointer.Checkpointer(exp_folder, key="loss")
+
+    yolo_mind = YOLO(m_cfg, hparams=hparams)
+
+    mAP = mm.Metric("mAP", yolo_mind.mAP, eval_only=True, eval_period=1)
+
+    yolo_mind.train(
+        epochs=200,
+        datasets={"train": train_loader, "val": val_loader},
+        metrics=[mAP],
+        checkpointer=checkpointer,
+        debug=hparams.debug,
+    )
diff --git a/recipes/objection_detection/yolo_loss.py b/recipes/objection_detection/yolo_loss.py
new file mode 100644
index 0000000..29650a6
--- /dev/null
+++ b/recipes/objection_detection/yolo_loss.py
@@ -0,0 +1,137 @@
+"""
+Wrapper for the YOLO loss, from the ultralytics implementation.
+For a reference on the parameters, please refer to https://shorturl.at/gkrAO
+
+
+Authors:
+    - Matteo Beltrami, 2023
+    - Francesco Paissan, 2023
+"""
+import torch
+import torch.nn as nn
+from ultralytics.utils.loss import BboxLoss, v8DetectionLoss
+from ultralytics.utils.ops import xywh2xyxy
+from ultralytics.utils.tal import TaskAlignedAssigner, dist2bbox, make_anchors
+
+
+class Loss(v8DetectionLoss):
+    def __init__(self, h, m, device):  # model must be de-paralleled
+        self.bce = nn.BCEWithLogitsLoss(reduction="none")
+        self.hyp = h
+        self.stride = m.stride
+        self.nc = m.nc
+        self.no = m.no
+        self.reg_max = m.reg_max
+        self.device = device
+
+        self.use_dfl = m.reg_max > 1
+
+        self.assigner = TaskAlignedAssigner(
+            topk=10, num_classes=self.nc, alpha=0.5, beta=6.0
+        )
+        self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=self.use_dfl).to(device)
+        self.proj = torch.arange(m.reg_max, dtype=torch.float, device=device)
+
+    def preprocess(self, targets, batch_size, scale_tensor):
+        """
+        Preprocesses the target counts and matches with the input batch size
+        to output a tensor.
+        """
+        if targets.shape[0] == 0:
+            out = torch.zeros(batch_size, 0, 5, device=self.device)
+        else:
+            i = targets[:, 0]  # image index
+            _, counts = i.unique(return_counts=True)
+            counts = counts.to(dtype=torch.int32)
+            out = torch.zeros(batch_size, counts.max(), 5, device=self.device)
+            for j in range(batch_size):
+                matches = i == j
+                n = matches.sum()
+                if n:
+                    out[j, :n] = targets[matches, 1:]
+            out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor))
+        return out
+
+    def bbox_decode(self, anchor_points, pred_dist):
+        """
+        Decode predicted object bounding box coordinates from anchor points and
+        distribution.
+        """
+        if self.use_dfl:
+            b, a, c = pred_dist.shape  # batch, anchors, channels
+            pred_dist = (
+                pred_dist.view(b, a, 4, c // 4)
+                .softmax(3)
+                .matmul(self.proj.type(pred_dist.dtype))
+            )
+        return dist2bbox(pred_dist, anchor_points, xywh=False)
+
+    def __call__(self, preds, batch):
+        """
+        Calculate the sum of the loss for box, cls and dfl multiplied by batch size.
+        """
+        loss = torch.zeros(3, device=self.device)  # box, cls, dfl
+        feats = preds[1] if isinstance(preds, tuple) else preds
+        pred_distri, pred_scores = torch.cat(
+            [xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2
+        ).split((self.reg_max * 4, self.nc), 1)
+
+        pred_scores = pred_scores.permute(0, 2, 1).contiguous()
+        pred_distri = pred_distri.permute(0, 2, 1).contiguous()
+
+        dtype = pred_scores.dtype
+        batch_size = pred_scores.shape[0]
+        imgsz = (
+            torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype)
+            * self.stride[0]
+        )  # image size (h,w)
+        anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)
+
+        # Targets
+        targets = torch.cat(
+            (batch["batch_idx"].view(-1, 1), batch["cls"].view(-1, 1), batch["bboxes"]),
+            1,
+        )
+        targets = self.preprocess(
+            targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]]
+        )
+        gt_labels, gt_bboxes = targets.split((1, 4), 2)  # cls, xyxy
+        mask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)
+
+        # Pboxes
+        pred_bboxes = self.bbox_decode(anchor_points, pred_distri)  # xyxy, (b, h*w, 4)
+
+        _, target_bboxes, target_scores, fg_mask, _ = self.assigner(
+            pred_scores.detach().sigmoid(),
+            (pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),
+            anchor_points * stride_tensor,
+            gt_labels,
+            gt_bboxes,
+            mask_gt,
+        )
+
+        target_scores_sum = max(target_scores.sum(), 1)
+
+        # Cls loss
+        loss[1] = (
+            self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum
+        )  # BCE
+
+        # Bbox loss
+        if fg_mask.sum():
+            target_bboxes /= stride_tensor
+            loss[0], loss[2] = self.bbox_loss(
+                pred_distri,
+                pred_bboxes,
+                anchor_points,
+                target_bboxes,
+                target_scores,
+                target_scores_sum,
+                fg_mask,
+            )
+
+        loss[0] *= self.hyp.box  # box gain
+        loss[1] *= self.hyp.cls  # cls gain
+        loss[2] *= self.hyp.dfl  # dfl gain
+
+        return loss.sum() * batch_size, loss.detach()  # loss(box, cls, dfl)