Add object detection files to XiNet as well (#59)

* fix linters * fix credits * add coco training configuration files
micromind-toolkit · Nov 28, 2023 · 7460236 · 7460236
1 parent 93cdcc6
commit 7460236
Show file tree

Hide file tree

Showing 6 changed files with 734 additions and 0 deletions.
diff --git a/recipes/objection_detection/cfg/data/coco.yaml b/recipes/objection_detection/cfg/data/coco.yaml
@@ -0,0 +1,151 @@
+########
+# Data configuration file for COCO8 trainings.
+# Based on the ultralytics data conf.
+#
+# Adapted by:
+# - Matteo Beltrami, 2023
+# - Francesco Paissan, 2023
+########
+task: detect  # (str) YOLO task, i.e. detect, segment, classify, pose
+mode: train  # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark
+
+# Train settings -------------------------------------------------------------------------------------------------------
+imgsz: 640  # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes
+rect: False  # (bool) rectangular training if mode='train' or rectangular validation if mode='val'
+cache: False  # (bool) True/ram, disk or False. Use cache for data loading
+single_cls: False  # (bool) train multi-class data as single-class
+fraction: 1.0  # (float) dataset fraction to train on (default is 1.0, all images in train set)
+
+# Segmentation
+overlap_mask: True  # (bool) masks should overlap during training (segment train only)
+mask_ratio: 4  # (int) mask downsample ratio (segment train only)
+
+# Prediction settings --------------------------------------------------------------------------------------------------
+classes:  # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3]
+
+# Hyperparameters ------------------------------------------------------------------------------------------------------
+box: 7.5  # (float) box loss gain
+cls: 0.5  # (float) cls loss gain (scale with pixels)
+dfl: 1.5  # (float) dfl loss gain
+
+hsv_h: 0.015  # (float) image HSV-Hue augmentation (fraction)
+hsv_s: 0.7  # (float) image HSV-Saturation augmentation (fraction)
+hsv_v: 0.4  # (float) image HSV-Value augmentation (fraction)
+degrees: 0.0  # (float) image rotation (+/- deg)
+translate: 0.1  # (float) image translation (+/- fraction)
+scale: 0.5  # (float) image scale (+/- gain)
+shear: 0.0  # (float) image shear (+/- deg)
+perspective: 0.0  # (float) image perspective (+/- fraction), range 0-0.001
+flipud: 0.0  # (float) image flip up-down (probability)
+fliplr: 0.5  # (float) image flip left-right (probability)
+mosaic: 1.0  # (float) image mosaic (probability)
+mixup: 0.0  # (float) image mixup (probability)
+copy_paste: 0.0  # (float) segment copy-paste (probability)
+
+
+# Dataset location
+path: /mnt/data/coco  # dataset root dir
+train: train2017.txt  # train images (relative to 'path') 118287 images
+val: val2017.txt  # val images (relative to 'path') 5000 images
+test: test-dev2017.txt  # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
+
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
+
+
+# Download script/URL (optional)
+download: |
+  from ultralytics.utils.downloads import download
+  from pathlib import Path
+
+  # Download labels
+  segments = True  # segment or box labels
+  dir = Path(data_cfg['path'])  # dataset root dir
+  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
+  urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')]  # labels
+  download(urls, dir=dir.parent)
+  # Download data
+  urls = ['http://images.cocodataset.org/zips/train2017.zip',  # 19G, 118k images
+          'http://images.cocodataset.org/zips/val2017.zip',  # 1G, 5k images
+          'http://images.cocodataset.org/zips/test2017.zip']  # 7G, 41k images (optional)
+  download(urls, dir=dir / 'images', threads=3)
diff --git a/recipes/objection_detection/cfg/data/coco8.yaml b/recipes/objection_detection/cfg/data/coco8.yaml
@@ -0,0 +1,144 @@
+########
+# Data configuration file for COCO8 trainings.
+# Based on the ultralytics data conf.
+#
+# Adapted by:
+# - Matteo Beltrami, 2023
+# - Francesco Paissan, 2023
+########
+task: detect  # (str) YOLO task, i.e. detect, segment, classify, pose
+mode: train  # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark
+
+# Train settings -------------------------------------------------------------------------------------------------------
+imgsz: 640  # (int | list) input images size as int for train and val modes, or list[w,h] for predict and export modes
+rect: False  # (bool) rectangular training if mode='train' or rectangular validation if mode='val'
+cache: False  # (bool) True/ram, disk or False. Use cache for data loading
+single_cls: False  # (bool) train multi-class data as single-class
+fraction: 1.0  # (float) dataset fraction to train on (default is 1.0, all images in train set)
+
+# Segmentation
+overlap_mask: True  # (bool) masks should overlap during training (segment train only)
+mask_ratio: 4  # (int) mask downsample ratio (segment train only)
+
+# Prediction settings --------------------------------------------------------------------------------------------------
+classes:  # (int | list[int], optional) filter results by class, i.e. classes=0, or classes=[0,2,3]
+
+# Hyperparameters ------------------------------------------------------------------------------------------------------
+box: 7.5  # (float) box loss gain
+cls: 0.5  # (float) cls loss gain (scale with pixels)
+dfl: 1.5  # (float) dfl loss gain
+
+hsv_h: 0.015  # (float) image HSV-Hue augmentation (fraction)
+hsv_s: 0.7  # (float) image HSV-Saturation augmentation (fraction)
+hsv_v: 0.4  # (float) image HSV-Value augmentation (fraction)
+degrees: 0.0  # (float) image rotation (+/- deg)
+translate: 0.1  # (float) image translation (+/- fraction)
+scale: 0.5  # (float) image scale (+/- gain)
+shear: 0.0  # (float) image shear (+/- deg)
+perspective: 0.0  # (float) image perspective (+/- fraction), range 0-0.001
+flipud: 0.0  # (float) image flip up-down (probability)
+fliplr: 0.5  # (float) image flip left-right (probability)
+mosaic: 1.0  # (float) image mosaic (probability)
+mixup: 0.0  # (float) image mixup (probability)
+copy_paste: 0.0  # (float) segment copy-paste (probability)
+
+
+# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
+path: /mnt/data/coco8  # dataset root dir
+train: images/train  # train images (relative to 'path') 4 images
+val: images/val  # val images (relative to 'path') 4 images
+test:  # test images (optional)
+
+# Classes
+names:
+  0: person
+  1: bicycle
+  2: car
+  3: motorcycle
+  4: airplane
+  5: bus
+  6: train
+  7: truck
+  8: boat
+  9: traffic light
+  10: fire hydrant
+  11: stop sign
+  12: parking meter
+  13: bench
+  14: bird
+  15: cat
+  16: dog
+  17: horse
+  18: sheep
+  19: cow
+  20: elephant
+  21: bear
+  22: zebra
+  23: giraffe
+  24: backpack
+  25: umbrella
+  26: handbag
+  27: tie
+  28: suitcase
+  29: frisbee
+  30: skis
+  31: snowboard
+  32: sports ball
+  33: kite
+  34: baseball bat
+  35: baseball glove
+  36: skateboard
+  37: surfboard
+  38: tennis racket
+  39: bottle
+  40: wine glass
+  41: cup
+  42: fork
+  43: knife
+  44: spoon
+  45: bowl
+  46: banana
+  47: apple
+  48: sandwich
+  49: orange
+  50: broccoli
+  51: carrot
+  52: hot dog
+  53: pizza
+  54: donut
+  55: cake
+  56: chair
+  57: couch
+  58: potted plant
+  59: bed
+  60: dining table
+  61: toilet
+  62: tv
+  63: laptop
+  64: mouse
+  65: remote
+  66: keyboard
+  67: cell phone
+  68: microwave
+  69: oven
+  70: toaster
+  71: sink
+  72: refrigerator
+  73: book
+  74: clock
+  75: vase
+  76: scissors
+  77: teddy bear
+  78: hair drier
+  79: toothbrush
+
+# Download script/URL (optional)
+download: |
+    from pathlib import Path
+    import zipfile
+    import os
+    data_cfg['path'] = Path(data_cfg['path'])
+    os.makedirs(data_cfg["path"], exist_ok=True)
+    os.system(f"wget https://ultralytics.com/assets/coco8.zip -O {os.path.join(data_cfg['path'], 'coco8.zip')}")
+    with zipfile.ZipFile(os.path.join(data_cfg['path'], 'coco8.zip'), 'r') as zip_ref:
+        zip_ref.extractall(data_cfg['path'].parent)
diff --git a/recipes/objection_detection/cfg/yolo_phinet.py b/recipes/objection_detection/cfg/yolo_phinet.py
@@ -0,0 +1,20 @@
+"""
+YOLOPhiNet training configuration.
+
+Authors:
+    - Matteo Beltrami, 2023
+    - Francesco Paissan, 2023
+"""
+# Data configuration
+batch_size = 8
+data_cfg = "cfg/data/coco.yaml"
+
+# Model configuration
+input_shape = (3, 672, 672)
+alpha = 3
+num_layers = 7
+beta = 0.75
+t_zero = 6
+divisor = 8
+downsampling_layers = [5, 7]
+return_layers = [4, 6, 7]