luxonis · N950 · Feb 20, 2024 · Feb 20, 2024 · Feb 21, 2024 · Feb 21, 2024
@@ -50,6 +50,8 @@ jobs:
       run: pytest tests --cov=luxonis_train --cov-report xml --junit-xml pytest.xml
 
     - name: Run tests [Windows, macOS]
+      env:
+        PYTORCH_MPS_HIGH_WATERMARK_RATIO: 0.0
       if: matrix.os != 'ubuntu-latest' || matrix.version != '3.10'
       run: pytest tests --junit-xml pytest.xml
 

@@ -142,23 +142,25 @@ To store and load the data we use LuxonisDataset and LuxonisLoader. For specific
 
 Here you can change everything related to actual training of the model.
 
-| Key                     | Type                                    | Default value | Description                                                                                                                                      |
-| ----------------------- | --------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ |
-| batch_size              | int                                     | 32            | batch size used for training                                                                                                                     |
-| accumulate_grad_batches | int                                     | 1             | number of batches for gradient accumulation                                                                                                      |
-| use_weighted_sampler    | bool                                    | False         | bool if use WeightedRandomSampler for training, only works with classification tasks                                                             |
-| epochs                  | int                                     | 100           | number of training epochs                                                                                                                        |
-| num_workers             | int                                     | 2             | number of workers for data loading                                                                                                               |
-| train_metrics_interval  | int                                     | -1            | frequency of computing metrics on train data, -1 if don't perform                                                                                |
-| validation_interval     | int                                     | 1             | frequency of computing metrics on validation data                                                                                                |
-| num_log_images          | int                                     | 4             | maximum number of images to visualize and log                                                                                                    |
-| skip_last_batch         | bool                                    | True          | whether to skip last batch while training                                                                                                        |
-| accelerator             | Literal\["auto", "cpu", "gpu"\]         | "auto"        | What accelerator to use for training.                                                                                                            |
-| devices                 | int \| list\[int\] \| str               | "auto"        | Either specify how many devices to use (int), list specific devices, or use "auto" for automatic configuration based on the selected accelerator |
-| strategy                | Literal\["auto", "ddp"\]                | "auto"        | What strategy to use for training.                                                                                                               |
-| num_sanity_val_steps    | int                                     | 2             | Number of sanity validation steps performed before training.                                                                                     |
-| profiler                | Literal\["simple", "advanced"\] \| None | None          | PL profiler for GPU/CPU/RAM utilization analysis                                                                                                 |
-| verbose                 | bool                                    | True          | Print all intermediate results to console.                                                                                                       |
+| Key                     | Type                                           | Default value | Description                                                                                                                                      |
+| ----------------------- | ---------------------------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ |
+| seed                    | int                                            | None          | seed for reproducibility                                                                                                                         |
+| batch_size              | int                                            | 32            | batch size used for training                                                                                                                     |
+| accumulate_grad_batches | int                                            | 1             | number of batches for gradient accumulation                                                                                                      |
+| use_weighted_sampler    | bool                                           | False         | bool if use WeightedRandomSampler for training, only works with classification tasks                                                             |
+| epochs                  | int                                            | 100           | number of training epochs                                                                                                                        |
+| num_workers             | int                                            | 2             | number of workers for data loading                                                                                                               |
+| train_metrics_interval  | int                                            | -1            | frequency of computing metrics on train data, -1 if don't perform                                                                                |
+| validation_interval     | int                                            | 1             | frequency of computing metrics on validation data                                                                                                |
+| num_log_images          | int                                            | 4             | maximum number of images to visualize and log                                                                                                    |
+| skip_last_batch         | bool                                           | True          | whether to skip last batch while training                                                                                                        |
+| accelerator             | Literal\["auto", "cpu", "gpu"\]                | "auto"        | What accelerator to use for training.                                                                                                            |
+| devices                 | int \| list\[int\] \| str                      | "auto"        | Either specify how many devices to use (int), list specific devices, or use "auto" for automatic configuration based on the selected accelerator |
+| matmul_precision        | Literal\["medium", "high", "highest"\] \| None | None          | Sets the internal precision of float32 matrix multiplications.                                                                                   |
+| strategy                | Literal\["auto", "ddp"\]                       | "auto"        | What strategy to use for training.                                                                                                               |
+| num_sanity_val_steps    | int                                            | 2             | Number of sanity validation steps performed before training.                                                                                     |
+| profiler                | Literal\["simple", "advanced"\] \| None        | None          | PL profiler for GPU/CPU/RAM utilization analysis                                                                                                 |
+| verbose                 | bool                                           | True          | Print all intermediate results to console.                                                                                                       |
 
 ### Preprocessing
 

@@ -15,8 +15,9 @@ model:
         thickness: 2
         include_plot: True
 
-dataset:
-  name: cifar10_test
+loader:
+  params:
+    dataset_name: cifar10_test
 
 trainer:
   preprocessing:

@@ -95,12 +95,14 @@ tracker:
   wandb_entity: luxonis
   is_mlflow: False
 
-dataset:
-  name: coco_test
+loader:
   train_view: train
   val_view: val
   test_view: test
 
+  params:
+    dataset_name: coco_test
+
 trainer:
   accelerator: auto
   devices: auto
@@ -117,7 +119,6 @@ trainer:
   validation_interval: 10
   num_log_images: 8
   skip_last_batch: True
-  main_head_index: 0
   log_sub_losses: True
   save_top_k: 3
 
@@ -154,7 +155,6 @@ trainer:
         monitor: val/loss
         mode: min
         verbose: true
-    - name: DeviceStatsMonitor
     - name: ExportOnTrainEnd
     - name: TestOnTrainEnd
 

@@ -10,8 +10,9 @@ model:
     params:
       use_neck: True
 
-dataset:
-  name: coco_test
+loader:
+  params:
+    dataset_name: coco_test
 
 trainer:
   preprocessing:

@@ -12,8 +12,9 @@ model:
       backbone: MicroNet
       task: binary
 
-dataset:
-  name: coco_test
+loader:
+  params:
+    dataset_name: coco_test
 
 trainer:
   preprocessing:

@@ -11,8 +11,9 @@ model:
       backbone: MicroNet
       task: binary
 
-dataset:
-  name: coco_test
+loader:
+  params:
+    dataset_name: coco_test
 
 trainer:
   preprocessing:

@@ -8,8 +8,9 @@ model:
   predefined_model:
     name: KeypointDetectionModel
 
-dataset:
-  name: coco_test
+loader:
+  params:
+    dataset_name: coco_test
 
 trainer:
   preprocessing:

@@ -0,0 +1,132 @@
+# An example configuration for OCR Decoding network.
+
+
+model:
+  name: ocr_decoding_test
+  nodes:
+    - name: OCRDecoderBackbone
+      params:
+        task: "text"
+        num_characters: 37
+        in_channels: 3
+        dropout_rate: 0.1
+
+    - name: OCRDecoderHead
+      inputs:
+        - OCRDecoderBackbone
+      params:
+        task: "text"
+        num_characters: 37
+
+
+
+  losses:
+    - name: FocalCTC
+      attached_to: OCRDecoderHead
+      params:
+        blank: 0
+
+  metrics:
+    - name: OCRAccuracy
+      is_main_metric: true
+      attached_to: OCRDecoderHead
+
+#  visualizers:
+#    - name: MultiVisualizer
+#      attached_to: ImplicitKeypointBBoxHead
+#      params:
+#        visualizers:
+#          - name: KeypointVisualizer
+#            params:
+#              nonvisible_color: blue
+#          - name: BBoxVisualizer
+#            params:
+#              colors:
+#                person: "#FF5055"
+#    - name: SegmentationVisualizer
+#      attached_to: SegmentationHead
+#      params:
+#        colors: "#FF5055"
+#    - name: BBoxVisualizer
+#      attached_to: EfficientBBoxHead
+
+tracker:
+  project_name: ocr_example
+  save_directory: ocr_output
+  is_tensorboard: True
+  is_wandb: False
+  wandb_entity: luxonis
+  is_mlflow: False
+
+loader:
+  train_view: train
+  val_view: val
+  test_view: test
+
+  params:
+    dataset_name: dataset_dev_0
+
+trainer:
+  accelerator: auto
+  devices: auto
+  strategy: auto
+
+  num_sanity_val_steps: 1
+  profiler: null
+  verbose: True
+  batch_size: 2
+  accumulate_grad_batches: 1
+  epochs: &epochs 200
+  num_workers: 2
+  train_metrics_interval: -1
+  validation_interval: 1
+  num_log_images: 1
+  skip_last_batch: False
+  log_sub_losses: True
+  save_top_k: 3
+
+  preprocessing:
+    train_image_size: [&height 160, &width 320]
+    keep_aspect_ratio: False
+    train_rgb: True
+    normalize:
+      active: True
+    augmentations:
+      - name: OCRAugmentation
+        params:
+          image_size: [160, 320]
+          is_rgb: True
+          is_train: True
+
+  callbacks:
+    - name: LearningRateMonitor
+      params:
+        logging_interval: step
+    - name: MetadataLogger
+      params:
+        hyperparams: ["trainer.epochs", trainer.batch_size]
+    - name: TestOnTrainEnd
+
+  optimizer:
+    name: SGD
+    params:
+      lr: 0.0001
+      momentum: 0.937
+      nesterov: True
+      weight_decay: 0.0005
+
+  scheduler:
+    name: CosineAnnealingLR
+    params:
+      T_max: *epochs
+      eta_min: 0
+
+exporter:
+  onnx:
+    opset_version: 11
+
+tuner:
+  params:
+    trainer.optimizer.name_categorical: ["Adam", "SGD"]
+    trainer.optimizer.params.lr_float: [0.0001, 0.001]
+    trainer.batch_size_int: [4, 16, 4]
@@ -0,0 +1,59 @@
+
+model:
+  name: resnet50_classification
+  nodes:
+    - name: ResNet
+      params:
+        variant: "50"
+        download_weights: True
+
+    - name: ClassificationHead
+      inputs:
+        - ResNet
+
+  losses:
+    - name: CrossEntropyLoss
+      attached_to: ClassificationHead
+
+  metrics:
+    - name: Accuracy
+      is_main_metric: true
+      attached_to: ClassificationHead
+
+  visualizers:
+    - name: ClassificationVisualizer
+      attached_to: ClassificationHead
+      params:
+        font_scale: 0.5
+        color: [255, 0, 0]
+        thickness: 2
+        include_plot: True
+
+loader:
+  params:
+    dataset_name: cifar10_test
+
+trainer:
+  batch_size: 4
+  epochs: &epochs 200
+  num_workers: 4
+  validation_interval: 10
+  num_log_images: 8
+
+  preprocessing:
+    train_image_size: [&height 224, &width 224]
+    keep_aspect_ratio: False
+    normalize:
+      active: True
+
+  callbacks:
+    - name: ExportOnTrainEnd
+    - name: TestOnTrainEnd
+
+  optimizer:
+    name: SGD
+    params:
+      lr: 0.02
+
+  scheduler:
+    name: ConstantLR