luxonis · jkbmrz · Jan 27, 2025 · Jan 27, 2025 · Jan 27, 2025 · Jan 27, 2025
diff --git a/neural-networks/README.md b/neural-networks/README.md
@@ -145,9 +145,13 @@ LEGEND: ✅: available; ❌: not available; 🚧: work in progress
 
 ## Counting
 
+|                                             Crowd Counting                                             |
+| :----------------------------------------------------------------------------------------------------: |
+| <img src="counting/crowdcounting/media/crowd-counting.gif" alt="crowd-counting" style="height:250px;"> |
+
 | Name                                                               | HubAI Model                                                                                                | RVC2 | RVC4 (peripheral) | RVC4 (standalone) | Gen2                                                                                                                          | Notes                                |
 | ------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------- | ---- | ----------------- | ----------------- | ----------------------------------------------------------------------------------------------------------------------------- | ------------------------------------ |
-| [crowdcounting](counting/crowdcounting/)                           | [dm-count](https://hub.luxonis.com/ai/models/16d01eab-dc2b-4422-907d-2634a940b52e)                         | 🚧   | 🚧                | 🚧                | [gen2-crowdcounting](https://github.com/luxonis/depthai-experiments/tree/master/gen2-crowdcounting)                           |                                      |
+| [crowd-counting](counting/crowdcounting/)                          | [dm-count](https://hub.luxonis.com/ai/models/16d01eab-dc2b-4422-907d-2634a940b52e)                         | ✅   | ✅                | ✅                | [gen2-crowdcounting](https://github.com/luxonis/depthai-experiments/tree/master/gen2-crowdcounting)                           |                                      |
 | [cumulative-object-counting](counting/cumulative-object-counting/) | [yolov6-nano](https://hub.luxonis.com/ai/models/face58c4-45ab-42a0-bafc-19f9fee8a034?view=page)            | 🚧   | 🚧                | 🚧                | [gen2-cumulative-object-counting](https://github.com/luxonis/depthai-experiments/tree/master/gen2-cumulative-object-counting) |                                      |
 | [depth-people-counting](counting/depth-people-counting/)           |                                                                                                            | 🚧   | 🚧                | 🚧                | [gen2-depth-people-counting](https://github.com/luxonis/depthai-experiments/tree/master/gen2-depth-people-counting)           | Experiment does not use any NN model |
 | [people-counter](counting/people-counter/)                         | [scrfd-person-detection](https://hub.luxonis.com/ai/models/c3830468-3178-4de6-bc09-0543bbe28b1c?view=page) | 🚧   | 🚧                | 🚧                | [gen2-people-counter](https://github.com/luxonis/depthai-experiments/tree/master/gen2-people-counter)                         |                                      |

diff --git a/neural-networks/counting/crowdcounting/README.md b/neural-networks/counting/crowdcounting/README.md
@@ -1,33 +1,62 @@
-# \[Gen3\] Crowd Counting with density maps on DepthAI
+# Overview
 
-This example shows an implementation of Crowd Counting with density maps on DepthAI.  We use [DM-Count](https://github.com/cvlab-stonybrook/DM-Count) ([LICENSE](https://github.com/cvlab-stonybrook/DM-Count/blob/master/LICENSE)) model, which has a VGG-19 backbone and is trained on Shanghai B data set.
+This experiment demonstrates how to build a DepthAI pipeline for crowd counting.
+We use [DM-Count](https://hub.luxonis.com/ai/models/16d01eab-dc2b-4422-907d-2634a940b52e?view=page) model to produce a crowd density map from which the count is computed.
 
-The model produces density map from which predicted count can be computed.
+## Demo
 
-Input video is resized to 426 x 240 (W x H). Due to a relatively heavy model, the inference speed is around 1 FPS.
+[![crowd counting](media/crowd-counting.gif)](media/crowd-counting.gif)
 
-![Image example](imgs/example.gif)
+<sup>[Source](hhttps://www.pexels.com/video/time-lapse-video-of-people-at-subway-station-855749/)</sup>
 
-![image](https://user-images.githubusercontent.com/32992551/171780142-5cd4f2a4-6c51-4dbc-9e3e-17062a9c6c6c.png)
+## Installation
 
-Example shows input video with overlay density map input. Example video taken from [VIRAT](https://viratdata.org/) dataset.
+Running this example requires a **Luxonis OAK device** connected to your computer. You can find more information about the supported devices and the set up instructions in our [Documentation](https://rvc4.docs.luxonis.com/hardware).
+Moreover, you need to prepare a **Python 3.10** environment with [DepthAI](https://pypi.org/project/depthai/) and [DepthAI Nodes](https://pypi.org/project/depthai-nodes/) packages installed. You can do this by running:
 
-## Installation
+```bash
+pip install -r requirements.txt
+```
+
+## Usage
+
+You can run the experiment fully on device (`STANDALONE` mode) or using your your computer as host (`PERIPHERAL` mode).
+
+### Peripheral Mode
 
+```bash
+python3 main.py -model <MODEL> -media <MEDIA> -fps <FPS_LIMIT> --device <DEVICE>
 ```
-python3 -m pip install -r requirements.txt
-python3 download.py
+
+- `<MODEL>`: DM-Count model variant reference from Luxonis HubAI.
+- `<MEDIA>` \[OPTIONAL\]: Path to the video file. Default: `None` - use camera input.
+- `<FPS_LIMIT>` \[OPTIONAL\]: Limit of the video/camera FPS. Beware that if providing a video file with higher FPS, a slowed-down video will be shown (and vice-versa if providing a video file with higher FPS). Default: `1`.
+- `<DEVICE>` \[OPTIONAL\]: Device IP or ID. Default: `None` - use the first identified device connected to the host.
+
+#### Examples
+
+```bash
+python main.py \
+    -model luxonis/dm-count:shb-426x240 \
+    -fps 5
 ```
 
-## Usage
+This will run the experiment using the SHB variant of the model with input size 426x240 on the default device and camera input at 5 FPS.
 
-Run the application
+### Standalone Mode
 
+Running the experiment in the [Standalone mode](https://rvc4.docs.luxonis.com/software/depthai/standalone/) runs the app entirely on the device.
+To run the example in this mode, first install the [oakctl](https://rvc4.docs.luxonis.com/software/tools/oakctl/) command-line tool (enables host-device interaction) as:
+
+```bash
+bash -c "$(curl -fsSL https://oakctl-releases.luxonis.com/oakctl-installer.sh)"
 ```
-python3 main.py
 
-optional arguments:
-  -h, --help            show this help message and exit
-  -v VIDEO_PATH --video-path VIDEO_PATH
-                        Path to the video input for inference. Default: /vids/virat.mp4
+The app can then be run with:
+
+```bash
+oakctl connect <DEVICE_IP>
+oakctl app run .
 ```
+
+This will run the experiment using the SHB variant of the model with input size 426x240 on the specified device and camera input at 5 FPS.
diff --git a/neural-networks/counting/crowdcounting/host_node b/neural-networks/counting/crowdcounting/host_node
diff --git a/neural-networks/counting/crowdcounting/imgs/example.gif b/neural-networks/counting/crowdcounting/imgs/example.gif
diff --git a/neural-networks/counting/crowdcounting/main.py b/neural-networks/counting/crowdcounting/main.py
@@ -1,137 +1,83 @@
-import argparse
-from os.path import isfile
 from pathlib import Path
-
 import depthai as dai
 from depthai_nodes import ParsingNeuralNetwork
-from download import download_vids
-from host_node.host_depth_color_transform import DepthColorTransform
-from host_node.overlay_frames import OverlayFrames
-from host_node.visualize_detections_v2 import VisualizeDetectionsV2
-from nn_configs import NN_CONFIGS
-from visualize_crowd_count import VisualizeCrowdCount
 
-device = dai.Device()
+from utils.arguments import initialize_argparser
+from utils.counter import CrowdCounter
+from utils.density_map_transform import DensityMapToFrame
+from utils.overlay import OverlayFrames
 
+_, args = initialize_argparser()
 
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "-nn",
-    "--neural-network",
-    type=str,
-    choices=[
-        "sha_small",
-        "sha_medium",
-        "sha_large",
-        "sha_xlarge",
-        "shb_small",
-        "shb_medium",
-        "shb_large",
-        "shb_xlarge",
-        "qnrf_small",
-        "qnrf_medium",
-        "qnrf_large",
-        "qnrf_xlarge",
-    ],
-    default="sha_medium",
-    help="Choose the neural network model used for crowd counting. Default: sha_medium",
-)
-parser.add_argument(
-    "-fps",
-    "--frames-per-second",
-    type=float,
-    help="Set the frames per second for the video. Default: 1",
-    default=1,
-)
-parser.add_argument(
-    "-v",
-    "--video-path",
-    type=str,
-    help="Path to the video input for inference. Default: vids/virat.mp4",
-    default="vids/vid4.mp4",
+visualizer = dai.RemoteConnection(httpPort=8082)
+device = dai.Device(dai.DeviceInfo(args.device_id)) if args.device_id else dai.Device()
+platform = device.getPlatform().name
+frame_type = (
+    dai.ImgFrame.Type.BGR888p if platform == "RVC2" else dai.ImgFrame.Type.BGR888i
 )
-parser.add_argument(
-    "-cam",
-    "--camera",
-    action="store_true",
-    help="Use the camera for inference instead of video. Default: False",
-)
-args = parser.parse_args()
-
-# Download test videos
-if (
-    not isfile(Path("vids/virat.mp4").resolve().absolute())
-    or not isfile(Path("vids/vid1.mp4").resolve().absolute())
-    or not isfile(Path("vids/vid2.mp4").resolve().absolute())
-    or not isfile(Path("vids/vid3.mp4").resolve().absolute())
-    or not isfile(Path("vids/vid4.mp4").resolve().absolute())
-):
-    download_vids()
-video_source = Path(args.video_path).resolve().absolute()
-
-
-nn_config = NN_CONFIGS[args.neural_network]
-
-NN_SIZE = nn_config["nn_size"]
-VIDEO_SIZE = (1280, 720)
-FPS = args.frames_per_second
-
-model_description = dai.NNModelDescription(
-    modelSlug=nn_config["model_slug"],
-    platform=device.getPlatform().name,
-    modelVersionSlug=nn_config["version_slug"],
-)
-archive_path = dai.getModelFromZoo(model_description, useCached=True)
-nn_archive = dai.NNArchive(archive_path)
-
-visualizer = dai.RemoteConnection()
 
 with dai.Pipeline(device) as pipeline:
     print("Creating pipeline...")
-    if args.camera:
-        cam = pipeline.create(dai.node.Camera).build(dai.CameraBoardSocket.CAM_A)
-        color_out = cam.requestOutput(VIDEO_SIZE, dai.ImgFrame.Type.BGR888p, fps=FPS)
-    else:
-        replay = pipeline.create(dai.node.ReplayVideo)
-        replay.setReplayVideoFile(video_source)
-        replay.setSize(VIDEO_SIZE)
-        replay.setOutFrameType(dai.ImgFrame.Type.BGR888p)
-        replay.setFps(FPS)
-        color_out = replay.out
 
-    manip = pipeline.create(dai.node.ImageManip)
-    manip.initialConfig.setResizeThumbnail(NN_SIZE)
-    manip.setMaxOutputFrameSize(NN_SIZE[0] * NN_SIZE[1] * 3)
-    manip.initialConfig.setFrameType(dai.ImgFrame.Type.BGR888p)
-    manip.inputImage.setBlocking(True)
-    color_out.link(manip.inputImage)
+    # Model NN Archive
+    cc_model_description = dai.NNModelDescription(args.crowd_counting_model)
+    cc_model_description.platform = platform
+    cc_model_nn_archive = dai.NNArchive(dai.getModelFromZoo(cc_model_description))
+    INPUT_WIDTH = cc_model_nn_archive.getInputWidth()
+    INPUT_HEIGHT = cc_model_nn_archive.getInputHeight()
+    STRIDE = (INPUT_WIDTH + 7) // 8 * 8  # Align width up to the nearest multiple of 8
+
+    # Video/Camera Input Node
+    if args.media_path:
+        replay = pipeline.create(dai.node.ReplayVideo)
+        replay.setReplayVideoFile(Path(args.media_path))
+        replay.setOutFrameType(dai.ImgFrame.Type.NV12)
+        replay.setLoop(True)
+        if args.fps_limit:
+            replay.setFps(args.fps_limit)
+            args.fps_limit = None  # only want to set it once
+        imageManip = pipeline.create(dai.node.ImageManipV2)
+        imageManip.setMaxOutputFrameSize(STRIDE * INPUT_HEIGHT * 3)
+        imageManip.initialConfig.setOutputSize(INPUT_WIDTH, INPUT_HEIGHT)
+        imageManip.initialConfig.setFrameType(frame_type)
+        replay.out.link(imageManip.inputImage)
+    else:
+        cam = pipeline.create(dai.node.Camera).build()
+    input_node = imageManip.out if args.media_path else cam
 
-    nn = pipeline.create(ParsingNeuralNetwork).build(manip.out, nn_archive)
+    # Model Node
+    nn: ParsingNeuralNetwork = pipeline.create(ParsingNeuralNetwork).build(
+        input_node, cc_model_nn_archive, fps=args.fps_limit
+    )
 
-    visualize_crowd_count = pipeline.create(VisualizeCrowdCount).build(nn.out)
+    # Counter Node
+    crowd_counter_node = pipeline.create(CrowdCounter).build(nn.out)
 
-    visualize_detections = pipeline.create(VisualizeDetectionsV2).build(nn.out)
+    # Density Map Transform and Resize Nodes
+    density_map_transform_node = pipeline.create(DensityMapToFrame).build(nn.out)
+    density_map_resize_node = pipeline.create(dai.node.ImageManipV2)
+    density_map_resize_node.setMaxOutputFrameSize(STRIDE * INPUT_HEIGHT * 3)
+    density_map_resize_node.initialConfig.setOutputSize(INPUT_WIDTH, INPUT_HEIGHT)
+    density_map_resize_node.initialConfig.setFrameType(frame_type)
+    density_map_transform_node.output.link(density_map_resize_node.inputImage)
 
-    color_transform = pipeline.create(DepthColorTransform).build(
-        visualize_detections.output_mask
+    # Overlay Frames Node
+    overlay_frames = pipeline.create(OverlayFrames).build(
+        nn.passthrough, density_map_resize_node.out
     )
 
-    map_resize = pipeline.create(dai.node.ImageManipV2)
-    map_resize.initialConfig.addResize(*VIDEO_SIZE)
-    map_resize.setMaxOutputFrameSize(VIDEO_SIZE[0] * VIDEO_SIZE[1] * 3)
-    color_transform.output.link(map_resize.inputImage)
-
-    overlay_frames = pipeline.create(OverlayFrames).build(color_out, map_resize.out)
+    # Visualizer
+    visualizer.addTopic("VideoOverlay", overlay_frames.output)
+    visualizer.addTopic("Count", crowd_counter_node.output)
 
-    visualizer.addTopic("Camera", color_out)
-    visualizer.addTopic("Segmentation", overlay_frames.output)
-    visualizer.addTopic("Predicted count", visualize_crowd_count.output)
     print("Pipeline created.")
+
     pipeline.start()
+
     visualizer.registerPipeline(pipeline)
+
     while pipeline.isRunning():
-        pipeline.processTasks()
-        key = visualizer.waitKey(1)
-        if key == ord("q"):
+        key_pressed = visualizer.waitKey(1)
+        if key_pressed == ord("q"):
+            pipeline.stop()
             break
-    print("Pipeline finished.")
diff --git a/neural-networks/counting/crowdcounting/media/crowd-counting.gif b/neural-networks/counting/crowdcounting/media/crowd-counting.gif
diff --git a/neural-networks/counting/crowdcounting/nn_configs.py b/neural-networks/counting/crowdcounting/nn_configs.py
diff --git a/neural-networks/counting/crowdcounting/oakapp.toml b/neural-networks/counting/crowdcounting/oakapp.toml
@@ -0,0 +1,12 @@
+identifier = "com.luxonis.crowdcounting-example"
+
+entrypoint = ["bash", "-c", "python3 /app/main.py -model luxonis/dm-count:shb-426x240 -fps 5"]
+
+prepare_container = [
+    { type = "RUN", command = "apt-get update" },
+    { type = "RUN", command = "apt-get install -y python3 python3-pip libglib2.0-0 libgl1-mesa-glx wget git" },
+]
+
+build_steps = ["pip3 install -r /app/requirements.txt --break-system-packages"]
+
+prepare_build_container = []
diff --git a/neural-networks/counting/crowdcounting/requirements.txt b/neural-networks/counting/crowdcounting/requirements.txt
@@ -1,6 +1,3 @@
---extra-index-url https://artifacts.luxonis.com/artifactory/luxonis-python-snapshot-local
-opencv-python==4.10.0.84
-depthai==3.0.0a5.dev0+56ddb2a505ed37704ebf790ea9b09dde6fa30a6e
-depthai-nodes==0.1.0
-gdown==5.2.0
-numpy~=2.0
+--extra-index-url https://artifacts.luxonis.com/artifactory/luxonis-python-release-local/
+depthai>=3.0.0a12
+depthai-nodes>=0.1.2
diff --git a/neural-networks/counting/crowdcounting/utils/__init__.py b/neural-networks/counting/crowdcounting/utils/__init__.py