refactor: update BehaviorClassifier to use MLP for frame-wise classif…

…ication - Replaced the Transformer encoder with an MLP architecture for simpler and more efficient classification. - Integrated the MLP to classify features extracted from each video frame independently. - Applied global average pooling across frame outputs to produce a single classification per video.
healthonrails · Nov 12, 2024 · 185b177 · 185b177
1 parent 56a12c3
commit 185b177
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/annolid/behavior/models/classifier.py b/annolid/behavior/models/classifier.py
@@ -50,15 +50,15 @@ class BehaviorClassifier(nn.Module):
 
     Args:
         feature_extractor (nn.Module): The feature extraction module.
-        d_model (int, optional): The embedding dimension. Defaults to 512.
+        d_model (int, optional): The embedding dimension. Defaults to 768.
         nhead (int, optional): The number of attention heads. Defaults to 8.
         num_layers (int, optional): The number of transformer encoder layers. Defaults to 6.
         dim_feedforward (int, optional): The dimension of the feedforward network. Defaults to 2048.
         dropout (float, optional): The dropout probability. Defaults to 0.1.
         num_classes (int, optional): The number of behavior classes. Defaults to 5.
     """
 
-    def __init__(self, feature_extractor: nn.Module, d_model: int = 512, nhead: int = 8,
+    def __init__(self, feature_extractor: nn.Module, d_model: int = 768, nhead: int = 8,
                  num_layers: int = 6, dim_feedforward: int = 2048, dropout: float = 0.1,
                  num_classes: int = 5):
         super().__init__()