AllenInstitute · anna-grim · Apr 13, 2024 · Apr 13, 2024
diff --git a/src/deep_neurographs/machine_learning/datasets.py b/src/deep_neurographs/machine_learning/datasets.py
@@ -279,13 +279,6 @@ def run(self, arr):
         return self.transform(arr)
 
 
-def get_lengths(neurograph):
-    lengths = []
-    for edge in neurograph.proposals.keys():
-        lengths.append(neurograph.proposal_length(edge))
-    return lengths
-
-
 # -- utils --
 def reformat(arr):
     """

diff --git a/src/deep_neurographs/machine_learning/feature_generation.py b/src/deep_neurographs/machine_learning/feature_generation.py
@@ -369,7 +369,7 @@ def generate_branch_features(neurograph, edges):
     for (i, j) in edges:
         edge = frozenset((i, j))
         features[edge] = np.zeros((31))
-        
+
         temp = np.concatenate(
             (
                 np.array([len(neurograph.edges[i, j]["xyz"])]),
@@ -402,7 +402,7 @@ def curvature(xyz_list):
 
 
 # -- Build feature matrix
-def get_feature_matrix(neurographs, features, model_type, block_ids=None):
+def get_matrix(neurographs, features, model_type, block_ids=None):
     assert model_type in SUPPORTED_MODELS, "Error! model_type not supported"
     if block_ids:
         return __multiblock_feature_matrix(

diff --git a/src/deep_neurographs/machine_learning/graph_datasets.py b/src/deep_neurographs/machine_learning/graph_datasets.py
@@ -6,12 +6,13 @@
 
 Custom datasets for training graph neural networks.
 
+# explain branches vs edges terminology
+
 """
 
 import networkx as nx
 import numpy as np
 import torch
-from torch.utils.data import Dataset
 from torch_geometric.data import Data as GraphData
 from torch_geometric.data import HeteroData as HeteroGraphData
 
@@ -25,13 +26,31 @@ def init(neurograph, branch_features, proposal_features, heterogeneous=False):
 
     Parameters
     ----------
-
+    neurograph : NeuroGraph
+        Graph that dataset is built from.
+    branch_features : dict
+        Feature vectors corresponding to branches such that the keys are a
+        frozenset of the node pair and values are the corresponding feature
+        vectors.
+    proposal_features : dict
+        Feature vectors corresponding to proposals such that the keys are a
+        frozenset of the node pair and values are the corresponding feature
+        vectors.
+    heterogeneous : bool
+        Indication of whether dataset should be stored as a heterogeneous
+        graph.
+
+    Returns
+    -------
+    GraphDataset, HeteroGraphDataset
+        Custom dataset.
+
     """
     # Extract features
-    x_branches, _, idxs_branches = feature_generation.get_feature_matrix(
+    x_branches, _, idxs_branches = feature_generation.get_matrix(
         neurograph, branch_features, "GraphNeuralNet"
     )
-    x_proposals, y_proposals, idxs_proposals = feature_generation.get_feature_matrix(
+    x_proposals, y_proposals, idxs_proposals = feature_generation.get_matrix(
         neurograph, proposal_features, "GraphNeuralNet"
     )
 
@@ -47,14 +66,18 @@ def init(neurograph, branch_features, proposal_features, heterogeneous=False):
             x_proposals,
             y_proposals,
             idxs_branches,
-            idxs_proposals
+            idxs_proposals,
         )
 
     return graph_dataset
 
 
 # Datasets
 class GraphDataset:
+    """
+    Custom dataset for homogenous graphs.
+
+    """
     def __init__(
         self,
         neurograph,
@@ -79,6 +102,10 @@ def __init__(
 
 
 class HeteroGraphDataset:
+    """
+    Custom dataset for heterogenous graphs.
+
+    """
     def __init__(
         self,
         neurograph,

diff --git a/src/deep_neurographs/machine_learning/graph_models.py b/src/deep_neurographs/machine_learning/graph_models.py
@@ -8,25 +8,33 @@
 
 """
 
-from torch.nn import ELU, Linear
-from torch_geometric.nn import GCNConv
-
 import torch
 import torch.nn.functional as F
+from torch.nn import ELU, Linear
+from torch_geometric.nn import GCNConv
 
 
 class GCN(torch.nn.Module):
     def __init__(self, input_channels):
         super().__init__()
         self.conv1 = GCNConv(input_channels, input_channels // 2)
-        self.conv2 = GCNConv(input_channels // 2, 1)
+        self.conv2 = GCNConv(input_channels // 2, input_channels // 2)
+        self.conv3 = GCNConv(input_channels // 2, 1)
         self.ELU = ELU()
 
     def forward(self, x, edge_index):
+        # Layer 1
         x = self.conv1(x, edge_index)
         x = self.ELU(x)
         x = F.dropout(x, p=0.25)
+
+        # Layer 2
         x = self.conv2(x, edge_index)
+        x = self.ELU(x)
+        x = F.dropout(x, p=0.25)
+
+        # Layer 3
+        x = self.conv3(x, edge_index)
         return x
 
 
@@ -37,7 +45,7 @@ def __init__(self, input_channels):
         self.linear2 = Linear(input_channels // 2, 1)
         self.ELU = ELU()
 
-    def forward(self, x):
+    def forward(self, x, edge_index):
         x = self.linear1(x)
         x = self.ELU(x)
         x = F.dropout(x, p=0.25)

diff --git a/src/deep_neurographs/machine_learning/graph_trainer.py b/src/deep_neurographs/machine_learning/graph_trainer.py
@@ -8,18 +8,19 @@
 
 """
 
-import torch
 from random import sample, shuffle
 
+import torch
+from torch.nn.functional import sigmoid
 
 LR = 1e-3
-N_EPOCHS = 100
+N_EPOCHS = 300
 TEST_PERCENT = 0.15
 WEIGHT_DECAY = 5e-4
 
 
 def run_on_graph(model, graph_data):
-      pass
+    pass
 
 
 def run_on_graphs(
@@ -31,54 +32,58 @@ def run_on_graphs(
     weight_decay=WEIGHT_DECAY,
 ):
     # Initializations
-    graph_ids = list(graph_datasets.keys())
-    model.train()
     model.to("cuda:0")
     optimizer = torch.optim.Adam(
         model.parameters(), lr=lr, weight_decay=weight_decay
     )
 
-    # Train
+    # Main
+    accuracy = []
     train_ids, test_ids = train_test_split(list(graph_datasets.keys()))
     for epoch in range(n_epochs):
-         for graph_id in train_ids:
+        # Train
+        model.train()
+        for graph_id in train_ids:
             loss, optimizer = train(
-                model,
-                criterion,
-                optimizer,
-                graph_datasets[graph_id].data,
+                model, criterion, optimizer, graph_datasets[graph_id].data
             )
+
+        # Test
+        model.eval()
+        accuracy_i = 0
+        for graph_id in test_ids:
+            accuracy_i += validate(model, graph_datasets[graph_id].data)
+        accuracy.append(accuracy_i / len(test_ids))
+        if epoch % 10 == 0:
+            print("Accuracy +/-:", accuracy[-1])
     return model
 
 
 def train(model, criterion, optimizer, graph_data):
-    # Move data to gpu
-    x = graph_data.x.to("cuda:0", dtype=torch.float32)
-    y = graph_data.y.to("cuda:0", dtype=torch.float32)
-    edge_index = graph_data.edge_index.to("cuda:0")
-
     # Forward pass
-    n = y.size(0)
+    x, y, edge_index = toGPU(graph_data)
     optimizer.zero_grad()
-    preds = model(x, edge_index)
-    loss = criterion(preds[0:n, 0], y)
+    hat_y = model(x, edge_index)
+    hat_y = truncate(hat_y, y)
 
     # Backward pass
+    loss = criterion(hat_y, y)
     loss.backward()
     optimizer.step()
     return loss, optimizer
 
 
 def validate(model, graph_data):
-    model.eval()
-            x = graph_data.x.to("cuda:0", dtype=torch.float32)
-    y = graph_data.y.to("cuda:0", dtype=torch.float32)
-    edge_index = graph_data.edge_index.to("cuda:0")
-      out = model(data.x, data.edge_index)
-      pred = out.argmax(dim=1)  # Use the class with highest probability.
-      correct = pred[mask] == data.y[mask]  # Check against ground-truth labels.
-      acc = int(correct.sum()) / int(mask.sum())  # Derive ratio of correct predictions.
-      return acc
+    # Initializations
+    x, y, edge_index = toGPU(graph_data)
+    hat_y = model(x, edge_index)
+    hat_y = truncate(hat_y, y)
+
+    # Compute accuracy
+    preds = get_predictions(hat_y)
+    correct = preds == y
+    acc = float(correct.sum()) / y.size(0)
+    return acc - y.sum() / y.size(0)
 
 
 # -- utils --
@@ -102,7 +107,40 @@ def shuffler(my_list):
 
 
 def train_test_split(graph_ids):
-    n_test_examples = int(len(graph_ids) * TEST_PERCENT)
+    n_test_examples = 1  # int(len(graph_ids) * TEST_PERCENT)
     test_ids = sample(graph_ids, n_test_examples)
     train_ids = list(set(graph_ids) - set(test_ids))
     return train_ids, test_ids
+
+
+def toGPU(graph_data):
+    x = graph_data.x.to("cuda:0", dtype=torch.float32)
+    y = graph_data.y.to("cuda:0", dtype=torch.float32)
+    edge_index = graph_data.edge_index.to("cuda:0")
+    return x, y, edge_index
+
+
+def truncate(hat_y, y):
+    """
+    Truncates "hat_y" so that this tensor has the same shape as "y". Note this
+    operation removes the predictions corresponding to branches so that loss
+    is computed over proposals.
+
+    Parameters
+    ----------
+    hat_y : torch.Tensor
+        Tensor to be truncated.
+    y : torch.Tensor
+        Tensor used as a reference.
+
+    Returns
+    -------
+    torch.Tensor
+        Truncated "hat_y".
+
+    """
+    return hat_y[0: y.size(0), 0]
+
+
+def get_predictions(hat_y, threshold=0.5):
+    return sigmoid(hat_y) > threshold
diff --git a/src/deep_neurographs/machine_learning/ml_utils.py b/src/deep_neurographs/machine_learning/ml_utils.py
@@ -154,7 +154,7 @@ def init_dataset(
     neurographs, features, model_type, block_ids=None, transform=False
 ):
     # Extract features
-    inputs, targets, idx_transforms = feature_generation.get_feature_matrix(
+    inputs, targets, idx_transforms = feature_generation.get_matrix(
         neurographs, features, model_type, block_ids=block_ids
     )
     lens = []
@@ -168,3 +168,10 @@ def init_dataset(
         "idx_to_edge": idx_transforms["idx_to_edge"],
     }
     return dataset
+
+
+def get_lengths(neurograph):
+    lengths = []
+    for edge in neurograph.proposals.keys():
+        lengths.append(neurograph.proposal_length(edge))
+    return lengths