diff --git a/src/deep_neurographs/machine_learning/datasets.py b/src/deep_neurographs/machine_learning/datasets.py index c7607e7..fe4bd6c 100644 --- a/src/deep_neurographs/machine_learning/datasets.py +++ b/src/deep_neurographs/machine_learning/datasets.py @@ -279,13 +279,6 @@ def run(self, arr): return self.transform(arr) -def get_lengths(neurograph): - lengths = [] - for edge in neurograph.proposals.keys(): - lengths.append(neurograph.proposal_length(edge)) - return lengths - - # -- utils -- def reformat(arr): """ diff --git a/src/deep_neurographs/machine_learning/feature_generation.py b/src/deep_neurographs/machine_learning/feature_generation.py index 58f7639..855da25 100644 --- a/src/deep_neurographs/machine_learning/feature_generation.py +++ b/src/deep_neurographs/machine_learning/feature_generation.py @@ -369,7 +369,7 @@ def generate_branch_features(neurograph, edges): for (i, j) in edges: edge = frozenset((i, j)) features[edge] = np.zeros((31)) - + temp = np.concatenate( ( np.array([len(neurograph.edges[i, j]["xyz"])]), @@ -402,7 +402,7 @@ def curvature(xyz_list): # -- Build feature matrix -def get_feature_matrix(neurographs, features, model_type, block_ids=None): +def get_matrix(neurographs, features, model_type, block_ids=None): assert model_type in SUPPORTED_MODELS, "Error! model_type not supported" if block_ids: return __multiblock_feature_matrix( diff --git a/src/deep_neurographs/machine_learning/graph_datasets.py b/src/deep_neurographs/machine_learning/graph_datasets.py index 5c1f0c2..bf22ef7 100644 --- a/src/deep_neurographs/machine_learning/graph_datasets.py +++ b/src/deep_neurographs/machine_learning/graph_datasets.py @@ -6,12 +6,13 @@ Custom datasets for training graph neural networks. +# explain branches vs edges terminology + """ import networkx as nx import numpy as np import torch -from torch.utils.data import Dataset from torch_geometric.data import Data as GraphData from torch_geometric.data import HeteroData as HeteroGraphData @@ -25,13 +26,31 @@ def init(neurograph, branch_features, proposal_features, heterogeneous=False): Parameters ---------- - + neurograph : NeuroGraph + Graph that dataset is built from. + branch_features : dict + Feature vectors corresponding to branches such that the keys are a + frozenset of the node pair and values are the corresponding feature + vectors. + proposal_features : dict + Feature vectors corresponding to proposals such that the keys are a + frozenset of the node pair and values are the corresponding feature + vectors. + heterogeneous : bool + Indication of whether dataset should be stored as a heterogeneous + graph. + + Returns + ------- + GraphDataset, HeteroGraphDataset + Custom dataset. + """ # Extract features - x_branches, _, idxs_branches = feature_generation.get_feature_matrix( + x_branches, _, idxs_branches = feature_generation.get_matrix( neurograph, branch_features, "GraphNeuralNet" ) - x_proposals, y_proposals, idxs_proposals = feature_generation.get_feature_matrix( + x_proposals, y_proposals, idxs_proposals = feature_generation.get_matrix( neurograph, proposal_features, "GraphNeuralNet" ) @@ -47,7 +66,7 @@ def init(neurograph, branch_features, proposal_features, heterogeneous=False): x_proposals, y_proposals, idxs_branches, - idxs_proposals + idxs_proposals, ) return graph_dataset @@ -55,6 +74,10 @@ def init(neurograph, branch_features, proposal_features, heterogeneous=False): # Datasets class GraphDataset: + """ + Custom dataset for homogenous graphs. + + """ def __init__( self, neurograph, @@ -79,6 +102,10 @@ def __init__( class HeteroGraphDataset: + """ + Custom dataset for heterogenous graphs. + + """ def __init__( self, neurograph, diff --git a/src/deep_neurographs/machine_learning/graph_models.py b/src/deep_neurographs/machine_learning/graph_models.py index 6a2def3..095645c 100644 --- a/src/deep_neurographs/machine_learning/graph_models.py +++ b/src/deep_neurographs/machine_learning/graph_models.py @@ -8,25 +8,33 @@ """ -from torch.nn import ELU, Linear -from torch_geometric.nn import GCNConv - import torch import torch.nn.functional as F +from torch.nn import ELU, Linear +from torch_geometric.nn import GCNConv class GCN(torch.nn.Module): def __init__(self, input_channels): super().__init__() self.conv1 = GCNConv(input_channels, input_channels // 2) - self.conv2 = GCNConv(input_channels // 2, 1) + self.conv2 = GCNConv(input_channels // 2, input_channels // 2) + self.conv3 = GCNConv(input_channels // 2, 1) self.ELU = ELU() def forward(self, x, edge_index): + # Layer 1 x = self.conv1(x, edge_index) x = self.ELU(x) x = F.dropout(x, p=0.25) + + # Layer 2 x = self.conv2(x, edge_index) + x = self.ELU(x) + x = F.dropout(x, p=0.25) + + # Layer 3 + x = self.conv3(x, edge_index) return x @@ -37,7 +45,7 @@ def __init__(self, input_channels): self.linear2 = Linear(input_channels // 2, 1) self.ELU = ELU() - def forward(self, x): + def forward(self, x, edge_index): x = self.linear1(x) x = self.ELU(x) x = F.dropout(x, p=0.25) diff --git a/src/deep_neurographs/machine_learning/graph_trainer.py b/src/deep_neurographs/machine_learning/graph_trainer.py index ec5c135..c95c7e0 100644 --- a/src/deep_neurographs/machine_learning/graph_trainer.py +++ b/src/deep_neurographs/machine_learning/graph_trainer.py @@ -8,18 +8,19 @@ """ -import torch from random import sample, shuffle +import torch +from torch.nn.functional import sigmoid LR = 1e-3 -N_EPOCHS = 100 +N_EPOCHS = 300 TEST_PERCENT = 0.15 WEIGHT_DECAY = 5e-4 def run_on_graph(model, graph_data): - pass + pass def run_on_graphs( @@ -31,54 +32,58 @@ def run_on_graphs( weight_decay=WEIGHT_DECAY, ): # Initializations - graph_ids = list(graph_datasets.keys()) - model.train() model.to("cuda:0") optimizer = torch.optim.Adam( model.parameters(), lr=lr, weight_decay=weight_decay ) - # Train + # Main + accuracy = [] train_ids, test_ids = train_test_split(list(graph_datasets.keys())) for epoch in range(n_epochs): - for graph_id in train_ids: + # Train + model.train() + for graph_id in train_ids: loss, optimizer = train( - model, - criterion, - optimizer, - graph_datasets[graph_id].data, + model, criterion, optimizer, graph_datasets[graph_id].data ) + + # Test + model.eval() + accuracy_i = 0 + for graph_id in test_ids: + accuracy_i += validate(model, graph_datasets[graph_id].data) + accuracy.append(accuracy_i / len(test_ids)) + if epoch % 10 == 0: + print("Accuracy +/-:", accuracy[-1]) return model def train(model, criterion, optimizer, graph_data): - # Move data to gpu - x = graph_data.x.to("cuda:0", dtype=torch.float32) - y = graph_data.y.to("cuda:0", dtype=torch.float32) - edge_index = graph_data.edge_index.to("cuda:0") - # Forward pass - n = y.size(0) + x, y, edge_index = toGPU(graph_data) optimizer.zero_grad() - preds = model(x, edge_index) - loss = criterion(preds[0:n, 0], y) + hat_y = model(x, edge_index) + hat_y = truncate(hat_y, y) # Backward pass + loss = criterion(hat_y, y) loss.backward() optimizer.step() return loss, optimizer def validate(model, graph_data): - model.eval() - x = graph_data.x.to("cuda:0", dtype=torch.float32) - y = graph_data.y.to("cuda:0", dtype=torch.float32) - edge_index = graph_data.edge_index.to("cuda:0") - out = model(data.x, data.edge_index) - pred = out.argmax(dim=1) # Use the class with highest probability. - correct = pred[mask] == data.y[mask] # Check against ground-truth labels. - acc = int(correct.sum()) / int(mask.sum()) # Derive ratio of correct predictions. - return acc + # Initializations + x, y, edge_index = toGPU(graph_data) + hat_y = model(x, edge_index) + hat_y = truncate(hat_y, y) + + # Compute accuracy + preds = get_predictions(hat_y) + correct = preds == y + acc = float(correct.sum()) / y.size(0) + return acc - y.sum() / y.size(0) # -- utils -- @@ -102,7 +107,40 @@ def shuffler(my_list): def train_test_split(graph_ids): - n_test_examples = int(len(graph_ids) * TEST_PERCENT) + n_test_examples = 1 # int(len(graph_ids) * TEST_PERCENT) test_ids = sample(graph_ids, n_test_examples) train_ids = list(set(graph_ids) - set(test_ids)) return train_ids, test_ids + + +def toGPU(graph_data): + x = graph_data.x.to("cuda:0", dtype=torch.float32) + y = graph_data.y.to("cuda:0", dtype=torch.float32) + edge_index = graph_data.edge_index.to("cuda:0") + return x, y, edge_index + + +def truncate(hat_y, y): + """ + Truncates "hat_y" so that this tensor has the same shape as "y". Note this + operation removes the predictions corresponding to branches so that loss + is computed over proposals. + + Parameters + ---------- + hat_y : torch.Tensor + Tensor to be truncated. + y : torch.Tensor + Tensor used as a reference. + + Returns + ------- + torch.Tensor + Truncated "hat_y". + + """ + return hat_y[0: y.size(0), 0] + + +def get_predictions(hat_y, threshold=0.5): + return sigmoid(hat_y) > threshold diff --git a/src/deep_neurographs/machine_learning/ml_utils.py b/src/deep_neurographs/machine_learning/ml_utils.py index f76d5e5..6cb09e5 100644 --- a/src/deep_neurographs/machine_learning/ml_utils.py +++ b/src/deep_neurographs/machine_learning/ml_utils.py @@ -154,7 +154,7 @@ def init_dataset( neurographs, features, model_type, block_ids=None, transform=False ): # Extract features - inputs, targets, idx_transforms = feature_generation.get_feature_matrix( + inputs, targets, idx_transforms = feature_generation.get_matrix( neurographs, features, model_type, block_ids=block_ids ) lens = [] @@ -168,3 +168,10 @@ def init_dataset( "idx_to_edge": idx_transforms["idx_to_edge"], } return dataset + + +def get_lengths(neurograph): + lengths = [] + for edge in neurograph.proposals.keys(): + lengths.append(neurograph.proposal_length(edge)) + return lengths