Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: graph training validation #116

Merged
merged 1 commit into from
Apr 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions src/deep_neurographs/machine_learning/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,13 +279,6 @@ def run(self, arr):
return self.transform(arr)


def get_lengths(neurograph):
lengths = []
for edge in neurograph.proposals.keys():
lengths.append(neurograph.proposal_length(edge))
return lengths


# -- utils --
def reformat(arr):
"""
Expand Down
4 changes: 2 additions & 2 deletions src/deep_neurographs/machine_learning/feature_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ def generate_branch_features(neurograph, edges):
for (i, j) in edges:
edge = frozenset((i, j))
features[edge] = np.zeros((31))

temp = np.concatenate(
(
np.array([len(neurograph.edges[i, j]["xyz"])]),
Expand Down Expand Up @@ -402,7 +402,7 @@ def curvature(xyz_list):


# -- Build feature matrix
def get_feature_matrix(neurographs, features, model_type, block_ids=None):
def get_matrix(neurographs, features, model_type, block_ids=None):
assert model_type in SUPPORTED_MODELS, "Error! model_type not supported"
if block_ids:
return __multiblock_feature_matrix(
Expand Down
37 changes: 32 additions & 5 deletions src/deep_neurographs/machine_learning/graph_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@

Custom datasets for training graph neural networks.

# explain branches vs edges terminology

"""

import networkx as nx
import numpy as np
import torch
from torch.utils.data import Dataset
from torch_geometric.data import Data as GraphData
from torch_geometric.data import HeteroData as HeteroGraphData

Expand All @@ -25,13 +26,31 @@ def init(neurograph, branch_features, proposal_features, heterogeneous=False):

Parameters
----------

neurograph : NeuroGraph
Graph that dataset is built from.
branch_features : dict
Feature vectors corresponding to branches such that the keys are a
frozenset of the node pair and values are the corresponding feature
vectors.
proposal_features : dict
Feature vectors corresponding to proposals such that the keys are a
frozenset of the node pair and values are the corresponding feature
vectors.
heterogeneous : bool
Indication of whether dataset should be stored as a heterogeneous
graph.

Returns
-------
GraphDataset, HeteroGraphDataset
Custom dataset.

"""
# Extract features
x_branches, _, idxs_branches = feature_generation.get_feature_matrix(
x_branches, _, idxs_branches = feature_generation.get_matrix(
neurograph, branch_features, "GraphNeuralNet"
)
x_proposals, y_proposals, idxs_proposals = feature_generation.get_feature_matrix(
x_proposals, y_proposals, idxs_proposals = feature_generation.get_matrix(
neurograph, proposal_features, "GraphNeuralNet"
)

Expand All @@ -47,14 +66,18 @@ def init(neurograph, branch_features, proposal_features, heterogeneous=False):
x_proposals,
y_proposals,
idxs_branches,
idxs_proposals
idxs_proposals,
)

return graph_dataset


# Datasets
class GraphDataset:
"""
Custom dataset for homogenous graphs.

"""
def __init__(
self,
neurograph,
Expand All @@ -79,6 +102,10 @@ def __init__(


class HeteroGraphDataset:
"""
Custom dataset for heterogenous graphs.

"""
def __init__(
self,
neurograph,
Expand Down
18 changes: 13 additions & 5 deletions src/deep_neurographs/machine_learning/graph_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,33 @@

"""

from torch.nn import ELU, Linear
from torch_geometric.nn import GCNConv

import torch
import torch.nn.functional as F
from torch.nn import ELU, Linear
from torch_geometric.nn import GCNConv


class GCN(torch.nn.Module):
def __init__(self, input_channels):
super().__init__()
self.conv1 = GCNConv(input_channels, input_channels // 2)
self.conv2 = GCNConv(input_channels // 2, 1)
self.conv2 = GCNConv(input_channels // 2, input_channels // 2)
self.conv3 = GCNConv(input_channels // 2, 1)
self.ELU = ELU()

def forward(self, x, edge_index):
# Layer 1
x = self.conv1(x, edge_index)
x = self.ELU(x)
x = F.dropout(x, p=0.25)

# Layer 2
x = self.conv2(x, edge_index)
x = self.ELU(x)
x = F.dropout(x, p=0.25)

# Layer 3
x = self.conv3(x, edge_index)
return x


Expand All @@ -37,7 +45,7 @@ def __init__(self, input_channels):
self.linear2 = Linear(input_channels // 2, 1)
self.ELU = ELU()

def forward(self, x):
def forward(self, x, edge_index):
x = self.linear1(x)
x = self.ELU(x)
x = F.dropout(x, p=0.25)
Expand Down
96 changes: 67 additions & 29 deletions src/deep_neurographs/machine_learning/graph_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,19 @@

"""

import torch
from random import sample, shuffle

import torch
from torch.nn.functional import sigmoid

LR = 1e-3
N_EPOCHS = 100
N_EPOCHS = 300
TEST_PERCENT = 0.15
WEIGHT_DECAY = 5e-4


def run_on_graph(model, graph_data):
pass
pass


def run_on_graphs(
Expand All @@ -31,54 +32,58 @@ def run_on_graphs(
weight_decay=WEIGHT_DECAY,
):
# Initializations
graph_ids = list(graph_datasets.keys())
model.train()
model.to("cuda:0")
optimizer = torch.optim.Adam(
model.parameters(), lr=lr, weight_decay=weight_decay
)

# Train
# Main
accuracy = []
train_ids, test_ids = train_test_split(list(graph_datasets.keys()))
for epoch in range(n_epochs):
for graph_id in train_ids:
# Train
model.train()
for graph_id in train_ids:
loss, optimizer = train(
model,
criterion,
optimizer,
graph_datasets[graph_id].data,
model, criterion, optimizer, graph_datasets[graph_id].data
)

# Test
model.eval()
accuracy_i = 0
for graph_id in test_ids:
accuracy_i += validate(model, graph_datasets[graph_id].data)
accuracy.append(accuracy_i / len(test_ids))
if epoch % 10 == 0:
print("Accuracy +/-:", accuracy[-1])
return model


def train(model, criterion, optimizer, graph_data):
# Move data to gpu
x = graph_data.x.to("cuda:0", dtype=torch.float32)
y = graph_data.y.to("cuda:0", dtype=torch.float32)
edge_index = graph_data.edge_index.to("cuda:0")

# Forward pass
n = y.size(0)
x, y, edge_index = toGPU(graph_data)
optimizer.zero_grad()
preds = model(x, edge_index)
loss = criterion(preds[0:n, 0], y)
hat_y = model(x, edge_index)
hat_y = truncate(hat_y, y)

# Backward pass
loss = criterion(hat_y, y)
loss.backward()
optimizer.step()
return loss, optimizer


def validate(model, graph_data):
model.eval()
x = graph_data.x.to("cuda:0", dtype=torch.float32)
y = graph_data.y.to("cuda:0", dtype=torch.float32)
edge_index = graph_data.edge_index.to("cuda:0")
out = model(data.x, data.edge_index)
pred = out.argmax(dim=1) # Use the class with highest probability.
correct = pred[mask] == data.y[mask] # Check against ground-truth labels.
acc = int(correct.sum()) / int(mask.sum()) # Derive ratio of correct predictions.
return acc
# Initializations
x, y, edge_index = toGPU(graph_data)
hat_y = model(x, edge_index)
hat_y = truncate(hat_y, y)

# Compute accuracy
preds = get_predictions(hat_y)
correct = preds == y
acc = float(correct.sum()) / y.size(0)
return acc - y.sum() / y.size(0)


# -- utils --
Expand All @@ -102,7 +107,40 @@ def shuffler(my_list):


def train_test_split(graph_ids):
n_test_examples = int(len(graph_ids) * TEST_PERCENT)
n_test_examples = 1 # int(len(graph_ids) * TEST_PERCENT)
test_ids = sample(graph_ids, n_test_examples)
train_ids = list(set(graph_ids) - set(test_ids))
return train_ids, test_ids


def toGPU(graph_data):
x = graph_data.x.to("cuda:0", dtype=torch.float32)
y = graph_data.y.to("cuda:0", dtype=torch.float32)
edge_index = graph_data.edge_index.to("cuda:0")
return x, y, edge_index


def truncate(hat_y, y):
"""
Truncates "hat_y" so that this tensor has the same shape as "y". Note this
operation removes the predictions corresponding to branches so that loss
is computed over proposals.

Parameters
----------
hat_y : torch.Tensor
Tensor to be truncated.
y : torch.Tensor
Tensor used as a reference.

Returns
-------
torch.Tensor
Truncated "hat_y".

"""
return hat_y[0: y.size(0), 0]


def get_predictions(hat_y, threshold=0.5):
return sigmoid(hat_y) > threshold
9 changes: 8 additions & 1 deletion src/deep_neurographs/machine_learning/ml_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def init_dataset(
neurographs, features, model_type, block_ids=None, transform=False
):
# Extract features
inputs, targets, idx_transforms = feature_generation.get_feature_matrix(
inputs, targets, idx_transforms = feature_generation.get_matrix(
neurographs, features, model_type, block_ids=block_ids
)
lens = []
Expand All @@ -168,3 +168,10 @@ def init_dataset(
"idx_to_edge": idx_transforms["idx_to_edge"],
}
return dataset


def get_lengths(neurograph):
lengths = []
for edge in neurograph.proposals.keys():
lengths.append(neurograph.proposal_length(edge))
return lengths
Loading