-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlocalization.py
109 lines (87 loc) · 4.41 KB
/
localization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import torch
import torch.nn as nn
import torch.nn.functional as F
class PositionalEncoding(nn.Module):
"""
Implements positional encoding for the previous pose. The result is combined with
the CNN output in the fully connected layers.
"""
def __init__(self, L=10):
super(PositionalEncoding, self).__init__()
self.L = L
def forward(self, pose):
encoded = []
for i in range(self.L):
# Compute sine and cosine for each pose and append to the encoded list
encoded.append(torch.sin((2**i) * torch.pi * pose))
encoded.append(torch.cos((2**i) * torch.pi * pose))
# Concatenate all the encoded values along the last dimension
return torch.cat(encoded, dim=-1)
class LocalizationModel1D(nn.Module):
def __init__(self):
super(LocalizationModel1D, self).__init__()
"""
Initializes the localization module, implementing a 1D CNN.
"""
# Determine the device to be used (GPU if available, else CPU)
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Initialize the positional encoder with 10 frequency components
self.pos_encoder = PositionalEncoding(L=10)
# Define a CNN with several 1D convolutional layers followed by ReLU activation and MaxPooling
self.simple_cnn = nn.Sequential(
nn.Conv1d(in_channels=1, out_channels=16, kernel_size=5, stride=1, padding=2),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2, stride=2),
nn.Conv1d(in_channels=16, out_channels=32, kernel_size=5, stride=1, padding=2),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2, stride=2),
nn.Conv1d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2, stride=2),
nn.Conv1d(in_channels=64, out_channels=128, kernel_size=5, stride=1, padding=2),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2, stride=2),
nn.Conv1d(in_channels=128, out_channels=256, kernel_size=5, stride=1, padding=2),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2, stride=2),
nn.Conv1d(in_channels=256, out_channels=512, kernel_size=5, stride=1, padding=2),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2, stride=2),
).to(self.device)
# Fully connected layers to process the combined features
self.fc1 = nn.Linear(512*16, 120)
self.fc2 = nn.Linear(120 + 60, 60)
self.fc3 = nn.Linear(60, 3)
def forward(self, lidar_scan, previous_pose):
"""
Performs the forward pass for inference or training of the Localization Model.
"""
# Process the LiDAR scan
# Unsqueeze to add batch and channel dimensions, convert to float
# Comment the following line for inference and the second line for training
# lidar_scan = lidar_scan.unsqueeze(1).float()
lidar_scan = lidar_scan.unsqueeze(0).unsqueeze(0).float()
# Pass through the CNN
lidar_scan = self.simple_cnn(lidar_scan)
# Flatten the output from the CNN
lidar_scan = lidar_scan.view(lidar_scan.size(0), -1)
# Process the previous pose and convert to grid representation
previous_pose = previous_pose.clone().detach().float()
grid_representation = torch.round(previous_pose / 1) * 1
# Encode the grid representation using the positional encoder
encoded_pose = self.pos_encoder(grid_representation)
# Reshape encoded pose for compatibility in combining inputs
# Comment the following line for training
encoded_pose = encoded_pose.view(1, -1)
# Combine features from the LiDAR scan and positional encoding
# Pass through the first fully connected layer with ReLU activation
lidar_scan = F.relu(self.fc1(lidar_scan))
# Concatenate the LiDAR features and the encoded pose
combined_input = torch.cat((lidar_scan, encoded_pose), dim=1)
# Pass through the second fully connected layer with ReLU activation
combined_input = F.relu(self.fc2(combined_input))
# Final output through the last fully connected layer
output = self.fc3(combined_input)
# Return the output to MIND-Stack for subsequent processing by the Stanley controller
# Comment the [0] for training
return output[0]