Swarm-SLAM  1.0.0
C-SLAM Framework
netvlad.py
Go to the documentation of this file.
1 import numpy as np
2 
3 import os
4 from os.path import join, exists, isfile, realpath, dirname
5 import torch
6 import torch.nn as nn
7 import torch.nn.functional as F
8 import torch.optim as optim
9 from torch.autograd import Variable
10 from torch.utils.data import DataLoader, SubsetRandomSampler
11 from torch.utils.data.dataset import Subset
12 import torchvision.transforms as transforms
13 from PIL import Image
14 from datetime import datetime
15 import torchvision.datasets as datasets
16 import torchvision.models as models
17 import numpy as np
18 import sys
19 import pickle
20 import sklearn
21 from sklearn.neighbors import NearestNeighbors
22 from ament_index_python.packages import get_package_share_directory
23 
24 IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
25 IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
26 
27 
28 class NetVLADLayer(nn.Module):
29  """ NetVLAD layer implementation
30  based on https://github.com/lyakaap/NetVLAD-pytorch/blob/master/netvlad.py
31  """
32 
33  def __init__(self,
34  num_clusters=64,
35  dim=128,
36  normalize_input=True,
37  vladv2=False):
38  """
39  Args:
40  num_clusters : int
41  The number of clusters
42  dim : int
43  Dimension of descriptors
44  alpha : float
45  Parameter of initialization. Larger value is harder assignment.
46  normalize_input : bool
47  If true, descriptor-wise L2 normalization is applied to input.
48  vladv2 : bool
49  If true, use vladv2 otherwise use vladv1
50  """
51  super(NetVLADLayer, self).__init__()
52  self.num_clustersnum_clusters = num_clusters
53  self.dimdim = dim
54  self.alphaalpha = 0
55  self.vladv2vladv2 = vladv2
56  self.normalize_inputnormalize_input = normalize_input
57  self.convconv = nn.Conv2d(dim,
58  num_clusters,
59  kernel_size=(1, 1),
60  bias=vladv2)
61  self.centroidscentroids = nn.Parameter(torch.rand(num_clusters, dim))
62 
63  def init_params(self, clsts, traindescs):
64  if self.vladv2vladv2 == False:
65  clstsAssign = clsts / np.linalg.norm(clsts, axis=1, keepdims=True)
66  dots = np.dot(clstsAssign, traindescs.T)
67  dots.sort(0)
68  dots = dots[::-1, :] # sort, descending
69 
70  self.alphaalpha = (-np.log(0.01) /
71  np.mean(dots[0, :] - dots[1, :])).item()
72  self.centroidscentroids = nn.Parameter(torch.from_numpy(clsts))
73  self.convconv.weight = nn.Parameter(
74  torch.from_numpy(self.alphaalpha *
75  clstsAssign).unsqueeze(2).unsqueeze(3))
76  self.convconv.bias = None
77  else:
78  knn = NearestNeighbors(n_jobs=-1)
79  knn.fit(traindescs)
80  del traindescs
81  dsSq = np.square(knn.kneighbors(clsts, 2)[1])
82  del knn
83  self.alphaalpha = (-np.log(0.01) /
84  np.mean(dsSq[:, 1] - dsSq[:, 0])).item()
85  self.centroidscentroids = nn.Parameter(torch.from_numpy(clsts))
86  del clsts, dsSq
87 
88  self.convconv.weight = nn.Parameter(
89  (2.0 * self.alphaalpha *
90  self.centroidscentroids).unsqueeze(-1).unsqueeze(-1))
91  self.convconv.bias = nn.Parameter(-self.alphaalpha *
92  self.centroidscentroids.norm(dim=1))
93 
94  def forward(self, x):
95  """Forward pass through the NetVLAD network
96 
97  Args:
98  x (image): image to match
99 
100  Returns:
101  torch array: Global image descriptor
102  """
103  N, C = x.shape[:2]
104 
105  if self.normalize_inputnormalize_input:
106  x = F.normalize(x, p=2, dim=1) # across descriptor dim
107 
108  # soft-assignment
109  soft_assign = self.convconv(x).view(N, self.num_clustersnum_clusters, -1)
110  soft_assign = F.softmax(soft_assign, dim=1)
111 
112  x_flatten = x.view(N, C, -1)
113 
114  # calculate residuals to each clusters
115  vlad = torch.zeros([N, self.num_clustersnum_clusters, C],
116  dtype=x.dtype,
117  layout=x.layout,
118  device=x.device)
119  for C in range(self.num_clustersnum_clusters
120  ): # slower than non-looped, but lower memory usage
121  residual = x_flatten.unsqueeze(0).permute(1, 0, 2, 3) - \
122  self.centroidscentroids[C:C+1, :].expand(x_flatten.size(-1), -1, -1).permute(1, 2, 0).unsqueeze(0)
123  residual *= soft_assign[:, C:C + 1, :].unsqueeze(2)
124  vlad[:, C:C + 1, :] = residual.sum(dim=-1)
125 
126  vlad = F.normalize(vlad, p=2, dim=2) # intra-normalization
127  vlad = vlad.view(x.size(0), -1) # flatten
128  vlad = F.normalize(vlad, p=2, dim=1) # L2 normalize
129 
130  return vlad
131 
132 
133 class NetVLAD(object):
134  """NetVLAD matcher
135  """
136 
137  def __init__(self, params, node):
138  """Initialization
139 
140  Args:
141  params (dict): parameters
142  """
143  self.paramsparams = params
144  self.nodenode = node
145 
146  self.enableenable = self.paramsparams['frontend.nn_checkpoint'].lower(
147  ) != 'disable'
148  if self.enableenable:
149  pkg_folder = get_package_share_directory("cslam")
150  self.paramsparams['frontend.nn_checkpoint'] = join(
151  pkg_folder, self.paramsparams['frontend.nn_checkpoint'])
152  self.paramsparams['frontend.netvlad.pca_checkpoint'] = join(
153  pkg_folder,
154  self.nodenode.get_parameter(
155  'frontend.netvlad.pca_checkpoint').value)
156 
157  if torch.cuda.is_available():
158  self.devicedevice = torch.device("cuda")
159  else:
160  self.devicedevice = torch.device("cpu")
161 
162  encoder_dim = 512
163  encoder = models.vgg16(pretrained=True)
164  # capture only feature part and remove last relu and maxpool
165  layers = list(encoder.features.children())[:-2]
166  # if using pretrained then only train conv5_1, conv5_2, and conv5_3
167  for l in layers[:-5]:
168  for p in l.parameters():
169  p.requires_grad = False
170 
171  encoder = nn.Sequential(*layers)
172  self.modelmodel = nn.Module()
173  self.modelmodel.add_module('encoder', encoder)
174  netvlad_layer = NetVLADLayer(num_clusters=64,
175  dim=encoder_dim,
176  vladv2=False)
177  self.modelmodel.add_module('pool', netvlad_layer)
178 
179  self.isParallelisParallel = False
180  print('=> Number of CUDA devices = ' +
181  str(torch.cuda.device_count()))
182  if torch.cuda.device_count() > 1:
183  self.modelmodel.encoder = nn.DataParallel(self.modelmodel.encoder)
184  self.modelmodel.pool = nn.DataParallel(self.modelmodel.pool)
185  self.isParallelisParallel = True
186 
187  resume_ckpt = self.paramsparams['frontend.nn_checkpoint']
188  if isfile(resume_ckpt):
189  print("=> loading checkpoint '{}'".format(resume_ckpt))
190  checkpoint = torch.load(
191  resume_ckpt, map_location=lambda storage, loc: storage)
192  start_epoch = checkpoint['epoch']
193  best_metric = checkpoint['best_score']
194  self.modelmodel.load_state_dict(checkpoint['state_dict'])
195  self.modelmodel = self.modelmodel.to(self.devicedevice)
196  print("=> loaded checkpoint '{}' (epoch {})".format(
197  resume_ckpt, checkpoint['epoch']))
198  else:
199  print("Error: Checkpoint path is incorrect")
200 
201  self.modelmodel.eval()
202  self.transformtransform = transforms.Compose([
203  transforms.CenterCrop(self.paramsparams["frontend.image_crop_size"]),
204  transforms.Resize(224, interpolation=3),
205  transforms.ToTensor(),
206  transforms.Normalize(IMAGENET_DEFAULT_MEAN,
207  IMAGENET_DEFAULT_STD),
208  ])
209  self.pcapca = pickle.load(
210  open(self.paramsparams['frontend.netvlad.pca_checkpoint'], 'rb'))
211 
212  def compute_embedding(self, keyframe):
213  """Load image to device and extract the global image descriptor
214 
215  Args:
216  keyframe (image): image to match
217 
218  Returns:
219  np.array: global image descriptor
220  """
221  if self.enableenable:
222  with torch.no_grad():
223  image = Image.fromarray(keyframe)
224  input = self.transformtransform(image)
225  input = torch.unsqueeze(input, 0)
226  input = input.to(self.devicedevice)
227  image_encoding = self.modelmodel.encoder(input)
228  vlad_encoding = self.modelmodel.pool(image_encoding)
229 
230  # Compute NetVLAD
231  embedding = vlad_encoding.detach().cpu().numpy()
232 
233  # Run PCA transform
234  reduced_embedding = self.pcapca.transform(embedding)
235  normalized_embedding = sklearn.preprocessing.normalize(
236  reduced_embedding)
237  output = normalized_embedding[0]
238 
239  del input, image_encoding, vlad_encoding, reduced_embedding, normalized_embedding, image
240 
241  return output
242  else:
243  # Random descriptor if disabled
244  # Use this option only for testing
245  return np.random.rand(128)
def compute_embedding(self, keyframe)
Definition: netvlad.py:212
def __init__(self, params, node)
Definition: netvlad.py:137
def forward(self, x)
Definition: netvlad.py:94
def __init__(self, num_clusters=64, dim=128, normalize_input=True, vladv2=False)
Definition: netvlad.py:37
def init_params(self, clsts, traindescs)
Definition: netvlad.py:63