4 from os.path
import join, exists, isfile, realpath, dirname
7 import torch.nn.functional
as F
8 import torch.optim
as optim
9 from torch.autograd
import Variable
10 from torch.utils.data
import DataLoader, SubsetRandomSampler
11 from torch.utils.data.dataset
import Subset
12 import torchvision.transforms
as transforms
14 from datetime
import datetime
15 import torchvision.datasets
as datasets
16 import torchvision.models
as models
21 from sklearn.neighbors
import NearestNeighbors
22 from ament_index_python.packages
import get_package_share_directory
24 IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
25 IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
29 """ NetVLAD layer implementation
30 based on https://github.com/lyakaap/NetVLAD-pytorch/blob/master/netvlad.py
41 The number of clusters
43 Dimension of descriptors
45 Parameter of initialization. Larger value is harder assignment.
46 normalize_input : bool
47 If true, descriptor-wise L2 normalization is applied to input.
49 If true, use vladv2 otherwise use vladv1
57 self.
convconv = nn.Conv2d(dim,
61 self.
centroidscentroids = nn.Parameter(torch.rand(num_clusters, dim))
64 if self.
vladv2vladv2 ==
False:
65 clstsAssign = clsts / np.linalg.norm(clsts, axis=1, keepdims=
True)
66 dots = np.dot(clstsAssign, traindescs.T)
70 self.
alphaalpha = (-np.log(0.01) /
71 np.mean(dots[0, :] - dots[1, :])).item()
72 self.
centroidscentroids = nn.Parameter(torch.from_numpy(clsts))
73 self.
convconv.weight = nn.Parameter(
74 torch.from_numpy(self.
alphaalpha *
75 clstsAssign).unsqueeze(2).unsqueeze(3))
76 self.
convconv.bias =
None
78 knn = NearestNeighbors(n_jobs=-1)
81 dsSq = np.square(knn.kneighbors(clsts, 2)[1])
83 self.
alphaalpha = (-np.log(0.01) /
84 np.mean(dsSq[:, 1] - dsSq[:, 0])).item()
85 self.
centroidscentroids = nn.Parameter(torch.from_numpy(clsts))
88 self.
convconv.weight = nn.Parameter(
89 (2.0 * self.
alphaalpha *
90 self.
centroidscentroids).unsqueeze(-1).unsqueeze(-1))
91 self.
convconv.bias = nn.Parameter(-self.
alphaalpha *
95 """Forward pass through the NetVLAD network
98 x (image): image to match
101 torch array: Global image descriptor
106 x = F.normalize(x, p=2, dim=1)
110 soft_assign = F.softmax(soft_assign, dim=1)
112 x_flatten = x.view(N, C, -1)
115 vlad = torch.zeros([N, self.
num_clustersnum_clusters, C],
121 residual = x_flatten.unsqueeze(0).permute(1, 0, 2, 3) - \
122 self.
centroidscentroids[C:C+1, :].expand(x_flatten.size(-1), -1, -1).permute(1, 2, 0).unsqueeze(0)
123 residual *= soft_assign[:, C:C + 1, :].unsqueeze(2)
124 vlad[:, C:C + 1, :] = residual.sum(dim=-1)
126 vlad = F.normalize(vlad, p=2, dim=2)
127 vlad = vlad.view(x.size(0), -1)
128 vlad = F.normalize(vlad, p=2, dim=1)
141 params (dict): parameters
149 pkg_folder = get_package_share_directory(
"cslam")
150 self.
paramsparams[
'frontend.nn_checkpoint'] = join(
151 pkg_folder, self.
paramsparams[
'frontend.nn_checkpoint'])
152 self.
paramsparams[
'frontend.netvlad.pca_checkpoint'] = join(
154 self.
nodenode.get_parameter(
155 'frontend.netvlad.pca_checkpoint').value)
157 if torch.cuda.is_available():
160 self.
devicedevice = torch.device(
"cpu")
163 encoder = models.vgg16(pretrained=
True)
165 layers = list(encoder.features.children())[:-2]
167 for l
in layers[:-5]:
168 for p
in l.parameters():
169 p.requires_grad =
False
171 encoder = nn.Sequential(*layers)
173 self.
modelmodel.add_module(
'encoder', encoder)
177 self.
modelmodel.add_module(
'pool', netvlad_layer)
180 print(
'=> Number of CUDA devices = ' +
181 str(torch.cuda.device_count()))
182 if torch.cuda.device_count() > 1:
183 self.
modelmodel.encoder = nn.DataParallel(self.
modelmodel.encoder)
184 self.
modelmodel.pool = nn.DataParallel(self.
modelmodel.pool)
187 resume_ckpt = self.
paramsparams[
'frontend.nn_checkpoint']
188 if isfile(resume_ckpt):
189 print(
"=> loading checkpoint '{}'".format(resume_ckpt))
190 checkpoint = torch.load(
191 resume_ckpt, map_location=
lambda storage, loc: storage)
192 start_epoch = checkpoint[
'epoch']
193 best_metric = checkpoint[
'best_score']
194 self.
modelmodel.load_state_dict(checkpoint[
'state_dict'])
196 print(
"=> loaded checkpoint '{}' (epoch {})".format(
197 resume_ckpt, checkpoint[
'epoch']))
199 print(
"Error: Checkpoint path is incorrect")
201 self.
modelmodel.eval()
203 transforms.CenterCrop(self.
paramsparams[
"frontend.image_crop_size"]),
204 transforms.Resize(224, interpolation=3),
205 transforms.ToTensor(),
206 transforms.Normalize(IMAGENET_DEFAULT_MEAN,
207 IMAGENET_DEFAULT_STD),
209 self.
pcapca = pickle.load(
210 open(self.
paramsparams[
'frontend.netvlad.pca_checkpoint'],
'rb'))
213 """Load image to device and extract the global image descriptor
216 keyframe (image): image to match
219 np.array: global image descriptor
222 with torch.no_grad():
223 image = Image.fromarray(keyframe)
225 input = torch.unsqueeze(input, 0)
226 input = input.to(self.
devicedevice)
227 image_encoding = self.
modelmodel.encoder(input)
228 vlad_encoding = self.
modelmodel.pool(image_encoding)
231 embedding = vlad_encoding.detach().cpu().numpy()
235 normalized_embedding = sklearn.preprocessing.normalize(
237 output = normalized_embedding[0]
239 del input, image_encoding, vlad_encoding, reduced_embedding, normalized_embedding, image
245 return np.random.rand(128)
def compute_embedding(self, keyframe)
def __init__(self, params, node)
def __init__(self, num_clusters=64, dim=128, normalize_input=True, vladv2=False)
def init_params(self, clsts, traindescs)