[jacinto-ai/pytorch-jacinto-ai-devkit.git] / modules / pytorch_jacinto_ai / xvision / datasets / pixel2pixel / tiad_dataset_internal.py
1 #################################################################################
2 # Copyright (c) 2018-2021, Texas Instruments Incorporated - http://www.ti.com
3 # All Rights Reserved.
4 #
5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions are met:
7 #
8 # * Redistributions of source code must retain the above copyright notice, this
9 # list of conditions and the following disclaimer.
10 #
11 # * Redistributions in binary form must reproduce the above copyright notice,
12 # this list of conditions and the following disclaimer in the documentation
13 # and/or other materials provided with the distribution.
14 #
15 # * Neither the name of the copyright holder nor the names of its
16 # contributors may be used to endorse or promote products derived from
17 # this software without specific prior written permission.
18 #
19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #
30 #################################################################################
32 import os
33 import numpy as np
34 import cv2
35 import json
36 import sys
37 import struct
38 from collections import OrderedDict
39 import warnings
41 import torch
42 from torch.utils import data
44 # sys.path.insert(0, './scripts_internal/')
45 # import write_desc
46 from pytorch_jacinto_ai import xnn
48 ###########################################
49 # config settings
50 def get_config():
51 dataset_config = xnn.utils.ConfigNode()
52 dataset_config.image_folders = ('leftImg8bit',)
53 dataset_config.input_offsets = None
54 dataset_config.load_segmentation = True
55 dataset_config.use_semseg_for_depth = False
56 dataset_config.load_segmentation_flow_correction = False
57 return dataset_config
60 class TiadBaseSegmentationLoader():
61 """CityscapesLoader: Data is derived from CityScapes, and can be downloaded from here: https://www.cityscapes-dataset.com/downloads/
62 Many Thanks to @fvisin for the loader repo: https://github.com/fvisin/dataset_loaders/blob/master/dataset_loaders/images/cityscapes.py"""
63 colors = [ # [ 0, 0, 0],
64 [152, 251, 152], [0, 130, 180], [220, 20, 60], [3, 3, 251], [190, 153, 153], [0, 0, 0]] #[0, 130, 180](sky) #[152, 251, 152](vegetation) #[102, 102, 156](vehicle)
66 label_colours = dict(zip(range(5), colors))
68 void_classes = [-1, 255]
69 valid_classes = [0, 1, 2, 3, 4]
70 class_names = ['road', 'sky', 'pedestrian', 'vehicle', 'background']
72 ignore_index = 255
73 class_map = dict(zip(valid_classes, range(5)))
74 num_classes_ = 5
76 class_weights_ = np.array([0.30594229, 1., 25.07696964, 2.59353056, 0.38336123], dtype=float)
77 # class_weights_ = np.ones(num_classes_)
79 @classmethod
80 def decode_segmap(cls, temp):
81 r = temp.copy()
82 g = temp.copy()
83 b = temp.copy()
84 for l in range(0, cls.num_classes_):
85 r[temp == l] = cls.label_colours[l][0]
86 g[temp == l] = cls.label_colours[l][1]
87 b[temp == l] = cls.label_colours[l][2]
89 rgb = np.zeros((temp.shape[0], temp.shape[1], 3))
90 rgb[:, :, 0] = r / 255.0
91 rgb[:, :, 1] = g / 255.0
92 rgb[:, :, 2] = b / 255.0
93 return rgb
96 @classmethod
97 def encode_segmap(cls, mask):
98 for _validc in cls.valid_classes:
99 mask[mask == _validc] = cls.class_map[_validc]
100 # Put all void classes to 255
101 for _voidc in cls.void_classes:
102 mask[mask == _voidc] = cls.ignore_index
103 return mask
106 @classmethod
107 def class_weights(cls):
108 return cls.class_weights_
110 class TiadBaseSemanticMotionLoader():
111 """CityscapesLoader: Data is derived from CityScapes, and can be downloaded from here: https://www.cityscapes-dataset.com/downloads/
112 Many Thanks to @fvisin for the loader repo: https://github.com/fvisin/dataset_loaders/blob/master/dataset_loaders/images/cityscapes.py"""
113 colors = [ # [ 0, 0, 0],
114 [152, 251, 152], [0, 130, 180], [220, 20, 60], [3, 3, 251], [190, 153, 153], [220, 20, 60], [0, 0, 0]] #[0, 130, 180](sky) #[152, 251, 152](vegetation) #[102, 102, 156](vehicle)
116 label_colours = dict(zip(range(6), colors))
118 void_classes = [-1, 255]
119 valid_classes = [0, 1, 2, 3, 4, 5]
120 class_names = ['road', 'sky', 'pedestrian', 'vehicle', 'background','vehicle_moving']
122 ignore_index = 255
123 class_map = dict(zip(valid_classes, range(6)))
124 num_classes_ = 6
126 class_weights_ = np.array([0.30594229, 1., 25.07696964, 5.18353056, 0.38336123, 5.18353056], dtype=float)
128 @classmethod
129 def decode_segmap(cls, temp):
130 r = temp.copy()
131 g = temp.copy()
132 b = temp.copy()
133 for l in range(0, cls.num_classes_):
134 r[temp == l] = cls.label_colours[l][0]
135 g[temp == l] = cls.label_colours[l][1]
136 b[temp == l] = cls.label_colours[l][2]
138 rgb = np.zeros((temp.shape[0], temp.shape[1], 3))
139 rgb[:, :, 0] = r / 255.0
140 rgb[:, :, 1] = g / 255.0
141 rgb[:, :, 2] = b / 255.0
142 return rgb
145 @classmethod
146 def encode_segmap(cls, mask):
147 for _validc in cls.valid_classes:
148 mask[mask == _validc] = cls.class_map[_validc]
149 # Put all void classes to 255
150 for _voidc in cls.void_classes:
151 mask[mask == _voidc] = cls.ignore_index
152 return mask
155 @classmethod
156 def class_weights(cls):
157 return cls.class_weights_
161 class TiadBaseMotionLoader():
162 """CityscapesLoader: Data is derived from CityScapes, and can be downloaded from here: https://www.cityscapes-dataset.com/downloads/
163 Many Thanks to @fvisin for the loader repo: https://github.com/fvisin/dataset_loaders/blob/master/dataset_loaders/images/cityscapes.py"""
164 colors = [
165 [0, 0, 0], [128, 64, 128], [0, 0, 0]]
167 label_colours = dict(zip(range(2), colors))
169 void_classes = [2]
170 valid_classes = [0, 255] #255 #1
171 class_names = ['static', 'moving']
172 ignore_index = 255
173 class_map = dict(zip(valid_classes, range(2)))
174 num_classes_ = 2
175 class_weights_ = np.array([0.05, 0.95], dtype=float) # [ 0.51222399, 20.95158417] #[0.02, 0.98] #[0.05, 0.95]
177 @classmethod
178 def decode_segmap(cls, temp):
179 r = temp.copy()
180 g = temp.copy()
181 b = temp.copy()
182 for l in range(0, cls.num_classes_):
183 r[temp == l] = cls.label_colours[l][0]
184 g[temp == l] = cls.label_colours[l][1]
185 b[temp == l] = cls.label_colours[l][2]
186 #
187 rgb = np.zeros((temp.shape[0], temp.shape[1], 3))
188 rgb[:, :, 0] = r / 255.0
189 rgb[:, :, 1] = g / 255.0
190 rgb[:, :, 2] = b / 255.0
191 return rgb
194 @classmethod
195 def encode_segmap(cls, mask):
196 # Put all void classes to zero
197 for _validc in cls.valid_classes:
198 mask[mask == _validc] = cls.class_map[_validc]
199 for _voidc in cls.void_classes:
200 mask[mask == _voidc] = cls.ignore_index
201 return mask
203 @classmethod
204 def class_weights(cls):
205 return cls.class_weights_
208 class TiadDataLoader(data.Dataset):
209 def __init__(self, root, split="train", gt="gtFine", transforms=None, image_folders=('leftImg8bit',),
210 search_images=False, load_segmentation=True, load_segmentation_flow_correction=False, load_semantic_motion=False, load_depth=False, load_motion = False, load_flow=False, load_interest_pt = False,
211 inference=False, additional_info = False, start_offsets=None, end_offsets=None, input_offsets=None,
212 akaze_format='precomputed_bin', max_depth = 20.0, depth_scale = 1, use_semseg_for_depth=False, train_depth_log= False):
213 super().__init__()
214 if split not in ['train', 'val', 'test']:
215 warnings.warn(f'unknown split specified: {split}')
216 self.root = root if not isinstance(root, (tuple, list)) else root[-1]
217 self.gt = gt
218 self.split = split
219 self.transforms = transforms
220 self.image_folders = image_folders
221 self.search_images = search_images
222 self.files = {}
223 self.additional_info = additional_info
224 self.start_offsets = start_offsets
225 self.end_offsets = end_offsets
227 self.load_segmentation = load_segmentation
228 self.load_segmentation_flow_correction = load_segmentation_flow_correction
229 self.load_semantic_motion = load_semantic_motion
230 self.load_depth = load_depth
231 self.load_motion = load_motion
232 self.load_flow = load_flow
233 self.load_interest_pt = load_interest_pt
234 self.num_interest_pt_channels = 65
236 self.inference = inference
237 self.input_offsets = input_offsets
239 self.image_suffix = '.png'
240 self.segmentation_suffix = self.gt + '_labelTrainIds.png'
241 self.semantic_motion_suffix = self.gt + '_labelTrainIds_semantic_motion.png'
242 self.depth_suffix = '.png'
243 self.motion_suffix = self.gt+'_labelTrainIds_motion.png'
244 self.set_index_for_zero_mean()
245 self.akaze_format = akaze_format
246 self.max_depth = max_depth
247 self.depth_scale = depth_scale
248 self.train_depth_log = train_depth_log
249 self.use_semseg_for_depth = use_semseg_for_depth
251 self.image_base = os.path.join(self.root, image_folders[-1], self.split) if not isinstance(root, (list, tuple)) else self.root
252 self.segmentation_base = os.path.join(self.root, gt, self.split)
253 self.semantic_motion_base = os.path.join(self.root, gt, self.split)
254 self.depth_base = os.path.join(self.root, 'depth', self.split)
255 self.ss_label_base = os.path.join(self.root, 'seg_labels', self.split)
256 self.cameracalib_base = os.path.join(self.root, 'camera', self.split)
257 self.motion_base = os.path.join(self.root, gt, self.split)
259 if self.search_images:
260 self.files = xnn.utils.recursive_glob(rootdir=self.image_base, suffix=self.image_suffix)
261 elif self.load_segmentation:
262 self.files = xnn.utils.recursive_glob(rootdir=self.segmentation_base, suffix=self.segmentation_suffix)
263 elif self.load_semantic_motion:
264 self.files = xnn.utils.recursive_glob(rootdir=self.semantic_motion_base, suffix=self.semantic_motion_suffix)
265 elif self.load_motion:
266 self.files = xnn.utils.recursive_glob(rootdir=self.motion_base, suffix=self.motion_suffix)
267 elif self.load_depth:
268 self.files = xnn.utils.recursive_glob(rootdir=self.depth_base, suffix=self.depth_suffix)
269 #
270 self.files = sorted(self.files)
272 if not self.files:
273 raise Exception("> No files for split=[%s] found in %s" % (split, self.segmentation_base))
274 #
276 self.image_files = [None] * len(image_folders)
277 for image_idx, image_folder in enumerate(image_folders):
278 image_base = os.path.join(self.root, image_folder, self.split) if not isinstance (root, (list, tuple)) else root[image_idx]
279 self.image_files[image_idx] = sorted(xnn.utils.recursive_glob(rootdir=image_base, suffix='.png'))
280 # select only the required files
281 if (self.start_offsets and self.end_offsets):
282 start_offset = self.start_offsets[image_idx]
283 end_offset = self.end_offsets[image_idx]
284 self.image_files[image_idx] = self.image_files[image_idx][start_offset:] \
285 if (end_offset == 0) else self.image_files[image_idx][start_offset:end_offset]
286 #
287 assert len(self.image_files[image_idx]) == len(self.image_files[0]), 'all folders should have same number of files'
288 #
289 if (self.start_offsets and self.end_offsets):
290 start_offset = self.start_offsets[-1]
291 end_offset = self.end_offsets[-1]
292 self.files = self.files[start_offset:] if (end_offset == 0) else self.files[start_offset:end_offset]
293 #
295 # return this if we get a corrupt data
296 if self.additional_info:
297 self.first_images, self.first_targets, self.first_images_path, self.first_targets_path = self.__getitem__(0)
298 else:
299 self.first_imges, self.first_targets = self.__getitem__(0)
301 def set_index_for_zero_mean(self):
302 self.pos_chan_idx = np.asarray([3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31, 32, 35, 36, 39, 40, 43, 44, 47, 48, 51,
303 52, 55, 56, 59, 60, 63, 64])
304 self.symmetric_chan_idx = np.asarray([1, 2, 5, 6, 9, 10, 13, 14, 17, 18, 21, 22, 25, 26, 29, 30, 33, 34, 37, 38, 41, 42, 45, 46,
305 49, 50, 53, 54, 57, 58, 61, 62])
306 two_pos_for_loc = 2
307 self.pos_chan_idx_in_data = self.pos_chan_idx + two_pos_for_loc
308 self.symmetric_chan_idx_in_data = self.symmetric_chan_idx + two_pos_for_loc
310 def __len__(self):
311 return len(self.files)
313 def __getitem__(self, index):
314 if self.search_images:
315 image_path = self.files[index].rstrip()
316 self.check_file_exists(image_path)
317 segmentation_path = image_path.replace(self.image_base, self.segmentation_base).replace(self.image_suffix, '_' + self.segmentation_suffix)
318 elif self.load_segmentation or self.load_segmentation_flow_correction:
319 segmentation_path = self.files[index].rstrip()
320 self.check_file_exists(segmentation_path)
321 image_path = segmentation_path.replace(self.segmentation_base, self.image_base).replace('_' + self.segmentation_suffix, self.image_suffix)
322 elif self.load_semantic_motion:
323 semantic_motion_path = self.files[index].rstrip()
324 self.check_file_exists(semantic_motion_path)
325 image_path = semantic_motion_path.replace(self.semantic_motion_base, self.image_base).replace('_' + self.semantic_motion_suffix, self.image_suffix)
326 elif self.load_motion:
327 motion_path = self.files[index].rstrip()
328 self.check_file_exists(motion_path)
329 image_path = motion_path.replace(self.motion_base, self.image_base).replace('_' + self.motion_suffix, self.image_suffix)
330 elif self.load_depth:
331 depth_path = self.files[index].rstrip()
332 self.check_file_exists(depth_path)
333 image_path = depth_path.replace(self.depth_base, self.image_base).replace('depth_', 'image_') #.replace(self.depth_base, self.image_base) #.replace('depth_', 'image_')
334 elif self.load_interest_pt:
335 image_path = self.files[index].rstrip()
336 self.check_file_exists(image_path)
337 #
339 images = []
340 images_path = []
341 for image_idx, image_folder in enumerate(self.image_folders):
342 this_image_path = self.image_files[image_idx][index].rstrip()
343 if image_idx == (len(self.image_folders)-1):
344 assert this_image_path == image_path, 'image file name error'
346 #
347 self.check_file_exists(this_image_path)
348 img_bgr = cv2.imread(this_image_path)
349 img = img_bgr
350 if img is None:
351 # read failed. use a pre-stored (transformed) image
352 return self.first_imges, self.first_targets
353 else:
354 img = img[:,:,::-1]
355 #
356 if self.input_offsets is not None:
357 img = img - self.input_offsets[image_idx]
358 #
359 images.append(img)
360 images_path.append(this_image_path)
361 #
363 targets = []
364 targets_path = []
365 if self.load_flow and (not self.inference):
366 flow_zero = np.zeros((images[0].shape[0],images[0].shape[1],2), dtype=np.float32)
367 targets.append(flow_zero)
369 if self.load_depth and (not self.inference):
370 depth_path = image_path.replace(self.image_base, self.depth_base).replace('image_','depth_')
371 #print("self.ss_label_path:", self.ss_label_base)
372 ss_label_path = ''
373 if self.use_semseg_for_depth:
374 ss_label_path = image_path.replace(self.image_base, self.ss_label_base).replace('image_','depth_')
375 # print("ss_label_path:", ss_label_path)
376 self.check_file_exists(ss_label_path)
378 self.check_file_exists(depth_path)
379 depth = self.depth_loader(depth_path, max_depth_afr_scale= self.max_depth, ss_label_path=ss_label_path, depth_scale = self.depth_scale,
380 train_depth_log = self.train_depth_log)
381 targets.append(depth)
382 #
384 if (self.load_segmentation or self.load_segmentation_flow_correction) and (not self.inference):
385 lbl = cv2.imread(segmentation_path, 0).astype(np.uint8)
386 lbl = TiadBaseSegmentationLoader.encode_segmap(np.array(lbl, dtype=np.uint8))
387 targets.append(lbl) if self.load_segmentation else None
388 targets_path.append(segmentation_path) if self.load_segmentation else None
389 if self.load_segmentation_flow_correction:
390 images[0][:,:,:-1][cv2.resize(lbl, dsize=(images[0].shape[1],images[0].shape[0]), interpolation=cv2.INTER_NEAREST) == 1] = 128
391 #
393 if self.load_semantic_motion and (not self.inference):
394 lbl = cv2.imread(semantic_motion_path, 0).astype(np.uint8)
395 lbl = TiadBaseSemanticMotionLoader.encode_segmap(np.array(lbl, dtype=np.uint8))
396 targets.append(lbl)
397 targets_path.append(semantic_motion_path)
398 #
400 if self.load_motion and (not self.inference):
401 motion_path = image_path.replace(self.image_base, self.motion_base).replace(self.image_suffix, '_' + self.motion_suffix)
402 self.check_file_exists(motion_path)
403 motion = cv2.imread(motion_path, 0).astype(np.uint8)
404 motion = TiadBaseMotionLoader.encode_segmap(np.array(motion, dtype=np.uint8))
405 targets.append(motion)
406 #
408 if self.load_interest_pt and (not self.inference):
409 interest_pt_descriptor = self.compute_interest_pt_descriptor(img_bgr, image_path)
410 targets.append(interest_pt_descriptor)
412 if (self.transforms is not None):
413 images, targets = self.transforms(images, targets)
414 #
416 if self.additional_info:
417 return images, targets, images_path, targets_path
418 else:
419 return images, targets
420 #
423 def decode_segmap(self, lbl):
424 if self.load_segmentation:
425 return TiadBaseSegmentationLoader.decode_segmap(lbl)
426 elif self.load_semantic_motion:
427 return TiadBaseSegmentationLoader.decode_segmap(lbl)
428 else:
429 return TiadBaseMotionLoader.decode_segmap(lbl)
430 #
433 def check_file_exists(self, file_name):
434 if not os.path.exists(file_name) or not os.path.isfile(file_name):
435 raise Exception("{} is not a file, can not open with imread.".format(file_name))
436 #
438 #stretch given array to min , max value
439 def stretch_to_range(self, ip_array = [], min=1.0, max=255.0):
440 ip_range = ip_array.max() - ip_array.min()
441 new_range = max - min
442 offset = min - (ip_array.min() * new_range / ip_range)
443 op_array = (ip_array * new_range / ip_range) + offset
444 return op_array
446 def depth_loader(self, depth_path, max_depth_afr_scale=20, ss_label_path = '', depth_scale=1.0, ignore_depth_val=-1,
447 train_depth_log = False):
448 depth = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED)
450 if depth.dtype == 'uint16':
451 ignore_depth_val = ignore_depth_val if ignore_depth_val != -1 else np.iinfo(np.uint16).max
452 depth[depth==ignore_depth_val] = 0
453 #dataset = 'sc-sfm'
454 #if dataset == 'sc-sfm':
455 #depth[depth == (ignore_depth_val-1)] = 0
457 #train log10(depth)
458 if train_depth_log:
459 if not np.all(depth==0):
460 depth = self.stretch_to_range(ip_array = depth, min=1.0, max=255.0)
461 depth = np.log10(depth)
462 depth = self.stretch_to_range(ip_array = depth, min=0.0, max=255.0)
463 else:
464 depth = np.float64(depth)
465 else:
466 #depth/256.0 is to take care of scale
467 #255.0/max_depth will make max_depth go to 255
468 depth = (depth/256.0) * depth_scale
470 #now depth from 0-max_depth has ben stretched to 0-255
471 #max_depth = 255
472 depth[depth>max_depth_afr_scale]=max_depth_afr_scale
473 elif depth.dtype == 'uint8':
474 ignore_depth_val = ignore_depth_val if ignore_depth_val != -1 else np.iinfo(np.uint8).max
475 if depth_scale != 1:
476 depth = np.uint8(depth * depth_scale)
477 depth[depth==ignore_depth_val] = 0
478 depth[depth>max_depth_afr_scale]=max_depth_afr_scale
479 else:
480 exit("float data type for depth GT?")
482 # read seg label for assigning max depth for far away region like Sky
483 ss_label = cv2.imread(ss_label_path, cv2.IMREAD_UNCHANGED) if ss_label_path != '' else None
484 if ss_label_path != '':
485 sky_label = 1
486 depth[ss_label == sky_label] = max_depth_afr_scale
487 return depth
488 #
489 def num_classes(self):
490 nc = []
491 if self.load_flow:
492 nc.append(2)
493 if self.load_depth:
494 nc.append(1)
495 if self.load_segmentation:
496 nc.append(TiadBaseSegmentationLoader.num_classes_)
497 if self.load_semantic_motion:
498 nc.append(TiadBaseSemanticMotionLoader.num_classes_)
499 if self.load_motion:
500 nc.append(TiadBaseMotionLoader.num_classes_)
501 if self.load_interest_pt:
502 nc.append(self.num_interest_pt_channels)
503 #
504 return nc
505 #
508 def class_weights(self):
509 cw = []
510 if self.load_depth:
511 cw.append(None)
512 if self.load_segmentation:
513 cw.append(TiadBaseSegmentationLoader.class_weights())
514 if self.load_semantic_motion:
515 cw.append(TiadBaseSemanticMotionLoader.class_weights())
516 if self.load_motion:
517 cw.append(TiadBaseMotionLoader.class_weights())
518 if self.load_interest_pt:
519 cw.append(None)
520 #
521 return cw
522 #
524 def create_palette(self):
525 palette = []
526 if self.load_segmentation:
527 palette.append(TiadBaseSegmentationLoader.colors)
528 if self.load_semantic_motion:
529 palette.append(TiadBaseSemanticMotionLoader.colors)
530 if self.load_motion:
531 palette.append(TiadBaseMotionLoader.colors)
532 return palette
534 def conv_desc_to_uniform_positive_range(self, descriptor=[], loc_x=0, loc_y=0, data=[], scale_score=127/0.001, scale_des=127):
535 data_ary = np.asarray(data)
536 #print(descriptor[loc_y, loc_x,:])
537 descriptor[loc_y, loc_x, 0] = np.clip((data_ary[2]*scale_score),0.0,255.0)
539 #actual range is 0-1.0 but very few sample above 0.5 so treat range as 0-0.5
540 scale_des = 510.0
541 #print(descriptor[loc_y, loc_x,:])
542 descriptor[loc_y, loc_x, self.pos_chan_idx] = np.clip((data_ary[self.pos_chan_idx_in_data]*scale_des), 0.0, 255.0)
544 #actual range is -1.0 to 1.0 but very few sample above 0.5 or below -0.5 so treat range as -0.5 to 0.5
545 #print(descriptor[loc_y, loc_x,:])
546 scale_des = 254.0
547 descriptor[loc_y, loc_x, self.symmetric_chan_idx] = np.clip(data_ary[self.symmetric_chan_idx_in_data]*scale_des+128, 0.0, 255.0)
548 #print(descriptor[loc_y, loc_x,:])
550 def conv_desc_to_uniform_range(self, descriptor=[], loc_x=0, loc_y=0, data=[], scale_score=127/0.001, scale_des=127):
551 data_ary = np.asarray(data)
552 #print(descriptor[loc_y, loc_x,:])
553 descriptor[loc_y, loc_x, 0] = np.clip((data_ary[2]*scale_score)-128.0,-128.0,127.0)
555 scale_des = 127.0*4.0
556 #print(descriptor[loc_y, loc_x,:])
557 descriptor[loc_y, loc_x, self.pos_chan_idx] = np.clip((data_ary[self.pos_chan_idx_in_data]*scale_des)-128.0,-128.0,127.0)
558 #print(descriptor[loc_y, loc_x,:])
559 scale_des = 127.0*2.0
560 descriptor[loc_y, loc_x, self.symmetric_chan_idx] = np.clip(data_ary[self.symmetric_chan_idx_in_data]*scale_des, -128.0,127.0)
561 #print(descriptor[loc_y, loc_x,:])
563 def read_akaze_score_desc_bin(self, image_name = [], img_shape = [], scale_score=127/0.001, scale_des=127, akaze_ds_fac = 1,
564 akaze_params=[]):
565 float_size = 4
566 #loc_x,loc_y,score,desc[64]
567 num_interest_pt_channels = 1+64
568 num_el = 2+num_interest_pt_channels
569 skip_fac = 2
571 filename = image_name.replace('.png', '.bin')
572 file = open(filename, "rb")
573 num_bytes = os.path.getsize(filename)
575 tot_kp = num_bytes//(num_el*float_size)
576 descriptor = np.zeros((img_shape[0]//akaze_ds_fac, img_shape[1]//akaze_ds_fac, num_interest_pt_channels), dtype=np.float32)
577 #print("tot_kp: ", tot_kp)
578 for index in range(0,tot_kp):
579 data = struct.unpack('f'*num_el, file.read(float_size*num_el))
580 #print(index)
581 #print("# of key_pts :{}".format(len(kpts)))
582 #print(data)
583 loc_x = int(data[0])
584 loc_y = int(data[1])
586 if not (loc_x % skip_fac) and not (loc_y % skip_fac):
587 if akaze_params.learn_scaled_values_interest_pt:
588 if akaze_params.make_score_zero_mean:
589 self.conv_desc_to_uniform_range(descriptor=descriptor, loc_x=loc_x, loc_y=loc_y, data=data, scale_score=scale_score, scale_des=scale_des)
590 elif akaze_params.uniform_positive_range:
591 self.conv_desc_to_uniform_positive_range(descriptor=descriptor, loc_x=loc_x, loc_y=loc_y, data=data, scale_score=scale_score, scale_des=scale_des)
592 else:
593 descriptor[loc_y, loc_x, 0] = np.clip(data[2]*scale_score,0.0,255.0)
594 descriptor[loc_y, loc_x, 1:] = np.asarray(data[3:])*scale_des
596 else:
597 descriptor[loc_y, loc_x, 0] = data[2]
598 descriptor[loc_y, loc_x, 1:] = np.asarray(data[3:])
600 return descriptor
602 def set_akaze_params(self):
603 akaze_params = OrderedDict()
605 akaze_params.learn_scaled_values_interest_pt = True
606 #'precomputed_bin', 'precomputed_npy', 'compute', 'none'
607 #akaze_params.akaze_format = 'none'
608 akaze_params.make_score_zero_mean = False
609 akaze_params.uniform_positive_range = True
610 return akaze_params
612 def compute_interest_pt_descriptor(self, img_bgr, image_path):
613 akaze_params = self.set_akaze_params()
615 akaze_save_np_format = False
616 #in the format localization can directly consume
617 akaze_save_localization_format = False
619 #to speed up instead of orig res compute AKAZE at low res
620 AKAZE_ON_LOW_RES = False
621 akaze_ds_fac = 2 if AKAZE_ON_LOW_RES else 1
622 scale_score = 127.0/0.001
623 scale_des = 127.0
625 if self.akaze_format == 'precomputed_npy':
626 filename = image_path.replace('.png', '.npz')
627 npz_ary = np.load(filename)
628 descriptor = npz_ary['arr_0']
629 #print("descriptor.shape: ", descriptor.shape)
630 elif self.akaze_format == 'precomputed_bin':
631 #bin format generated by KazeGTTraining utility
632 descriptor = self.read_akaze_score_desc_bin(image_name = image_path, img_shape = img_bgr.shape,
633 scale_score=scale_score, scale_des=scale_des, akaze_ds_fac = akaze_ds_fac,
634 akaze_params = akaze_params)
635 xnn.utils.comp_hist_tensor3d(x=descriptor, en = False, dir = 'gt', name='ch', log = True)
636 elif self.akaze_format == 'compute':
637 if AKAZE_ON_LOW_RES:
638 descriptor = np.zeros((img_bgr.shape[0]//akaze_ds_fac, img_bgr.shape[1]//akaze_ds_fac, self.num_interest_pt_channels), dtype=np.float32)
639 img_bgr = cv2.resize(img_bgr, (descriptor.shape[1], descriptor.shape[0]), interpolation=cv2.INTER_AREA)
640 else:
641 descriptor = np.zeros((img_bgr.shape[0], img_bgr.shape[1], self.num_interest_pt_channels), dtype=np.float32)
642 #print("img_bgr.shape :{}".format(img_bgr.shape))
643 akaze_th = 0.0 #0.001
644 akaze = cv2.AKAZE_create(descriptor_type=cv2.AKAZE_DESCRIPTOR_KAZE, threshold=akaze_th)
645 kpts, descs = akaze.detectAndCompute(img_bgr, None)
646 #print("# of key_pts :{}".format(len(kpts)))
648 if akaze_save_localization_format:
649 filename = image_path.replace('.png', '.txt')
650 scale_to_write_kp_loc_to_orig_res = akaze_ds_fac if akaze_params.scale_to_write_kp_loc_to_orig_res == -1 else akaze_params.scale_to_write_kp_loc_to_orig_res
651 # write_desc.write_immediate_score_desc_as_text(kpts=kpts, descs=descs, scale_to_write_kp_loc_to_orig_res=scale_to_write_kp_loc_to_orig_res,
652 # txt_file_name=filename, fract_loc = True)
653 if len(kpts) > 0:
654 for idx, (kpt,desc) in enumerate(zip(kpts, descs)):
655 pt = np.round(kpt.pt)
656 if akaze_params.learn_scaled_values_interest_pt:
657 descriptor[int(pt[1]),int(pt[0]),0] = np.clip(kpt.response*scale_score,0.0,255.0)
658 descriptor[int(pt[1]),int(pt[0]),1:] = desc*scale_des
659 else:
660 descriptor[int(pt[1]), int(pt[0]), 0] = np.clip(kpt.response, 0.0, 0.002)
661 descriptor[int(pt[1]), int(pt[0]), 1:] = desc
663 if akaze_save_np_format:
664 filename = image_path.replace('.png', '.npz')
665 np.savez_compressed(filename, descriptor)
666 else: #None
667 descriptor = np.zeros((img_bgr.shape[0], img_bgr.shape[1], self.num_interest_pt_channels), dtype=np.float32)
669 return descriptor
672 ##########################################
673 def tiad_segmentation(dataset_config, root, split=None, transforms=None):
674 gt = "gtFine"
675 train_split = val_split = None
676 split = ['train', 'val']
677 for split_name in split:
678 if split_name == 'train':
679 train_split = TiadDataLoader(root, split_name, gt, transforms=transforms[0])
680 elif split_name == 'val':
681 val_split = TiadDataLoader(root, split_name, gt, transforms=transforms[1])
682 else:
683 pass
684 #
685 return train_split, val_split
689 def tiad_depth(dataset_config, root, split=None, transforms=None):
690 gt = "gtFine"
691 train_split = val_split = None
692 split = ['train', 'val']
693 for split_name in split:
694 if split_name == 'train':
695 train_split = TiadDataLoader(root, split_name, gt, transforms=transforms[0], load_segmentation=False, load_depth = True,
696 max_depth=dataset_config.max_depth_bfr_scaling, depth_scale=dataset_config.depth_scale,
697 use_semseg_for_depth=dataset_config.use_semseg_for_depth, train_depth_log=dataset_config.train_depth_log)
698 elif split_name == 'val':
699 val_split = TiadDataLoader(root, split_name, gt, transforms=transforms[1], load_segmentation=False, load_depth = True,
700 max_depth=dataset_config.max_depth_bfr_scaling, depth_scale=dataset_config.depth_scale,
701 use_semseg_for_depth=dataset_config.use_semseg_for_depth, train_depth_log=dataset_config.train_depth_log)
702 else:
703 pass
704 #
705 return train_split, val_split
708 def tiad_interest_pt_descriptor(dataset_config, root, split=None, transforms=None):
709 gt = "gtFine"
710 train_split = val_split = None
711 split = ['train', 'val']
712 for split_name in split:
713 if split_name == 'train':
714 train_split = TiadDataLoader(root, split_name, gt, transforms=transforms[0], search_images=True,
715 load_segmentation=False, load_interest_pt=True)
716 elif split_name == 'val':
717 val_split = TiadDataLoader(root, split_name, gt, transforms=transforms[1], search_images=True,
718 load_segmentation=False, load_interest_pt=True)
719 else:
720 pass
721 #
722 return train_split, val_split
726 def tiad_depth_segmentation(dataset_config, root, split=None, transforms=None):
727 gt = "gtFine"
728 train_split = val_split = None
729 split = ['train', 'val']
730 for split_name in split:
731 if split_name == 'train':
732 train_split = TiadDataLoader(root, split_name, gt, transforms=transforms[0], load_depth = True)
733 elif split_name == 'val':
734 val_split = TiadDataLoader(root, split_name, gt, transforms=transforms[1], load_depth = True)
735 else:
736 pass
737 #
738 return train_split, val_split
741 ##########################################
742 def tiad_motion_multi_input(dataset_config, root, split=None, transforms=None, image_folders=('leftImg8bitPrevious','leftImg8bit'),
743 input_offsets=None):
744 dataset_config = get_config().merge_from(dataset_config)
745 gt = "gtFine"
746 train_split = val_split = None
747 split = ['train', 'val']
748 for split_name in split:
749 if split_name == 'train':
750 train_split = TiadDataLoader(root, split_name, gt, transforms=transforms[0], load_segmentation=False, load_motion = True,
751 image_folders=dataset_config.image_folders, input_offsets=input_offsets)
752 elif split_name == 'val':
753 val_split = TiadDataLoader(root, split_name, gt, transforms=transforms[1], load_segmentation=False, load_motion = True,
754 image_folders=dataset_config.image_folders, input_offsets=input_offsets)
755 else:
756 pass
757 #
758 return train_split, val_split
761 ##########################################
762 def tiad_semantic_motion_single_task_multi_input(dataset_config, root, split=None, transforms=None, image_folders=('leftImg8bitPrevious','leftImg8bit'),
763 input_offsets=None):
764 dataset_config = get_config().merge_from(dataset_config)
765 gt = "gtFine"
766 train_split = val_split = None
767 split = ['train', 'val']
768 for split_name in split:
769 if split_name == 'train':
770 train_split = TiadDataLoader(root, split_name, gt, transforms=transforms[0], load_segmentation=False, load_semantic_motion = True,
771 image_folders=dataset_config.image_folders, input_offsets=input_offsets)
772 elif split_name == 'val':
773 val_split = TiadDataLoader(root, split_name, gt, transforms=transforms[1], load_segmentation=False, load_semantic_motion = True,
774 image_folders=dataset_config.image_folders, input_offsets=input_offsets)
775 else:
776 pass
777 #
778 return train_split, val_split
780 ##########################################
781 # --joint motion semantic training
782 def tiad_motion_semantic_multi_input(dataset_config, root, split=None, transforms=None, image_folders=('leftImg8bitPrevious','leftImg8bit'), input_offsets=None):
783 dataset_config = get_config().merge_from(dataset_config)
784 gt = "gtFine"
785 train_split = val_split = None
786 split = ['train', 'val']
787 for split_name in split:
788 if split_name == 'train':
789 train_split = TiadDataLoader(root, split_name, gt, transforms=transforms[0], load_motion = True,
790 image_folders=dataset_config.image_folders, input_offsets=input_offsets)
791 elif split_name == 'val':
792 val_split = TiadDataLoader(root, split_name, gt, transforms=transforms[1], load_motion = True,
793 image_folders=dataset_config.image_folders, input_offsets=input_offsets)
794 else:
795 pass
796 #
797 return train_split, val_split
800 ############################################
801 def tiad_depth_semantic_motion_multi_input(dataset_config, root, split=None, transforms=None, image_folders=('leftImg8bitPrevious','leftImg8bit'),
802 input_offsets=None):
803 dataset_config = get_config().merge_from(dataset_config)
804 gt = "gtFine"
805 train_split = val_split = None
806 split = ['train', 'val']
807 for split_name in split:
808 if split_name == 'train':
809 train_split = TiadDataLoader(root, split_name, gt, transforms=transforms[0], load_depth = True,
810 load_motion=True, image_folders=dataset_config.image_folders, input_offsets=input_offsets, max_depth=dataset_config.max_depth_bfr_scaling,
811 train_depth_log= dataset_config.train_depth_log, use_semseg_for_depth=dataset_config.use_semseg_for_depth,
812 load_segmentation_flow_correction=dataset_config.load_segmentation_flow_correction , depth_scale=dataset_config.depth_scale,)
813 elif split_name == 'val':
814 val_split = TiadDataLoader(root, split_name, gt, transforms=transforms[1], load_depth = True,
815 load_motion=True, image_folders=dataset_config.image_folders, input_offsets=input_offsets, max_depth=dataset_config.max_depth_bfr_scaling,
816 train_depth_log=dataset_config.train_depth_log,use_semseg_for_depth=dataset_config.use_semseg_for_depth,
817 load_segmentation_flow_correction=dataset_config.load_segmentation_flow_correction , depth_scale=dataset_config.depth_scale,)
818 else:
819 pass
820 #
821 return train_split, val_split
823 ############################################
824 def tiad_depth_semantic_motion_descriptor_multi_input(dataset_config, root, split=None, transforms=None, image_folders=('leftImg8bitPrevious','leftImg8bit'),
825 input_offsets=None):
826 dataset_config = get_config().merge_from(dataset_config)
827 gt = "gtFine"
828 train_split = val_split = None
829 split = ['train', 'val']
831 #akaze_format = 'compute'
832 for split_name in split:
833 if split_name == 'train':
834 train_split = TiadDataLoader(root, split_name, gt, transforms=transforms[0], load_depth = True,
835 load_motion=True, load_interest_pt=True, image_folders=dataset_config.image_folders, input_offsets=input_offsets
836 )
837 elif split_name == 'val':
838 val_split = TiadDataLoader(root, split_name, gt, transforms=transforms[1], load_depth = True,
839 load_motion=True, load_interest_pt=True, image_folders=dataset_config.image_folders, input_offsets=input_offsets
840 )
841 else:
842 pass
843 #
844 return train_split, val_split
846 def tiad_semantic_motion_descriptor_multi_input(dataset_config, root, split=None, transforms=None, image_folders=('leftImg8bitPrevious','leftImg8bit'),
847 input_offsets=None):
848 dataset_config = get_config().merge_from(dataset_config)
849 gt = "gtFine"
850 train_split = val_split = None
851 split = ['train', 'val']
853 #akaze_format = 'compute'
854 for split_name in split:
855 if split_name == 'train':
856 train_split = TiadDataLoader(root, split_name, gt, transforms=transforms[0], load_depth = False,
857 load_motion=True, load_interest_pt=True, image_folders=dataset_config.image_folders, input_offsets=input_offsets
858 )
859 elif split_name == 'val':
860 val_split = TiadDataLoader(root, split_name, gt, transforms=transforms[1], load_depth = False,
861 load_motion=True, load_interest_pt=True, image_folders=dataset_config.image_folders, input_offsets=input_offsets
862 )
863 else:
864 pass
865 #
866 return train_split, val_split
870 #Semantic inference
871 def tiad_segmentation_infer(dataset_config, root, split=None, transforms=None):
872 gt = "gtFine"
873 split_name = 'val' #'train' 'val' , ''
874 infer_split = TiadDataLoader(root, split_name, gt, transforms=transforms, image_folders=('leftImg8bit',),
875 load_segmentation=True, search_images=True, inference=True, additional_info=True)
876 return infer_split
879 def tiad_segmentation_measure(dataset_config, root, split=None, transforms=None):
880 gt = "gtFine"
881 split_name = 'val'
882 infer_split = TiadDataLoader(root, split_name, gt, transforms=transforms, image_folders=('leftImg8bit',),
883 load_segmentation=True, search_images=True, inference=False, additional_info=True)
884 return infer_split
887 def tiad_depth_infer(dataset_config, root, split=None, transforms=None):
888 gt = "gtFine"
889 split_name = dataset_config.split_name #'train' 'val'
890 infer_split = TiadDataLoader(root, split_name, gt, transforms=transforms, load_segmentation=False, load_depth = True,
891 search_images=True, inference=True, additional_info=True, max_depth=dataset_config.max_depth_bfr_scaling,
892 depth_scale=dataset_config.depth_scale, use_semseg_for_depth=dataset_config.use_semseg_for_depth,
893 train_depth_log=dataset_config.train_depth_log)
894 #
895 return infer_split
898 def tiad_depth_measure(dataset_config, root, split=None, transforms=None):
899 gt = "gtFine"
900 split_name = 'val' #'train' 'val'
901 infer_split = TiadDataLoader(root, split_name, gt, transforms=transforms, load_segmentation=False, load_depth = True,
902 search_images=True, inference=False, additional_info=True, max_depth=dataset_config.max_depth_bfr_scaling,
903 depth_scale=dataset_config.depth_scale, train_depth_log=dataset_config.train_depth_log)
904 #
905 return infer_split
908 # motion inference
909 def tiad_motion_multi_input_infer(dataset_config, root, split=None, transforms=None, image_folders=('leftImg8bitPrevious', 'leftImg8bit'), input_offsets=None):
910 dataset_config = get_config().merge_from(dataset_config)
911 gt = "gtFine"
912 split_name = 'val'
913 val_split = TiadDataLoader(root, split_name, gt, transforms=transforms, load_segmentation=False, load_motion = True,
914 image_folders=dataset_config.image_folders, search_images=True, inference=True, additional_info=True, input_offsets=input_offsets)
915 # --
916 return val_split
919 def tiad_motion_multi_input_measure(dataset_config, root, split=None, transforms=None, image_folders=('leftImg8bitPrevious', 'leftImg8bit'),input_offsets=None):
920 dataset_config = get_config().merge_from(dataset_config)
921 gt = "gtFine"
922 split_name = 'val' # 'val' # 'train'
923 val_split = TiadDataLoader(root, split_name, gt, transforms=transforms, load_segmentation=False, load_motion = True,
924 image_folders=dataset_config.image_folders, search_images=True, inference=False, additional_info=True, input_offsets=input_offsets)
925 # --
926 return val_split
929 # single tak semantic motion measure
930 def tiad_semantic_motion_single_task_multi_input_measure(dataset_config, root, split=None, transforms=None, image_folders=('leftImg8bitPrevious', 'leftImg8bit'), input_offsets=None):
931 dataset_config = get_config().merge_from(dataset_config)
932 gt = "gtFine"
933 split_name = dataset_config.split_name
934 val_split = TiadDataLoader(root, split_name, gt, transforms=transforms, load_segmentation=False, load_semantic_motion = True,
935 image_folders=dataset_config.image_folders, additional_info=True, input_offsets=input_offsets)
936 # --
937 return val_split
940 # single tak semantic motion inference
941 def tiad_semantic_motion_single_task_multi_input_infer(dataset_config, root, split=None, transforms=None, image_folders=('leftImg8bitPrevious', 'leftImg8bit'), input_offsets=None):
942 dataset_config = get_config().merge_from(dataset_config)
943 gt = "gtFine"
944 split_name = dataset_config.split_name
945 val_split = TiadDataLoader(root, split_name, gt, transforms=transforms, load_segmentation=False, load_semantic_motion = True,
946 image_folders=dataset_config.image_folders, search_images=True, inference=True, additional_info=True, input_offsets=input_offsets)
947 # --
948 return val_split
951 # joint motion semantic inference
952 def tiad_motion_semantic_multi_input_infer(dataset_config, root, split=None, transforms=None, image_folders=('leftImg8bitPrevious','leftImg8bit'), input_offsets=None):
953 dataset_config = get_config().merge_from(dataset_config)
954 gt = "gtFine"
955 split_name = 'val'
956 val_split = TiadDataLoader(root, split_name, gt, transforms=transforms, load_motion=True,
957 image_folders=dataset_config.image_folders, search_images=True, inference=True, additional_info=True, input_offsets=input_offsets)
958 #
959 return val_split
963 def tiad_motion_semantic_multi_input_measure(dataset_config, root, split=None, transforms=None, image_folders=('leftImg8bitPrevious','leftImg8bit'), input_offsets=None):
964 dataset_config = get_config().merge_from(dataset_config)
965 gt = "gtFine"
966 split_name = 'val'
967 val_split = TiadDataLoader(root, split_name, gt, transforms=transforms, load_motion=True,
968 image_folders=dataset_config.image_folders, search_images=True, inference=False, additional_info=True, input_offsets=input_offsets)
969 #
970 return val_split
973 def tiad_depth_semantic_motion_multi_input_measure(dataset_config, root, split=None, transforms=None, image_folders=('leftImg8bitPrevious','leftImg8bit'),
974 input_offsets=None):
975 dataset_config = get_config().merge_from(dataset_config)
976 gt = "gtFine"
977 split = dataset_config.split_name
978 val_split = TiadDataLoader(root, split, gt, transforms=transforms, load_depth = True, search_images=True,inference=False,additional_info=True,
979 load_motion=True, image_folders=dataset_config.image_folders, input_offsets=input_offsets)
980 #
981 return val_split
984 ############################################
985 def tiad_depth_semantic_motion_multi_input_infer(dataset_config, root, split=None, transforms=None, image_folders=('leftImg8bitPrevious','leftImg8bit'),
986 input_offsets=None):
987 dataset_config = get_config().merge_from(dataset_config)
988 gt = "gtFine"
989 split = dataset_config.split_name
990 val_split = TiadDataLoader(root, split, gt, transforms=transforms, load_depth = True, search_images=True,inference=True,additional_info=True,
991 load_motion=True, image_folders=image_folders, input_offsets=input_offsets)
992 #
993 return val_split
996 #interest_pt inference
997 def tiad_interest_pt_infer(dataset_config, root, split=None, transforms=None):
998 gt = "gtFine"
999 #split_name = 'val' #'train' 'val'
1000 split_name = dataset_config.split_name
1001 infer_split = TiadDataLoader(root, split_name, gt, transforms=transforms, image_folders=('leftImg8bit',),
1002 load_segmentation=False, load_interest_pt=True, search_images=True, inference=True, additional_info=True)
1003 return infer_split
1006 def tiad_interest_pt_measure(dataset_config, root, split=None, transforms=None):
1007 gt = "gtFine"
1008 #split_name = 'val' #'train' 'val'
1009 split_name = dataset_config.split_name
1010 #'precomputed_bin', 'precomputed_npy', 'compute', 'none'
1011 akaze_format = 'none' if dataset_config.write_desc_type == 'PRED' else 'precomputed_bin'
1013 infer_split = TiadDataLoader(root, split_name, gt, transforms=transforms, image_folders=('leftImg8bit',),
1014 load_segmentation=False, load_interest_pt=True, search_images=True, inference=False,
1015 additional_info=True, akaze_format=akaze_format)
1016 return infer_split