release commit
authorManu Mathew <a0393608@ti.com>
Tue, 28 Jan 2020 05:02:56 +0000 (10:32 +0530)
committerManu Mathew <a0393608@ti.com>
Tue, 28 Jan 2020 05:02:56 +0000 (10:32 +0530)
12 files changed:
modules/pytorch_jacinto_ai/engine/infer_pixel2pixel.py
modules/pytorch_jacinto_ai/vision/datasets/pixel2pixel/__init__.py
modules/pytorch_jacinto_ai/vision/datasets/pixel2pixel/a2d2.py [new file with mode: 0644]
modules/pytorch_jacinto_ai/vision/datasets/pixel2pixel/calculate_class_weights.py
scripts/evaluate_segmentation_main.py
scripts/infer_segmentation_main.py
scripts/test_classification_main.py
scripts/train_classification_main.py
scripts/train_depth_main.py
scripts/train_motion_segmentation_main.py
scripts/train_pixel2pixel_multitask_main.py
scripts/train_segmentation_main.py

index d17decf8a110d177614af1ec0e749b0fbdde469d..81e1ec5aff9e5533d3efd50d55b44e08ab1edfee 100644 (file)
@@ -564,7 +564,7 @@ def validate(args, val_dataset, val_loader, model, epoch, infer_path):
                     viz_depth(prediction = prediction, args=args, output_name = output_name, input_name=input_path[-1][task_index])
                     print('{}/{}'.format((args.batch_size * iter + index), len(val_dataset)))
 
-                if args.blend[task_index]:
+                if args.viz_op_type[task_index] == 'blend':
                     prediction_size = (prediction.shape[0], prediction.shape[1], 3)
                     output_image = args.palette[task_index-1][prediction.ravel()].reshape(prediction_size)
                     input_bgr = cv2.imread(input_path[-1][index]) #Read the actual RGB image
@@ -573,8 +573,13 @@ def validate(args, val_dataset, val_loader, model, epoch, infer_path):
                     output_name = os.path.join(infer_path[task_index], input_path[-1][index].split('/')[-4] + '_' + input_path[-1][index].split('/')[-3] + '_' +os.path.basename(input_path[-1][index]))
                     cv2.imwrite(output_name, output_image)
                     print('{}/{}'.format((args.batch_size*iter+index), len(val_dataset)))
+                elif args.viz_op_type[task_index] == 'color':
+                    prediction_size = (prediction.shape[0], prediction.shape[1], 3)
+                    output_image = args.palette[task_index-1][prediction.ravel()].reshape(prediction_size)
+                    output_name = os.path.join(infer_path[task_index], input_path[-1][index].split('/')[-4] + '_' + input_path[-1][index].split('/')[-3] + '_' +os.path.basename(input_path[-1][index]))
+                    cv2.imwrite(output_name, output_image)
+                    print('{}/{}'.format((args.batch_size*iter+index), len(val_dataset)))
                 #
-
                 if args.car_mask:   # generating car_mask (required for localization)
                     car_mask = np.logical_or(prediction == 13, prediction == 14, prediction == 16, prediction == 17)
                     prediction[car_mask] = 255
index 7679729f0bb0ac374c945f27c6e25c33177fcdf5..76402e3ba5284ce06c99a6603986377ba9816181 100644 (file)
@@ -5,6 +5,8 @@ from .kitti_depth import *
 from .mpisintel import *
 from .segmentation import *
 from .cityscapes_plus import *
+from .a2d2 import *
+from .calculate_class_weights import *
 
 try: from .tiad_dataset_internal import *
 except: pass
diff --git a/modules/pytorch_jacinto_ai/vision/datasets/pixel2pixel/a2d2.py b/modules/pytorch_jacinto_ai/vision/datasets/pixel2pixel/a2d2.py
new file mode 100644 (file)
index 0000000..2c55bc9
--- /dev/null
@@ -0,0 +1,561 @@
+'''
+Dataset loader for The A2D2 dataset:
+M. Cordts, M. Omran, S. Ramos, T. Rehfeld, M. Enzweiler, R. Benenson, U. Franke, S. Roth, and B. Schiele,
+“The A2D2 Dataset for Semantic Urban Scene Understanding,” in Proc. of the IEEE Conference on Computer
+Vision and Pattern Recognition (CVPR), 2016.
+https://www.A2D2-dataset.com/
+'''
+
+import os
+import numpy as np
+import cv2
+import json
+from torch.utils import data
+import sys
+import warnings
+from ..import utils
+from .... import xnn
+
+###########################################
+# config settings
+def get_config():
+    dataset_config = xnn.utils.ConfigNode()
+    dataset_config.image_folders = ('leftImg8bit',)
+    dataset_config.input_offsets = None
+    dataset_config.load_segmentation = True
+    dataset_config.load_segmentation_five_class = False
+    return dataset_config
+
+
+###########################################
+class A2D2BaseSegmentationLoader():
+    """A2D2Loader: Data is derived from A2D2, and can be downloaded from here: https://www.A2D2-dataset.com/downloads/
+    Many Thanks to @fvisin for the loader repo: https://github.com/fvisin/dataset_loaders/blob/master/dataset_loaders/images/A2D2.py"""
+    
+    colors = [
+        [255,0,0],[182,89,6],[204,153,255],[255,128,0],[0,255,0],[0,128,255],[0,255,255],[255,255,0],[233,100,0],[110,110,0],[128,128,0],[255,193,37],[64,0,64],[185,122,87],[0,0,100],[139,99,108],[210,50,115],[255,0,128],[255,246,143],[150,0,150],[204,255,153],[238,162,173],[33,44,177],[180,50,180],[255,70,185],[238,233,191],[147,253,194],[150,150,200],[180,150,200],[72,209,204],[200,125,210],[159,121,238],[128,0,255],[255,0,255],[135,206,255],[241,230,255],[96,69,143],[53,46,82], [0, 0, 0]]
+
+    num_classes_ = 38
+    label_colours = dict(zip(range(num_classes_), colors))
+    
+    void_classes = []
+    
+    valid_classes = range(0,num_classes_)
+    class_names = ['Car  0','Bicycle  1','Pedestrian  2','Truck  3','Small vehicles  4','Traffic signal  5','Traffic sign  6','Utility vehicle  7','Sidebars 8','Speed bumper 9','Curbstone 10','Solid line 11','Irrelevant signs 12','Road blocks 13','Tractor 14','Non-drivable street 15','Zebra crossing 16','Obstacles / trash 17','Poles 18','RD restricted area 19','Animals 20','Grid structure 21','Signal corpus 22','Drivable cobbleston 23','Electronic traffic 24','Slow drive area 25','Nature object 26','Parking area 27','Sidewalk 28','Ego car 29','Painted driv. instr. 30','Traffic guide obj. 31','Dashed line 32','RD normal street 33','Sky 34','Buildings 35','Blurred area 36','Rain dirt 37']
+
+    ignore_index = 255
+    class_map = dict(zip(valid_classes, range(num_classes_)))
+
+    class_weights_ = np.ones(num_classes_)
+    #set high freq category weights to low to not over power other categorie
+    # Nature object 26
+    # RD normal street 33
+    # Sky 34
+    # Buildings 35
+
+    cat_with_high_freq = [26, 33, 34, 35]
+    for cat_idx in cat_with_high_freq:
+        class_weights_[cat_idx] = 0.05
+
+    @classmethod
+    def decode_segmap(cls, temp):
+        r = temp.copy()
+        g = temp.copy()
+        b = temp.copy()
+        for l in range(0, cls.num_classes_):
+            r[temp == l] = cls.label_colours[l][0]
+            g[temp == l] = cls.label_colours[l][1]
+            b[temp == l] = cls.label_colours[l][2]
+
+        rgb = np.zeros((temp.shape[0], temp.shape[1], 3))
+        rgb[:, :, 0] = r / 255.0
+        rgb[:, :, 1] = g / 255.0
+        rgb[:, :, 2] = b / 255.0
+        return rgb
+
+
+    @classmethod
+    def encode_segmap(cls, mask):
+        # Put all void classes to zero
+        for _voidc in cls.void_classes:
+            mask[mask == _voidc] = cls.ignore_index
+        for _validc in cls.valid_classes:
+            mask[mask == _validc] = cls.class_map[_validc]
+        return mask
+
+
+    @classmethod
+    def class_weights(cls):
+        return cls.class_weights_
+
+###########################################
+class A2D2BaseMotionLoader():
+    """A2D2Loader: Data is derived from A2D2, and can be downloaded from here: https://www.A2D2-dataset.com/downloads/
+    Many Thanks to @fvisin for the loader repo: https://github.com/fvisin/dataset_loaders/blob/master/dataset_loaders/images/A2D2.py"""
+    colors = [  # [  0,   0,   0],
+        [0, 0, 0], [119, 11, 32]]
+
+    label_colours = dict(zip(range(2), colors))
+
+    void_classes = []
+    valid_classes = [0, 255]
+    class_names = ['static', 'moving']
+    ignore_index = 255
+    class_map = dict(zip(valid_classes, range(2)))
+    num_classes_ = 2
+    class_weights_ = np.array([0.05, 0.95], dtype=float)    #Calculated weights based on mdeian_frequenncy = [ 0.51520306, 16.94405377]
+
+    @classmethod
+    def decode_segmap(cls, temp):
+        r = temp.copy()
+        g = temp.copy()
+        b = temp.copy()
+        for l in range(0, cls.num_classes_):
+            r[temp == l] = cls.label_colours[l][0]
+            g[temp == l] = cls.label_colours[l][1]
+            b[temp == l] = cls.label_colours[l][2]
+        #
+        rgb = np.zeros((temp.shape[0], temp.shape[1], 3))
+        rgb[:, :, 0] = r / 255.0
+        rgb[:, :, 1] = g / 255.0
+        rgb[:, :, 2] = b / 255.0
+        return rgb
+
+    @classmethod
+    def encode_segmap(cls, mask):
+        for _validc in cls.valid_classes:
+            mask[mask == _validc] = cls.class_map[_validc]
+        # Put all void classes to zero
+        for _voidc in cls.void_classes:
+            mask[mask == _voidc] = cls.ignore_index
+        return mask
+
+    @classmethod
+    def class_weights(cls):
+        return cls.class_weights_
+
+
+###########################################
+class A2D2DataLoader(data.Dataset):
+    def __init__(self, dataset_config, root, split="train", gt="gtFine", transforms=None, image_folders=('leftImg8bit',),
+                 search_images=False, load_segmentation=True, load_depth=False, load_motion=False, load_flow=False,
+                 load_segmentation_five_class=False, inference=False, additional_info=False, input_offsets=None):
+        super().__init__()
+        if split not in ['train', 'val', 'test']:
+            warnings.warn(f'unknown split specified: {split}')
+        #
+        self.root = root
+        self.gt = gt
+        self.split = split
+        self.transforms = transforms
+        self.image_folders = image_folders
+        self.search_images = search_images
+        self.files = {}
+
+        self.additional_info = additional_info
+        self.load_segmentation = load_segmentation
+        self.load_segmentation_five_class = load_segmentation_five_class
+        self.load_depth = load_depth
+        self.load_motion = load_motion
+        self.load_flow = load_flow
+        self.inference = inference
+        self.input_offsets = input_offsets
+
+        #self.image_suffix = (self.image_folders[-1]+'.png') #'.png'
+        #self.image_suffix = self.image_suffix.replace('leftImg8bit_sequence.png', 'leftImg8bit.png')
+        self.image_suffix = '.png'
+        #self.segmentation_suffix = self.gt+'_labelIds.png'  #'.png'
+        self.segmentation_suffix = '.png'  #'.png'
+        if self.load_segmentation_five_class:
+            self.segmentation_suffix = self.gt+'_labelTrainIds.png'
+        self.disparity_suffix = 'disparity.png'
+        self.motion_suffix =  self.gt+'_labelTrainIds_motion.png' #'.png'
+
+        self.image_base = os.path.join(self.root, image_folders[-1], self.split)
+        self.segmentation_base = os.path.join(self.root, gt, self.split)
+        self.disparity_base = os.path.join(self.root, 'disparity', self.split)
+        self.cameracalib_base = os.path.join(self.root, 'camera', self.split)
+        self.motion_base = os.path.join(self.root, gt, self.split)
+
+        if self.search_images:
+            self.files = xnn.utils.recursive_glob(rootdir=self.image_base, suffix=self.image_suffix)
+        else:
+            self.files = xnn.utils.recursive_glob(rootdir=self.segmentation_base, suffix=self.segmentation_suffix)
+        #
+        self.files = sorted(self.files)
+        
+        if not self.files:
+            raise Exception("> No files for split=[%s] found in %s" % (split, self.segmentation_base))
+        #
+        
+        self.image_files = [None] * len(image_folders)
+        for image_idx, image_folder in enumerate(image_folders):
+            image_base = os.path.join(self.root, image_folder, self.split)
+            self.image_files[image_idx] = sorted(xnn.utils.recursive_glob(rootdir=image_base, suffix='.png'))
+            assert len(self.image_files[image_idx]) == len(self.image_files[0]), 'all folders should have same number of files'
+        #
+        
+        
+    def __len__(self):
+        return len(self.files)
+
+
+    def __getitem__(self, index):
+        if self.search_images:
+            image_path = self.files[index].rstrip()
+            self.check_file_exists(image_path)
+            segmentation_path = image_path.replace(self.image_base, self.segmentation_base).replace(self.image_suffix, self.segmentation_suffix)
+        else:
+            segmentation_path = self.files[index].rstrip()
+            self.check_file_exists(segmentation_path)
+            image_path = segmentation_path.replace(self.segmentation_base, self.image_base).replace(self.segmentation_suffix, self.image_suffix)
+        #
+
+        images = []
+        images_path = []
+        for image_idx, image_folder in enumerate(self.image_folders):
+            sys.stdout.flush()
+            this_image_path =  self.image_files[image_idx][index].rstrip()
+            if image_idx == (len(self.image_folders)-1):
+                assert this_image_path == image_path, 'image file name error'
+            #
+            self.check_file_exists(this_image_path)
+
+            img = cv2.imread(this_image_path)[:,:,::-1]
+            if self.input_offsets is not None:
+                img = img - self.input_offsets[image_idx]
+            #
+            images.append(img)
+            images_path.append(this_image_path)
+        #
+
+        targets = []
+        targets_path = []
+        if self.load_flow and (not self.inference):
+            flow_zero = np.zeros((images[0].shape[0],images[0].shape[1],2), dtype=np.float32)
+            targets.append(flow_zero)
+
+        if self.load_depth and (not self.inference):
+            disparity_path = image_path.replace(self.image_base, self.disparity_base).replace(self.image_suffix, self.disparity_suffix)
+            self.check_file_exists(disparity_path)
+            depth = self.depth_loader(disparity_path)
+            targets.append(depth)
+        #
+
+        if self.load_segmentation and (not self.inference):
+            lbl = cv2.imread(segmentation_path,0)
+            lbl = A2D2BaseSegmentationLoader.encode_segmap(np.array(lbl, dtype=np.uint8))
+            targets.append(lbl)
+            targets_path.append(segmentation_path)
+        #
+
+        elif self.load_segmentation_five_class and (not self.inference):
+            lbl = cv2.imread(segmentation_path,0)
+            lbl = A2D2BaseSegmentationLoaderFiveClasses.encode_segmap(np.array(lbl, dtype=np.uint8))
+            targets.append(lbl)
+            targets_path.append(segmentation_path)
+
+        if self.load_motion and (not self.inference):
+            motion_path = image_path.replace(self.image_base, self.motion_base).replace(self.image_suffix, self.motion_suffix)
+            self.check_file_exists(motion_path)
+            motion = cv2.imread(motion_path,0)
+            motion = A2D2BaseMotionLoader.encode_segmap(np.array(motion, dtype=np.uint8))
+            targets.append(motion)
+        #
+
+        #targets = np.stack(targets, axis=2)
+
+        if (self.transforms is not None):
+            images, targets = self.transforms(images, targets)
+        #
+
+        if self.additional_info:
+            return images, targets, images_path, targets_path
+        else:
+            return images, targets
+    #
+
+
+    def decode_segmap(self, lbl):
+        if self.load_segmentation:
+            return A2D2BaseSegmentationLoader.decode_segmap(lbl)
+        elif self.load_segmentation_five_class:
+            return A2D2BaseSegmentationLoaderFiveClasses.decode_segmap(lbl)
+        else:
+            return A2D2BaseMotionLoader.decode_segmap(lbl)
+    #
+
+
+    def check_file_exists(self, file_name):
+        if not os.path.exists(file_name) or not os.path.isfile(file_name):
+            raise Exception("{} is not a file, can not open with imread.".format(file_name))
+    #
+
+
+    def depth_loader(self, disparity_path):
+        eps = (1e-6)
+        disparity_range = (eps, 255.0)
+        depth_range = (1.0, 255.0)
+
+        disp = cv2.imread(disparity_path, cv2.IMREAD_UNCHANGED)
+        disp_valid = (disp > 0)       # 0 values have to be ignored
+        disp = ((disp - 1.0)/256.0)
+
+        # convert to depth
+        calib_path = disparity_path.replace(self.disparity_base, self.cameracalib_base).replace(self.disparity_suffix, 'camera.json')
+        with open(calib_path) as fp:
+            cameracalib = json.load(fp)
+            extrinsic = cameracalib['extrinsic']
+            intrinsic = cameracalib['intrinsic']
+            focal_len = intrinsic['fx']
+            proj = (focal_len * extrinsic['baseline'])
+            depth = np.divide(proj, disp, out=np.zeros_like(disp), where=(disp!=0))
+            d_out = np.clip(depth, depth_range[0], depth_range[1]) * disp_valid
+        #
+        return d_out
+    #
+
+
+    def num_classes(self):
+        nc = []
+        if self.load_flow:
+            nc.append(2)
+        if self.load_depth:
+            nc.append(1)
+        if self.load_segmentation:
+            nc.append(A2D2BaseSegmentationLoader.num_classes_)
+        elif self.load_segmentation_five_class:
+            nc.append(A2D2BaseSegmentationLoaderFiveClasses.num_classes_)
+        if self.load_motion:
+            nc.append(A2D2BaseMotionLoader.num_classes_)
+        #
+        return nc
+    #
+
+
+    def class_weights(self):
+        cw = []
+        if self.load_flow:
+            cw.append(None)
+        if self.load_depth:
+            cw.append(None)
+        if self.load_segmentation:
+            cw.append(A2D2BaseSegmentationLoader.class_weights())
+        elif self.load_segmentation_five_class:
+            cw.append(A2D2BaseSegmentationLoaderFiveClasses.class_weights())
+        if self.load_motion:
+            cw.append(A2D2BaseMotionLoader.class_weights())
+        #
+        return cw
+    #
+
+
+    def create_palette(self):
+        palette = []
+        if self.load_segmentation:
+            palette.append(A2D2BaseSegmentationLoader.colors)
+        if self.load_segmentation_five_class:
+            palette.append(A2D2BaseSegmentationLoaderFiveClasses.colors)
+        if self.load_motion:
+            palette.append(A2D2BaseMotionLoader.colors)
+        return palette
+
+
+##########################################
+def a2d2_segmentation_train(dataset_config, root, split=None, transforms=None):
+    dataset_config = get_config().merge_from(dataset_config)
+    gt = "gtFine"
+    transform = transforms[0] if isinstance(transforms, (list,tuple)) else transforms
+    train_split = A2D2DataLoader(dataset_config, root, 'train', gt, transforms=transform,
+                                            load_segmentation=dataset_config.load_segmentation,
+                                            load_segmentation_five_class=dataset_config.load_segmentation_five_class)
+    return train_split
+
+
+def a2d2_segmentation(dataset_config, root, split=None, transforms=None):
+    dataset_config = get_config().merge_from(dataset_config)
+    gt = "gtFine"
+    train_split = val_split = None
+    split = ['train', 'val']
+    for split_name in split:
+        if split_name == 'train':
+            train_split = A2D2DataLoader(dataset_config, root, split_name, gt, transforms=transforms[0],
+                                            load_segmentation=dataset_config.load_segmentation,
+                                            load_segmentation_five_class=dataset_config.load_segmentation_five_class)
+        elif split_name == 'val':
+            val_split = A2D2DataLoader(dataset_config, root, split_name, gt, transforms=transforms[1],
+                                            load_segmentation=dataset_config.load_segmentation,
+                                            load_segmentation_five_class=dataset_config.load_segmentation_five_class)
+        else:
+            pass
+    #
+    return train_split, val_split
+
+
+def a2d2_depth_train(dataset_config, root, split=None, transforms=None):
+    dataset_config = get_config().merge_from(dataset_config)
+    gt = "gtFine"
+    train_split = A2D2DataLoader(dataset_config, root, 'train', gt, transforms=transforms[0], load_segmentation=False, load_depth = True)
+    return train_split
+
+
+def a2d2_depth(dataset_config, root, split=None, transforms=None):
+    dataset_config = get_config().merge_from(dataset_config)
+    gt = "gtFine"
+    train_split = val_split = None
+    split = ['train', 'val']
+    for split_name in split:
+        if split_name == 'train':
+            train_split = A2D2DataLoader(dataset_config, root, split_name, gt, transforms=transforms[0], load_segmentation=False, load_depth = True)
+        elif split_name == 'val':
+            val_split = A2D2DataLoader(dataset_config, root, split_name, gt, transforms=transforms[1], load_segmentation=False, load_depth = True)
+        else:
+            pass
+    #
+    return train_split, val_split
+
+
+#################################################################
+# semantic inference
+def a2d2_segmentation_infer(dataset_config, root, split=None, transforms=None):
+    dataset_config = get_config().merge_from(dataset_config)
+    gt = "gtFine"
+    split_name = 'val'
+    infer_split = A2D2DataLoader(dataset_config, root, split_name, gt, transforms=transforms, image_folders=dataset_config.image_folders,
+                                       load_segmentation=dataset_config.load_segmentation,
+                                       load_segmentation_five_class=dataset_config.load_segmentation_five_class,
+                                       search_images=True, inference=True, additional_info=True)
+    return  infer_split
+
+
+def a2d2_segmentation_measure(dataset_config, root, split=None, transforms=None):
+    dataset_config = get_config().merge_from(dataset_config)
+    gt = "gtFine"
+    split_name = 'val'
+    infer_split = A2D2DataLoader(dataset_config, root, split_name, gt, transforms=transforms, image_folders=dataset_config.image_folders,
+                                       load_segmentation=dataset_config.load_segmentation,
+                                       load_segmentation_five_class=dataset_config.load_segmentation_five_class,
+                                       search_images=True, inference=False, additional_info=True)
+    return infer_split
+
+def a2d2_segmentation_infer_dir(dataset_config, root, split=None, transforms=None):
+    dataset_config = get_config().merge_from(dataset_config)
+    gt = "gtFine"
+    split_name = 'val'
+    infer_split = A2D2DataLoader(dataset_config, root, split_name, gt, transforms=transforms, image_folders=dataset_config.image_folders,
+                                       load_segmentation=dataset_config.load_segmentation,
+                                       load_segmentation_five_class=dataset_config.load_segmentation_five_class,
+                                       search_images=True, inference=True, additional_info=True)
+    return infer_split
+
+
+#############################################################################################################
+# dual stream ip
+
+def a2d2_segmentation_multi_input(dataset_config, root, split=None, transforms=None):
+    dataset_config = get_config().merge_from(dataset_config)
+    gt = "gtFine"
+    train_split = val_split = None
+    split = ['train', 'val']
+    for split_name in split:
+        if split_name == 'train':
+            train_split = A2D2DataLoader(dataset_config, root, split_name, gt, transforms=transforms[0],
+                                               image_folders=dataset_config.image_folders, input_offsets=dataset_config.input_offsets,
+                                               load_segmentation=dataset_config.load_segmentation,
+                                               load_segmentation_five_class=dataset_config.load_segmentation_five_class)
+        elif split_name == 'val':
+            val_split = A2D2DataLoader(dataset_config, root, split_name, gt, transforms=transforms[1],
+                                             image_folders=dataset_config.image_folders, input_offsets=dataset_config.input_offsets,
+                                             load_segmentation=dataset_config.load_segmentation,
+                                             load_segmentation_five_class=dataset_config.load_segmentation_five_class)
+        else:
+            pass
+    #
+    return train_split, val_split
+
+def a2d2_motion_multi_input(dataset_config, root, split=None, transforms=None):
+    dataset_config = get_config().merge_from(dataset_config)
+    gt = "gtFine"
+    train_split = val_split = None
+    split = ['train', 'val']
+    for split_name in split:
+        if split_name == 'train':
+            train_split = A2D2DataLoader(dataset_config, root, split_name, gt, transforms=transforms[0], load_segmentation=False, load_motion = True,
+                                               image_folders=dataset_config.image_folders, input_offsets=dataset_config.input_offsets)
+        elif split_name == 'val':
+            val_split = A2D2DataLoader(dataset_config, root, split_name, gt, transforms=transforms[1], load_segmentation=False, load_motion = True,
+                                             image_folders=dataset_config.image_folders, input_offsets=dataset_config.input_offsets)
+        else:
+            pass
+    #
+    return train_split, val_split
+
+
+def a2d2_depth_semantic_motion_multi_input(dataset_config, root, split=None, transforms=None):
+    dataset_config = get_config().merge_from(dataset_config)
+    gt = "gtFine"
+    train_split = val_split = None
+    split = ['train', 'val']
+    for split_name in split:
+        if split_name == 'train':
+            train_split = A2D2DataLoader(dataset_config, root, split_name, gt, transforms=transforms[0], load_depth = True,
+                                               load_motion=True, image_folders=dataset_config.image_folders, input_offsets=dataset_config.input_offsets,
+                                               load_segmentation=dataset_config.load_segmentation,
+                                               load_segmentation_five_class=dataset_config.load_segmentation_five_class)
+        elif split_name == 'val':
+            val_split = A2D2DataLoader(dataset_config, root, split_name, gt, transforms=transforms[1], load_depth = True,
+                                             load_motion=True, image_folders=dataset_config.image_folders, input_offsets=dataset_config.input_offsets,
+                                             load_segmentation=dataset_config.load_segmentation,
+                                             load_segmentation_five_class=dataset_config.load_segmentation_five_class)
+        else:
+            pass
+    #
+    return train_split, val_split
+
+#############################################################################################################
+# inference dual stream
+def a2d2_segmentation_multi_input_measure(dataset_config, root, split=None, transforms=None):
+    dataset_config = get_config().merge_from(dataset_config)
+    gt = "gtFine"
+    split_name = 'val'
+    infer_split = A2D2DataLoader(dataset_config, root, split_name, gt, transforms=transforms, image_folders=dataset_config.image_folders,
+                                       load_segmentation=dataset_config.load_segmentation,
+                                       load_segmentation_five_class=dataset_config.load_segmentation_five_class,
+                                       search_images=True, inference=False, additional_info=True, input_offsets=dataset_config.input_offsets)
+    return infer_split
+
+
+
+# motion inference
+def a2d2_motion_multi_input_infer(dataset_config, root, split=None, transforms=None):
+    dataset_config = get_config().merge_from(dataset_config)
+    dataset_config.image_folders = ('leftImg8bit_flow_confidence', 'leftImg8bit')
+    gt = "gtFine"
+    split_name = 'val'
+    val_split = A2D2DataLoader(dataset_config, root, split_name, gt, transforms=transforms, load_segmentation=False,
+                                     image_folders=dataset_config.image_folders, search_images=True, inference=True, additional_info=True,
+                                     input_offsets=dataset_config.input_offsets)
+    #
+    return val_split
+
+
+def a2d2_motion_multi_input_measure(dataset_config, root, split=None, transforms=None):
+    dataset_config = get_config().merge_from(dataset_config)
+    gt = "gtFine"
+    split_name = 'val'
+    val_split = A2D2DataLoader(dataset_config, root, split_name, gt, transforms=transforms, load_segmentation=False, load_motion = True,
+                                     image_folders=dataset_config.image_folders, search_images=True, inference=False, additional_info=True,
+                                     input_offsets=dataset_config.input_offsets)
+    #
+    return val_split
+
+
+def a2d2_depth_semantic_motion_multi_input_infer(dataset_config, root, split=None, transforms=None):
+    dataset_config = get_config().merge_from(dataset_config)
+    gt = "gtFine"
+    split_name = 'val'
+    val_split = A2D2DataLoader(dataset_config, root, split_name, gt, transforms=transforms, load_depth = True,
+                                     load_motion=True, image_folders=dataset_config.image_folders,search_images=True,
+                                     inference=True, additional_info=True, input_offsets = dataset_config.input_offsets)
+    #
+    return val_split
index c22ffd6c5a44e08737d82f6bb10095c20710f261..db2adecc5f27ac3c36e43bb35f9334a7f122a6b5 100644 (file)
@@ -2,10 +2,12 @@ import numpy as np
 import os
 import scipy.misc as misc
 import sys
+import cv2
+#__package__ = "pytorch_jacinto_ai.vision.datasets.pixel2pixel"
+from .... import xnn
 
-from  .... import xnn
 from .cityscapes_plus import CityscapesBaseSegmentationLoader, CityscapesBaseMotionLoader
-
+from .a2d2 import A2D2BaseSegmentationLoader, A2D2BaseMotionLoader
 
 def calc_median_frequency(classes, present_num):
     """
@@ -16,10 +18,9 @@ def calc_median_frequency(classes, present_num):
         c is present, and median_freq is the median of these frequencies.'
     """
     class_freq = classes / present_num
-    median_freq = np.median(class_freq)
+    median_freq = np.median(class_freq[classes != 1.0])
     return median_freq / class_freq
 
-
 def calc_log_frequency(classes, value=1.02):
     """Class balancing by ERFNet method.
        prob = each_sum_pixel / each_sum_pixel.max()
@@ -30,25 +31,41 @@ def calc_log_frequency(classes, value=1.02):
     # print(np.log(value + class_freq))
     return 1 / np.log(value + class_freq)
 
-
-def calc_weights():
+def print_stats(classes = [], class_weight = []):
+    print("class_freq \n","-"*32)
+    for idx, class_freq in enumerate(classes):
+        print("{} : {:.0f}".format(idx, class_freq))
+    print("-"*32)
+    print("class_freq in % \n","-"*32)
+    for idx, class_freq in enumerate(classes):
+        print("{} : {:.2f}".format(idx, class_freq*100.0/np.sum(classes)))
+    print("-"*32)
+
+    print("-"*32)
+    print("class weights \n","-"*32)
+    for idx, class_wt in enumerate(class_weight):
+        print("{} : {:.08}".format(idx, class_wt))
+
+def calc_weights():    
     method = "median"
-    result_path = "/afs/cg.cs.tu-bs.de/home/zhang/SEDPShuffleNet/datasets"
+    result_path = "/data/ssd/datasets/a2d2_v2/info/"
 
     traval = "gtFine"
-    imgs_path = "./data/tiad/data/leftImg8bit/train"    #"./data/cityscapes/data/leftImg8bit/train"   #"./data/TIAD/data/leftImg8bit/train"
-    lbls_path = "./data/tiad/data/gtFine/train"         #"./data/cityscapes/data/gtFine/train"   # "./data/tiad/data/gtFine/train"  #"./data/cityscapes_frame_pair/data/gtFine/train"
-    labels = xnn.utils.recursive_glob(rootdir=lbls_path, suffix='labelTrainIds_motion.png')  #'labelTrainIds_motion.png'  #'labelTrainIds.png'
+    #imgs_path = "/data/ssd/datasets/a2d2_v1_full/leftImg8bit/train"    #"./data/cityscapes/data/leftImg8bit/train"   #"./data/TIAD/data/leftImg8bit/train"
+    lbls_path = "/data/ssd/datasets/a2d2_v2/gtFine/train/"         #"./data/cityscapes/data/gtFine/train"   # "./data/tiad/data/gtFine/train"  #"./data/cityscapes_frame_pair/data/gtFine/train"
+    #labels = xnn.utils.recursive_glob(rootdir=lbls_path, suffix='labelTrainIds_motion.png')  #'labelTrainIds_motion.png'  #'labelTrainIds.png'
+    labels = xnn.utils.recursive_glob(rootdir=lbls_path, suffix='.png')  #'labelTrainIds_motion.png'  #'labelTrainIds.png'
 
-    num_classes = 2       #5  #2
+    num_classes = 38       #5  #2
 
     local_path = "./data/checkpoints"
-    dst = CityscapesBaseMotionLoader() #TiadBaseSegmentationLoader()  #CityscapesBaseSegmentationLoader()  #CityscapesBaseMotionLoader()
+    dst = A2D2BaseSegmentationLoader() #TiadBaseSegmentationLoader()  #CityscapesBaseSegmentationLoader()  #CityscapesBaseMotionLoader(), #A2D2BaseSegmentationLoader()
 
     classes, present_num = ([0 for i in range(num_classes)] for i in range(2))
 
     for idx, lbl_path in enumerate(labels):
-        lbl = misc.imread(lbl_path)
+        print("lbl_path: ", lbl_path)
+        lbl = cv2.imread(lbl_path, 0)
         lbl = dst.encode_segmap(np.array(lbl, dtype=np.uint8))
 
         for nc in range(num_classes):
@@ -57,10 +74,11 @@ def calc_weights():
                 classes[nc] += num_pixel
                 present_num[nc] += 1
 
-    if 0 in classes:
-        raise Exception("Some classes are not found")
-
     classes = np.array(classes, dtype="f")
+    
+    #if any class had 0 occurnace then set to 1 to avoid div by 0 kind of error
+    classes[classes==0] = 1
+
     presetn_num = np.array(classes, dtype="f")
     if method == "median":
         class_weight = calc_median_frequency(classes, present_num)
@@ -68,8 +86,9 @@ def calc_weights():
         class_weight = calc_log_frequency(classes)
     else:
         raise Exception("Please assign method to 'mean' or 'log'")
+    
+    print_stats(classes = classes, class_weight = class_weight)
 
-    print("class weight", class_weight)
     print("Done!")
 
 
index 82364b9833d06af9070d1ef4a0a06beda55c3e82..6bc8552c54be7b78a160e7c6424ea4134358f035 100755 (executable)
@@ -1,10 +1,11 @@
+#!/usr/bin/env python
+
 import sys
-import cv2
 import os
-import torch
-
-################################
-#sys.path.insert(0, os.path.abspath('./modules'))
+import cv2
+import argparse
+import datetime
+import numpy as np
 
 ################################
 from pytorch_jacinto_ai.engine import evaluate_pixel2pixel
index ec912aad66c0a53b082e2fbc1b76657d81fbed13..0e99ec66b1e690d4066fe87aedd2955a8259fb23 100755 (executable)
@@ -4,10 +4,8 @@ import sys
 import os
 import cv2
 import argparse
-
-################################
-#sys.path.insert(0, os.path.abspath('./modules'))
-
+import datetime
+import numpy as np
 
 ################################
 from pytorch_jacinto_ai.xnn.utils import str2bool
@@ -45,6 +43,8 @@ cv2.setNumThreads(0)
 
 
 ################################
+#import of torch should be after CUDA_VISIBLE_DEVICES for it to take effect
+import torch
 from pytorch_jacinto_ai.engine import infer_pixel2pixel
 
 # Create the parse and set default arguments
@@ -54,12 +54,14 @@ args = infer_pixel2pixel.get_config()
 #Modify arguments
 args.model_name = "deeplabv3lite_mobilenetv2_tv" #"deeplabv3lite_mobilenetv2_relu" #"deeplabv3lite_mobilenetv2_relu_x1p5" #"deeplabv3plus"
 
-args.dataset_name = 'cityscapes_segmentation_measure' #'tiad_segmentation_infer'   #'cityscapes_segmentation_infer' #'tiad_segmentation'  #'cityscapes_segmentation_measure'
+args.dataset_name = 'a2d2_segmentation_measure' #'tiad_segmentation_infer'   #'cityscapes_segmentation_infer' #'tiad_segmentation'  #'cityscapes_segmentation_measure'
 
 #args.save_path = './data/checkpoints'
-args.data_path = './data/datasets/cityscapes/data'   #'/data/hdd/datasets/cityscapes_leftImg8bit_sequence_trainvaltest/' #'./data/datasets/cityscapes/data'  #'./data/tiad/data/demoVideo/sequence0021'  #'./data/tiad/data/demoVideo/sequence0025'   #'./data/tiad/data/demoVideo/sequence0001_2017'
+args.data_path = '/data/ssd/datasets/a2d2_v2/' #'./data/datasets/cityscapes/data'   #'/data/hdd/datasets/cityscapes_leftImg8bit_sequence_trainvaltest/' #'./data/datasets/cityscapes/data'  #'./data/tiad/data/demoVideo/sequence0021'  #'./data/tiad/data/demoVideo/sequence0025'   #'./data/tiad/data/demoVideo/sequence0001_2017'
 #args.pretrained = './data/modelzoo/semantic_segmentation/cityscapes/deeplabv3lite-mobilenetv2/cityscapes_segmentation_deeplabv3lite-mobilenetv2_2019-06-26-08-59-32.pth'
-args.pretrained = './data/checkpoints/tiad_segmentation/2019-10-18_00-50-03_tiad_segmentation_deeplabv3lite_mobilenetv2_ericsun_resize768x384_traincrop768x384_float/checkpoint.pth.tar'
+#args.pretrained = './data/checkpoints/tiad_segmentation/2019-10-18_00-50-03_tiad_segmentation_deeplabv3lite_mobilenetv2_ericsun_resize768x384_traincrop768x384_float/checkpoint.pth.tar'
+
+args.pretrained = '/data/files/work/bitbucket_TI/pytorch-jacinto-models/data/checkpoints/a2d2_segmentation/2020-01-25_13-06-18_a2d2_segmentation_deeplabv3lite_mobilenetv2_tv_resize768x384_traincrop768x384/training/model_best_ep172.pth.tar'
 
 args.model_config.input_channels = (3,)
 args.model_config.output_type = ['segmentation']
@@ -71,7 +73,10 @@ args.frame_IOU =  False # Print mIOU for each frame
 args.shuffle = False
 
 args.num_images = 30000   # Max number of images to run inference on
-args.blend = [False]
+#args.blend = [False]
+#'color', 'blend'
+args.viz_op_type = ['color']
+args.visualize_gt = False
 args.car_mask = False  # False   #True
 args.label = [True]    # False   #True
 args.label_infer = [True]
@@ -87,7 +92,7 @@ args.iter_size = 1                      #2
 args.batch_size = 1 #80                  #12 #16 #32 #64
 args.img_resize = (384, 768)         #(256,512) #(512,512) # #(1024, 2048) #(512,1024)  #(720, 1280)
 
-args.output_size = (384, 768)          #(1024, 2048)
+args.output_size = (1208, 1920)          #(1024, 2048)
 #args.rand_scale = (1.0, 2.0)            #(1.0,2.0) #(1.0,1.5) #(1.0,1.25)
 
 args.quantize = True
index f10f3730fbf42d31f81f5012fbdd985acc25e0eb..6b17e6ae43f0774372ed5402015af84d17a85af5 100755 (executable)
@@ -1,15 +1,11 @@
 #!/usr/bin/env python
 
 import sys
-import cv2
 import os
-import datetime
+import cv2
 import argparse
-
-
-################################
-#sys.path.insert(0, os.path.abspath('./modules'))
-
+import datetime
+import numpy as np
 
 ################################
 from pytorch_jacinto_ai.xnn.utils import str2bool
index 7b1964b73c2d8e8955f2c68cfb5cdec8ff22ebe6..d7317b47fec6272e7668d65cedc81aca6f79c1e9 100755 (executable)
@@ -1,15 +1,11 @@
 #!/usr/bin/env python
 
 import sys
-import cv2
 import os
-import datetime
+import cv2
 import argparse
-
-
-################################
-#sys.path.insert(0, os.path.abspath('./modules'))
-
+import datetime
+import numpy as np
 
 ################################
 from pytorch_jacinto_ai.xnn.utils import str2bool
@@ -19,6 +15,7 @@ parser.add_argument('--gpus', type=int, nargs='*', default=None, help='Base lear
 parser.add_argument('--batch_size', type=int, default=None, help='Batch size')
 parser.add_argument('--strides', type=int, nargs='*', default=None, help='strides in the model')
 parser.add_argument('--lr', type=float, default=None, help='Base learning rate')
+parser.add_argument('--lr_clips', type=float, default=None, help='Learning rate for clips in PAct2')
 parser.add_argument('--lr_calib', type=float, default=None, help='Learning rate for calibration')
 parser.add_argument('--model_name', type=str, default=None, help='model name')
 parser.add_argument('--dataset_name', type=str, default=None, help='dataset name')
@@ -61,6 +58,8 @@ if 'gpus' in vars(cmds):
 cv2.setNumThreads(0)
 
 ################################
+#import of torch should be after CUDA_VISIBLE_DEVICES for it to take effect
+import torch
 from pytorch_jacinto_ai.engine import train_classification
 
 #Create the parse and set default arguments
@@ -158,8 +157,8 @@ if 'training' in args.phase and (not args.quantize):
     args.pretrained = os.path.join(save_path, 'model_best.pth.tar')
     args.phase = 'training_quantize'
     args.quantize = True
-    args.lr = 5e-5
-    args.epochs = min(args.epochs,25)
+    args.lr = 1e-5
+    args.epochs = 25
     # quantized training will use only one GPU in the engine - so reduce the batch_size
     num_gpus = len(str(os.environ["CUDA_VISIBLE_DEVICES"]).split(','))
     args.batch_size = args.batch_size//num_gpus
index ae179ac6c10271ff446db94734c200cbad30847a..c9e7453efcb4ea57de0769b03650e23ccca493e0 100755 (executable)
@@ -5,10 +5,7 @@ import os
 import cv2
 import argparse
 import datetime
-
-################################
-#sys.path.insert(0, os.path.abspath('./modules'))
-
+import numpy as np
 
 ################################
 from pytorch_jacinto_ai.xnn.utils import str2bool
@@ -53,12 +50,15 @@ if 'gpus' in vars(cmds):
         os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([str(v) for v in value])
 #
 
+################################
 # to avoid hangs in data loader with multi threads
 # this was observed after using cv2 image processing functions
 # https://github.com/pytorch/pytorch/issues/1355
 cv2.setNumThreads(0)
 
 ################################
+#import of torch should be after CUDA_VISIBLE_DEVICES for it to take effect
+import torch
 from pytorch_jacinto_ai.engine import train_pixel2pixel
 
 # Create the parser and set default arguments
@@ -127,6 +127,7 @@ args.date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
 
 
 ################################
+# set other args
 for key in vars(cmds):
     if key == 'gpus':
         pass # already taken care above, since this has to be done before importing pytorch
@@ -146,14 +147,14 @@ train_pixel2pixel.main(args)
 # In addition run a quantization aware training, starting from the trained model
 if 'training' in args.phase and (not args.quantize):
     save_path = train_pixel2pixel.get_save_path(args)
-    args.pretrained = os.path.join(save_path, 'model_best.pth.tar')
+    args.pretrained = os.path.join(save_path, 'model_best.pth.tar') if (args.epochs>0) else args.pretrained
     args.phase = 'training_quantize'
     args.quantize = True
-    args.lr = 5e-5
-    args.epochs = min(args.epochs,25)
+    args.lr = 1e-5
+    args.epochs = 25
     # quantized training will use only one GPU in the engine - so reduce the batch_size
-    num_gpus = len(str(os.environ["CUDA_VISIBLE_DEVICES"]).split(','))
-    args.batch_size = args.batch_size//num_gpus
+    num_gpus = len(str(os.environ["CUDA_VISIBLE_DEVICES"]).split(',')) if ("CUDA_VISIBLE_DEVICES" in os.environ) else None
+    args.batch_size = (args.batch_size//num_gpus) if (num_gpus is not None) else args.batch_size
     train_pixel2pixel.main(args)
 #
 
index 43c114cf519f98621c1487f7cd6fb55cce48c290..89717df7eb2d4513c49a338da43a257019e056d8 100755 (executable)
@@ -2,18 +2,17 @@ import sys
 import os
 import cv2
 import argparse
-
-################################
-#sys.path.insert(0, os.path.abspath('./modules'))
-
+import datetime
+import numpy as np
 
 ################################
 from pytorch_jacinto_ai.xnn.utils import str2bool, splitstr2bool
 parser = argparse.ArgumentParser()
+parser.add_argument('--save_path', type=str, default=None, help='checkpoint save folder')
 parser.add_argument('--gpus', type=int, nargs='*', default=None, help='Base learning rate')
 parser.add_argument('--batch_size', type=int, default=None, help='Batch size')
 parser.add_argument('--lr', type=float, default=None, help='Base learning rate')
-parser.add_argument('--lr_bias', type=float, default=None, help='Learning rate for bias parameters. If not specified, lr will be used.')
+parser.add_argument('--lr_clips', type=float, default=None, help='Learning rate for clips in PAct2')
 parser.add_argument('--lr_calib', type=float, default=None, help='Learning rate for calibration')
 parser.add_argument('--model_name', type=str, default=None, help='model name')
 parser.add_argument('--dataset_name', type=str, default=None, help='dataset name')
@@ -29,13 +28,18 @@ parser.add_argument('--rand_scale', type=float, nargs=2, default=None, help='ran
 parser.add_argument('--rand_crop', type=int, nargs=2, default=None, help='random crop for training')
 parser.add_argument('--output_size', type=int, nargs=2, default=None, help='output size of the evaluation - prediction/groundtruth. this is not used while training as it blows up memory requirement')
 parser.add_argument('--pretrained', type=str, default=None, help='pretrained model')
+parser.add_argument('--resume', type=str, default=None, help='resume an unfinished training from this model')
+parser.add_argument('--phase', type=str, default=None, help='training/calibration/validation')
+parser.add_argument('--evaluate_start', type=str2bool, default=None, help='Whether to run validation before the training')
 #
 parser.add_argument('--quantize', type=str2bool, default=None, help='Quantize the model')
-parser.add_argument('--resume', type=str, default=None, help='resume an unfinished training from this model')
+parser.add_argument('--histogram_range', type=str2bool, default=None, help='run only evaluation and no training')
+parser.add_argument('--per_channel_q', type=str2bool, default=None, help='run only evaluation and no training')
+parser.add_argument('--bias_calibration', type=str2bool, default=None, help='run only evaluation and no training')
 parser.add_argument('--bitwidth_weights', type=int, default=None, help='bitwidth for weight quantization')
 parser.add_argument('--bitwidth_activations', type=int, default=None, help='bitwidth for activation quantization')
 #
-
+parser.add_argument('--freeze_bn', type=str2bool, default=None, help='freeze the bn stats or not')
 cmds = parser.parse_args()
 
 ################################
@@ -46,6 +50,7 @@ if 'gpus' in vars(cmds):
         os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([str(v) for v in value])
 #
 
+################################
 # to avoid hangs in data loader with multi threads
 # this was observed after using cv2 image processing functions
 # https://github.com/pytorch/pytorch/issues/1355
@@ -53,6 +58,8 @@ cv2.setNumThreads(0)
 
 
 ################################
+#import of torch should be after CUDA_VISIBLE_DEVICES for it to take effect
+import torch
 from pytorch_jacinto_ai.engine import train_pixel2pixel
 
 #Create the parse and set default arguments
@@ -132,5 +139,30 @@ for key in vars(cmds):
 #
 
 ################################
-#Run the training
-train_pixel2pixel.main(args)
\ No newline at end of file
+# Run the given phase
+train_pixel2pixel.main(args)
+
+################################
+# In addition run a quantization aware training, starting from the trained model
+if 'training' in args.phase and (not args.quantize):
+    save_path = train_pixel2pixel.get_save_path(args)
+    args.pretrained = os.path.join(save_path, 'model_best.pth.tar') if (args.epochs>0) else args.pretrained
+    args.phase = 'training_quantize'
+    args.quantize = True
+    args.lr = 1e-5
+    args.epochs = 25
+    # quantized training will use only one GPU in the engine - so reduce the batch_size
+    num_gpus = len(str(os.environ["CUDA_VISIBLE_DEVICES"]).split(',')) if ("CUDA_VISIBLE_DEVICES" in os.environ) else None
+    args.batch_size = (args.batch_size//num_gpus) if (num_gpus is not None) else args.batch_size
+    train_pixel2pixel.main(args)
+#
+
+################################
+# In addition run a separate validation
+if 'training' in args.phase or 'calibration' in args.phase:
+    save_path = train_pixel2pixel.get_save_path(args)
+    args.pretrained = os.path.join(save_path, 'model_best.pth.tar')
+    args.phase = 'validation'
+    args.quantize = True
+    train_pixel2pixel.main(args)
+#
index d37d86fb7b96e66a0dc61e94c70dd4e325a0e612..3d31769ef21d1243daacd0d983d24a7c55e7bb7f 100755 (executable)
@@ -2,10 +2,8 @@ import sys
 import os
 import cv2
 import argparse
-
-################################
-#sys.path.insert(0, os.path.abspath('./modules'))
-
+import datetime
+import numpy as np
 
 ################################
 from pytorch_jacinto_ai.xnn.utils import str2bool
@@ -14,7 +12,7 @@ parser.add_argument('--save_path', type=str, default=None, help='checkpoint save
 parser.add_argument('--gpus', type=int, nargs='*', default=None, help='Base learning rate')
 parser.add_argument('--batch_size', type=int, default=None, help='Batch size')
 parser.add_argument('--lr', type=float, default=None, help='Base learning rate')
-parser.add_argument('--lr_bias', type=float, default=None, help='Learning rate for bias parameters. If not specified, lr will be used.')
+parser.add_argument('--lr_clips', type=float, default=None, help='Learning rate for clips in PAct2')
 parser.add_argument('--lr_calib', type=float, default=None, help='Learning rate for calibration')
 parser.add_argument('--model_name', type=str, default=None, help='model name')
 parser.add_argument('--dataset_name', type=str, default=None, help='dataset name')
@@ -27,11 +25,15 @@ parser.add_argument('--img_resize', type=int, nargs=2, default=None, help='img_r
 parser.add_argument('--rand_scale', type=float, nargs=2, default=None, help='random scale factors for training')
 parser.add_argument('--rand_crop', type=int, nargs=2, default=None, help='random crop for training')
 parser.add_argument('--output_size', type=int, nargs=2, default=None, help='output size of the evaluation - prediction/groundtruth. this is not used while training as it blows up memory requirement')
-#
-parser.add_argument('--quantize', type=str2bool, default=None, help='Quantize the model')
 parser.add_argument('--pretrained', type=str, default=None, help='pretrained model')
 parser.add_argument('--resume', type=str, default=None, help='resume an unfinished training from this model')
 parser.add_argument('--phase', type=str, default=None, help='training/calibration/validation')
+parser.add_argument('--evaluate_start', type=str2bool, default=None, help='Whether to run validation before the training')
+#
+parser.add_argument('--quantize', type=str2bool, default=None, help='Quantize the model')
+parser.add_argument('--histogram_range', type=str2bool, default=None, help='run only evaluation and no training')
+parser.add_argument('--per_channel_q', type=str2bool, default=None, help='run only evaluation and no training')
+parser.add_argument('--bias_calibration', type=str2bool, default=None, help='run only evaluation and no training')
 parser.add_argument('--bitwidth_weights', type=int, default=None, help='bitwidth for weight quantization')
 parser.add_argument('--bitwidth_activations', type=int, default=None, help='bitwidth for activation quantization')
 #
@@ -46,6 +48,7 @@ if 'gpus' in vars(cmds):
         os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([str(v) for v in value])
 #
 
+################################
 # to avoid hangs in data loader with multi threads
 # this was observed after using cv2 image processing functions
 # https://github.com/pytorch/pytorch/issues/1355
@@ -53,6 +56,8 @@ cv2.setNumThreads(0)
 
 
 ################################
+#import of torch should be after CUDA_VISIBLE_DEVICES for it to take effect
+import torch
 from pytorch_jacinto_ai.engine import train_pixel2pixel
 
 #Create the parse and set default arguments
@@ -118,8 +123,8 @@ args.model_config.aspp_dil = (2, 4, 6)
 
 args.generate_onnx = False
 #args.phase = 'validation'
-# args.quantize = True
-# args.model_surgery = 'pact2'
+#args.quantize = True
+
 args.model_config.normalize_gradients = True
 
 args.pivot_task_idx = 2
@@ -137,6 +142,30 @@ for key in vars(cmds):
 #
 
 ################################
-#Run the training
+# Run the given phase
 train_pixel2pixel.main(args)
 
+################################
+# In addition run a quantization aware training, starting from the trained model
+if 'training' in args.phase and (not args.quantize):
+    save_path = train_pixel2pixel.get_save_path(args)
+    args.pretrained = os.path.join(save_path, 'model_best.pth.tar') if (args.epochs>0) else args.pretrained
+    args.phase = 'training_quantize'
+    args.quantize = True
+    args.lr = 1e-5
+    args.epochs = 25
+    # quantized training will use only one GPU in the engine - so reduce the batch_size
+    num_gpus = len(str(os.environ["CUDA_VISIBLE_DEVICES"]).split(',')) if ("CUDA_VISIBLE_DEVICES" in os.environ) else None
+    args.batch_size = (args.batch_size//num_gpus) if (num_gpus is not None) else args.batch_size
+    train_pixel2pixel.main(args)
+#
+
+################################
+# In addition run a separate validation
+if 'training' in args.phase or 'calibration' in args.phase:
+    save_path = train_pixel2pixel.get_save_path(args)
+    args.pretrained = os.path.join(save_path, 'model_best.pth.tar')
+    args.phase = 'validation'
+    args.quantize = True
+    train_pixel2pixel.main(args)
+#
index 94c18aab8de0106b32fc630e7f06799e96a60745..271d4d40e34c1b7534c0ec568fdb1ff791305a35 100755 (executable)
@@ -5,10 +5,7 @@ import os
 import cv2
 import argparse
 import datetime
-
-################################
-#sys.path.insert(0, os.path.abspath('./modules'))
-
+import numpy as np
 
 ################################
 from pytorch_jacinto_ai.xnn.utils import str2bool
@@ -53,12 +50,15 @@ if 'gpus' in vars(cmds):
         os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([str(v) for v in value])
 #
 
+################################
 # to avoid hangs in data loader with multi threads
 # this was observed after using cv2 image processing functions
 # https://github.com/pytorch/pytorch/issues/1355
 cv2.setNumThreads(0)
 
 ################################
+#import of torch should be after CUDA_VISIBLE_DEVICES for it to take effect
+import torch
 from pytorch_jacinto_ai.engine import train_pixel2pixel
 
 # Create the parser and set default arguments
@@ -69,9 +69,10 @@ args = train_pixel2pixel.get_config()
 args.model_name = 'deeplabv3lite_mobilenetv2_tv' #'deeplabv3lite_mobilenetv2_tv' #'fpn_pixel2pixel_aspp_mobilenetv2_tv' #'fpn_pixel2pixel_aspp_resnet50'
 args.dataset_name = 'cityscapes_segmentation' #'cityscapes_segmentation' #'voc_segmentation'
 
-#args.save_path = './data/checkpoints'
 args.data_path = './data/datasets/cityscapes/data' #'./data/datasets/cityscapes/data' #'./data/datasets/voc'
 
+#args.save_path = './data/checkpoints'
+
 args.pretrained = 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth'
                         # 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth'
                         # './data/modelzoo/pretrained/pytorch/imagenet_classification/ericsun99/MobileNet-V2-Pytorch/mobilenetv2_Top1_71.806_Top2_90.410.pth.tar'
@@ -110,23 +111,23 @@ args.transform_rotation = 5                                 #rotation degrees
 #args.image_mean = [123.675, 116.28, 103.53]
 #args.image_scale = [0.017125, 0.017507, 0.017429]
 
-#args.phase = 'validation'
-#args.quantize = True
+#args.parallel_model=False
 #args.print_model = True
 #args.generate_onnx = False
 #args.run_soon = False
 #args.evaluate_start = False
 args.print_freq = 10
 
+#args.phase = 'validation' #'training'
 #args.quantize = True
 #args.per_channel_q = True
-#args.phase = 'validation'
-#args.parallel_model=False
+
 
 # defining date from outside can help to write multiple phases into the same folder
 args.date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
 
 ################################
+# set other args
 for key in vars(cmds):
     if key == 'gpus':
         pass # already taken care above, since this has to be done before importing pytorch
@@ -146,14 +147,14 @@ train_pixel2pixel.main(args)
 # In addition run a quantization aware training, starting from the trained model
 if 'training' in args.phase and (not args.quantize):
     save_path = train_pixel2pixel.get_save_path(args)
-    args.pretrained = os.path.join(save_path, 'model_best.pth.tar')
+    args.pretrained = os.path.join(save_path, 'model_best.pth.tar') if (args.epochs>0) else args.pretrained
     args.phase = 'training_quantize'
     args.quantize = True
-    args.lr = 5e-5
-    args.epochs = min(args.epochs,25)
+    args.lr = 1e-5
+    args.epochs = 25
     # quantized training will use only one GPU in the engine - so reduce the batch_size
-    num_gpus = len(str(os.environ["CUDA_VISIBLE_DEVICES"]).split(','))
-    args.batch_size = args.batch_size//num_gpus
+    num_gpus = len(str(os.environ["CUDA_VISIBLE_DEVICES"]).split(',')) if ("CUDA_VISIBLE_DEVICES" in os.environ) else None
+    args.batch_size = (args.batch_size//num_gpus) if (num_gpus is not None) else args.batch_size
     train_pixel2pixel.main(args)
 #