release commit
authorManu Mathew <a0393608@ti.com>
Fri, 17 Jan 2020 11:39:50 +0000 (17:09 +0530)
committerManu Mathew <a0393608@ti.com>
Fri, 17 Jan 2020 11:39:50 +0000 (17:09 +0530)
docs/Semantic_Segmentation.md
modules/pytorch_jacinto_ai/engine/train_classification.py
modules/pytorch_jacinto_ai/engine/train_pixel2pixel.py
modules/pytorch_jacinto_ai/vision/datasets/pixel2pixel/segmentation.py
modules/pytorch_jacinto_ai/vision/models/pixel2pixel/fpn_pixel2pixel.py
run_segmentation.sh

index 1d4826f3a2c87f56cee5e11479efb7263c404a17..7cf06964f1515cfd3fdc87692d8bcb48d589e744 100644 (file)
@@ -38,6 +38,8 @@ Whether to use multiple inputs or how many decoders to use are fully configurabl
     ```
     cd VOCdevkit/VOC2012/ImageSets/Segmentation
     ls -1 SegmentationClassAug | sed s/.png// > trainaug.txt
+    wget http://home.bharathh.info/pubs/codes/SBD/train_noval.txt  
+    mv train_noval.txt trainaug_noval.txt 
     ```
 
 ## Training
index be27d8f994d1d6373164daf6c2f4efe9535f65c8..7ba0f2aea453e471135745f9fa89ed7cb970f60d 100644 (file)
@@ -228,8 +228,10 @@ def main(args):
     #
 
     # load pretrained
-    xnn.utils.load_weights_check(get_model_orig(model), pretrained=pretrained_data, change_names_dict=change_names_dict)
-
+    if pretrained_data is not None:
+        xnn.utils.load_weights_check(get_model_orig(model), pretrained=pretrained_data, change_names_dict=change_names_dict)
+    #
+    
     #################################################
     if args.count_flops:
         count_flops(args, model)
index 11e0bf6deecd918088912efb145969b428bc7fe2..c5a000708550b80384a6db5082314f5837f1ab60 100644 (file)
@@ -341,7 +341,9 @@ def main(args):
     #
 
     # load pretrained model
-    xnn.utils.load_weights_check(get_model_orig(model), pretrained=pretrained_data, change_names_dict=change_names_dict)
+    if pretrained_data is not None:
+        xnn.utils.load_weights_check(get_model_orig(model), pretrained=pretrained_data, change_names_dict=change_names_dict)
+    #
 
     #################################################
     if args.count_flops:
index c37296a8d83b74275b84309778827ec6c051399c..9b173b8d1cf1736c1d9f07e65a76b7a40f4cf53e 100644 (file)
@@ -30,15 +30,16 @@ def cityscales_instance(dataset_config, root, split=('train', 'val'), transforms
 
 #####################################################################
 def voc_segmentation(dataset_config, root, split=None, transforms=None, target_type=None):
-    split = ('trainaug', 'val') if split is None else split
+    split = ('trainaug_noval', 'val') if split is None else split
     warnings_str = '''
     Note: 'trainaug' set of VOC2012 Segmentation has images in 'val' as well, so validation results won't be indicative of the test results.
     To get test results when using 'trainaug' for training, submit for testing on Pascal VOC 2012 test server.
     However, when training with 'trainaug_noval', validation results are correct, and expected to be indicative of the test.
-    But since 'trainaug_noval', has fewer images, the results may be be poorer. Using 'trainaug' as default for now.
+    But since 'trainaug_noval', has fewer images, the results may be be poorer. Using 'trainaug_noval' as default for now.
     For more details, see here: https://github.com/DrSleep/tensorflow-deeplab-resnet
     And here: http://home.bharathh.info/pubs/codes/SBD/download.html
     And here: https://github.com/tensorflow/models/blob/master/research/deeplab/train.py 
+    The list of images for trainaug_noval is avalibale here: http://home.bharathh.info/pubs/codes/SBD/train_noval.txt
     '''
     if split[0] == 'trainaug':
         warnings.warn(warnings_str)
index 939ad4b82901ceb169cd4185980dc87c3bcab317..571e70feaa40c81325db5b46c2f3c5843ad4787b 100644 (file)
@@ -56,25 +56,28 @@ def get_config_fpnp2p_mnv2():
 
 ###########################################
 class FPNPyramid(torch.nn.Module):
-    def __init__(self, model_config, upsample, current_channels, decoder_channels, shortcut_strides, shortcut_channels, inloop_fpn=False, all_outputs=False):
+    def __init__(self, current_channels, decoder_channels, shortcut_strides, shortcut_channels, activation, kernel_size_smooth, interpolation_type, interpolation_mode, inloop_fpn=False, all_outputs=False):
         super().__init__()
-        self.model_config = model_config
+        self.inloop_fpn = inloop_fpn
         self.shortcut_strides = shortcut_strides
         self.shortcut_channels = shortcut_channels
-        activation = model_config.activation
-        self.upsample = upsample
         self.smooth_convs = torch.nn.ModuleList()
         self.shortcuts = torch.nn.ModuleList([self.create_shortcut(current_channels, decoder_channels, activation)])
+        self.upsamples = torch.nn.ModuleList()
+
+        upstride = 2
         for idx, (s_stride, feat_chan) in enumerate(zip(shortcut_strides, shortcut_channels)):
             shortcut = self.create_shortcut(feat_chan, decoder_channels, activation)
             self.shortcuts.append(shortcut)
             is_last = (idx == len(shortcut_channels)-1)
             if inloop_fpn or (all_outputs or is_last):
-                smooth_conv = xnn.layers.ConvDWSepNormAct2d(decoder_channels, decoder_channels, kernel_size=model_config.kernel_size_smooth, activation=(activation,activation))
+                smooth_conv = xnn.layers.ConvDWSepNormAct2d(decoder_channels, decoder_channels, kernel_size=kernel_size_smooth, activation=(activation,activation))
             else:
                 smooth_conv = None
             #
             self.smooth_convs.append(smooth_conv)
+            upsample = xnn.layers.UpsampleTo(decoder_channels, decoder_channels, upstride, interpolation_type, interpolation_mode)
+            self.upsamples.append(upsample)
         #
     #
 
@@ -87,14 +90,14 @@ class FPNPyramid(torch.nn.Module):
         x = x_list[-1]
         x = self.shortcuts[0](x)
         outputs = [x]
-        for idx, (shortcut, smooth_conv, s_stride, short_chan) in enumerate(zip(self.shortcuts[1:], self.smooth_convs, self.shortcut_strides, self.shortcut_channels)):
+        for idx, (shortcut, smooth_conv, s_stride, short_chan, upsample) in enumerate(zip(self.shortcuts[1:], self.smooth_convs, self.shortcut_strides, self.shortcut_channels, self.upsamples)):
             shape_s = xnn.utils.get_shape_with_stride(in_shape, s_stride)
             shape_s[1] = short_chan
             x_s = xnn.utils.get_blob_from_list(x_list, shape_s)
             x_s = shortcut(x_s)
-            x = self.upsample((x,x_s))
+            x = upsample((x,x_s))
             x = x + x_s
-            if self.model_config.inloop_fpn:
+            if self.inloop_fpn:
                 x = smooth_conv(x)
                 outputs.append(x)
             elif (smooth_conv is not None):
@@ -106,8 +109,8 @@ class FPNPyramid(torch.nn.Module):
 
 
 class InLoopFPNPyramid(FPNPyramid):
-    def __init__(self, model_config, upsample, input_channels, decoder_channels, shortcut_strides, shortcut_channels, inloop_fpn=True, all_outputs=False):
-        super().__init__(model_config, upsample, input_channels, decoder_channels, shortcut_strides, shortcut_channels, inloop_fpn, all_outputs)
+    def __init__(self, input_channels, decoder_channels, shortcut_strides, shortcut_channels, activation, kernel_size_smooth, interpolation_type, interpolation_mode, inloop_fpn=True, all_outputs=False):
+        super().__init__(input_channels, decoder_channels, shortcut_strides, shortcut_channels, activation, kernel_size_smooth, interpolation_type, interpolation_mode, inloop_fpn=inloop_fpn, all_outputs=all_outputs)
 
 
 ###########################################
@@ -119,10 +122,8 @@ class FPNPixel2PixelDecoder(torch.nn.Module):
         self.output_type = model_config.output_type
         self.decoder_channels = decoder_channels = round(self.model_config.decoder_chan*self.model_config.decoder_factor)
 
-        upstride1 = 2
-        upstride2 = self.model_config.shortcut_strides[0]
-        self.upsample1 = xnn.layers.UpsampleTo(decoder_channels, decoder_channels, upstride1, model_config.interpolation_type, model_config.interpolation_mode)
-        self.upsample2 = xnn.layers.UpsampleTo(model_config.output_channels, model_config.output_channels, upstride2, model_config.interpolation_type, model_config.interpolation_mode)
+        upstride_final = self.model_config.shortcut_strides[0]
+        self.upsample = xnn.layers.UpsampleTo(model_config.output_channels, model_config.output_channels, upstride_final, model_config.interpolation_type, model_config.interpolation_mode)
 
         self.rfblock = None
         if self.model_config.use_aspp:
@@ -145,7 +146,8 @@ class FPNPixel2PixelDecoder(torch.nn.Module):
         shortcut_strides = self.model_config.shortcut_strides[::-1][1:]
         shortcut_channels = self.model_config.shortcut_channels[::-1][1:]
         FPNType = InLoopFPNPyramid if model_config.inloop_fpn else FPNPyramid
-        self.fpn = FPNType(self.model_config, self.upsample1, current_channels, decoder_channels, shortcut_strides, shortcut_channels)
+        self.fpn = FPNType(current_channels, decoder_channels, shortcut_strides, shortcut_channels, self.model_config.activation, self.model_config.kernel_size_smooth,
+                           self.model_config.interpolation_type, self.model_config.interpolation_mode)
 
         # prediction
         if self.model_config.final_prediction:
@@ -177,7 +179,7 @@ class FPNPixel2PixelDecoder(torch.nn.Module):
 
             # final prediction is the upsampled one
             if self.model_config.final_upsample:
-                x = self.upsample2((x,x_input))
+                x = self.upsample((x,x_input))
 
             if (not self.training) and (self.output_type == 'segmentation'):
                 x = torch.argmax(x, dim=1, keepdim=True)
index 4e3e5ea87f828a2d3fb6f65bea8ac35987ab9113..9a30e760b8a82fe7ee2116d8e623465e26b02229 100755 (executable)
@@ -8,6 +8,10 @@
 #python ./scripts/train_segmentation_main.py --dataset_name cityscapes_segmentation --model_name deeplabv3lite_mobilenetv2_tv --data_path ./data/datasets/cityscapes/data --img_resize 384 768 --output_size 1024 2048 --gpus 0 1 \
 #--pretrained https://download.pytorch.org/models/mobilenet_v2-b0353104.pth
 
+#### Cityscapes Semantic Segmentation - Training with MobileNetV2+DeeplabV3Lite
+#python ./scripts/train_segmentation_main.py --dataset_name cityscapes_segmentation --model_name fpn_pixel2pixel_aspp_mobilenetv2_tv --data_path ./data/datasets/cityscapes/data --img_resize 384 768 --output_size 1024 2048 --gpus 0 1 \
+#--pretrained https://download.pytorch.org/models/mobilenet_v2-b0353104.pth
+
 #### Cityscapes Semantic Segmentation - original fpn - no aspp model, stride 64 model - Low Complexity Model
 #python ./scripts/train_segmentation_main.py --dataset_name cityscapes_segmentation --model_name fpn_pixel2pixel_aspp_mobilenetv2_tv_fd --data_path ./data/datasets/cityscapes/data --img_resize 384 768 --output_size 1024 2048 --gpus 0 1 \
 #--pretrained https://download.pytorch.org/models/mobilenet_v2-b0353104.pth