release commit
authorManu Mathew <a0393608@ti.com>
Thu, 16 Jan 2020 08:24:02 +0000 (13:54 +0530)
committerManu Mathew <a0393608@ti.com>
Thu, 16 Jan 2020 08:24:02 +0000 (13:54 +0530)
README.md
modules/pytorch_jacinto_ai/vision/models/pixel2pixel/fpn_pixel2pixel.py

index 3949672b4cd1ec86ddc358a0305f86a3d9a6549d..97bd27e8c141528df2e7587d307fed119a2d9353 100644 (file)
--- a/README.md
+++ b/README.md
@@ -11,9 +11,7 @@ This code provides a set of low complexity deep learning examples and models for
 
 This code also includes tools for **Post Training Calibration and Trained Quantization (a.k.a Quantization Aware Training)** that can output an 8-bit Quantization friendly model - these tools can be used to improve the quantized accuracy and bring it near floating point accuracy. For more details, please refer to the section on [Quantization](docs/Quantization.md).
 
-Our expectation is that these Deep Learning examples, models and tools will find application in a variety of problems, and the users will be able to build upon the **building blocks** that we have provided. 
-
-**Several of these models have been verified to work on [TI's Jacinto7 Automotive Processors](http://www.ti.com/processors/automotive-processors/tdax-adas-socs/overview.html).** This code is primarily intended for learning and research. 
+**Several of these models have been verified to work on [TI's Jacinto7 Automotive Processors](http://www.ti.com/processors/automotive-processors/tdax-adas-socs/overview.html).** These tools and software are primarily intended as examples for learning and research.
 
 ## Installation Instructions
 - These instructions are for installation on **Ubuntu 18.04**. 
index cde3b4d04425ccf0321e9d886f04f81c229195e4..939ad4b82901ceb169cd4185980dc87c3bcab317 100644 (file)
@@ -14,6 +14,101 @@ __all__ = ['FPNPixel2PixelASPP', 'FPNPixel2PixelDecoder',
            'fpn_pixel2pixel_aspp_resnet50', 'fpn_pixel2pixel_aspp_resnet50_fd',
            ]
 
+# config settings for mobilenetv2 backbone
+def get_config_fpnp2p_mnv2():
+    model_config = xnn.utils.ConfigNode()
+    model_config.num_classes = None
+    model_config.num_decoders = None
+    model_config.intermediate_outputs = True
+    model_config.use_aspp = True
+    model_config.use_extra_strides = False
+    model_config.groupwise_sep = False
+    model_config.fastdown = False
+
+    model_config.strides = (2,2,2,2,2)
+    encoder_stride = np.prod(model_config.strides)
+    model_config.shortcut_strides = (4,8,16,encoder_stride)
+    model_config.shortcut_channels = (24,32,96,320) # this is for mobilenetv2 - change for other networks
+    model_config.decoder_chan = 256
+    model_config.aspp_chan = 256
+    model_config.aspp_dil = (6,12,18)
+
+    model_config.inloop_fpn = False # inloop_fpn means the smooth convs are in the loop, after upsample
+
+    model_config.kernel_size_smooth = 3
+    model_config.interpolation_type = 'upsample'
+    model_config.interpolation_mode = 'bilinear'
+
+    model_config.final_prediction = True
+    model_config.final_upsample = True
+
+    model_config.normalize_input = False
+    model_config.split_outputs = False
+    model_config.decoder_factor = 1.0
+    model_config.activation = xnn.layers.DefaultAct2d
+    model_config.linear_dw = False
+    model_config.normalize_gradients = False
+    model_config.freeze_encoder = False
+    model_config.freeze_decoder = False
+    model_config.multi_task = False
+    return model_config
+
+
+###########################################
+class FPNPyramid(torch.nn.Module):
+    def __init__(self, model_config, upsample, current_channels, decoder_channels, shortcut_strides, shortcut_channels, inloop_fpn=False, all_outputs=False):
+        super().__init__()
+        self.model_config = model_config
+        self.shortcut_strides = shortcut_strides
+        self.shortcut_channels = shortcut_channels
+        activation = model_config.activation
+        self.upsample = upsample
+        self.smooth_convs = torch.nn.ModuleList()
+        self.shortcuts = torch.nn.ModuleList([self.create_shortcut(current_channels, decoder_channels, activation)])
+        for idx, (s_stride, feat_chan) in enumerate(zip(shortcut_strides, shortcut_channels)):
+            shortcut = self.create_shortcut(feat_chan, decoder_channels, activation)
+            self.shortcuts.append(shortcut)
+            is_last = (idx == len(shortcut_channels)-1)
+            if inloop_fpn or (all_outputs or is_last):
+                smooth_conv = xnn.layers.ConvDWSepNormAct2d(decoder_channels, decoder_channels, kernel_size=model_config.kernel_size_smooth, activation=(activation,activation))
+            else:
+                smooth_conv = None
+            #
+            self.smooth_convs.append(smooth_conv)
+        #
+    #
+
+    def create_shortcut(self, inch, outch, activation):
+        shortcut = xnn.layers.ConvNormAct2d(inch, outch, kernel_size=1, activation=activation)
+        return shortcut
+    #
+
+    def forward(self, x_list, in_shape):
+        x = x_list[-1]
+        x = self.shortcuts[0](x)
+        outputs = [x]
+        for idx, (shortcut, smooth_conv, s_stride, short_chan) in enumerate(zip(self.shortcuts[1:], self.smooth_convs, self.shortcut_strides, self.shortcut_channels)):
+            shape_s = xnn.utils.get_shape_with_stride(in_shape, s_stride)
+            shape_s[1] = short_chan
+            x_s = xnn.utils.get_blob_from_list(x_list, shape_s)
+            x_s = shortcut(x_s)
+            x = self.upsample((x,x_s))
+            x = x + x_s
+            if self.model_config.inloop_fpn:
+                x = smooth_conv(x)
+                outputs.append(x)
+            elif (smooth_conv is not None):
+                y = smooth_conv(x)
+                outputs.append(y)
+            #
+        #
+        return outputs[::-1]
+
+
+class InLoopFPNPyramid(FPNPyramid):
+    def __init__(self, model_config, upsample, input_channels, decoder_channels, shortcut_strides, shortcut_channels, inloop_fpn=True, all_outputs=False):
+        super().__init__(model_config, upsample, input_channels, decoder_channels, shortcut_strides, shortcut_channels, inloop_fpn, all_outputs)
+
 
 ###########################################
 class FPNPixel2PixelDecoder(torch.nn.Module):
@@ -22,76 +117,59 @@ class FPNPixel2PixelDecoder(torch.nn.Module):
         self.model_config = model_config
         activation = self.model_config.activation
         self.output_type = model_config.output_type
-
         self.decoder_channels = decoder_channels = round(self.model_config.decoder_chan*self.model_config.decoder_factor)
 
+        upstride1 = 2
+        upstride2 = self.model_config.shortcut_strides[0]
+        self.upsample1 = xnn.layers.UpsampleTo(decoder_channels, decoder_channels, upstride1, model_config.interpolation_type, model_config.interpolation_mode)
+        self.upsample2 = xnn.layers.UpsampleTo(model_config.output_channels, model_config.output_channels, upstride2, model_config.interpolation_type, model_config.interpolation_mode)
+
         self.rfblock = None
         if self.model_config.use_aspp:
             current_channels = self.model_config.shortcut_channels[-1]
             aspp_channels = round(self.model_config.aspp_chan * self.model_config.decoder_factor)
-            self.rfblock = xnn.layers.DWASPPLiteBlock(current_channels, aspp_channels, decoder_channels, dilation=self.model_config.aspp_dil,
-                                              avg_pool=False, activation=activation)
+            self.rfblock = xnn.layers.DWASPPLiteBlock(current_channels, aspp_channels, decoder_channels, dilation=self.model_config.aspp_dil, avg_pool=False, activation=activation)
+            current_channels = decoder_channels
         elif self.model_config.use_extra_strides:
             # a low complexity pyramid
             current_channels = self.model_config.shortcut_channels[-3]
             self.rfblock = torch.nn.Sequential(xnn.layers.ConvDWSepNormAct2d(current_channels, current_channels, kernel_size=3, stride=2, activation=(activation, activation)),
                                                xnn.layers.ConvDWSepNormAct2d(current_channels, decoder_channels, kernel_size=3, stride=2, activation=(activation, activation)))
+            current_channels = decoder_channels
         else:
             current_channels = self.model_config.shortcut_channels[-1]
             self.rfblock = xnn.layers.ConvNormAct2d(current_channels, decoder_channels, kernel_size=1, stride=1)
-
-        current_channels = decoder_channels
-
-        self.shortcuts = torch.nn.ModuleList()
-        self.smooth_convs = torch.nn.ModuleList()
-
-        for s_stride, feat_chan in zip(self.model_config.shortcut_strides[::-1][1:], self.model_config.shortcut_channels[::-1][1:]):
-            shortcut = xnn.layers.ConvNormAct2d(feat_chan, decoder_channels, kernel_size=1, activation=activation)
-            self.shortcuts.append(shortcut)
-            if self.model_config.smooth_conv:
-                smooth_conv = xnn.layers.ConvDWSepNormAct2d(current_channels, decoder_channels, kernel_size=self.model_config.kernel_size_smooth, activation=(activation,activation))
-            else:
-                smooth_conv = xnn.layers.BypassBlock()
-
-            self.smooth_convs.append(smooth_conv)
             current_channels = decoder_channels
+        #
+
+        shortcut_strides = self.model_config.shortcut_strides[::-1][1:]
+        shortcut_channels = self.model_config.shortcut_channels[::-1][1:]
+        FPNType = InLoopFPNPyramid if model_config.inloop_fpn else FPNPyramid
+        self.fpn = FPNType(self.model_config, self.upsample1, current_channels, decoder_channels, shortcut_strides, shortcut_channels)
 
         # prediction
         if self.model_config.final_prediction:
             self.pred = xnn.layers.ConvDWSepNormAct2d(current_channels, self.model_config.output_channels, kernel_size=3, normalization=(True,False), activation=(False,False))
+        #
 
-        upstride1 = 2
-        upstride2 = self.model_config.shortcut_strides[0]
-        self.upsample1 = xnn.layers.UpsampleTo(decoder_channels, decoder_channels, upstride1, model_config.interpolation_type, model_config.interpolation_mode)
-        self.upsample2 = xnn.layers.UpsampleTo(model_config.output_channels, model_config.output_channels, upstride2, model_config.interpolation_type, model_config.interpolation_mode)
-
-
-    # the upsampling is using functional form to support size based upsampling for odd sizes
-    # that are not a perfect ratio (eg. 257x513), which seem to be popular for segmentation
-    def forward(self, x, x_features, x_list):
-        assert isinstance(x, (list,tuple)) and len(x)<=2, 'incorrect input'
-        x_input = x[0]
+    def forward(self, x_input, x, x_list):
+        assert isinstance(x_input, (list,tuple)) and len(x_input)<=2, 'incorrect input'
+        assert x is x_list[-1], 'the features must the last one in x_list'
+        x_input = x_input[0]
         in_shape = x_input.shape
 
-        # rfblock at output stride
-        if self.model_config.use_aspp:
-            x = self.rfblock(x_features)
-        elif self.model_config.use_extra_strides:
-            x = x_features
+        if self.model_config.use_extra_strides:
             for blk in self.rfblock:
                 x = blk(x)
                 x_list += [x]
-        else:
-            x = self.rfblock(x_features)
+            #
+        elif self.rfblock is not None:
+            x = self.rfblock(x)
+            x_list[-1] = x
+        #
 
-        for s_stride, shortcut, smooth_conv, short_chan in zip(self.model_config.shortcut_strides[::-1][1:], self.shortcuts, self.smooth_convs, self.model_config.shortcut_channels[::-1][1:]):
-            shape_s = xnn.utils.get_shape_with_stride(in_shape, s_stride)
-            shape_s[1] = short_chan
-            x_s = xnn.utils.get_blob_from_list(x_list, shape_s)
-            x_s = shortcut(x_s)
-            x = self.upsample1((x,x_s))
-            x = x + x_s
-            x = smooth_conv(x)
+        x_list = self.fpn(x_list, in_shape)
+        x = x_list[0]
 
         if self.model_config.final_prediction:
             # prediction
@@ -116,45 +194,6 @@ class FPNPixel2PixelASPP(Pixel2PixelNet):
 
 
 ###########################################
-# config settings for mobilenetv2 backbone
-def get_config_fpnp2p_mnv2():
-    model_config = xnn.utils.ConfigNode()
-    model_config.num_classes = None
-    model_config.num_decoders = None
-    model_config.intermediate_outputs = True
-    model_config.use_aspp = True
-    model_config.use_extra_strides = False
-    model_config.groupwise_sep = False
-    model_config.fastdown = False
-
-    model_config.strides = (2,2,2,2,2)
-    encoder_stride = np.prod(model_config.strides)
-    model_config.shortcut_strides = (4,8,16,encoder_stride)
-    model_config.shortcut_channels = (24,32,96,320) # this is for mobilenetv2 - change for other networks
-    model_config.smooth_conv = True
-    model_config.decoder_chan = 256
-    model_config.aspp_chan = 256
-    model_config.aspp_dil = (6,12,18)
-
-    model_config.kernel_size_smooth = 3
-    model_config.interpolation_type = 'upsample'
-    model_config.interpolation_mode = 'bilinear'
-
-    model_config.final_prediction = True
-    model_config.final_upsample = True
-
-    model_config.normalize_input = False
-    model_config.split_outputs = False
-    model_config.decoder_factor = 1.0
-    model_config.activation = xnn.layers.DefaultAct2d
-    model_config.linear_dw = False
-    model_config.normalize_gradients = False
-    model_config.freeze_encoder = False
-    model_config.freeze_decoder = False
-    model_config.multi_task = False
-    return model_config
-
-
 def fpn_pixel2pixel_aspp_mobilenetv2_tv(model_config, pretrained=None):
     model_config = get_config_fpnp2p_mnv2().merge_from(model_config)
     # encoder setup