depth - doc and script update

[jacinto-ai/pytorch-jacinto-ai-devkit.git] / scripts / train_depth_main.py
diff --git a/scripts/train_depth_main.py b/scripts/train_depth_main.py

index 63ce6177ecbe1f073c01d1a2b46977ab675c8c68..4d11dcb47335b731d8af6668a3d11234912d64d6 100755 (executable)
--- a/scripts/train_depth_main.py
+++ b/scripts/train_depth_main.py
@@ -5,10 +5,7 @@ import os
  import cv2
  import argparse
  import datetime
-
-################################
-#sys.path.insert(0, os.path.abspath('./modules'))
-
+import numpy as np
  
  ################################
  from pytorch_jacinto_ai.xnn.utils import str2bool
@@ -53,12 +50,15 @@ if 'gpus' in vars(cmds):
          os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([str(v) for v in value])
  #
  
+################################
  # to avoid hangs in data loader with multi threads
  # this was observed after using cv2 image processing functions
  # https://github.com/pytorch/pytorch/issues/1355
  cv2.setNumThreads(0)
  
  ################################
+#import of torch should be after CUDA_VISIBLE_DEVICES for it to take effect
+import torch
  from pytorch_jacinto_ai.engine import train_pixel2pixel
  
  # Create the parser and set default arguments
@@ -70,19 +70,22 @@ args.model_name = 'deeplabv3lite_mobilenetv2_tv' #'deeplabv3lite_mobilenetv2_tv'
  
  args.dataset_name = 'kitti_depth' #'kitti_depth' #'kitti_depth' #'kitti_depth2'
  
-#args.save_path = './data/checkpoints'
-
  args.data_path = './data/datasets/kitti/kitti_depth/data'
  args.split_files = (args.data_path+'/train.txt', args.data_path+'/val.txt')
  
+#args.save_path = './data/checkpoints'
+
  args.pretrained = './data/modelzoo/semantic_segmentation/cityscapes/deeplabv3lite-mobilenetv2/cityscapes_segmentation_deeplabv3lite-mobilenetv2_2019-06-26-08-59-32.pth'
                                      # 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth'
                                      # './data/modelzoo/pretrained/pytorch/imagenet_classification/ericsun99/MobileNet-V2-Pytorch/mobilenetv2_Top1_71.806_Top2_90.410.pth.tar'
                                      # 'https://download.pytorch.org/models/resnet50-19c8e357.pth'
  
-args.model_config.input_channels = (3,)      # [3,3]
+args.model_config.input_channels = (3,)     # [3,3]
  args.model_config.output_type = ['depth']
  args.model_config.output_channels = [1]
+args.model_config.output_range = [(0,128)]  # important note: set this output_range parameter in the inference script as well
+                                            # this is an important difference from the semantic segmentation script.
+
  args.losses = [['supervised_loss', 'scale_loss', 'supervised_error_var']] #[['supervised_loss', 'scale_loss']]
  args.loss_mult_factors = [[0.125, 0.125, 4.0]]
  
@@ -127,6 +130,7 @@ args.date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
  
  
  ################################
+# set other args
  for key in vars(cmds):
      if key == 'gpus':
          pass # already taken care above, since this has to be done before importing pytorch
@@ -143,24 +147,26 @@ for key in vars(cmds):
  train_pixel2pixel.main(args)
  
  ################################
-# In addition run a quantized calibration, starting from the trained model
-if args.phase == 'training' and (not args.quantize):
+# In addition run a quantization aware training, starting from the trained model
+if 'training' in args.phase and (not args.quantize):
      save_path = train_pixel2pixel.get_save_path(args)
-    args.pretrained = os.path.join(save_path, 'model_best.pth.tar')
-    args.phase = 'calibration'
+    args.pretrained = os.path.join(save_path, 'model_best.pth.tar') if (args.epochs>0) else args.pretrained
+    args.phase = 'training_quantize'
      args.quantize = True
-    args.epochs = 1
-    args.epoch_size = 100
+    args.lr = 1e-5
+    args.epochs = 25
+    # quantized training will use only one GPU in the engine - so reduce the batch_size
+    num_gpus = len(str(os.environ["CUDA_VISIBLE_DEVICES"]).split(',')) if ("CUDA_VISIBLE_DEVICES" in os.environ) else None
+    args.batch_size = (args.batch_size//num_gpus) if (num_gpus is not None) else args.batch_size
      train_pixel2pixel.main(args)
  #
  
  ################################
-# In addition run a separate validation, starting from the calibrated model - to estimate the quantized accuracy accurately
-if args.phase == 'training' or args.phase == 'calibration':
+# In addition run a separate validation
+if 'training' in args.phase or 'calibration' in args.phase:
      save_path = train_pixel2pixel.get_save_path(args)
      args.pretrained = os.path.join(save_path, 'model_best.pth.tar')
      args.phase = 'validation'
      args.quantize = True
      train_pixel2pixel.main(args)
  #
-