1 #!/usr/bin/env python
3 import sys
4 import os
5 import cv2
6 import argparse
7 import datetime
8 import numpy as np
10 ################################
11 from pytorch_jacinto_ai.xnn.utils import str2bool
12 parser = argparse.ArgumentParser()
13 parser.add_argument('--save_path', type=str, default=None, help='checkpoint save folder')
14 parser.add_argument('--gpus', type=int, nargs='*', default=None, help='Base learning rate')
15 parser.add_argument('--batch_size', type=int, default=None, help='Batch size')
16 parser.add_argument('--lr', type=float, default=None, help='Base learning rate')
17 parser.add_argument('--lr_clips', type=float, default=None, help='Learning rate for clips in PAct2')
18 parser.add_argument('--lr_calib', type=float, default=None, help='Learning rate for calibration')
19 parser.add_argument('--model_name', type=str, default=None, help='model name')
20 parser.add_argument('--dataset_name', type=str, default=None, help='dataset name')
21 parser.add_argument('--data_path', type=str, default=None, help='data path')
22 parser.add_argument('--epochs', type=int, default=None, help='number of epochs')
23 parser.add_argument('--warmup_epochs', type=int, default=None, help='number of epochs for the learning rate to increase and reach base value')
24 parser.add_argument('--milestones', type=int, nargs='*', default=None, help='change lr at these milestones')
25 parser.add_argument('--img_resize', type=int, nargs=2, default=None, help='img_resize size. for training this will be modified according to rand_scale')
26 parser.add_argument('--rand_scale', type=float, nargs=2, default=None, help='random scale factors for training')
27 parser.add_argument('--rand_crop', type=int, nargs=2, default=None, help='random crop for training')
28 parser.add_argument('--output_size', type=int, nargs=2, default=None, help='output size of the evaluation - prediction/groundtruth. this is not used while training as it blows up memory requirement')
29 parser.add_argument('--pretrained', type=str, default=None, help='pretrained model')
30 parser.add_argument('--resume', type=str, default=None, help='resume an unfinished training from this model')
31 parser.add_argument('--phase', type=str, default=None, help='training/calibration/validation')
32 parser.add_argument('--evaluate_start', type=str2bool, default=None, help='Whether to run validation before the training')
33 parser.add_argument('--workers', type=int, default=None, help='number of workers for dataloading')
34 parser.add_argument('--save_onnx', type=str2bool, default=None, help='Whether to export onnx model or not')
35 #
36 parser.add_argument('--quantize', type=str2bool, default=None, help='Quantize the model')
37 parser.add_argument('--histogram_range', type=str2bool, default=None, help='run only evaluation and no training')
38 parser.add_argument('--per_channel_q', type=str2bool, default=None, help='run only evaluation and no training')
39 parser.add_argument('--bias_calibration', type=str2bool, default=None, help='run only evaluation and no training')
40 parser.add_argument('--bitwidth_weights', type=int, default=None, help='bitwidth for weight quantization')
41 parser.add_argument('--bitwidth_activations', type=int, default=None, help='bitwidth for activation quantization')
42 #
43 parser.add_argument('--freeze_bn', type=str2bool, default=None, help='freeze the bn stats or not')
44 #
45 parser.add_argument('--shuffle', type=str2bool, default=None, help='whether to shuffle the training set or not')
46 parser.add_argument('--shuffle_val', type=str2bool, default=None, help='whether to shuffle the validation set or not')
47 parser.add_argument('--epoch_size', type=float, default=None, help='epoch size. options are: 0, fraction or number. '
48 '0 will use the full epoch. '
49 'using a number will cause the epoch to have that many images. '
50 'using a fraction will reduce the number of images used for one epoch. ')
51 parser.add_argument('--epoch_size_val', type=float, default=None, help='epoch size for validation. options are: 0, fraction or number. '
52 '0 will use the full epoch. '
53 'using a number will cause the epoch to have that many images. '
54 'using a fraction will reduce the number of images used for one epoch. ')
55 #
56 cmds = parser.parse_args()
58 ################################
59 # taken care first, since this has to be done before importing pytorch
60 if 'gpus' in vars(cmds):
61 value = getattr(cmds, 'gpus')
62 if (value is not None) and ("CUDA_VISIBLE_DEVICES" not in os.environ):
63 os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([str(v) for v in value])
64 #
65 #
67 ################################
68 # to avoid hangs in data loader with multi threads
69 # this was observed after using cv2 image processing functions
70 # https://github.com/pytorch/pytorch/issues/1355
71 cv2.setNumThreads(0)
73 ################################
74 #import of torch should be after CUDA_VISIBLE_DEVICES for it to take effect
75 import torch
76 from pytorch_jacinto_ai.engine import train_pixel2pixel
78 # Create the parser and set default arguments
79 args = train_pixel2pixel.get_config()
81 ################################
82 #Modify arguments
83 args.model_name = 'deeplabv3lite_mobilenetv2_tv' #'deeplabv3lite_mobilenetv2_tv' #'fpnlite_pixel2pixel_aspp_mobilenetv2_tv' #'fpnlite_pixel2pixel_aspp_resnet50'
84 args.dataset_name = 'cityscapes_segmentation' #'cityscapes_segmentation' #'voc_segmentation'
86 args.data_path = './data/datasets/cityscapes/data' #'./data/datasets/cityscapes/data' #'./data/datasets/voc'
88 #args.save_path = './data/checkpoints'
90 args.pretrained = 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth'
91 # 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth'
92 # './data/modelzoo/pretrained/pytorch/imagenet_classification/ericsun99/MobileNet-V2-Pytorch/mobilenetv2_Top1_71.806_Top2_90.410.pth.tar'
93 # 'https://download.pytorch.org/models/resnet50-19c8e357.pth'
95 #args.resume = './data/checkpoints/cityscapes_segmentation/2019-04-11-05-35-55_cityscapes_segmentation_deeplabv3lite_mobilenetv2_relu_resize768x384_traincrop768x384/checkpoint.pth.tar'
97 args.model_config.input_channels = (3,)
98 args.model_config.output_type = ['segmentation']
99 args.model_config.output_channels = None
100 args.model_config.output_range = None
101 args.model_config.num_decoders = None #0, 1, None
103 args.losses = [['segmentation_loss']]
104 args.metrics = [['segmentation_metrics']]
106 args.solver = 'adam' #'sgd' #'adam'
107 args.epochs = 250 #200
108 args.epoch_size = 0 #0 #0.5
109 args.epoch_size_val = 0 #0 #0.5
110 args.scheduler = 'step' #'poly' #'step'
111 args.multistep_gamma = 0.25 #0.5 #only for step scheduler
112 args.milestones = (100, 200) #only for step scheduler
113 args.polystep_power = 0.9 #only for poly scheduler
114 args.iter_size = 1 #2
116 args.lr = 4e-4 #1e-4 #0.01 #7e-3 #1e-4 #2e-4
117 args.batch_size = 12 #12 #16 #32 #64
118 args.weight_decay = 1e-4 #1e-4 #4e-5 #1e-5
120 args.img_resize = (384, 768) #(384, 768) (512, 1024) #(1024, 2048)
121 args.output_size = (1024, 2048) #target output size for evaluation
123 args.transform_rotation = 5 #rotation degrees
125 #args.image_mean = [123.675, 116.28, 103.53]
126 #args.image_scale = [0.017125, 0.017507, 0.017429]
128 #args.parallel_model=False
129 #args.print_model = True
130 #args.save_onnx = False
131 #args.run_soon = False
132 #args.evaluate_start = False
133 args.print_freq = 10
135 #args.phase = 'validation' #'training'
136 #args.quantize = True
137 #args.per_channel_q = True
140 # defining date from outside can help to write multiple phases into the same folder
141 args.date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
143 ################################
144 # set other args
145 for key in vars(cmds):
146 if key == 'gpus':
147 pass # already taken care above, since this has to be done before importing pytorch
148 elif hasattr(args, key):
149 value = getattr(cmds, key)
150 if value != 'None' and value is not None:
151 setattr(args, key, value)
152 else:
153 assert False, f'invalid argument {key}'
154 #
156 ################################
157 # Run the given phase
158 train_pixel2pixel.main(args)
160 ################################
161 # In addition run a quantization aware training, starting from the trained model
162 if 'training' in args.phase and (not args.quantize):
163 save_path = train_pixel2pixel.get_save_path(args)
164 args.pretrained = os.path.join(save_path, 'model_best.pth') if (args.epochs>0) else args.pretrained
165 args.phase = 'training_quantize'
166 args.quantize = True
167 args.lr = 1e-5
168 args.epochs = 50
169 train_pixel2pixel.main(args)
170 #
172 ################################
173 # In addition run a separate validation
174 if 'training' in args.phase or 'calibration' in args.phase:
175 save_path = train_pixel2pixel.get_save_path(args)
176 args.pretrained = os.path.join(save_path, 'model_best.pth')
177 if 'training' in args.phase:
178 # DataParallel isn't enabled for QuantCalibrateModule and QuantTestModule.
179 # If the previous phase was training, then it is likely that the batch_size was high and won't fit in a single gpu - reduce it.
180 num_gpus = len(str(os.environ["CUDA_VISIBLE_DEVICES"]).split(',')) if ("CUDA_VISIBLE_DEVICES" in os.environ) else None
181 args.batch_size = max(args.batch_size//num_gpus, 1) if (num_gpus is not None) else args.batch_size
182 #
183 args.phase = 'validation'
184 args.quantize = True
185 train_pixel2pixel.main(args)
186 #