renamed pytorch_jacinto_ai.vision to pytorch_jacinto_ai.xvision
authorManu Mathew <a0393608@ti.com>
Mon, 20 Jul 2020 20:04:52 +0000 (01:34 +0530)
committerManu Mathew <a0393608@ti.com>
Mon, 20 Jul 2020 20:14:41 +0000 (01:44 +0530)
120 files changed:
docs/Quantization.md
examples/quantization_example.py
examples/write_onnx_model_example.py
modules/pytorch_jacinto_ai/engine/infer_classification_onnx_rt.py
modules/pytorch_jacinto_ai/engine/infer_pixel2pixel.py
modules/pytorch_jacinto_ai/engine/infer_pixel2pixel_onnx_rt.py
modules/pytorch_jacinto_ai/engine/test_classification.py
modules/pytorch_jacinto_ai/engine/test_pixel2pixel_onnx.py
modules/pytorch_jacinto_ai/engine/train_classification.py
modules/pytorch_jacinto_ai/engine/train_pixel2pixel.py
modules/pytorch_jacinto_ai/xnn/layers/conv_blocks.py
modules/pytorch_jacinto_ai/xnn/quantize/quant_train_module.py
modules/pytorch_jacinto_ai/xvision/__init__.py [moved from modules/pytorch_jacinto_ai/vision/__init__.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/__init__.py [moved from modules/pytorch_jacinto_ai/vision/datasets/__init__.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/caltech.py [moved from modules/pytorch_jacinto_ai/vision/datasets/caltech.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/celeba.py [moved from modules/pytorch_jacinto_ai/vision/datasets/celeba.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/cifar.py [moved from modules/pytorch_jacinto_ai/vision/datasets/cifar.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/cityscapes.py [moved from modules/pytorch_jacinto_ai/vision/datasets/cityscapes.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/classification/__init__.py [moved from modules/pytorch_jacinto_ai/vision/datasets/classification/__init__.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/coco.py [moved from modules/pytorch_jacinto_ai/vision/datasets/coco.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/fakedata.py [moved from modules/pytorch_jacinto_ai/vision/datasets/fakedata.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/flickr.py [moved from modules/pytorch_jacinto_ai/vision/datasets/flickr.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/folder.py [moved from modules/pytorch_jacinto_ai/vision/datasets/folder.py with 99% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/hmdb51.py [moved from modules/pytorch_jacinto_ai/vision/datasets/hmdb51.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/imagenet.py [moved from modules/pytorch_jacinto_ai/vision/datasets/imagenet.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/kinetics.py [moved from modules/pytorch_jacinto_ai/vision/datasets/kinetics.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/lsun.py [moved from modules/pytorch_jacinto_ai/vision/datasets/lsun.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/mnist.py [moved from modules/pytorch_jacinto_ai/vision/datasets/mnist.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/omniglot.py [moved from modules/pytorch_jacinto_ai/vision/datasets/omniglot.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/phototour.py [moved from modules/pytorch_jacinto_ai/vision/datasets/phototour.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/pixel2pixel/__init__.py [moved from modules/pytorch_jacinto_ai/vision/datasets/pixel2pixel/__init__.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/pixel2pixel/a2d2.py [moved from modules/pytorch_jacinto_ai/vision/datasets/pixel2pixel/a2d2.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/pixel2pixel/calculate_class_weights.py [moved from modules/pytorch_jacinto_ai/vision/datasets/pixel2pixel/calculate_class_weights.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/pixel2pixel/cityscapes_plus.py [moved from modules/pytorch_jacinto_ai/vision/datasets/pixel2pixel/cityscapes_plus.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/pixel2pixel/dataset_utils.py [moved from modules/pytorch_jacinto_ai/vision/datasets/pixel2pixel/dataset_utils.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/pixel2pixel/flyingchairs.py [moved from modules/pytorch_jacinto_ai/vision/datasets/pixel2pixel/flyingchairs.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/pixel2pixel/kitti_depth.py [moved from modules/pytorch_jacinto_ai/vision/datasets/pixel2pixel/kitti_depth.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/pixel2pixel/kitti_sceneflow.py [moved from modules/pytorch_jacinto_ai/vision/datasets/pixel2pixel/kitti_sceneflow.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/pixel2pixel/mpisintel.py [moved from modules/pytorch_jacinto_ai/vision/datasets/pixel2pixel/mpisintel.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/pixel2pixel/segmentation.py [moved from modules/pytorch_jacinto_ai/vision/datasets/pixel2pixel/segmentation.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/sbd.py [moved from modules/pytorch_jacinto_ai/vision/datasets/sbd.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/sbu.py [moved from modules/pytorch_jacinto_ai/vision/datasets/sbu.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/semeion.py [moved from modules/pytorch_jacinto_ai/vision/datasets/semeion.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/stl10.py [moved from modules/pytorch_jacinto_ai/vision/datasets/stl10.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/svhn.py [moved from modules/pytorch_jacinto_ai/vision/datasets/svhn.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/ucf101.py [moved from modules/pytorch_jacinto_ai/vision/datasets/ucf101.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/usps.py [moved from modules/pytorch_jacinto_ai/vision/datasets/usps.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/utils.py [moved from modules/pytorch_jacinto_ai/vision/datasets/utils.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/video_utils.py [moved from modules/pytorch_jacinto_ai/vision/datasets/video_utils.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/vision.py [moved from modules/pytorch_jacinto_ai/vision/datasets/vision.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/datasets/voc.py [moved from modules/pytorch_jacinto_ai/vision/datasets/voc.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/extension.py [moved from modules/pytorch_jacinto_ai/vision/extension.py with 79% similarity]
modules/pytorch_jacinto_ai/xvision/io/__init__.py [moved from modules/pytorch_jacinto_ai/vision/io/__init__.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/io/video.py [moved from modules/pytorch_jacinto_ai/vision/io/video.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/losses/__init__.py [moved from modules/pytorch_jacinto_ai/vision/losses/__init__.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/losses/basic_loss.py [moved from modules/pytorch_jacinto_ai/vision/losses/basic_loss.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/losses/flow_loss.py [moved from modules/pytorch_jacinto_ai/vision/losses/flow_loss.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/losses/interest_pt_loss.py [moved from modules/pytorch_jacinto_ai/vision/losses/interest_pt_loss.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/losses/loss_utils.py [moved from modules/pytorch_jacinto_ai/vision/losses/loss_utils.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/losses/norm_loss.py [moved from modules/pytorch_jacinto_ai/vision/losses/norm_loss.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/losses/scale_loss.py [moved from modules/pytorch_jacinto_ai/vision/losses/scale_loss.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/losses/segmentation_loss.py [moved from modules/pytorch_jacinto_ai/vision/losses/segmentation_loss.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/__init__.py [moved from modules/pytorch_jacinto_ai/vision/models/__init__.py with 93% similarity]
modules/pytorch_jacinto_ai/xvision/models/_utils.py [moved from modules/pytorch_jacinto_ai/vision/models/_utils.py with 93% similarity]
modules/pytorch_jacinto_ai/xvision/models/alexnet.py [moved from modules/pytorch_jacinto_ai/vision/models/alexnet.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/classification/__init__.py [moved from modules/pytorch_jacinto_ai/vision/models/classification/__init__.py with 82% similarity]
modules/pytorch_jacinto_ai/xvision/models/densenet.py [moved from modules/pytorch_jacinto_ai/vision/models/densenet.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/detection/__init__.py [moved from modules/pytorch_jacinto_ai/vision/models/detection/__init__.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/detection/_utils.py [moved from modules/pytorch_jacinto_ai/vision/models/detection/_utils.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/detection/backbone_utils.py [moved from modules/pytorch_jacinto_ai/vision/models/detection/backbone_utils.py with 96% similarity]
modules/pytorch_jacinto_ai/xvision/models/detection/faster_rcnn.py [moved from modules/pytorch_jacinto_ai/vision/models/detection/faster_rcnn.py with 96% similarity]
modules/pytorch_jacinto_ai/xvision/models/detection/generalized_rcnn.py [moved from modules/pytorch_jacinto_ai/vision/models/detection/generalized_rcnn.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/detection/image_list.py [moved from modules/pytorch_jacinto_ai/vision/models/detection/image_list.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/detection/keypoint_rcnn.py [moved from modules/pytorch_jacinto_ai/vision/models/detection/keypoint_rcnn.py with 96% similarity]
modules/pytorch_jacinto_ai/xvision/models/detection/mask_rcnn.py [moved from modules/pytorch_jacinto_ai/vision/models/detection/mask_rcnn.py with 96% similarity]
modules/pytorch_jacinto_ai/xvision/models/detection/roi_heads.py [moved from modules/pytorch_jacinto_ai/vision/models/detection/roi_heads.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/detection/rpn.py [moved from modules/pytorch_jacinto_ai/vision/models/detection/rpn.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/detection/transform.py [moved from modules/pytorch_jacinto_ai/vision/models/detection/transform.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/mnasnet.py [moved from modules/pytorch_jacinto_ai/vision/models/mnasnet.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/mobilenetv1.py [moved from modules/pytorch_jacinto_ai/vision/models/mobilenetv1.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/mobilenetv2.py [moved from modules/pytorch_jacinto_ai/vision/models/mobilenetv2.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/multi_input_net.py [moved from modules/pytorch_jacinto_ai/vision/models/multi_input_net.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/pixel2pixel/__init__.py [moved from modules/pytorch_jacinto_ai/vision/models/pixel2pixel/__init__.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/pixel2pixel/deeplabv3lite.py [moved from modules/pytorch_jacinto_ai/vision/models/pixel2pixel/deeplabv3lite.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/pixel2pixel/fpnlite_pixel2pixel.py [moved from modules/pytorch_jacinto_ai/vision/models/pixel2pixel/fpnlite_pixel2pixel.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/pixel2pixel/pixel2pixelnet.py [moved from modules/pytorch_jacinto_ai/vision/models/pixel2pixel/pixel2pixelnet.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/pixel2pixel/pixel2pixelnet_utils.py [moved from modules/pytorch_jacinto_ai/vision/models/pixel2pixel/pixel2pixelnet_utils.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/pixel2pixel/unetlite_pixel2pixel.py [moved from modules/pytorch_jacinto_ai/vision/models/pixel2pixel/unetlite_pixel2pixel.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/resnet.py [moved from modules/pytorch_jacinto_ai/vision/models/resnet.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/segmentation/__init__.py [moved from modules/pytorch_jacinto_ai/vision/models/segmentation/__init__.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/segmentation/_utils.py [moved from modules/pytorch_jacinto_ai/vision/models/segmentation/_utils.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/segmentation/deeplabv3.py [moved from modules/pytorch_jacinto_ai/vision/models/segmentation/deeplabv3.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/segmentation/fcn.py [moved from modules/pytorch_jacinto_ai/vision/models/segmentation/fcn.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/segmentation/segmentation.py [moved from modules/pytorch_jacinto_ai/vision/models/segmentation/segmentation.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/shufflenetv2.py [moved from modules/pytorch_jacinto_ai/vision/models/shufflenetv2.py with 89% similarity]
modules/pytorch_jacinto_ai/xvision/models/squeezenet.py [moved from modules/pytorch_jacinto_ai/vision/models/squeezenet.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/utils.py [moved from modules/pytorch_jacinto_ai/vision/models/utils.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/vgg.py [moved from modules/pytorch_jacinto_ai/vision/models/vgg.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/video/__init__.py [moved from modules/pytorch_jacinto_ai/vision/models/video/__init__.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/models/video/resnet.py [moved from modules/pytorch_jacinto_ai/vision/models/video/resnet.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/ops/__init__.py [moved from modules/pytorch_jacinto_ai/vision/ops/__init__.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/ops/_utils.py [moved from modules/pytorch_jacinto_ai/vision/ops/_utils.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/ops/boxes.py [moved from modules/pytorch_jacinto_ai/vision/ops/boxes.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/ops/feature_pyramid_network.py [moved from modules/pytorch_jacinto_ai/vision/ops/feature_pyramid_network.py with 98% similarity]
modules/pytorch_jacinto_ai/xvision/ops/misc.py [moved from modules/pytorch_jacinto_ai/vision/ops/misc.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/ops/poolers.py [moved from modules/pytorch_jacinto_ai/vision/ops/poolers.py with 98% similarity]
modules/pytorch_jacinto_ai/xvision/ops/roi_align.py [moved from modules/pytorch_jacinto_ai/vision/ops/roi_align.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/ops/roi_pool.py [moved from modules/pytorch_jacinto_ai/vision/ops/roi_pool.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/transforms/__init__.py [moved from modules/pytorch_jacinto_ai/vision/transforms/__init__.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/transforms/functional.py [moved from modules/pytorch_jacinto_ai/vision/transforms/functional.py with 99% similarity]
modules/pytorch_jacinto_ai/xvision/transforms/image_transform_utils.py [moved from modules/pytorch_jacinto_ai/vision/transforms/image_transform_utils.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/transforms/image_transforms.py [moved from modules/pytorch_jacinto_ai/vision/transforms/image_transforms.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/transforms/image_transforms_xv12.py [moved from modules/pytorch_jacinto_ai/vision/transforms/image_transforms_xv12.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/transforms/transforms.py [moved from modules/pytorch_jacinto_ai/vision/transforms/transforms.py with 100% similarity]
modules/pytorch_jacinto_ai/xvision/utils.py [moved from modules/pytorch_jacinto_ai/vision/utils.py with 100% similarity]
requirements.txt
requirements_conda.txt
scripts/train_pixel2pixel_multitask_main.py
setup.py
setup.sh

index 83cbffe28faa3de87b15be66a252e82827ff9d83..f999b5d124688952642d2fcf52f21f0927c7354e 100644 (file)
@@ -25,7 +25,7 @@ To get best accuracy at the quantization stage, it is important that the model i
 
 ## Implementation Notes, Limitations & Recommendations
 - **Please read carefully** - closely following these recommendations can save hours or days of debug related to quantization accuracy issues.
-- **The same module should not be re-used multiple times within the module** in order that the feature map range estimation is correct. Unfortunately, in the torchvision ResNet models, the ReLU module in the BasicBlock and BottleneckBlock are re-used multiple times. We have corrected this by defining separate ReLU modules. This change is minor and **does not** affect the loading of existing pretrained weights. See the [our modified ResNet model definition here](../modules/pytorch_jacinto_ai/vision/models/resnet.py).<br>
+- **The same module should not be re-used multiple times within the module** in order that the feature map range estimation is correct. Unfortunately, in the torchvision ResNet models, the ReLU module in the BasicBlock and BottleneckBlock are re-used multiple times. We have corrected this by defining separate ReLU modules. This change is minor and **does not** affect the loading of existing pretrained weights. See the [our modified ResNet model definition here](../modules/pytorch_jacinto_ai/xvision/models/resnet.py).<br>
 - **Use Modules instead of functionals or tensor operations** (by Module we mean classes derived from torch.nn.Module). We make use of Modules heavily in our quantization tools - in order to do range collection, in order to merge Convolution/BatchNorm/ReLU in order to decide whether to quantize a certain tensor and so on. For example use torch.nn.ReLU instead of torch.nn.functional.relu(), torch.nn.AdaptiveAvgPool2d() instead of torch.nn.functional.adaptive_avg_pool2d(), torch.nn.Flatten() instead of torch.nn.functional.flatten() etc.<br>
 - Other notable modules provided are: [xnn.layers.AddBlock](../modules/pytorch_jacinto_ai/xnn/layers/common_blocks.py) to do elementwise addition and [xnn.layers.CatBlock](../modules/pytorch_jacinto_ai/xnn/layers/common_blocks.py) to do concatenation of tensors. Use these in the models instead of tensor operations. Note that if there are multiple element wise additions in a model, each of them should use a different instance of xnn.layers.AddBlock (since the same module should not be re-used multiple times - see above). The same restriction applies for xnn.layers.CatBlock or any other module as well.
 - **Interpolation/Upsample/Resize** has been tricky in PyTorch in the sense that the ONNX graph generated used to be unnecessarily complicated. Recent versions of PyTorch has fixed it - but the right options must be used to get the clean graph. We have provided a functional form as well as a module form of this operator with the capability to export a clean ONNX graph [xnn.layers.resize_with, xnn.layers.ResizeWith](../modules/pytorch_jacinto_ai/xnn/layers/resize_blocks.py)
index 8c7e22667ccd1a2ed1b305bcf47179fa16964137..7d8649644a12f3064a86d9f50bec4bd366b3b9ca 100644 (file)
@@ -25,12 +25,12 @@ import torchvision.transforms as transforms
 import torchvision.datasets as datasets
 
 # some of the default torchvision models need some minor tweaks to be friendly for
-# quantization aware training. so use models from pytorch_jacinto_ai.vision insead
+# quantization aware training. so use models from pytorch_jacinto_ai.xvision insead
 #import torchvision.models as models
 
 from pytorch_jacinto_ai import xnn
-from pytorch_jacinto_ai import vision as xvision
-from pytorch_jacinto_ai.vision import models as models
+from pytorch_jacinto_ai import xvision as xvision
+from pytorch_jacinto_ai.xvision import models as models
 
 model_names = sorted(name for name in models.__dict__
     if name.islower() and not name.startswith("__")
index fe55e4b4817243ad06c34c7610fe2d44d6e44446..6c8362253ef907abb9e1925dc591e5a6526c7300 100644 (file)
@@ -1,8 +1,8 @@
 import os
 import torch
 import datetime
-import torchvision as vision
-# from pytorch_jacinto_ai import vision
+import torchvision as xvision
+# from pytorch_jacinto_ai import xvision
 
 # dependencies
 # Anaconda Python 3.7 for Linux - download and install from: https://www.anaconda.com/distribution/
@@ -28,7 +28,7 @@ rand_input = torch.rand(1, 3, rand_crop[0], rand_crop[1])
 
 for model_name in model_names:
     # create the model - replace with your model
-    model = vision.models.__dict__[model_name](pretrained=True)
+    model = xvision.models.__dict__[model_name](pretrained=True)
     model.eval()
 
     # write pytorch model
index 15a48ec2d2400bcfdf2599cd421e82793a84b941..2249e25755e94be08e2f9623588428ca68857d03 100644 (file)
@@ -22,7 +22,7 @@ import onnx
 import onnxruntime
 
 from .. import xnn
-from .. import vision
+from .. import xvision
 
 
 # ################################################
@@ -304,17 +304,17 @@ def get_epoch_size(args, loader, args_epoch_size):
 
 def get_data_loaders(args):
     # Data loading code
-    normalize = vision.transforms.NormalizeMeanScale(mean=args.image_mean, scale=args.image_scale) \
+    normalize = xvision.transforms.NormalizeMeanScale(mean=args.image_mean, scale=args.image_scale) \
                         if (args.image_mean is not None and args.image_scale is not None) else None
 
     # pass tuple to Resize() to resize to exact size without respecting aspect ratio (typical caffe style)
-    val_transform = vision.transforms.Compose([vision.transforms.Resize(size=args.img_resize),
-                                               vision.transforms.CenterCrop(size=args.img_crop),
-                                               vision.transforms.ToFloat(),
-                                               vision.transforms.ToTensor(),
+    val_transform = xvision.transforms.Compose([xvision.transforms.Resize(size=args.img_resize),
+                                               xvision.transforms.CenterCrop(size=args.img_crop),
+                                               xvision.transforms.ToFloat(),
+                                               xvision.transforms.ToTensor(),
                                                normalize])
 
-    val_dataset = vision.datasets.classification.__dict__[args.dataset_name](args.dataset_config, args.data_path, transforms=val_transform)
+    val_dataset = xvision.datasets.classification.__dict__[args.dataset_name](args.dataset_config, args.data_path, transforms=val_transform)
 
     val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=args.shuffle, num_workers=args.workers,
                                              pin_memory=True, drop_last=False)
index 397422225bc771a7421a0f8319a71fec4d611ae8..e56f474f4d5bec64b537ffaf8d4803d2825ffc74 100644 (file)
@@ -17,7 +17,7 @@ import cv2
 import matplotlib.pyplot as plt
 
 from .. import xnn
-from .. import vision
+from .. import xvision
 from .engine_utils import *
 
 # ################################################
@@ -224,7 +224,7 @@ def main(args):
     if args.dataset is not None:
         dataset = args.dataset
     else:
-        dataset = vision.datasets.pixel2pixel.__dict__[args.dataset_name](args.dataset_config, args.data_path, split=split_arg, transforms=transforms)
+        dataset = xvision.datasets.pixel2pixel.__dict__[args.dataset_name](args.dataset_config, args.data_path, split=split_arg, transforms=transforms)
     #
 
     # if a pair is given, take the second one
@@ -257,7 +257,7 @@ def main(args):
             pretrained_data = args.pretrained
         else:
             if args.pretrained.startswith('http://') or args.pretrained.startswith('https://'):
-                pretrained_file = vision.datasets.utils.download_url(args.pretrained, './data/downloads')
+                pretrained_file = xvision.datasets.utils.download_url(args.pretrained, './data/downloads')
             else:
                 pretrained_file = args.pretrained
             #
@@ -272,7 +272,7 @@ def main(args):
         model, change_names_dict = args.model if isinstance(args.model, (list, tuple)) else (args.model, None)
         assert isinstance(model, torch.nn.Module), 'args.model, if provided must be a valid torch.nn.Module'
     else:
-        model = vision.models.pixel2pixel.__dict__[args.model_name](args.model_config)
+        model = xvision.models.pixel2pixel.__dict__[args.model_name](args.model_config)
         # check if we got the model as well as parameters to change the names in pretrained
         model, change_names_dict = model if isinstance(model, (list,tuple)) else (model,None)
     #
@@ -316,7 +316,7 @@ def main(args):
     for task_dx, task_losses in enumerate(args.losses):
         for loss_idx, loss_fn in enumerate(task_losses):
             kw_args = {}
-            loss_args = vision.losses.__dict__[loss_fn].args()
+            loss_args = xvision.losses.__dict__[loss_fn].args()
             for arg in loss_args:
                 #if arg == 'weight':
                 #    kw_args.update({arg:args.class_weights[task_dx]})
@@ -326,7 +326,7 @@ def main(args):
                     kw_args.update({arg:args.sparse})
                 #
             #
-            loss_fn = vision.losses.__dict__[loss_fn](**kw_args)
+            loss_fn = xvision.losses.__dict__[loss_fn](**kw_args)
             loss_fn = loss_fn.cuda()
             args.loss_modules[task_dx][loss_idx] = loss_fn
 
@@ -334,7 +334,7 @@ def main(args):
     for task_dx, task_metrics in enumerate(args.metrics):
         for midx, metric_fn in enumerate(task_metrics):
             kw_args = {}
-            loss_args = vision.losses.__dict__[metric_fn].args()
+            loss_args = xvision.losses.__dict__[metric_fn].args()
             for arg in loss_args:
                 if arg == 'weight':
                     kw_args.update({arg:args.class_weights[task_dx]})
@@ -344,7 +344,7 @@ def main(args):
                     kw_args.update({arg:args.sparse})
                 #
             #
-            metric_fn = vision.losses.__dict__[metric_fn](**kw_args)
+            metric_fn = xvision.losses.__dict__[metric_fn](**kw_args)
             metric_fn = metric_fn.cuda()
             args.metric_modules[task_dx][midx] = metric_fn
 
@@ -700,18 +700,18 @@ def get_transforms(args):
     # image normalization can be at the beginning of transforms or at the end
     args.image_mean = np.array(args.image_mean, dtype=np.float32)
     args.image_scale = np.array(args.image_scale, dtype=np.float32)
-    image_prenorm = vision.transforms.image_transforms.NormalizeMeanScale(mean=args.image_mean, scale=args.image_scale) if args.image_prenorm else None
-    image_postnorm = vision.transforms.image_transforms.NormalizeMeanScale(mean=args.image_mean, scale=args.image_scale) if (not image_prenorm) else None
+    image_prenorm = xvision.transforms.image_transforms.NormalizeMeanScale(mean=args.image_mean, scale=args.image_scale) if args.image_prenorm else None
+    image_postnorm = xvision.transforms.image_transforms.NormalizeMeanScale(mean=args.image_mean, scale=args.image_scale) if (not image_prenorm) else None
 
     #target size must be according to output_size. prediction will be resized to output_size before evaluation.
-    test_transform = vision.transforms.image_transforms.Compose([
+    test_transform = xvision.transforms.image_transforms.Compose([
         image_prenorm,
-        vision.transforms.image_transforms.AlignImages(),
-        vision.transforms.image_transforms.MaskTarget(args.target_mask, 0),
-        vision.transforms.image_transforms.CropRect(args.img_border_crop),
-        vision.transforms.image_transforms.Scale(args.img_resize, target_size=args.output_size, is_flow=args.is_flow),
+        xvision.transforms.image_transforms.AlignImages(),
+        xvision.transforms.image_transforms.MaskTarget(args.target_mask, 0),
+        xvision.transforms.image_transforms.CropRect(args.img_border_crop),
+        xvision.transforms.image_transforms.Scale(args.img_resize, target_size=args.output_size, is_flow=args.is_flow),
         image_postnorm,
-        vision.transforms.image_transforms.ConvertToTensor()
+        xvision.transforms.image_transforms.ConvertToTensor()
         ])
 
     return test_transform
index 0a02316259cbe491b36073d3d40049bcc3b6234f..4532440c8417cd7b0e838d474a4cfe25a622a7b0 100644 (file)
@@ -23,7 +23,7 @@ from onnx import helper
 
 
 from .. import xnn
-from .. import vision
+from .. import xvision
 
 #sys.path.insert(0, '../devkit-datasets/TI/')
 #from fisheye_calib import r_fish_to_theta_rect
@@ -219,7 +219,7 @@ def main(args):
     print("=> fetching img pairs in '{}'".format(args.data_path))
     split_arg = args.split_file if args.split_file else (args.split_files if args.split_files else args.split_value)
 
-    val_dataset = vision.datasets.pixel2pixel.__dict__[args.dataset_name](args.dataset_config, args.data_path, split=split_arg, transforms=transforms)
+    val_dataset = xvision.datasets.pixel2pixel.__dict__[args.dataset_name](args.dataset_config, args.data_path, split=split_arg, transforms=transforms)
 
     print('=> {} val samples found'.format(len(val_dataset)))
     val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size,
@@ -270,7 +270,7 @@ def main(args):
     for task_dx, task_losses in enumerate(args.losses):
         for loss_idx, loss_fn in enumerate(task_losses):
             kw_args = {}
-            loss_args = vision.losses.__dict__[loss_fn].args()
+            loss_args = xvision.losses.__dict__[loss_fn].args()
             for arg in loss_args:
                 #if arg == 'weight':
                 #    kw_args.update({arg:args.class_weights[task_dx]})
@@ -280,7 +280,7 @@ def main(args):
                     kw_args.update({arg:args.sparse})
                 #
             #
-            loss_fn = vision.losses.__dict__[loss_fn](**kw_args)
+            loss_fn = xvision.losses.__dict__[loss_fn](**kw_args)
             loss_fn = loss_fn.cuda()
             args.loss_modules[task_dx][loss_idx] = loss_fn
 
@@ -288,7 +288,7 @@ def main(args):
     for task_dx, task_metrics in enumerate(args.metrics):
         for midx, metric_fn in enumerate(task_metrics):
             kw_args = {}
-            loss_args = vision.losses.__dict__[metric_fn].args()
+            loss_args = xvision.losses.__dict__[metric_fn].args()
             for arg in loss_args:
                 if arg == 'weight':
                     kw_args.update({arg:args.class_weights[task_dx]})
@@ -298,7 +298,7 @@ def main(args):
                     kw_args.update({arg:args.sparse})
                 #
             #
-            metric_fn = vision.losses.__dict__[metric_fn](**kw_args)
+            metric_fn = xvision.losses.__dict__[metric_fn](**kw_args)
             metric_fn = metric_fn.cuda()
             args.metric_modules[task_dx][midx] = metric_fn
 
@@ -659,18 +659,18 @@ def get_transforms(args):
     # image normalization can be at the beginning of transforms or at the end
     args.image_mean = np.array(args.image_mean, dtype=np.float32)
     args.image_scale = np.array(args.image_scale, dtype=np.float32)
-    image_prenorm = vision.transforms.image_transforms.NormalizeMeanScale(mean=args.image_mean, scale=args.image_scale) if args.image_prenorm else None
-    image_postnorm = vision.transforms.image_transforms.NormalizeMeanScale(mean=args.image_mean, scale=args.image_scale) if (not image_prenorm) else None
+    image_prenorm = xvision.transforms.image_transforms.NormalizeMeanScale(mean=args.image_mean, scale=args.image_scale) if args.image_prenorm else None
+    image_postnorm = xvision.transforms.image_transforms.NormalizeMeanScale(mean=args.image_mean, scale=args.image_scale) if (not image_prenorm) else None
 
     #target size must be according to output_size. prediction will be resized to output_size before evaluation.
-    test_transform = vision.transforms.image_transforms.Compose([
+    test_transform = xvision.transforms.image_transforms.Compose([
         image_prenorm,
-        vision.transforms.image_transforms.AlignImages(),
-        vision.transforms.image_transforms.MaskTarget(args.target_mask, 0),
-        vision.transforms.image_transforms.CropRect(args.img_border_crop),
-        vision.transforms.image_transforms.Scale(args.img_resize, target_size=args.output_size, is_flow=args.is_flow),
+        xvision.transforms.image_transforms.AlignImages(),
+        xvision.transforms.image_transforms.MaskTarget(args.target_mask, 0),
+        xvision.transforms.image_transforms.CropRect(args.img_border_crop),
+        xvision.transforms.image_transforms.Scale(args.img_resize, target_size=args.output_size, is_flow=args.is_flow),
         image_postnorm,
-        vision.transforms.image_transforms.ConvertToTensor()
+        xvision.transforms.image_transforms.ConvertToTensor()
         ])
 
     return test_transform
index 7732097354afdc0a07e7c5b073dd804de832fcd4..2a6e8069f8fe04c4dd02ca0c14a22b829dd3b1ec 100644 (file)
@@ -19,7 +19,7 @@ import torch.utils.data
 import torch.utils.data.distributed
 
 from .. import xnn
-from .. import vision
+from .. import xvision
 
 
 # ################################################
@@ -125,7 +125,7 @@ def main(args):
     model_surgery_quantize = False
     if args.pretrained and args.pretrained != "None":
         if args.pretrained.startswith('http://') or args.pretrained.startswith('https://'):
-            pretrained_file = vision.datasets.utils.download_url(args.pretrained, './data/downloads')
+            pretrained_file = xvision.datasets.utils.download_url(args.pretrained, './data/downloads')
         else:
             pretrained_file = args.pretrained
         #
@@ -137,7 +137,7 @@ def main(args):
     ################################
     # create model
     print("=> creating model '{}'".format(args.model_name))
-    model = vision.models.classification.__dict__[args.model_name](args.model_config)
+    model = xvision.models.classification.__dict__[args.model_name](args.model_config)
 
     # check if we got the model as well as parameters to change the names in pretrained
     model, change_names_dict = model if isinstance(model, (list,tuple)) else (model,None)
@@ -384,17 +384,17 @@ def get_epoch_size(args, loader, args_epoch_size):
 
 def get_data_loaders(args):
     # Data loading code
-    normalize = vision.transforms.NormalizeMeanScale(mean=args.image_mean, scale=args.image_scale) \
+    normalize = xvision.transforms.NormalizeMeanScale(mean=args.image_mean, scale=args.image_scale) \
                         if (args.image_mean is not None and args.image_scale is not None) else None
 
     # pass tuple to Resize() to resize to exact size without respecting aspect ratio (typical caffe style)
-    val_transform = vision.transforms.Compose([vision.transforms.Resize(size=args.img_resize),
-                                               vision.transforms.CenterCrop(size=args.img_crop),
-                                               vision.transforms.ToFloat(),
-                                               vision.transforms.ToTensor(),
+    val_transform = xvision.transforms.Compose([xvision.transforms.Resize(size=args.img_resize),
+                                               xvision.transforms.CenterCrop(size=args.img_crop),
+                                               xvision.transforms.ToFloat(),
+                                               xvision.transforms.ToTensor(),
                                                normalize])
 
-    train_dataset, val_dataset = vision.datasets.classification.__dict__[args.dataset_name](args.dataset_config, args.data_path, transforms=(None,val_transform))
+    train_dataset, val_dataset = xvision.datasets.classification.__dict__[args.dataset_name](args.dataset_config, args.data_path, transforms=(None,val_transform))
 
     val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=args.shuffle, num_workers=args.workers,
                                              pin_memory=True, drop_last=False)
index 84978fefaafb5975f49b98e7f7ea5655e68f22d9..5dde0194d62b5a9e1e6773e7b46a4408901bc2cd 100644 (file)
@@ -19,7 +19,7 @@ import caffe2
 import caffe2.python.onnx.backend
 
 from .. import xnn
-from .. import vision
+from .. import xvision
 
 
 
@@ -161,7 +161,7 @@ def main(args):
 
     print("=> fetching img pairs in '{}'".format(args.data_path))
     split_arg = args.split_file if args.split_file else (args.split_files if args.split_files else args.split_value)
-    val_dataset = vision.datasets.pixel2pixel.__dict__[args.dataset_name](args.dataset_config, args.data_path, split=split_arg, transforms=transforms)
+    val_dataset = xvision.datasets.pixel2pixel.__dict__[args.dataset_name](args.dataset_config, args.data_path, split=split_arg, transforms=transforms)
 
     print('=> {} val samples found'.format(len(val_dataset)))
     
@@ -306,18 +306,18 @@ def get_transforms(args):
     # image normalization can be at the beginning of transforms or at the end
     args.image_mean = np.array(args.image_mean, dtype=np.float32)
     args.image_scale = np.array(args.image_scale, dtype=np.float32)
-    image_prenorm = vision.transforms.image_transforms.NormalizeMeanScale(mean=args.image_mean, scale=args.image_scale) if args.image_prenorm else None
-    image_postnorm = vision.transforms.image_transforms.NormalizeMeanScale(mean=args.image_mean, scale=args.image_scale) if (not image_prenorm) else None
+    image_prenorm = xvision.transforms.image_transforms.NormalizeMeanScale(mean=args.image_mean, scale=args.image_scale) if args.image_prenorm else None
+    image_postnorm = xvision.transforms.image_transforms.NormalizeMeanScale(mean=args.image_mean, scale=args.image_scale) if (not image_prenorm) else None
 
     #target size must be according to output_size. prediction will be resized to output_size before evaluation.
-    test_transform = vision.transforms.image_transforms.Compose([
+    test_transform = xvision.transforms.image_transforms.Compose([
         image_prenorm,
-        vision.transforms.image_transforms.AlignImages(),
-        vision.transforms.image_transforms.MaskTarget(args.target_mask, 0),
-        vision.transforms.image_transforms.CropRect(args.img_border_crop),
-        vision.transforms.image_transforms.Scale(args.img_resize, target_size=args.output_size, is_flow=args.is_flow),
+        xvision.transforms.image_transforms.AlignImages(),
+        xvision.transforms.image_transforms.MaskTarget(args.target_mask, 0),
+        xvision.transforms.image_transforms.CropRect(args.img_border_crop),
+        xvision.transforms.image_transforms.Scale(args.img_resize, target_size=args.output_size, is_flow=args.is_flow),
         image_postnorm,
-        vision.transforms.image_transforms.ConvertToTensor()
+        xvision.transforms.image_transforms.ConvertToTensor()
         ])
 
     return test_transform
index 1747931b9abdab99496b1cb1587f957471569c6d..06d418bb3460fb2c345a72f5b54c2ed105e6d9bd 100644 (file)
@@ -24,7 +24,7 @@ import onnx
 from onnx import shape_inference
 
 from .. import xnn
-from .. import vision
+from .. import xvision
 
 
 #################################################
@@ -211,7 +211,7 @@ def main(args):
     model_surgery_quantize = False
     if args.pretrained and args.pretrained != "None":
         if args.pretrained.startswith('http://') or args.pretrained.startswith('https://'):
-            pretrained_file = vision.datasets.utils.download_url(args.pretrained, './data/downloads')
+            pretrained_file = xvision.datasets.utils.download_url(args.pretrained, './data/downloads')
         else:
             pretrained_file = args.pretrained
         #
@@ -231,7 +231,7 @@ def main(args):
         model = xnn.onnx.import_onnx(args.model)
         is_onnx_model = True
     else:
-        model = vision.models.classification.__dict__[args.model_name](args.model_config)
+        model = xvision.models.classification.__dict__[args.model_name](args.model_config)
     #
 
     # check if we got the model as well as parameters to change the names in pretrained
@@ -513,7 +513,8 @@ def train(args, train_loader, model, criterion, optimizer, epoch):
         loss = criterion(output, target) / args.iter_size
 
         # measure accuracy and record loss
-        prec1, prec5 = accuracy(output, target, topk=(1, 5))
+        topk_cat = min(5, args.model_config.num_classes)
+        prec1, prec5 = accuracy(output, target, topk=(1, topk_cat))
         losses.update(loss.item(), input_size[0])
         top1.update(prec1[0], input_size[0])
         top5.update(prec5[0], input_size[0])
@@ -605,7 +606,8 @@ def validate(args, val_loader, model, criterion, epoch):
             loss = criterion(output, target)
 
             # measure accuracy and record loss
-            prec1, prec5 = accuracy(output, target, topk=(1, 5))
+            topk_cat = min(5, args.model_config.num_classes)
+            prec1, prec5 = accuracy(output, target, topk=(1, topk_cat))
             losses.update(loss.item(), input_size[0])
             top1.update(prec1[0], input_size[0])
             top5.update(prec5[0], input_size[0])
@@ -746,32 +748,32 @@ def get_dataset_sampler(dataset_object, epoch_size, balanced_sampler=False):
     
 
 def get_train_transform(args):
-    normalize = vision.transforms.NormalizeMeanScale(mean=args.image_mean, scale=args.image_scale) \
+    normalize = xvision.transforms.NormalizeMeanScale(mean=args.image_mean, scale=args.image_scale) \
         if (args.image_mean is not None and args.image_scale is not None) else None
-    multi_color_transform = vision.transforms.MultiColor(args.multi_color_modes) if (args.multi_color_modes is not None) else None
+    multi_color_transform = xvision.transforms.MultiColor(args.multi_color_modes) if (args.multi_color_modes is not None) else None
 
-    train_resize_crop_transform = vision.transforms.RandomResizedCrop(size=args.img_crop, scale=args.rand_scale) \
-        if args.rand_scale else vision.transforms.RandomCrop(size=args.img_crop)
-    train_transform = vision.transforms.Compose([train_resize_crop_transform,
-                                                 vision.transforms.RandomHorizontalFlip(),
+    train_resize_crop_transform = xvision.transforms.RandomResizedCrop(size=args.img_crop, scale=args.rand_scale) \
+        if args.rand_scale else xvision.transforms.RandomCrop(size=args.img_crop)
+    train_transform = xvision.transforms.Compose([train_resize_crop_transform,
+                                                 xvision.transforms.RandomHorizontalFlip(),
                                                  multi_color_transform,
-                                                 vision.transforms.ToFloat(),
-                                                 vision.transforms.ToTensor(),
+                                                 xvision.transforms.ToFloat(),
+                                                 xvision.transforms.ToTensor(),
                                                  normalize])
     return train_transform
 
 def get_validation_transform(args):
-    normalize = vision.transforms.NormalizeMeanScale(mean=args.image_mean, scale=args.image_scale) \
+    normalize = xvision.transforms.NormalizeMeanScale(mean=args.image_mean, scale=args.image_scale) \
         if (args.image_mean is not None and args.image_scale is not None) else None
-    multi_color_transform = vision.transforms.MultiColor(args.multi_color_modes) if (args.multi_color_modes is not None) else None
+    multi_color_transform = xvision.transforms.MultiColor(args.multi_color_modes) if (args.multi_color_modes is not None) else None
 
     # pass tuple to Resize() to resize to exact size without respecting aspect ratio (typical caffe style)
-    val_resize_crop_transform = vision.transforms.Resize(size=args.img_resize) if args.img_resize else vision.transforms.Bypass()
-    val_transform = vision.transforms.Compose([val_resize_crop_transform,
-                                               vision.transforms.CenterCrop(size=args.img_crop),
+    val_resize_crop_transform = xvision.transforms.Resize(size=args.img_resize) if args.img_resize else xvision.transforms.Bypass()
+    val_transform = xvision.transforms.Compose([val_resize_crop_transform,
+                                               xvision.transforms.CenterCrop(size=args.img_crop),
                                                multi_color_transform,
-                                               vision.transforms.ToFloat(),
-                                               vision.transforms.ToTensor(),
+                                               xvision.transforms.ToFloat(),
+                                               xvision.transforms.ToTensor(),
                                                normalize])
     return val_transform
 
@@ -786,7 +788,7 @@ def get_transforms(args):
 def get_data_loaders(args):
     train_transform, val_transform = get_transforms(args)
 
-    train_dataset, val_dataset = vision.datasets.classification.__dict__[args.dataset_name](args.dataset_config, args.data_path, transforms=(train_transform,val_transform))
+    train_dataset, val_dataset = xvision.datasets.classification.__dict__[args.dataset_name](args.dataset_config, args.data_path, transforms=(train_transform,val_transform))
 
     if args.distributed:
         train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
index 0602f3296b024e9d9a5ed54cad5f113e9da0d95e..d87a850117a12ae66fd829e2752a2d2fd95b1945 100644 (file)
@@ -23,7 +23,7 @@ from packaging import version
 import warnings
 
 from .. import xnn
-from .. import vision
+from .. import xvision
 from . infer_pixel2pixel import compute_accuracy
 
 
@@ -275,7 +275,7 @@ def main(args):
 
     print("=> fetching images in '{}'".format(args.data_path))
     split_arg = args.split_file if args.split_file else (args.split_files if args.split_files else args.split_value)
-    train_dataset, val_dataset = vision.datasets.pixel2pixel.__dict__[args.dataset_name](args.dataset_config, args.data_path, split=split_arg, transforms=transforms)
+    train_dataset, val_dataset = xvision.datasets.pixel2pixel.__dict__[args.dataset_name](args.dataset_config, args.data_path, split=split_arg, transforms=transforms)
 
     #################################################
     print('=> {} samples found, {} train samples and {} test samples '.format(len(train_dataset)+len(val_dataset),
@@ -324,7 +324,7 @@ def main(args):
                 p_data = p
             else:
                 if p.startswith('http://') or p.startswith('https://'):
-                    p_file = vision.datasets.utils.download_url(p, './data/downloads')
+                    p_file = xvision.datasets.utils.download_url(p, './data/downloads')
                 else:
                     p_file = p
                 #
@@ -346,7 +346,7 @@ def main(args):
         is_onnx_model = True
     else:
         xnn.utils.print_yellow("=> creating model '{}'".format(args.model_name))
-        model = vision.models.pixel2pixel.__dict__[args.model_name](args.model_config)
+        model = xvision.models.pixel2pixel.__dict__[args.model_name](args.model_config)
         # check if we got the model as well as parameters to change the names in pretrained
         model, change_names_dict = model if isinstance(model, (list,tuple)) else (model,None)
     #
@@ -422,7 +422,7 @@ def main(args):
     for task_dx, task_losses in enumerate(args.losses):
         for loss_idx, loss_fn in enumerate(task_losses):
             kw_args = {}
-            loss_args = vision.losses.__dict__[loss_fn].args()
+            loss_args = xvision.losses.__dict__[loss_fn].args()
             for arg in loss_args:
                 if arg == 'weight' and (args.class_weights is not None):
                     kw_args.update({arg:args.class_weights[task_dx]})
@@ -432,7 +432,7 @@ def main(args):
                     kw_args.update({arg:args.sparse})
                 #
             #
-            loss_fn_raw = vision.losses.__dict__[loss_fn](**kw_args)
+            loss_fn_raw = xvision.losses.__dict__[loss_fn](**kw_args)
             if args.parallel_criterion:
                 loss_fn = torch.nn.DataParallel(loss_fn_raw).cuda() if args.parallel_criterion else loss_fn_raw.cuda()
                 loss_fn.info = loss_fn_raw.info
@@ -447,7 +447,7 @@ def main(args):
     for task_dx, task_metrics in enumerate(args.metrics):
         for midx, metric_fn in enumerate(task_metrics):
             kw_args = {}
-            loss_args = vision.losses.__dict__[metric_fn].args()
+            loss_args = xvision.losses.__dict__[metric_fn].args()
             for arg in loss_args:
                 if arg == 'weight':
                     kw_args.update({arg:args.class_weights[task_dx]})
@@ -456,7 +456,7 @@ def main(args):
                 elif arg == 'sparse':
                     kw_args.update({arg:args.sparse})
 
-            metric_fn_raw = vision.losses.__dict__[metric_fn](**kw_args)
+            metric_fn_raw = xvision.losses.__dict__[metric_fn](**kw_args)
             if args.parallel_criterion:
                 metric_fn = torch.nn.DataParallel(metric_fn_raw).cuda()
                 metric_fn.info = metric_fn_raw.info
@@ -978,7 +978,7 @@ def write_output(args, prefix, val_epoch_size, iter_id, epoch, dataset, output_w
     for img_idx, img in enumerate(input_images):
         if args.model_config.input_nv12:
             #convert NV12 to BGR for tensorboard
-            input_image = vision.transforms.image_transforms_xv12.nv12_to_bgr_image(Y = input_images[img_idx][0][b_index], UV = input_images[img_idx][1][b_index],
+            input_image = xvision.transforms.image_transforms_xv12.nv12_to_bgr_image(Y = input_images[img_idx][0][b_index], UV = input_images[img_idx][1][b_index],
                                    image_scale=args.image_scale, image_mean=args.image_mean)
         else:
             input_image = input_images[img_idx][b_index].cpu().numpy().transpose((1, 2, 0))
@@ -1127,25 +1127,25 @@ def get_train_transform(args):
     # image normalization can be at the beginning of transforms or at the end
     image_mean = np.array(args.image_mean, dtype=np.float32)
     image_scale = np.array(args.image_scale, dtype=np.float32)
-    image_prenorm = vision.transforms.image_transforms.NormalizeMeanScale(mean=image_mean, scale=image_scale) if args.image_prenorm else None
-    image_postnorm = vision.transforms.image_transforms.NormalizeMeanScale(mean=image_mean, scale=image_scale) if (not image_prenorm) else None
+    image_prenorm = xvision.transforms.image_transforms.NormalizeMeanScale(mean=image_mean, scale=image_scale) if args.image_prenorm else None
+    image_postnorm = xvision.transforms.image_transforms.NormalizeMeanScale(mean=image_mean, scale=image_scale) if (not image_prenorm) else None
 
     # crop size used only for training
-    image_train_output_scaling = vision.transforms.image_transforms.Scale(args.rand_resize, target_size=args.rand_output_size, is_flow=args.is_flow) \
+    image_train_output_scaling = xvision.transforms.image_transforms.Scale(args.rand_resize, target_size=args.rand_output_size, is_flow=args.is_flow) \
         if (args.rand_output_size is not None and args.rand_output_size != args.rand_resize) else None
-    train_transform = vision.transforms.image_transforms.Compose([
+    train_transform = xvision.transforms.image_transforms.Compose([
         image_prenorm,
-        vision.transforms.image_transforms.AlignImages(),
-        vision.transforms.image_transforms.MaskTarget(args.target_mask, 0),
-        vision.transforms.image_transforms.CropRect(args.img_border_crop),
-        vision.transforms.image_transforms.RandomRotate(args.transform_rotation, is_flow=args.is_flow) if args.transform_rotation else None,
-        vision.transforms.image_transforms.RandomScaleCrop(args.rand_resize, scale_range=args.rand_scale, is_flow=args.is_flow),
-        vision.transforms.image_transforms.RandomHorizontalFlip(is_flow=args.is_flow),
-        vision.transforms.image_transforms.RandomCrop(args.rand_crop),
-        vision.transforms.image_transforms.RandomColor2Gray(is_flow=args.is_flow, random_threshold=0.5) if 'tiad' in args.dataset_name else None,
+        xvision.transforms.image_transforms.AlignImages(),
+        xvision.transforms.image_transforms.MaskTarget(args.target_mask, 0),
+        xvision.transforms.image_transforms.CropRect(args.img_border_crop),
+        xvision.transforms.image_transforms.RandomRotate(args.transform_rotation, is_flow=args.is_flow) if args.transform_rotation else None,
+        xvision.transforms.image_transforms.RandomScaleCrop(args.rand_resize, scale_range=args.rand_scale, is_flow=args.is_flow),
+        xvision.transforms.image_transforms.RandomHorizontalFlip(is_flow=args.is_flow),
+        xvision.transforms.image_transforms.RandomCrop(args.rand_crop),
+        xvision.transforms.image_transforms.RandomColor2Gray(is_flow=args.is_flow, random_threshold=0.5) if 'tiad' in args.dataset_name else None,
         image_train_output_scaling,
         image_postnorm,
-        vision.transforms.image_transforms.ConvertToTensor()
+        xvision.transforms.image_transforms.ConvertToTensor()
         ])
     return train_transform
 
@@ -1154,18 +1154,18 @@ def get_validation_transform(args):
     # image normalization can be at the beginning of transforms or at the end
     image_mean = np.array(args.image_mean, dtype=np.float32)
     image_scale = np.array(args.image_scale, dtype=np.float32)
-    image_prenorm = vision.transforms.image_transforms.NormalizeMeanScale(mean=image_mean, scale=image_scale) if args.image_prenorm else None
-    image_postnorm = vision.transforms.image_transforms.NormalizeMeanScale(mean=image_mean, scale=image_scale) if (not image_prenorm) else None
+    image_prenorm = xvision.transforms.image_transforms.NormalizeMeanScale(mean=image_mean, scale=image_scale) if args.image_prenorm else None
+    image_postnorm = xvision.transforms.image_transforms.NormalizeMeanScale(mean=image_mean, scale=image_scale) if (not image_prenorm) else None
 
     # prediction is resized to output_size before evaluation.
-    val_transform = vision.transforms.image_transforms.Compose([
+    val_transform = xvision.transforms.image_transforms.Compose([
         image_prenorm,
-        vision.transforms.image_transforms.AlignImages(),
-        vision.transforms.image_transforms.MaskTarget(args.target_mask, 0),
-        vision.transforms.image_transforms.CropRect(args.img_border_crop),
-        vision.transforms.image_transforms.Scale(args.img_resize, target_size=args.output_size, is_flow=args.is_flow),
+        xvision.transforms.image_transforms.AlignImages(),
+        xvision.transforms.image_transforms.MaskTarget(args.target_mask, 0),
+        xvision.transforms.image_transforms.CropRect(args.img_border_crop),
+        xvision.transforms.image_transforms.Scale(args.img_resize, target_size=args.output_size, is_flow=args.is_flow),
         image_postnorm,
-        vision.transforms.image_transforms.ConvertToTensor()
+        xvision.transforms.image_transforms.ConvertToTensor()
         ])
     return val_transform
 
index 4160918cc3e5bc32965ef07cbfdaff57f9da7630..28077be6ce65ff477d2662c6b558a2014fea2291 100644 (file)
@@ -114,7 +114,7 @@ def ConvDWTripletNormAct2d(in_planes, out_planes, kernel_size=None, stride=1,
 
 
 class ConvDWTripletRes2d(torch.nn.Module):
-    def __init__(self, *args, with_residual=True, force_residual=False, activation_after_residual=True, **kwargs):
+    def __init__(self, *args, with_residual=True, always_residual=False, activation_after_residual=True, **kwargs):
         super().__init__()
 
         in_planes = args[0]
@@ -131,8 +131,8 @@ class ConvDWTripletRes2d(torch.nn.Module):
         assert isinstance(activation, (list, tuple)) and len(activation) == 3, \
             'activation must be a list/tuple with length 3'
 
-        is_shape_same = (in_planes == out_planes) or (stride == 1)
-        self.use_residual = (with_residual and is_shape_same) or force_residual
+        is_shape_same = (in_planes == out_planes) and (stride == 1)
+        self.use_residual = (with_residual and is_shape_same) or always_residual
 
         if self.use_residual:
             if activation_after_residual:
@@ -171,3 +171,9 @@ class ConvDWTripletRes2d(torch.nn.Module):
             y = self.act(y) if self.act is not None else y
         #
         return y
+
+
+class ConvDWTripletResAlways2d(ConvDWTripletRes2d):
+    def __init__(self, *args, with_residual=True, activation_after_residual=True, **kwargs):
+        super().__init__(*args, with_residual=with_residual, always_residual=True,
+                         activation_after_residual=activation_after_residual, **kwargs)
index 679261e842ab2da9428a8a3ddabb6ffc24d6b5d8..3c3cfedf1395e1065e9fcd5e8bf4c3aa0db6e368 100644 (file)
@@ -355,9 +355,13 @@ class QuantTrainPAct2(layers.PAct2):
             with torch.no_grad():
                 y.data.copy_(yq.data)
             #
-        elif self.training and (self.quantized_estimation_type == QuantEstimationType.ALPHA_BLENDING_ESTIMATION):
-            # TODO: vary the alpha blending factor over the epochs
-            y = y * (1.0-self.alpha_blending_estimation_factor) + yq * self.alpha_blending_estimation_factor
+        elif (self.quantized_estimation_type == QuantEstimationType.ALPHA_BLENDING_ESTIMATION):
+            if self.training:
+                # TODO: vary the alpha blending factor over the epochs
+                y = y * (1.0-self.alpha_blending_estimation_factor) + yq * self.alpha_blending_estimation_factor
+            else:
+                y = yq
+            #
         elif (self.quantized_estimation_type == QuantEstimationType.QUANTIZED_THROUGH_ESTIMATION):
             # pass on the quantized output - the backward gradients also flow through quantization.
             # however, note the gradients of round and ceil operators are forced to be unity (1.0).
similarity index 99%
rename from modules/pytorch_jacinto_ai/vision/datasets/folder.py
rename to modules/pytorch_jacinto_ai/xvision/datasets/folder.py
index 88ef557658e2a3270271efaa0a61d90c2995995d..c920e8f25dbf8b3c10e13680bd66e74004da6eb4 100644 (file)
@@ -170,7 +170,7 @@ def accimage_loader(path):
 
 
 def default_loader(path):
-    from ...vision import get_image_backend
+    from ...xvision import get_image_backend
     if get_image_backend() == 'accimage':
         return accimage_loader(path)
     else:
similarity index 79%
rename from modules/pytorch_jacinto_ai/vision/extension.py
rename to modules/pytorch_jacinto_ai/xvision/extension.py
index 25f6069a6b11849dea22f23c8ba3a6b7c814c911..f4cafbab9b2df5ed386d16553ab703e1b31d9dac 100644 (file)
@@ -3,13 +3,13 @@ _C = None
 
 def _lazy_import():
     """
-    Make sure that CUDA versions match between the pytorch install and pytorch_jacinto_ai.vision install
+    Make sure that CUDA versions match between the pytorch install and pytorch_jacinto_ai.xvision install
     """
     global _C
     if _C is not None:
         return _C
     import torch
-    from pytorch_jacinto_ai.vision import _C as C
+    from pytorch_jacinto_ai.xvision import _C as C
     _C = C
     if hasattr(_C, "CUDA_VERSION") and torch.version.cuda is not None:
         tv_version = str(_C.CUDA_VERSION)
@@ -24,8 +24,8 @@ def _lazy_import():
         t_major = int(t_version[0])
         t_minor = int(t_version[1])
         if t_major != tv_major or t_minor != tv_minor:
-            raise RuntimeError("Detected that PyTorch and pytorch_jacinto_ai.vision were compiled with different CUDA versions. "
-                               "PyTorch has CUDA Version={}.{} and pytorch_jacinto_ai.vision has CUDA Version={}.{}. "
-                               "Please reinstall the pytorch_jacinto_ai.vision that matches your PyTorch install."
+            raise RuntimeError("Detected that PyTorch and pytorch_jacinto_ai.xvision were compiled with different CUDA versions. "
+                               "PyTorch has CUDA Version={}.{} and pytorch_jacinto_ai.xvision has CUDA Version={}.{}. "
+                               "Please reinstall the pytorch_jacinto_ai.xvision that matches your PyTorch install."
                                .format(t_major, t_minor, tv_major, tv_minor))
     return _C
similarity index 93%
rename from modules/pytorch_jacinto_ai/vision/models/__init__.py
rename to modules/pytorch_jacinto_ai/xvision/models/__init__.py
index 0977ce1d03e620f324c646d3df696de49b68e267..27eb480f9cf3b7eb7ece77789da71945d8feffbf 100644 (file)
@@ -30,5 +30,5 @@ except: pass
 
 @property
 def name():
-    return 'pytorch_jacinto_ai.vision.models'
+    return 'pytorch_jacinto_ai.xvision.models'
 #
\ No newline at end of file
similarity index 93%
rename from modules/pytorch_jacinto_ai/vision/models/_utils.py
rename to modules/pytorch_jacinto_ai/xvision/models/_utils.py
index b951a7edbcff1fec8398313da63c9c882f6ae7e0..c89346dfb324724624d982bc9587b2a92259f62f 100644 (file)
@@ -26,9 +26,9 @@ class IntermediateLayerGetter(nn.ModuleDict):
 
     Examples::
 
-        >>> m = pytorch_jacinto_ai.vision.models.resnet18(pretrained=True)
+        >>> m = pytorch_jacinto_ai.xvision.models.resnet18(pretrained=True)
         >>> # extract layer1 and layer3, giving as names `feat1` and feat2`
-        >>> new_m = pytorch_jacinto_ai.vision.models._utils.IntermediateLayerGetter(m,
+        >>> new_m = pytorch_jacinto_ai.xvision.models._utils.IntermediateLayerGetter(m,
         >>>     {'layer1': 'feat1', 'layer3': 'feat2'})
         >>> out = new_m(torch.rand(1, 3, 224, 224))
         >>> print([(k, v.shape) for k, v in out.items()])
similarity index 82%
rename from modules/pytorch_jacinto_ai/vision/models/classification/__init__.py
rename to modules/pytorch_jacinto_ai/xvision/models/classification/__init__.py
index cf4437a81434979eeed1239f9c63c55bade216df..af1f6ccff959ce5827b0b2830eb2e0fa13c1ebb9 100644 (file)
@@ -1,10 +1,16 @@
 from .. import mobilenetv2
 from .. import mobilenetv1
 from .. import resnet
+from ..shufflenetv2 import shufflenet_v2_x1_0
+from ..shufflenetv2 import shufflenet_v2_x1_5
+from ..shufflenetv2 import shufflenet_v2_x2_0
 
 try: from .. import mobilenetv2_internal
 except: pass
 
+try: from .. import mobilenetv2_densenas_internal
+except: pass
+
 try: from .. import mobilenetv2_ericsun_internal
 except: pass
 
@@ -24,7 +30,8 @@ __all__ = ['mobilenetv1_x1', 'mobilenetv2_tv_x1', 'mobilenetv2_x1', 'mobilenetv2
            'resnet50_x1', 'resnet50_xp5', 'resnet18_x1',
            # experimental
            'mobilenetv2_ericsun_x1', 'mobilenetv2_shicai_x1',
-           'mobilenetv2_tv_gws_x1', 'flownetslite_base_x1', 'mobilenetv1_multi_label_x1']
+           'mobilenetv2_tv_gws_x1', 'flownetslite_base_x1', 'mobilenetv1_multi_label_x1',
+           'shufflenet_v2_x1_0','shufflenet_v2_x1_5', 'mobilenetv2_tv_dense_nas_x1']
 
 
 #####################################################################
@@ -106,6 +113,13 @@ def mobilenetv2_tv_gws_x1(model_config=None, pretrained=None):
         model = xnn.utils.load_weights(model, pretrained)
     return model
 
+def mobilenetv2_tv_dense_nas_x1(model_config, pretrained=None):
+    model_config = mobilenetv2_densenas_internal.get_config_mnetv2_dense_nas().merge_from(model_config)
+    model = mobilenetv2_densenas_internal.MobileNetV2TVDenseNAS(model_config=model_config)
+    if pretrained:
+        model = xnn.utils.load_weights(model, pretrained)
+    return model
+
 
 #####################################################################
 def mobilenetv2_ericsun_x1(model_config=None, pretrained=None):
@@ -123,3 +137,12 @@ def mobilenetv2_shicai_x1(model_config=None, pretrained=None):
         model = xnn.utils.load_weights(model, pretrained)
     return model
 
+
+def flownetslite_base_x1(model_config, pretrained=None):
+    model_config = flownetbase_internal.get_config().merge_from(model_config)
+    model = flownetbase_internal.flownetslite_base(model_config, pretrained=pretrained)
+    if pretrained:
+        model = xnn.utils.load_weights(model, pretrained)
+    return model
+
+    #####################################################################
similarity index 96%
rename from modules/pytorch_jacinto_ai/vision/models/detection/backbone_utils.py
rename to modules/pytorch_jacinto_ai/xvision/models/detection/backbone_utils.py
index f97eddc793056bc8bb4bba3a27adf67a6c1827d9..8119236c40f337ada31ced0e4383da14d29230e9 100644 (file)
@@ -11,7 +11,7 @@ class BackboneWithFPN(nn.Sequential):
     """
     Adds a FPN on top of a model.
 
-    Internally, it uses pytorch_jacinto_ai.vision.models._utils.IntermediateLayerGetter to
+    Internally, it uses pytorch_jacinto_ai.xvision.models._utils.IntermediateLayerGetter to
     extract a submodel that returns the feature maps specified in return_layers.
     The same limitations of IntermediatLayerGetter apply here.
 
similarity index 96%
rename from modules/pytorch_jacinto_ai/vision/models/detection/faster_rcnn.py
rename to modules/pytorch_jacinto_ai/xvision/models/detection/faster_rcnn.py
index db1ca0bc2ac533a3937ac7e24240f949c24a122e..a90623682020f12e522de1b81bcbafc8b64353ed 100644 (file)
@@ -100,12 +100,12 @@ class FasterRCNN(GeneralizedRCNN):
     Example::
 
         >>> import torch
-        >>> import pytorch_jacinto_ai.vision
-        >>> from pytorch_jacinto_ai.vision.models.detection import FasterRCNN
-        >>> from pytorch_jacinto_ai.vision.models.detection.rpn import AnchorGenerator
+        >>> import pytorch_jacinto_ai.xvision
+        >>> from pytorch_jacinto_ai.xvision.models.detection import FasterRCNN
+        >>> from pytorch_jacinto_ai.xvision.models.detection.rpn import AnchorGenerator
         >>> # load a pre-trained model for classification and return
         >>> # only the features
-        >>> backbone = pytorch_jacinto_ai.vision.models.mobilenet_v2(pretrained=True).features
+        >>> backbone = pytorch_jacinto_ai.xvision.models.mobilenet_v2(pretrained=True).features
         >>> # FasterRCNN needs to know the number of
         >>> # output channels in a backbone. For mobilenet_v2, it's 1280
         >>> # so we need to add it here
@@ -126,7 +126,7 @@ class FasterRCNN(GeneralizedRCNN):
         >>> # be [0]. More generally, the backbone should return an
         >>> # OrderedDict[Tensor], and in featmap_names you can choose which
         >>> # feature maps to use.
-        >>> roi_pooler = pytorch_jacinto_ai.vision.ops.MultiScaleRoIAlign(featmap_names=[0],
+        >>> roi_pooler = pytorch_jacinto_ai.xvision.ops.MultiScaleRoIAlign(featmap_names=[0],
         >>>                                                 output_size=7,
         >>>                                                 sampling_ratio=2)
         >>>
@@ -317,7 +317,7 @@ def fasterrcnn_resnet50_fpn(pretrained=False, progress=True,
 
     Example::
 
-        >>> model = pytorch_jacinto_ai.vision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
+        >>> model = pytorch_jacinto_ai.xvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
         >>> model.eval()
         >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
         >>> predictions = model(x)
similarity index 96%
rename from modules/pytorch_jacinto_ai/vision/models/detection/keypoint_rcnn.py
rename to modules/pytorch_jacinto_ai/xvision/models/detection/keypoint_rcnn.py
index 5b930db4979d073dc328523f43d2f482c7e34daf..512b478e13470c28ace4e34902a2eedc76c2afcc 100644 (file)
@@ -101,13 +101,13 @@ class KeypointRCNN(FasterRCNN):
 
     Example::
 
-        >>> import pytorch_jacinto_ai.vision
-        >>> from pytorch_jacinto_ai.vision.models.detection import KeypointRCNN
-        >>> from pytorch_jacinto_ai.vision.models.detection.rpn import AnchorGenerator
+        >>> import pytorch_jacinto_ai.xvision
+        >>> from pytorch_jacinto_ai.xvision.models.detection import KeypointRCNN
+        >>> from pytorch_jacinto_ai.xvision.models.detection.rpn import AnchorGenerator
         >>>
         >>> # load a pre-trained model for classification and return
         >>> # only the features
-        >>> backbone = pytorch_jacinto_ai.vision.models.mobilenet_v2(pretrained=True).features
+        >>> backbone = pytorch_jacinto_ai.xvision.models.mobilenet_v2(pretrained=True).features
         >>> # KeypointRCNN needs to know the number of
         >>> # output channels in a backbone. For mobilenet_v2, it's 1280
         >>> # so we need to add it here
@@ -128,11 +128,11 @@ class KeypointRCNN(FasterRCNN):
         >>> # be [0]. More generally, the backbone should return an
         >>> # OrderedDict[Tensor], and in featmap_names you can choose which
         >>> # feature maps to use.
-        >>> roi_pooler = pytorch_jacinto_ai.vision.ops.MultiScaleRoIAlign(featmap_names=[0],
+        >>> roi_pooler = pytorch_jacinto_ai.xvision.ops.MultiScaleRoIAlign(featmap_names=[0],
         >>>                                                 output_size=7,
         >>>                                                 sampling_ratio=2)
         >>>
-        >>> keypoint_roi_pooler = pytorch_jacinto_ai.vision.ops.MultiScaleRoIAlign(featmap_names=[0],
+        >>> keypoint_roi_pooler = pytorch_jacinto_ai.xvision.ops.MultiScaleRoIAlign(featmap_names=[0],
         >>>                                                          output_size=14,
         >>>                                                          sampling_ratio=2)
         >>> # put the pieces together inside a FasterRCNN model
@@ -296,7 +296,7 @@ def keypointrcnn_resnet50_fpn(pretrained=False, progress=True,
 
     Example::
 
-        >>> model = pytorch_jacinto_ai.vision.models.detection.keypointrcnn_resnet50_fpn(pretrained=True)
+        >>> model = pytorch_jacinto_ai.xvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=True)
         >>> model.eval()
         >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
         >>> predictions = model(x)
similarity index 96%
rename from modules/pytorch_jacinto_ai/vision/models/detection/mask_rcnn.py
rename to modules/pytorch_jacinto_ai/xvision/models/detection/mask_rcnn.py
index 1da76f1d7cf9997017bf16e77a590896dd0896b3..be0b9e8c4b0a4792884d587fd8f708240e4ba451 100644 (file)
@@ -104,13 +104,13 @@ class MaskRCNN(FasterRCNN):
 
     Example::
 
-        >>> import pytorch_jacinto_ai.vision
-        >>> from pytorch_jacinto_ai.vision.models.detection import MaskRCNN
-        >>> from pytorch_jacinto_ai.vision.models.detection.rpn import AnchorGenerator
+        >>> import pytorch_jacinto_ai.xvision
+        >>> from pytorch_jacinto_ai.xvision.models.detection import MaskRCNN
+        >>> from pytorch_jacinto_ai.xvision.models.detection.rpn import AnchorGenerator
         >>>
         >>> # load a pre-trained model for classification and return
         >>> # only the features
-        >>> backbone = pytorch_jacinto_ai.vision.models.mobilenet_v2(pretrained=True).features
+        >>> backbone = pytorch_jacinto_ai.xvision.models.mobilenet_v2(pretrained=True).features
         >>> # MaskRCNN needs to know the number of
         >>> # output channels in a backbone. For mobilenet_v2, it's 1280
         >>> # so we need to add it here
@@ -131,11 +131,11 @@ class MaskRCNN(FasterRCNN):
         >>> # be [0]. More generally, the backbone should return an
         >>> # OrderedDict[Tensor], and in featmap_names you can choose which
         >>> # feature maps to use.
-        >>> roi_pooler = pytorch_jacinto_ai.vision.ops.MultiScaleRoIAlign(featmap_names=[0],
+        >>> roi_pooler = pytorch_jacinto_ai.xvision.ops.MultiScaleRoIAlign(featmap_names=[0],
         >>>                                                 output_size=7,
         >>>                                                 sampling_ratio=2)
         >>>
-        >>> mask_roi_pooler = pytorch_jacinto_ai.vision.ops.MultiScaleRoIAlign(featmap_names=[0],
+        >>> mask_roi_pooler = pytorch_jacinto_ai.xvision.ops.MultiScaleRoIAlign(featmap_names=[0],
         >>>                                                      output_size=14,
         >>>                                                      sampling_ratio=2)
         >>> # put the pieces together inside a FasterRCNN model
@@ -296,7 +296,7 @@ def maskrcnn_resnet50_fpn(pretrained=False, progress=True,
 
     Example::
 
-        >>> model = pytorch_jacinto_ai.vision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
+        >>> model = pytorch_jacinto_ai.xvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
         >>> model.eval()
         >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
         >>> predictions = model(x)
similarity index 89%
rename from modules/pytorch_jacinto_ai/vision/models/shufflenetv2.py
rename to modules/pytorch_jacinto_ai/xvision/models/shufflenetv2.py
index 3a45fa198fdabf2a6257e380f82a886ca8116cc4..74a408d0ed7765c2471f55b26cf57b91c5745c44 100644 (file)
@@ -82,7 +82,7 @@ class InvertedResidual(nn.Module):
 
 
 class ShuffleNetV2(nn.Module):
-    def __init__(self, stages_repeats, stages_out_channels, num_classes=1000):
+    def __init__(self, model_config, stages_repeats, stages_out_channels, num_classes=1000):
         super(ShuffleNetV2, self).__init__()
         self.num_classes = num_classes
 
@@ -95,19 +95,18 @@ class ShuffleNetV2(nn.Module):
         input_channels = 3
         output_channels = self._stage_out_channels[0]
         conv1 = nn.Sequential(
-            nn.Conv2d(input_channels, output_channels, 3, 2, 1, bias=False),
+            nn.Conv2d(input_channels, output_channels, 3, model_config.strides[0], 1, bias=False),
             nn.BatchNorm2d(output_channels),
             nn.ReLU(inplace=True),
         )
         input_channels = output_channels
 
-        maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        maxpool = nn.MaxPool2d(kernel_size=3, stride=model_config.strides[1], padding=1)
         layers = [('conv1',conv1), ('maxpool',maxpool)]
 
         stage_names = ['stage{}'.format(i) for i in [2, 3, 4]]
-        for name, repeats, output_channels in zip(
-                stage_names, stages_repeats, self._stage_out_channels[1:]):
-            seq = [InvertedResidual(input_channels, output_channels, 2)]
+        for idx, (name, repeats, output_channels) in enumerate(zip(stage_names, stages_repeats, self._stage_out_channels[1:])):
+            seq = [InvertedResidual(input_channels, output_channels, model_config.strides[2+idx])]
             for i in range(repeats - 1):
                 seq.append(InvertedResidual(output_channels, output_channels, 1))
             layers += [(name,nn.Sequential(*seq))]
@@ -152,8 +151,8 @@ class ShuffleNetV2(nn.Module):
         return self, change_names_dict
 
 
-def _shufflenetv2(arch, pretrained, progress, *args, **kwargs):
-    model = ShuffleNetV2(*args, **kwargs)
+def _shufflenetv2(arch, pretrained, progress, model_config, *args, **kwargs):
+    model = ShuffleNetV2(model_config, *args, **kwargs)
     if pretrained is True:
         change_names_dict = kwargs.get('change_names_dict', None)
         model_url = model_urls[arch]
@@ -171,7 +170,7 @@ def _shufflenetv2(arch, pretrained, progress, *args, **kwargs):
     return model
 
 
-def shufflenet_v2_x0_5(pretrained=False, progress=True, **kwargs):
+def shufflenet_v2_x0_5(model_config, pretrained=False, progress=True, **kwargs):
     """
     Constructs a ShuffleNetV2 with 0.5x output channels, as described in
     `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
@@ -185,7 +184,7 @@ def shufflenet_v2_x0_5(pretrained=False, progress=True, **kwargs):
                          [4, 8, 4], [24, 48, 96, 192, 1024], **kwargs)
 
 
-def shufflenet_v2_x1_0(pretrained=False, progress=True, **kwargs):
+def shufflenet_v2_x1_0(model_config, pretrained=False, progress=True, **kwargs):
     """
     Constructs a ShuffleNetV2 with 1.0x output channels, as described in
     `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
@@ -199,7 +198,7 @@ def shufflenet_v2_x1_0(pretrained=False, progress=True, **kwargs):
                          [4, 8, 4], [24, 116, 232, 464, 1024], **kwargs)
 
 
-def shufflenet_v2_x1_5(pretrained=False, progress=True, **kwargs):
+def shufflenet_v2_x1_5(model_config, pretrained=False, progress=True, **kwargs):
     """
     Constructs a ShuffleNetV2 with 1.5x output channels, as described in
     `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
@@ -213,7 +212,7 @@ def shufflenet_v2_x1_5(pretrained=False, progress=True, **kwargs):
                          [4, 8, 4], [24, 176, 352, 704, 1024], **kwargs)
 
 
-def shufflenet_v2_x2_0(pretrained=False, progress=True, **kwargs):
+def shufflenet_v2_x2_0(model_config, pretrained=False, progress=True, **kwargs):
     """
     Constructs a ShuffleNetV2 with 2.0x output channels, as described in
     `"ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design"
@@ -223,5 +222,5 @@ def shufflenet_v2_x2_0(pretrained=False, progress=True, **kwargs):
         pretrained (bool): If True, returns a model pre-trained on ImageNet
         progress (bool): If True, displays a progress bar of the download to stderr
     """
-    return _shufflenetv2('shufflenetv2_x2.0', pretrained, progress,
-                         [4, 8, 4], [24, 244, 488, 976, 2048], **kwargs)
+    return _shufflenetv2('shufflenetv2_x2.0', pretrained, progress, model_config,
+                         [4, 8, 4], [24, 244, 488, 976, 2048],  **kwargs)
similarity index 98%
rename from modules/pytorch_jacinto_ai/vision/ops/feature_pyramid_network.py
rename to modules/pytorch_jacinto_ai/xvision/ops/feature_pyramid_network.py
index 5ebca27fbe88d085da5562bf74b8c6b38bb0ce78..6c6c65bb4f2bc26a8adcb4038a57123c47486cbd 100644 (file)
@@ -27,7 +27,7 @@ class FeaturePyramidNetwork(nn.Module):
 
     Examples::
 
-        >>> m = pytorch_jacinto_ai.vision.ops.FeaturePyramidNetwork([10, 20, 30], 5)
+        >>> m = pytorch_jacinto_ai.xvision.ops.FeaturePyramidNetwork([10, 20, 30], 5)
         >>> # get some dummy data
         >>> x = OrderedDict()
         >>> x['feat0'] = torch.rand(1, 10, 64, 64)
similarity index 98%
rename from modules/pytorch_jacinto_ai/vision/ops/poolers.py
rename to modules/pytorch_jacinto_ai/xvision/ops/poolers.py
index d20f8a4327eb673dfaa4c9096e47fb80f8b4204a..13d1a7806320fff4a1d360d77a95d2c1d850162c 100644 (file)
@@ -54,7 +54,7 @@ class MultiScaleRoIAlign(nn.Module):
 
     Examples::
 
-        >>> m = pytorch_jacinto_ai.vision.ops.MultiScaleRoIAlign(['feat1', 'feat3'], 3, 2)
+        >>> m = pytorch_jacinto_ai.xvision.ops.MultiScaleRoIAlign(['feat1', 'feat3'], 3, 2)
         >>> i = OrderedDict()
         >>> i['feat1'] = torch.rand(1, 5, 64, 64)
         >>> i['feat2'] = torch.rand(1, 5, 32, 32)  # this feature won't be used in the pooling
similarity index 99%
rename from modules/pytorch_jacinto_ai/vision/transforms/functional.py
rename to modules/pytorch_jacinto_ai/xvision/transforms/functional.py
index 469f458ca29635a86381f29c73feb60833dc5264..b09ee1aae1a5648909bc0e6ca421e4430d77c9b4 100644 (file)
@@ -104,7 +104,7 @@ def to_tensor(pic):
 def to_pil_image(pic, mode=None):
     """Convert a tensor or an ndarray to PIL Image.
 
-    See :class:`~pytorch_jacinto_ai.vision.transforms.ToPILImage` for more details.
+    See :class:`~pytorch_jacinto_ai.xvision.transforms.ToPILImage` for more details.
 
     Args:
         pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
@@ -194,7 +194,7 @@ def normalize(tensor, mean, std, inplace=False):
     .. note::
         This transform acts out of place by default, i.e., it does not mutates the input tensor.
 
-    See :class:`~pytorch_jacinto_ai.vision.transforms.Normalize` for more details.
+    See :class:`~pytorch_jacinto_ai.xvision.transforms.Normalize` for more details.
 
     Args:
         tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
@@ -381,7 +381,7 @@ def center_crop(img, output_size):
 def resized_crop(img, i, j, h, w, size, interpolation=Image.BILINEAR):
     """Crop the given PIL Image and resize it to desired size.
 
-    Notably used in :class:`~pytorch_jacinto_ai.vision.transforms.RandomResizedCrop`.
+    Notably used in :class:`~pytorch_jacinto_ai.xvision.transforms.RandomResizedCrop`.
 
     Args:
         img (PIL Image): Image to be cropped.
index 8546ff7dbc6c58fe246ff7ad55a866b4befd5a31..3b5ac5e0809a2e2fbd542f180c31bd545716b9e8 100644 (file)
@@ -6,8 +6,10 @@ pillow
 colorama
 progiter
 protobuf
-torch
-torchvision
+opencv-python
+torch>=1.5
+torchvision>=0.6
 tensorboard
 onnx
 packaging
+
index c552abf956fb837d52c7823fa374115e0be7f339..27fc920a42ab107644aab21826b2148e937ebb15 100644 (file)
@@ -1,3 +1,2 @@
 h5py
-hdf5
-opencv
\ No newline at end of file
+hdf5
\ No newline at end of file
index 802b5ffc5e12136c256ac9c7bd8fe9aa951fca9c..3ad2b40ff0beebe34979e9834c81043cc4bd2bde 100755 (executable)
@@ -81,7 +81,7 @@ args.pretrained = './data/modelzoo/pytorch/semantic_segmentation/cityscapes/jaci
                             #'./data/checkpoints/cityscapes_depth_semantic_five_class_motion_image_dof_conf/0p9_release/2019-06-27-13-50-10_cityscapes_depth_semantic_five_class_motion_image_dof_conf_deeplabv3lite_mobilenetv2_ericsun_mi4_resize768x384_traincrop768x384/model_best.pth.tar'
                             #'./data/modelzoo/pretrained/pytorch/cityscapes_segmentation/v0.9-2018-12-07-19:38:26_cityscapes_segmentation_deeplabv3lite_mobilenetv2_relu_resize768x384_traincrop768x384_(68.9%)/model_best.pth.tar'
                             #'./data/checkpoints/store/saved/cityscapes_segmentation/v0.7-2018-10-25-13:07:38_cityscapes_segmentation_deeplabv3lite_mobilenetv2_relu_resize1024x512_traincrop512x512_(71.5%)/model_best.pth.tar'
-                            #'./data/modelzoo/pretrained/pytorch/imagenet_classification/pytorch_jacinto_ai.vision/resnet50-19c8e357.pth'
+                            #'./data/modelzoo/pretrained/pytorch/imagenet_classification/pytorch_jacinto_ai.xvision/resnet50-19c8e357.pth'
                             #'./data/modelzoo/pretrained/pytorch/imagenet_classification/ericsun99/MobileNet-V2-Pytorch/mobilenetv2_Top1_71.806_Top2_90.410.pth.tar'
 
 # args.resume = '/user/a0132471/Files/pytorch/pytorch-jacinto-models/checkpoints/cityscapes_depth_semantic_five_class_motion_image_dof_conf/2019-08-13-13-49-29_cityscapes_depth_semantic_five_class_motion_image_dof_conf_deeplabv3lite_mobilenetv2_ericsun_mi4_resize768x384_traincrop768x384/checkpoint.pth.tar'
index 7b2378178a96f7249699a3972d78f2509583a803..cd623a2fca140c39f9aac1b0cf8074c3b3f831df 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -31,7 +31,7 @@ if __name__ == '__main__':
         long_description_content_type = 'text/markdown',
         url = 'http://git.ti.com/jacinto-ai/pytorch-jacinto-ai',
         author = 'Jacinto AI DevKit Team',
-        author_email = 'jacinto-ai@list.ti.com',
+        author_email = 'jacinto-ai-devkit@list.ti.com',
         classifiers = [
             'Development Status :: 4 - Beta'
             'Programming Language :: Pyuthon :: 3.7'
index abfcf245e9e0b3bb355e5eafcca72385247e28a0..8e821e251c5796e273f478562b228208cc9020ad 100755 (executable)
--- a/setup.sh
+++ b/setup.sh
@@ -14,6 +14,7 @@ echo 'Installing python packages...'
 conda install --yes --file requirements_conda.txt
 pip install -r requirements.txt
 
+
 ######################################################################
 #NOTE: THIS STEP INSTALLS THE EDITABLE LOCAL MODULE pytorch-jacinto-ai
 #NOTE: THIS IS THE MOST IMPORTANT STEP WITHOUT WHICH NONE OF THE SCRIPTS WILL WORK