]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - jacinto-ai/caffe-jacinto.git/commitdiff
GS enabling fix
authorSergei Nikolaev <snikolaev@nvidia.com>
Mon, 12 Mar 2018 22:26:51 +0000 (15:26 -0700)
committerSergei Nikolaev <snikolaev@nvidia.com>
Mon, 12 Mar 2018 22:26:51 +0000 (15:26 -0700)
include/caffe/data_reader.hpp
include/caffe/net.hpp
models/modelBuilder/build_resnet.py
models/modelBuilder/layers.py
src/caffe/net.cpp
src/caffe/util/gpu_memory.cpp

index 9c2fccc48a47ee5195bdb5ea33dc7306d6ffb10e..cc53b5c807d6b667d5ac4f5c93ef042915c6ed7a 100644 (file)
@@ -122,9 +122,7 @@ class DataReader : public InternalThread {
   }
 
   void free_push(size_t queue_id, const shared_ptr<DatumType>& datum) {
-    if (!sample_only_) {
-      free_[queue_id]->push(datum);
-    }
+    free_[queue_id]->push(datum);
   }
 
   shared_ptr<DatumType> free_pop(size_t queue_id) {
index 1349c3c023527095b6a5663401f9c9cdd1f16374..d609ed0bcfe3d8cffa5e4204593f14831e6f0fc4 100644 (file)
@@ -281,7 +281,7 @@ class Net {
   }
 
   bool global_grad_scale_enabled() const {
-    return global_grad_scale_param_ > 1.F;
+    return has_global_grad_scale_param_ && global_grad_scale_param_ > 0.F;
   }
 
   void update_grad_scale();
@@ -432,7 +432,7 @@ class Net {
   size_t infer_count_;
   std::atomic_llong wgrad_sq_;
   float global_grad_scale_coeff_, global_grad_scale_param_;
-  bool global_grad_scale_adaptive_;
+  bool has_global_grad_scale_param_, global_grad_scale_adaptive_;
   /// Inner net runs on singe GPU (see recurrent layers)
   const bool inner_net_;
 
index 49995df52f5aae42b86b43a18a8428a8c0578fd6..8effdc5e01f3fdc9ae52107966cba4d920141b59 100755 (executable)
@@ -181,5 +181,14 @@ def main():
     fp = open("resnet_152.prototxt", 'w')
     fp.write(model)
 
+    netConfig = numpy.matrix([
+        [ 64, 3, 1, 0],
+        [128, 8, 1, 1],
+        [256, 52, 1, 1],
+        [512, 3, 1, 1]])
+    model = buildResidualModel(netConfig, name="Resnet200", net_type="large")
+    fp = open("resnet_200.prototxt", 'w')
+    fp.write(model)
+
 if __name__ == '__main__':
     main()
index c07a6bfa7e2bc34e4168f623211d822e211aa223..6fabb49632ffd5f765d647227ec8462ac6f15f52 100755 (executable)
@@ -584,7 +584,7 @@ layer {{
   bottom: "{bottom_2}"
   top: "{top}"
   accuracy_param {{ top_k: {k} }}
-#  include {{ phase: TEST }}
+  include {{ phase: TEST }}
 }}'''.format(name=name, top=name, bottom_1=bottom_1, bottom_2=bottom_2, k=k)
     model += layer
     return model, name
index 3dad99374b93efba3828a5fd8dc248581494e04e..e1e7042f7705fa952e740fe009117d54f495f5b0 100644 (file)
@@ -106,6 +106,7 @@ void Net::Init(const NetParameter& in_param) {
 
   wgrad_sq_.store(0LL);
   global_grad_scale_coeff_ = 1.F;
+  has_global_grad_scale_param_ = in_param.has_global_grad_scale(); 
   global_grad_scale_param_ = in_param.global_grad_scale();
   global_grad_scale_adaptive_ = in_param.global_grad_scale_adaptive();
 
index 516f1cd141ff82459256de9a94b700adead30985..c8c7857f40b719598afa3d545615a386ea1812a7 100644 (file)
@@ -148,6 +148,14 @@ bool GPUMemory::Manager::try_allocate(void** ptr, size_t size, int device,
     // Clean Cache & Retry logic is inside now
     status = cub_allocator_->DeviceAllocate(device, ptr, size, pstream->get(), size_allocated);
     if (status == cudaSuccess && device > INVALID_DEVICE) {
+//      if (device == 0) {
+//        DevInfo dev_info;
+//        CUDA_CHECK(cudaMemGetInfo(&dev_info.free_, &dev_info.total_));
+//        size_t allocated = dev_info.total_ - dev_info.free_;
+//        size_t pcent = 100UL* allocated / dev_info.total_;
+//        std::string bar(pcent, '*');
+//        std::cout << bar << " " << pcent << "%" << std::endl;
+//      }
       if (size_allocated > 0) {
         if (dev_info_[device].free_ < update_thresholds_[device]) {
           update_dev_info(device);