summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 0710704)
raw | patch | inline | side by side (parent: 0710704)
author | Sergei Nikolaev <snikolaev@nvidia.com> | |
Wed, 17 May 2017 20:28:14 +0000 (13:28 -0700) | ||
committer | Sergei Nikolaev <snikolaev@nvidia.com> | |
Wed, 17 May 2017 22:02:55 +0000 (15:02 -0700) |
index b18fc26cfd8d471ec0b753e16450fd69297f3729..0c1f993fb1e32cfa7c5ca198894f2d3f47517d79 100644 (file)
source: "examples/mnist/mnist_train_lmdb"
batch_size: 64
backend: LMDB
+
+cache: true
+shuffle: true
+
+#threads: 1
+#parser_threads: 1
+
}
}
layer {
diff --git a/src/caffe/parallel.cpp b/src/caffe/parallel.cpp
index 67fdd46e8c793691cb559f7a490ae623e4636b0e..2b9e8c6550186e95fe219889b9960c125d1a17ba 100644 (file)
--- a/src/caffe/parallel.cpp
+++ b/src/caffe/parallel.cpp
ncclSum,
nccl_comm_,
comm_stream_->get()));
+ syncCommStream();
#endif // USE_NCCL
#endif // CPU_ONLY
}
#ifdef USE_NCCL
NCCL_CHECK(ncclAllReduce(bucket, bucket, count, nccl::nccl_type(type),
ncclSum, nccl_comm_, comm_stream_->get()));
+ syncCommStream();
#endif // USE_NCCL
#endif // CPU_ONLY
}
index 88a8d9c559e3571b23f48221a42f78f25798142d..8e75f886ceb849f3b62c1a25335f6713dc0d54b7 100644 (file)
(N, reinterpret_cast<__half*>(g), reinterpret_cast<__half*>(w),
reinterpret_cast<__half*>(h), reinterpret_cast<__half*>(h2),
momentum, delta, local_rate, local_decay, reg_type == "L2", clear_grads);
-CUDA_POST_KERNEL_CHECK;
-CUDA_CHECK(cudaStreamSynchronize(stream));
+ CUDA_POST_KERNEL_CHECK;
+ CUDA_CHECK(cudaStreamSynchronize(stream));
}
template void adadelta_reg_update_and_clear_gpu<float16, float>(int, float16*, float*, float*,
index 7b723ff1c6f31c347a1e5b97f047b45ee9b26efa..9d6e18c1d572f8fffbcfe5d63e2ec07ef87dad37 100644 (file)
g, w, h,
momentum, local_rate, local_decay, reg_type == "L2", clear_grads);
CUDA_POST_KERNEL_CHECK;
- if (synced) {
+// if (synced) {
CUDA_CHECK(cudaStreamSynchronize(stream));
- }
+// }
}
template void sgd_reg_update_all_and_clear_gpu<float16, double>(int, float16*, double*, double*,
reinterpret_cast<__half*>(g), reinterpret_cast<__half*>(w), reinterpret_cast<__half*>(h),
momentum, local_rate, local_decay, reg_type == "L2", clear_grads);
CUDA_POST_KERNEL_CHECK;
- if (synced) {
+// if (synced) {
CUDA_CHECK(cudaStreamSynchronize(stream));
- }
+// }
}
template<>
index 951d75f02f72bef8d630775e5b5b669f87ae02d4..c20b0f0e10916e60220dfa5e036c0df0218cb37f 100644 (file)
cudaStream_t stream;
CUBLAS_CHECK(cublasGetStream(cublas_handle, &stream));
CUBLAS_CHECK(cublasSscal(cublas_handle, N, &alpha, X, 1));
- if (sync) {
+// if (sync) {
CUDA_CHECK(cudaStreamSynchronize(stream));
- }
+// }
}
template<>
cudaStream_t stream;
CUBLAS_CHECK(cublasGetStream(cublas_handle, &stream));
CUBLAS_CHECK(cublasDscal(cublas_handle, N, &alpha, X, 1));
- if (sync) {
+// if (sync) {
CUDA_CHECK(cudaStreamSynchronize(stream));
- }
+// }
}
template<>
@@ -272,9 +272,9 @@ void caffe_gpu_scal_float16(const int n, const float16 alpha, float16* x, cudaSt
scale_in_place_kernel <<<CAFFE_GET_BLOCKS_HALF(n2), CAFFE_CUDA_NUM_THREADS_HALF, 0, stream>>>
(n2, alpha2, reinterpret_cast<__half2*>(x));
CUDA_POST_KERNEL_CHECK;
- if (sync) {
+// if (sync) {
CUDA_CHECK(cudaStreamSynchronize(stream));
- }
+// }
}
template<>
scale_in_place_kernel_fp16<<<CAFFE_GET_BLOCKS_HALF(n2), CAFFE_CUDA_NUM_THREADS_HALF, 0, stream>>>
(n2, alpha, reinterpret_cast<__half2*>(x));
CUDA_POST_KERNEL_CHECK;
- if (sync) {
+// if (sync) {
CUDA_CHECK(cudaStreamSynchronize(stream));
- }
+// }
}
template<>