summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 183b046)
raw | patch | inline | side by side (parent: 183b046)
author | Sergei Nikolaev <snikolaev@nvidia.com> | |
Sat, 16 Dec 2017 09:11:57 +0000 (01:11 -0800) | ||
committer | Sergei Nikolaev <snikolaev@nvidia.com> | |
Sat, 16 Dec 2017 09:11:57 +0000 (01:11 -0800) |
18 files changed:
index 08e7c019e130f1ac5f9f2f74909cc16e577995ef..6659f8270930e4d4f5d1b075a176be9a8b759705 100644 (file)
namespace caffe {
class Batch {
-public:
+ public:
shared_ptr<Blob> data_;
shared_ptr<Blob> label_;
}
DISABLE_COPY_MOVE_AND_ASSIGN(Batch);
-private:
+
+ private:
size_t id_;
Packing data_packing_;
};
void resize(bool skip_to_next);
-private:
+ private:
BBQ processed_full_;
BBQ processed_free_;
TBlob<Btype> tmp_;
index 20ae5fd230894d1281082b48b89e82b9eb506494..2a9d79bb85243c8e4f878f30f85b963ba03cc0b5 100644 (file)
--- a/include/caffe/solver.hpp
+++ b/include/caffe/solver.hpp
virtual void PrintRate(float rate = 0) {}
virtual float ApplyUpdate(int param_id, void* handle, bool clear_grads) = 0;
-protected:
+ protected:
string SnapshotFilename(const string extension);
string SnapshotToBinaryProto();
string SnapshotToHDF5();
index ddb2f6ed2916f8e2b921189a7a994fe52c0538c2..4c4709c939cfa1735a8e377f4e1b6f98d1c9aff9 100644 (file)
template<>
class dataType<float> {
-public:
+ public:
static const cudnnDataType_t type = CUDNN_DATA_FLOAT;
static const cudnnDataType_t conv_type = CUDNN_DATA_FLOAT;
static float oneval, zeroval;
template<>
class dataType<double> {
-public:
+ public:
static const cudnnDataType_t type = CUDNN_DATA_DOUBLE;
static const cudnnDataType_t conv_type = CUDNN_DATA_DOUBLE;
static double oneval, zeroval;
template<>
class dataType<float16> {
-public:
+ public:
static const cudnnDataType_t type = CUDNN_DATA_HALF;
static const cudnnDataType_t conv_type = CUDNN_DATA_HALF;
static float oneval, zeroval;
@@ -168,7 +168,7 @@ inline void setTensor4dDesc(cudnnTensorDescriptor_t *desc, cudnnDataType_t type,
stride_h = w * stride_w;
stride_c = h * stride_h;
stride_n = c * stride_c;
- } else if (packing == NHWC){
+ } else if (packing == NHWC) {
stride_c = 1;
stride_w = c * stride_c;
stride_h = w * stride_w;
index bbcc4f4b9b6677857481b1fd5cfaf5203d668ad2..d85081a5f51ac4771a7f574d448d23ccea3d78c7 100644 (file)
char content[N]; \
}
-#define SHMEMN(N,FN) \
+#define SHMEMN(N, FN) \
template<> struct __dyn_shmem_##FN##__<n_bytes<N>> { \
__device__ n_bytes<N>* getPtr() { \
extern __shared__ n_bytes<N> FN##ptr##N[]; \
}
#define CAFFE_GPU_SHMEM(FN) \
-SHMEMN(2,FN); \
-SHMEMN(4,FN); \
-SHMEMN(6,FN); \
-SHMEMN(8,FN); \
-SHMEMN(10,FN); \
-SHMEMN(12,FN); \
-SHMEMN(14,FN); \
-SHMEMN(16,FN)
+SHMEMN(2, FN); \
+SHMEMN(4, FN); \
+SHMEMN(6, FN); \
+SHMEMN(8, FN); \
+SHMEMN(10, FN); \
+SHMEMN(12, FN); \
+SHMEMN(14, FN); \
+SHMEMN(16, FN)
#define REDUCTION_GROUP_AMAX 0
#define REDUCTION_GROUP_ASUM 1
index a1dd96e1feb9771709c36308df24d73b749012f5..8c78210e5f885172be75eab6e0334fc9f1ce535d 100644 (file)
@@ -236,9 +236,9 @@ void FloatCVMatToBuf(const cv::Mat& cv_img, size_t buf_len, Dtype* buf, bool rep
}
} else {
if (cv_img.depth() == CV_32F && tp<Dtype>() == FLOAT) {
- std::memcpy(buf, cv_img.ptr<float>(0), img_size * sizeof(float));
+ std::memcpy(buf, cv_img.ptr<float>(0), img_size * sizeof(float)); // NOLINT(caffe/alt_fn)
} else if (cv_img.depth() == CV_64F && tp<Dtype>() == DOUBLE) {
- std::memcpy(buf, cv_img.ptr<double>(0), img_size * sizeof(double));
+ std::memcpy(buf, cv_img.ptr<double>(0), img_size * sizeof(double)); // NOLINT(caffe/alt_fn)
} else {
if (cv_img.depth() == CV_8U) {
for (size_t i = 0UL; i < img_size; ++i) {
index 6fbe5eac555af0dfc94be8ed18695b8d61e8dddb..41a6f01fc521b065b0b85947683a6673d5435d5b 100644 (file)
queues_num_(queues_num),
next_batch_queue_(0UL),
transform_param_(transform_param),
- gpu_transform_(gpu_transform){
+ gpu_transform_(gpu_transform) {
shared_ptr<Batch> processed = make_shared<Batch>(tp<Ftype>(), tp<Ftype>());
processed_free_.push(processed);
resize(false);
batch->set_id((size_t) -1L);
prefetches_free_[next_batch_queue_]->push(batch);
next_batch_queue();
-
}
}catch (boost::thread_interrupted&) {
}
-
}
template<typename Ftype, typename Btype>
diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp
index 5aaba201167776f884ec8115665587f07c49e0c5..a2869cc79f642c6d9b53710a91d7f1039e6299a3 100644 (file)
--- a/src/caffe/blob.cpp
+++ b/src/caffe/blob.cpp
Tensor::copy_helper(is_gpu, count_,
is_gpu ? src->gpu_data() : src->cpu_data(), src_type,
is_gpu ? dst->mutable_gpu_data() : dst->mutable_cpu_data(), dst_type);
- } while(false);
+ } while (false);
#ifndef CPU_ONLY
} else {
CHECK(srct != dstt);
index 47361a740f084511dffb4f2181c930a4506c7907..d0454efa15b86422af967d16cd4106572f57a34c 100644 (file)
vertical_stretch_upper_(param_.vertical_stretch_upper()),
horizontal_stretch_lower_(param_.horizontal_stretch_lower()),
horizontal_stretch_upper_(param_.horizontal_stretch_upper()),
- allow_upscale_(param_.allow_upscale()){
+ allow_upscale_(param_.allow_upscale()) {
// check if we want to use mean_file
if (param_.has_mean_file()) {
CHECK_EQ(param_.mean_value_size(), 0)
index dcb8f58db62e3a6e696aae8cf45a43b7c66668d1..1b66d60c27fe9b2c7f95dc6b62d93345fb182927 100644 (file)
if (has_mean_file) {
CHECK_EQ(datum_channels, data_mean_.channels());
mean = data_mean_.gpu_data();
- }
- else if (has_mean_values) {
+ } else if (has_mean_values) {
if (mean_values_gpu_.empty()) {
CHECK(mean_values_.size() == 1 || mean_values_.size() == datum_channels)
<< "Specify either 1 mean_value or as many as channels: "
index 98a92c5900a988e0d9c8bcaf0430ee7158522f9f..872678c82ae9e0f273eeef63d77a57862287f1b5 100644 (file)
@@ -110,20 +110,13 @@ void BasePrefetchingDataLayer<Ftype, Btype>::InternalThreadEntryN(size_t thread_
try {
while (!must_stop(thread_id)) {
const size_t qid = this->queue_id(thread_id);
-#ifndef CPU_ONLY
shared_ptr<Batch> batch = batch_transformer_->prefetched_pop_free(qid);
-
CHECK_EQ((size_t) -1L, batch->id());
load_batch(batch.get(), thread_id, qid);
if (must_stop(thread_id)) {
break;
}
batch_transformer_->prefetched_push_full(qid, batch);
-#else
- shared_ptr<Batch> batch = prefetches_free_[qid]->pop();
- load_batch(batch.get(), thread_id, qid);
- prefetches_full_[qid]->push(batch);
-#endif
if (iter0) {
if (this->net_iteration0_flag_ != nullptr) {
this->net_iteration0_flag_->wait();
index fa4c24e5602365f112b143bdd22e4cf77840c609..bae3a33c3a780319c8ae2694db09ec1d8796d820 100644 (file)
DataLayer<Ftype, Btype>::DataLayerSetUp(const vector<Blob*>& bottom, const vector<Blob*>& top) {
const LayerParameter& param = this->layer_param();
const int batch_size = param.data_param().batch_size();
- const bool use_gpu_transform = this->is_gpu_transform();
const bool cache = cache_ && this->phase_ == TRAIN;
const bool shuffle = cache && shuffle_ && this->phase_ == TRAIN;
@@ -170,13 +169,15 @@ DataLayer<Ftype, Btype>::DataLayerSetUp(const vector<Blob*>& bottom, const vecto
top[0]->safe_reshape_mode(true);
top[0]->Reshape(top_shape);
- if (use_gpu_transform) {
+#ifndef CPU_ONLY
+ if (this->is_gpu_transform()) {
LOG(INFO) << this->print_current_device() << " Transform on GPU enabled";
tmp_gpu_buffer_.resize(this->threads_num());
for (int i = 0; i < this->tmp_gpu_buffer_.size(); ++i) {
this->tmp_gpu_buffer_[i] = make_shared<GPUMemory::Workspace>();
}
}
+#endif
// label
vector<int> label_shape(1, batch_size);
if (this->output_labels_) {
@@ -264,11 +265,18 @@ void DataLayer<Ftype, Btype>::load_batch(Batch* batch, int thread_id, size_t que
Ftype* top_label = this->output_labels_ ?
batch->label_->template mutable_cpu_data_c<Ftype>(false) : nullptr;
+#ifndef CPU_ONLY
void* dst_gptr = nullptr;
+#endif
+ Btype* dst_cptr = nullptr;
if (use_gpu_transform) {
+#ifndef CPU_ONLY
size_t holder_size = top_shape[0] * top_shape[1] * init_datum_height * init_datum_width;
tmp_gpu_buffer_[thread_id]->safe_reserve(holder_size);
dst_gptr = tmp_gpu_buffer_[thread_id]->data();
+#endif
+ } else {
+ dst_cptr = batch->data_->template mutable_cpu_data_c<Btype>(false);
}
size_t current_batch_id = 0UL;
@@ -295,7 +303,8 @@ void DataLayer<Ftype, Btype>::load_batch(Batch* batch, int thread_id, size_t que
src_ptr = datum->data().size() > 0 ?
&datum->data().front() :
reinterpret_cast<const char*>(&datum->float_data().Get(0));
- std::memcpy(src_buf.data() + src_buf_pos * datum_size, src_ptr, datum_size);
+ std::memcpy(src_buf.data() + // NOLINT(caffe/alt_fn)
+ src_buf_pos * datum_size, src_ptr, datum_size); // NOLINT(caffe/alt_fn)
}
++src_buf_pos;
if (src_buf_pos == src_buf_items) {
@@ -315,7 +324,6 @@ void DataLayer<Ftype, Btype>::load_batch(Batch* batch, int thread_id, size_t que
// Get data offset for this datum to hand off to transform thread
const size_t offset = batch->data_->offset(item_id);
CHECK_EQ(0, offset % buf_len);
- Btype* dst_cptr =batch->data_->template mutable_cpu_data_c<Btype>(false);
vector<int> shape = this->dt(thread_id)->Transform(datum.get(), dst_cptr + offset,
buf_len, packing, false);
CHECK_EQ(top_shape[1], shape[1]) << "Number of channels can't vary in the same batch";
@@ -331,54 +339,8 @@ void DataLayer<Ftype, Btype>::load_batch(Batch* batch, int thread_id, size_t que
CUDA_CHECK(cudaMemcpyAsync(
reinterpret_cast<char*>(dst_gptr) + last_item_id * datum_size,
src_buf.data(), src_buf_pos * datum_size, cudaMemcpyHostToDevice, stream));
-// CUDA_CHECK(cudaStreamSynchronize(stream));
+ CUDA_CHECK(cudaStreamSynchronize(stream));
}
- }
-// else {
-// const size_t gpu_holder_size = sizeof(Btype) *
-// top_shape[0] * top_shape[1] * top_shape[2] * top_shape[3];
-//
-// // LOG(INFO) << this->print_current_device() << " ********** " << top_->at(0)->to_string();
-//
-//// tmp_gpu_holder_[thread_id]->safe_reserve(gpu_holder_size);
-//// //tmp_cpu_holder_[thread_id].size());
-//// dst_gptr = tmp_gpu_holder_[thread_id]->data();
-//
-// CUDA_CHECK(cudaMemcpyAsync(dst_gptr, dst_cptr, gpu_holder_size,
-// cudaMemcpyHostToDevice, stream));
-// //batch->data_->template mutable_gpu_data_c<Btype>(false);
-// }
-
-//// if (needs_repack) {
-//// void* repack_dst_gptr =
-// cudnnHandle_t handle = Caffe::cudnn_handle();
-// cudnnTensorDescriptor_t src_desc, dst_desc;
-// CUDNN_CHECK(cudnnCreateTensorDescriptor(&src_desc));
-// CUDNN_CHECK(cudnnCreateTensorDescriptor(&dst_desc));
-// cudnn::setTensor4dDesc(&src_desc,
-// use_gpu_transform ? CUDNN_DATA_INT8 : cudnn_dt<Btype>(),
-// packing, batch->data_->shape());
-// cudnn::setTensor4dDesc(&dst_desc, cudnn_dt<Ftype>(), this->transform_param_.forward_packing(),
-// batch->data_->shape());
-//
-// CUDNN_CHECK(cudnnTransformTensor(handle,
-// cudnn::one(tp<float>()),
-// src_desc, dst_gptr,
-// cudnn::zero(tp<Ftype>()),
-// dst_desc, batch->data_->template mutable_gpu_data_c<Ftype>(false)));
-// //tmp_gpu_buffer_[thread_id]->data()));
-// CUDA_CHECK(cudaStreamSynchronize(stream));
-// CUDNN_CHECK(cudnnDestroyTensorDescriptor(src_desc));
-// CUDNN_CHECK(cudnnDestroyTensorDescriptor(dst_desc));
-//
-//// dst_gptr = tmp_gpu_holder_[thread_id]->data();
-//// batch->data_->Reshape(top_shape);
-//// datum_sizeof_element = sizeof(Ftype);
-//
-// packing = this->transform_param_.forward_packing();
-//// }
-
- if (use_gpu_transform) {
this->dt(thread_id)->TransformGPU(top_shape[0], top_shape[1],
init_datum_height, // non-crop
init_datum_width, // non-crop
index 05b21330d69012ad64e78b2b997d18c8eb0e3ddb..5dab005f62858282ffc1d552a3ac50f55fb9dde8 100644 (file)
outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_,
reinterpret_cast<half*>(counts));
} else {
+ // NOLINT_NEXT_LINE(whitespace/operators)
SoftmaxLossForwardGPU<<<CAFFE_GET_BLOCKS(nthreads),
CAFFE_CUDA_NUM_THREADS, 0, stream>>> (nthreads, prob_data, label, loss_data,
outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts);
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 5abd69ffc9f454441d1345a835ff33f537a76455..90842d645194744343ef98a08f46aec4ca5d0792 100644 (file)
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
const int top = *au_ids.rbegin();
id_from_ret = -1;
const std::map<size_t, std::set<int>>& ltop = ltop_[type_id];
- for (auto lit = ltop.rbegin(); lit != ltop.rend(); ++lit) { //size_t layer_id = top_layer_id; layer_id > 0UL; --layer_id) {
+ for (auto lit = ltop.rbegin(); lit != ltop.rend(); ++lit) {
if (lit->second.empty() || *lit->second.begin() > top) {
continue;
}
cb->allreduce(type_id, param_id);
cb->reduce_barrier(type_id);
}
- this->learnable_params()[param_id]->scale_diff(1.F / (Caffe::solver_count() * global_grad_scale()),
+ this->learnable_params()[param_id]->
+ scale_diff(1.F / (Caffe::solver_count() * global_grad_scale()),
Caffe::cublas_handle());
// Also need to barrier to make sure lock isn't undone
// until all have completed, but the current nature of
}
}
+#endif
+
const vector<Type>& Net::learnable_types(bool reset) {
if (reset || learnable_types_.empty()) {
learnable_types_.clear();
return learnable_types_;
}
-#endif
-
} // namespace caffe
diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
index 8245381116e0a44c3623e0717514f9b5a1d10fd2..a74b80ed96ba100372010020eeaf8feeda2fd48d 100644 (file)
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
iteration_start_signal(0);
#endif
for (int i = 0; i < param_.iter_size(); ++i) {
-
loss += net_->ForwardBackward(i + 1 == param_.iter_size());
-
if (i == 0) {
if (first_loop) {
iter0_flag_.set();
index fee1d794d303142ef3727b9a6901a999df1ce252..5d8e3f10a096a0f5fa10ff6a2ba474a1d5e93b01 100644 (file)
template class BlockingQueue<shared_ptr<caffe::TBlob<float>>>;
template class BlockingQueue<shared_ptr<caffe::TBlob<double>>>;
+#ifndef CPU_ONLY
template class BlockingQueue<shared_ptr<caffe::TBlob<float16>>>;
+#endif
} // namespace caffe
index 1d990594bb0cc583c121618756e9545f64569968..ade5f94eb5eda03c70b991a2a70cd1f0094f3994 100644 (file)
}
void GPUMemory::Manager::GetInfo(size_t* free_mem, size_t* total_mem, bool with_update) {
- CHECK(cub_allocator_) << "Forgot to add 'caffe::GPUMemory::Scope gpu_memory_scope(gpus);' to your main()?";
+ CHECK(cub_allocator_) <<
+ "Forgot to add 'caffe::GPUMemory::Scope gpu_memory_scope(gpus);' to your main()?";
int cur_device;
CUDA_CHECK(cudaGetDevice(&cur_device));
if (with_update) {
diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp
index 3d7b914d8a45cf96c803080d7c158546d1211b4a..632b5fa55eaa4005320b88cd937bc5915080ff8d 100644 (file)
--- a/src/caffe/util/io.cpp
+++ b/src/caffe/util/io.cpp
LOG(ERROR) << "Could not decode datum";
}
CHECK_EQ(cv_img.channels() * cv_img.rows * cv_img.cols, buf_len);
- std::memcpy(buf, cv_img.data, buf_len);
+ std::memcpy(buf, cv_img.data, buf_len); // NOLINT(caffe/alt_fn)
}
}
index 348c907485e25ce548757ee3b5f1d84fdb3cf841..3e67cc1c8d95ff12ed0eeda9c38152dc069575e1 100644 (file)
half ha;
ha.setx(alpha.getx());
// use cublasHscal when it will become available
+ // NOLINT_NEXT_LINE(whitespace/operators)
scale_in_place_kernel <<<CAFFE_GET_BLOCKS_HALF(n), CAFFE_CUDA_NUM_THREADS_HALF, 0, stream>>>
(n, ha, reinterpret_cast<half*>(x));
CUDA_POST_KERNEL_CHECK;