aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorManu Mathew2018-07-23 08:48:41 -0500
committerManu Mathew2018-07-23 08:50:30 -0500
commit9b18bfa7613139c4a88cd78034e61e8c46fd4a1a (patch)
treeac8fbae8dba3c3edd736b3f8cec4efbe8062d51d
parent5603c2c50fcb384f147d5cd1bd780e240409aa7c (diff)
downloadcaffe-jacinto-9b18bfa7613139c4a88cd78034e61e8c46fd4a1a.tar.gz
caffe-jacinto-9b18bfa7613139c4a88cd78034e61e8c46fd4a1a.tar.xz
caffe-jacinto-9b18bfa7613139c4a88cd78034e61e8c46fd4a1a.zip
quantization - bugfixes
-rw-r--r--include/caffe/quantized_layer.hpp4
-rw-r--r--src/caffe/layers/conv_layer.cu2
-rw-r--r--src/caffe/layers/cudnn_conv_layer.cu2
-rw-r--r--src/caffe/net.cpp12
-rw-r--r--src/caffe/quantized_layer.cpp16
-rw-r--r--src/caffe/quantized_layer.cu55
6 files changed, 71 insertions, 20 deletions
diff --git a/include/caffe/quantized_layer.hpp b/include/caffe/quantized_layer.hpp
index 9ce4c00d..18873e87 100644
--- a/include/caffe/quantized_layer.hpp
+++ b/include/caffe/quantized_layer.hpp
@@ -38,9 +38,9 @@ public:
38 * @param fl The number of bits in the fractional part. 38 * @param fl The number of bits in the fractional part.
39 */ 39 */
40 void Trim2FixedPoint_cpu(Ftype* data, const int cnt, bool power2_range, const int bitwidth, 40 void Trim2FixedPoint_cpu(Ftype* data, const int cnt, bool power2_range, const int bitwidth,
41 const int rounding, int fracbits, float scale, float offset, bool unsigned_quant, bool clip); 41 const int rounding, int fracbits, float scale, float offset, bool unsigned_quant, bool clip, bool roundup);
42 void Trim2FixedPoint_gpu(Ftype* data, const int cnt, bool power2_range, const int bitwidth, 42 void Trim2FixedPoint_gpu(Ftype* data, const int cnt, bool power2_range, const int bitwidth,
43 const int rounding, int fracbits, float scale, float offset, bool unsigned_quant, bool clip); 43 const int rounding, int fracbits, float scale, float offset, bool unsigned_quant, bool clip, bool roundup);
44 44
45 /** 45 /**
46 * @brief Generate random number in [0,1) range. 46 * @brief Generate random number in [0,1) range.
diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu
index 50ef113e..791e5d97 100644
--- a/src/caffe/layers/conv_layer.cu
+++ b/src/caffe/layers/conv_layer.cu
@@ -7,7 +7,7 @@ namespace caffe {
7template <typename Ftype, typename Btype> 7template <typename Ftype, typename Btype>
8void ConvolutionLayer<Ftype, Btype>::Forward_gpu(const vector<Blob*>& bottom, 8void ConvolutionLayer<Ftype, Btype>::Forward_gpu(const vector<Blob*>& bottom,
9 const vector<Blob*>& top) { 9 const vector<Blob*>& top) {
10 //this->Quantize_gpu(bottom, top); 10 this->Quantize_gpu(bottom, top);
11 const Ftype* weight = this->blobs_[0]->template gpu_data<Ftype>(); 11 const Ftype* weight = this->blobs_[0]->template gpu_data<Ftype>();
12 for (int i = 0; i < bottom.size(); ++i) { 12 for (int i = 0; i < bottom.size(); ++i) {
13 const Ftype* bottom_data = bottom[i]->gpu_data<Ftype>(); 13 const Ftype* bottom_data = bottom[i]->gpu_data<Ftype>();
diff --git a/src/caffe/layers/cudnn_conv_layer.cu b/src/caffe/layers/cudnn_conv_layer.cu
index f42c210d..0c907b30 100644
--- a/src/caffe/layers/cudnn_conv_layer.cu
+++ b/src/caffe/layers/cudnn_conv_layer.cu
@@ -11,7 +11,7 @@ namespace caffe {
11template<typename Ftype, typename Btype> 11template<typename Ftype, typename Btype>
12void CuDNNConvolutionLayer<Ftype, Btype>::Forward_gpu(const vector<Blob*>& bottom, 12void CuDNNConvolutionLayer<Ftype, Btype>::Forward_gpu(const vector<Blob*>& bottom,
13 const vector<Blob*>& top) { 13 const vector<Blob*>& top) {
14 //this->Quantize_gpu(bottom, top); 14 this->Quantize_gpu(bottom, top);
15 const Ftype* weight = this->blobs_[0]->template gpu_data<Ftype>(); 15 const Ftype* weight = this->blobs_[0]->template gpu_data<Ftype>();
16 shared_ptr<GPUMemory::Workspace>& ws = GPUMemory::workspace_[Caffe::current_device()]; 16 shared_ptr<GPUMemory::Workspace>& ws = GPUMemory::workspace_[Caffe::current_device()];
17 if (use_v7grouping()) { 17 if (use_v7grouping()) {
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index a4bab339..4edba6db 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -2161,7 +2161,10 @@ void Net::EnableQuantizationForSelectedLayers() {
2161 std::transform(layer_type_lower.begin(), layer_type_lower.end(), layer_type_lower.begin(), 2161 std::transform(layer_type_lower.begin(), layer_type_lower.end(), layer_type_lower.begin(),
2162 [](unsigned char c) {return std::tolower(c);} 2162 [](unsigned char c) {return std::tolower(c);}
2163 ); 2163 );
2164 if(layer_type_lower.find("data") != string::npos) { 2164 if(layer_type_lower.find("Data") != string::npos) {
2165 max_blobs_to_quantize = 1;
2166 }
2167 if(layer_type_lower.find("Input") != string::npos) {
2165 max_blobs_to_quantize = 1; 2168 max_blobs_to_quantize = 1;
2166 } 2169 }
2167 2170
@@ -2172,13 +2175,16 @@ void Net::EnableQuantizationForSelectedLayers() {
2172 layer_type == "Bias" || layer_type == "Pooling") { 2175 layer_type == "Bias" || layer_type == "Pooling") {
2173 is_quantized_layer_type = true; 2176 is_quantized_layer_type = true;
2174 } 2177 }
2175 if(layer_type_lower.find("data") != string::npos) { 2178 if(layer_type_lower.find("Data") != string::npos) {
2179 is_quantized_layer_type = true;
2180 }
2181 if(layer_type_lower.find("Input") != string::npos) {
2176 is_quantized_layer_type = true; 2182 is_quantized_layer_type = true;
2177 } 2183 }
2178 2184
2179 //quantize weights 2185 //quantize weights
2180 if(net_qparam.quantize_weights()) { 2186 if(net_qparam.quantize_weights()) {
2181 if(is_quantized_layer_type && (!is_merged_layer) && (!is_ignored_layer_name)) { 2187 if(is_quantized_layer_type /*&& (!is_merged_layer)*/ && (!is_ignored_layer_name)) {
2182 if(layer_type == "Convolution" || layer_type == "InnerProduct" || layer_type == "Deconvolution") { 2188 if(layer_type == "Convolution" || layer_type == "InnerProduct" || layer_type == "Deconvolution") {
2183 QuantizationParameter& quantization_param = *layers_[layer_id]->mutable_layer_param().mutable_quantization_param(); 2189 QuantizationParameter& quantization_param = *layers_[layer_id]->mutable_layer_param().mutable_quantization_param();
2184 for(int blob_id=0; blob_id<layers_[layer_id]->blobs().size(); blob_id++) { 2190 for(int blob_id=0; blob_id<layers_[layer_id]->blobs().size(); blob_id++) {
diff --git a/src/caffe/quantized_layer.cpp b/src/caffe/quantized_layer.cpp
index e686d76c..8407b43e 100644
--- a/src/caffe/quantized_layer.cpp
+++ b/src/caffe/quantized_layer.cpp
@@ -51,7 +51,7 @@ void QuantizedLayer<Ftype, Btype>::QuantizeWeights_cpu(Ftype* data, const int bl
51 case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT: 51 case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT:
52 Trim2FixedPoint_cpu(data, count, param.power2_scale_weights(), qparam_w.bitwidth(), 52 Trim2FixedPoint_cpu(data, count, param.power2_scale_weights(), qparam_w.bitwidth(),
53 param.rounding_scheme(), qparam_w.fracbits(), qparam_w.scale_target(), 53 param.rounding_scheme(), qparam_w.fracbits(), qparam_w.scale_target(),
54 qparam_w.offset(), qparam_w.unsigned_quant(), clip); 54 qparam_w.offset(), qparam_w.unsigned_quant(), clip, false);
55 break; 55 break;
56 case QuantizationParameter_Precision_FLOAT: 56 case QuantizationParameter_Precision_FLOAT:
57 break; 57 break;
@@ -72,7 +72,7 @@ void QuantizedLayer<Ftype, Btype>::QuantizeLayerInputs_cpu(Ftype* data, const in
72 case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT: 72 case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT:
73 Trim2FixedPoint_cpu(data, count, param.power2_scale_activations(), qparam_in.bitwidth(), 73 Trim2FixedPoint_cpu(data, count, param.power2_scale_activations(), qparam_in.bitwidth(),
74 param.rounding_scheme(), qparam_in.fracbits(), qparam_in.scale_target(), 74 param.rounding_scheme(), qparam_in.fracbits(), qparam_in.scale_target(),
75 qparam_in.offset(), qparam_in.unsigned_quant(), true); 75 qparam_in.offset(), qparam_in.unsigned_quant(), true, true);
76 break; 76 break;
77 case QuantizationParameter_Precision_FLOAT: 77 case QuantizationParameter_Precision_FLOAT:
78 break; 78 break;
@@ -91,7 +91,7 @@ void QuantizedLayer<Ftype, Btype>::QuantizeLayerOutputs_cpu(
91 case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT: 91 case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT:
92 Trim2FixedPoint_cpu(data, count, param.power2_scale_activations(), qparam_out.bitwidth(), 92 Trim2FixedPoint_cpu(data, count, param.power2_scale_activations(), qparam_out.bitwidth(),
93 param.rounding_scheme(), qparam_out.fracbits(), qparam_out.scale_target(), 93 param.rounding_scheme(), qparam_out.fracbits(), qparam_out.scale_target(),
94 qparam_out.offset(), qparam_out.unsigned_quant(), true); 94 qparam_out.offset(), qparam_out.unsigned_quant(), true, true);
95 break; 95 break;
96 case QuantizationParameter_Precision_FLOAT: 96 case QuantizationParameter_Precision_FLOAT:
97 break; 97 break;
@@ -103,7 +103,7 @@ void QuantizedLayer<Ftype, Btype>::QuantizeLayerOutputs_cpu(
103 103
104template<typename Ftype, typename Btype> 104template<typename Ftype, typename Btype>
105void QuantizedLayer<Ftype, Btype>::Trim2FixedPoint_cpu(Ftype* data, const int cnt, bool power2_range, const int bitwidth, 105void QuantizedLayer<Ftype, Btype>::Trim2FixedPoint_cpu(Ftype* data, const int cnt, bool power2_range, const int bitwidth,
106 const int rounding, int fracbits, float scale, float offset, bool unsigned_quant, bool clip) { 106 const int rounding, int fracbits, float scale, float offset, bool unsigned_quant, bool clip, bool roundup) {
107 float inv_scale = 1.0f/scale; 107 float inv_scale = 1.0f/scale;
108 108
109 int qrange = unsigned_quant? bitwidth : (bitwidth - 1); 109 int qrange = unsigned_quant? bitwidth : (bitwidth - 1);
@@ -121,7 +121,13 @@ void QuantizedLayer<Ftype, Btype>::Trim2FixedPoint_cpu(Ftype* data, const int cn
121 // Round data 121 // Round data
122 switch (rounding) { 122 switch (rounding) {
123 case QuantizationParameter_Rounding_NEAREST: 123 case QuantizationParameter_Rounding_NEAREST:
124 data[index] = round(data[index]); 124 //data[index] = round(data[index]);
125 if(roundup) {
126 //data[index] = int(data[index]+0.5);
127 data[index] = (int(data[index] * 4096) + 2048)>>12;
128 } else {
129 data[index] = int(data[index] >= 0? (data[index]+0.5) : (data[index]-0.5));
130 }
125 break; 131 break;
126 case QuantizationParameter_Rounding_STOCHASTIC: 132 case QuantizationParameter_Rounding_STOCHASTIC:
127 data[index] = floor(data[index] + RandUniform_cpu()); 133 data[index] = floor(data[index] + RandUniform_cpu());
diff --git a/src/caffe/quantized_layer.cu b/src/caffe/quantized_layer.cu
index 3a9f3f70..69c2e974 100644
--- a/src/caffe/quantized_layer.cu
+++ b/src/caffe/quantized_layer.cu
@@ -46,7 +46,7 @@ void QuantizedLayer<Ftype, Btype>::QuantizeWeights_gpu(Ftype* data, const int bl
46 case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT: 46 case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT:
47 Trim2FixedPoint_gpu(data, count, param.power2_scale_weights(), qparam_w.bitwidth(), 47 Trim2FixedPoint_gpu(data, count, param.power2_scale_weights(), qparam_w.bitwidth(),
48 param.rounding_scheme(), qparam_w.fracbits(), qparam_w.scale_target(), 48 param.rounding_scheme(), qparam_w.fracbits(), qparam_w.scale_target(),
49 qparam_w.offset(), qparam_w.unsigned_quant(), clip); 49 qparam_w.offset(), qparam_w.unsigned_quant(), clip, false);
50 break; 50 break;
51 case QuantizationParameter_Precision_FLOAT: 51 case QuantizationParameter_Precision_FLOAT:
52 break; 52 break;
@@ -66,7 +66,7 @@ void QuantizedLayer<Ftype, Btype>::QuantizeLayerInputs_gpu(
66 case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT: 66 case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT:
67 Trim2FixedPoint_gpu(data, count, param.power2_scale_activations(), qparam_in.bitwidth(), 67 Trim2FixedPoint_gpu(data, count, param.power2_scale_activations(), qparam_in.bitwidth(),
68 param.rounding_scheme(), qparam_in.fracbits(), qparam_in.scale_target(), 68 param.rounding_scheme(), qparam_in.fracbits(), qparam_in.scale_target(),
69 qparam_in.offset(), qparam_in.unsigned_quant(), true); 69 qparam_in.offset(), qparam_in.unsigned_quant(), true, true);
70 break; 70 break;
71 case QuantizationParameter_Precision_FLOAT: 71 case QuantizationParameter_Precision_FLOAT:
72 break; 72 break;
@@ -85,7 +85,7 @@ void QuantizedLayer<Ftype, Btype>::QuantizeLayerOutputs_gpu(Ftype* data,
85 case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT: 85 case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT:
86 Trim2FixedPoint_gpu(data, count, param.power2_scale_activations(), qparam_out.bitwidth(), 86 Trim2FixedPoint_gpu(data, count, param.power2_scale_activations(), qparam_out.bitwidth(),
87 param.rounding_scheme(), qparam_out.fracbits(), qparam_out.scale_target(), 87 param.rounding_scheme(), qparam_out.fracbits(), qparam_out.scale_target(),
88 qparam_out.offset(), qparam_out.unsigned_quant(), true); 88 qparam_out.offset(), qparam_out.unsigned_quant(), true, true);
89 break; 89 break;
90 case QuantizationParameter_Precision_FLOAT: 90 case QuantizationParameter_Precision_FLOAT:
91 break; 91 break;
@@ -96,7 +96,7 @@ void QuantizedLayer<Ftype, Btype>::QuantizeLayerOutputs_gpu(Ftype* data,
96} 96}
97 97
98template <typename Dtype> 98template <typename Dtype>
99__global__ void Trim2FixedPoint_kernel(Dtype* data, const int cnt, 99__global__ void Trim2FixedPoint_roundassymetric_kernel(Dtype* data, const int cnt,
100 const int bitwidth, const int rounding, float scale, float inv_scale, float offset, float min_data, float max_data, bool clip) { 100 const int bitwidth, const int rounding, float scale, float inv_scale, float offset, float min_data, float max_data, bool clip) {
101 CUDA_KERNEL_LOOP(index, cnt) { 101 CUDA_KERNEL_LOOP(index, cnt) {
102 102
@@ -105,7 +105,8 @@ __global__ void Trim2FixedPoint_kernel(Dtype* data, const int cnt,
105 // Round data 105 // Round data
106 switch (rounding) { 106 switch (rounding) {
107 case QuantizationParameter_Rounding_NEAREST: 107 case QuantizationParameter_Rounding_NEAREST:
108 data[index] = rint(data[index]); 108 //data[index] = rint(data[index]);
109 data[index] = int(data[index] >= 0? (data[index]+0.5) : (data[index]-0.5));
109 break; 110 break;
110 case QuantizationParameter_Rounding_STOCHASTIC: 111 case QuantizationParameter_Rounding_STOCHASTIC:
111 data[index] = __float2int_rd(data[index] + RandUniform_device(index)); 112 data[index] = __float2int_rd(data[index] + RandUniform_device(index));
@@ -124,20 +125,58 @@ __global__ void Trim2FixedPoint_kernel(Dtype* data, const int cnt,
124 } 125 }
125} 126}
126 127
128template <typename Dtype>
129__global__ void Trim2FixedPoint_roundup_kernel(Dtype* data, const int cnt,
130 const int bitwidth, const int rounding, float scale, float inv_scale, float offset, float min_data, float max_data, bool clip) {
131 CUDA_KERNEL_LOOP(index, cnt) {
132
133 data[index] = (data[index] * scale) + offset;
134
135 // Round data
136 switch (rounding) {
137 case QuantizationParameter_Rounding_NEAREST:
138 //data[index] = rint(data[index]);
139 //data[index] = (data[index]+0.5);
140 data[index] = (int(data[index] * 4096) + 2048)>>12;
141 break;
142 case QuantizationParameter_Rounding_STOCHASTIC:
143 //data[index] = __float2int_rd(data[index] + RandUniform_device(index));
144 break;
145 default:
146 break;
147 }
148
149 // Saturate data
150 if(clip) {
151 data[index] = (data[index]>(Dtype)max_data? (Dtype)max_data:
152 (data[index]<(Dtype)min_data?(Dtype)min_data:data[index]));
153 }
154
155 data[index] = (data[index] - offset) * inv_scale;
156 }
157}
158
127 159
128template<typename Ftype, typename Btype> 160template<typename Ftype, typename Btype>
129void QuantizedLayer<Ftype, Btype>::Trim2FixedPoint_gpu(Ftype* data, const int cnt, bool power2_range, 161void QuantizedLayer<Ftype, Btype>::Trim2FixedPoint_gpu(Ftype* data, const int cnt, bool power2_range,
130 const int bitwidth, const int rounding, int fracbits, float scale, float offset, bool unsigned_quant, bool clip) { 162 const int bitwidth, const int rounding, int fracbits, float scale, float offset, bool unsigned_quant, bool clip,
163 bool roundup) {
131 float inv_scale = 1.0f/scale; 164 float inv_scale = 1.0f/scale;
132 165
133 int qrange = unsigned_quant? bitwidth : (bitwidth - 1); 166 int qrange = unsigned_quant? bitwidth : (bitwidth - 1);
134 float min_data = unsigned_quant? 0 : -(powf(2, qrange)); 167 float min_data = unsigned_quant? 0 : -(powf(2, qrange));
135 float max_data = +(powf(2, qrange) - 1); 168 float max_data = +(powf(2, qrange) - 1);
136 169
137 Trim2FixedPoint_kernel<<<CAFFE_GET_BLOCKS(cnt), CAFFE_CUDA_NUM_THREADS>>>( 170 if(roundup) {
138 data, cnt, bitwidth, rounding, scale, inv_scale, offset, min_data, max_data, clip); 171 Trim2FixedPoint_roundup_kernel<<<CAFFE_GET_BLOCKS(cnt), CAFFE_CUDA_NUM_THREADS>>>(
172 data, cnt, bitwidth, rounding, scale, inv_scale, offset, min_data, max_data, clip);
173 } else {
174 Trim2FixedPoint_roundassymetric_kernel<<<CAFFE_GET_BLOCKS(cnt), CAFFE_CUDA_NUM_THREADS>>>(
175 data, cnt, bitwidth, rounding, scale, inv_scale, offset, min_data, max_data, clip);
176 }
139} 177}
140 178
179
141template void QuantizedLayer<double, double>::Quantize_gpu(const vector<Blob*>& bottom,const vector<Blob*>& top); 180template void QuantizedLayer<double, double>::Quantize_gpu(const vector<Blob*>& bottom,const vector<Blob*>& top);
142template void QuantizedLayer<double, float>::Quantize_gpu(const vector<Blob*>& bottom,const vector<Blob*>& top); 181template void QuantizedLayer<double, float>::Quantize_gpu(const vector<Blob*>& bottom,const vector<Blob*>& top);
143template void QuantizedLayer<double, float16>::Quantize_gpu(const vector<Blob*>& bottom,const vector<Blob*>& top); 182template void QuantizedLayer<double, float16>::Quantize_gpu(const vector<Blob*>& bottom,const vector<Blob*>& top);