diff options
author | Manu Mathew | 2018-07-23 08:48:41 -0500 |
---|---|---|
committer | Manu Mathew | 2018-07-23 08:50:30 -0500 |
commit | 9b18bfa7613139c4a88cd78034e61e8c46fd4a1a (patch) | |
tree | ac8fbae8dba3c3edd736b3f8cec4efbe8062d51d | |
parent | 5603c2c50fcb384f147d5cd1bd780e240409aa7c (diff) | |
download | caffe-jacinto-9b18bfa7613139c4a88cd78034e61e8c46fd4a1a.tar.gz caffe-jacinto-9b18bfa7613139c4a88cd78034e61e8c46fd4a1a.tar.xz caffe-jacinto-9b18bfa7613139c4a88cd78034e61e8c46fd4a1a.zip |
quantization - bugfixes
-rw-r--r-- | include/caffe/quantized_layer.hpp | 4 | ||||
-rw-r--r-- | src/caffe/layers/conv_layer.cu | 2 | ||||
-rw-r--r-- | src/caffe/layers/cudnn_conv_layer.cu | 2 | ||||
-rw-r--r-- | src/caffe/net.cpp | 12 | ||||
-rw-r--r-- | src/caffe/quantized_layer.cpp | 16 | ||||
-rw-r--r-- | src/caffe/quantized_layer.cu | 55 |
6 files changed, 71 insertions, 20 deletions
diff --git a/include/caffe/quantized_layer.hpp b/include/caffe/quantized_layer.hpp index 9ce4c00d..18873e87 100644 --- a/include/caffe/quantized_layer.hpp +++ b/include/caffe/quantized_layer.hpp | |||
@@ -38,9 +38,9 @@ public: | |||
38 | * @param fl The number of bits in the fractional part. | 38 | * @param fl The number of bits in the fractional part. |
39 | */ | 39 | */ |
40 | void Trim2FixedPoint_cpu(Ftype* data, const int cnt, bool power2_range, const int bitwidth, | 40 | void Trim2FixedPoint_cpu(Ftype* data, const int cnt, bool power2_range, const int bitwidth, |
41 | const int rounding, int fracbits, float scale, float offset, bool unsigned_quant, bool clip); | 41 | const int rounding, int fracbits, float scale, float offset, bool unsigned_quant, bool clip, bool roundup); |
42 | void Trim2FixedPoint_gpu(Ftype* data, const int cnt, bool power2_range, const int bitwidth, | 42 | void Trim2FixedPoint_gpu(Ftype* data, const int cnt, bool power2_range, const int bitwidth, |
43 | const int rounding, int fracbits, float scale, float offset, bool unsigned_quant, bool clip); | 43 | const int rounding, int fracbits, float scale, float offset, bool unsigned_quant, bool clip, bool roundup); |
44 | 44 | ||
45 | /** | 45 | /** |
46 | * @brief Generate random number in [0,1) range. | 46 | * @brief Generate random number in [0,1) range. |
diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu index 50ef113e..791e5d97 100644 --- a/src/caffe/layers/conv_layer.cu +++ b/src/caffe/layers/conv_layer.cu | |||
@@ -7,7 +7,7 @@ namespace caffe { | |||
7 | template <typename Ftype, typename Btype> | 7 | template <typename Ftype, typename Btype> |
8 | void ConvolutionLayer<Ftype, Btype>::Forward_gpu(const vector<Blob*>& bottom, | 8 | void ConvolutionLayer<Ftype, Btype>::Forward_gpu(const vector<Blob*>& bottom, |
9 | const vector<Blob*>& top) { | 9 | const vector<Blob*>& top) { |
10 | //this->Quantize_gpu(bottom, top); | 10 | this->Quantize_gpu(bottom, top); |
11 | const Ftype* weight = this->blobs_[0]->template gpu_data<Ftype>(); | 11 | const Ftype* weight = this->blobs_[0]->template gpu_data<Ftype>(); |
12 | for (int i = 0; i < bottom.size(); ++i) { | 12 | for (int i = 0; i < bottom.size(); ++i) { |
13 | const Ftype* bottom_data = bottom[i]->gpu_data<Ftype>(); | 13 | const Ftype* bottom_data = bottom[i]->gpu_data<Ftype>(); |
diff --git a/src/caffe/layers/cudnn_conv_layer.cu b/src/caffe/layers/cudnn_conv_layer.cu index f42c210d..0c907b30 100644 --- a/src/caffe/layers/cudnn_conv_layer.cu +++ b/src/caffe/layers/cudnn_conv_layer.cu | |||
@@ -11,7 +11,7 @@ namespace caffe { | |||
11 | template<typename Ftype, typename Btype> | 11 | template<typename Ftype, typename Btype> |
12 | void CuDNNConvolutionLayer<Ftype, Btype>::Forward_gpu(const vector<Blob*>& bottom, | 12 | void CuDNNConvolutionLayer<Ftype, Btype>::Forward_gpu(const vector<Blob*>& bottom, |
13 | const vector<Blob*>& top) { | 13 | const vector<Blob*>& top) { |
14 | //this->Quantize_gpu(bottom, top); | 14 | this->Quantize_gpu(bottom, top); |
15 | const Ftype* weight = this->blobs_[0]->template gpu_data<Ftype>(); | 15 | const Ftype* weight = this->blobs_[0]->template gpu_data<Ftype>(); |
16 | shared_ptr<GPUMemory::Workspace>& ws = GPUMemory::workspace_[Caffe::current_device()]; | 16 | shared_ptr<GPUMemory::Workspace>& ws = GPUMemory::workspace_[Caffe::current_device()]; |
17 | if (use_v7grouping()) { | 17 | if (use_v7grouping()) { |
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index a4bab339..4edba6db 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp | |||
@@ -2161,7 +2161,10 @@ void Net::EnableQuantizationForSelectedLayers() { | |||
2161 | std::transform(layer_type_lower.begin(), layer_type_lower.end(), layer_type_lower.begin(), | 2161 | std::transform(layer_type_lower.begin(), layer_type_lower.end(), layer_type_lower.begin(), |
2162 | [](unsigned char c) {return std::tolower(c);} | 2162 | [](unsigned char c) {return std::tolower(c);} |
2163 | ); | 2163 | ); |
2164 | if(layer_type_lower.find("data") != string::npos) { | 2164 | if(layer_type_lower.find("Data") != string::npos) { |
2165 | max_blobs_to_quantize = 1; | ||
2166 | } | ||
2167 | if(layer_type_lower.find("Input") != string::npos) { | ||
2165 | max_blobs_to_quantize = 1; | 2168 | max_blobs_to_quantize = 1; |
2166 | } | 2169 | } |
2167 | 2170 | ||
@@ -2172,13 +2175,16 @@ void Net::EnableQuantizationForSelectedLayers() { | |||
2172 | layer_type == "Bias" || layer_type == "Pooling") { | 2175 | layer_type == "Bias" || layer_type == "Pooling") { |
2173 | is_quantized_layer_type = true; | 2176 | is_quantized_layer_type = true; |
2174 | } | 2177 | } |
2175 | if(layer_type_lower.find("data") != string::npos) { | 2178 | if(layer_type_lower.find("Data") != string::npos) { |
2179 | is_quantized_layer_type = true; | ||
2180 | } | ||
2181 | if(layer_type_lower.find("Input") != string::npos) { | ||
2176 | is_quantized_layer_type = true; | 2182 | is_quantized_layer_type = true; |
2177 | } | 2183 | } |
2178 | 2184 | ||
2179 | //quantize weights | 2185 | //quantize weights |
2180 | if(net_qparam.quantize_weights()) { | 2186 | if(net_qparam.quantize_weights()) { |
2181 | if(is_quantized_layer_type && (!is_merged_layer) && (!is_ignored_layer_name)) { | 2187 | if(is_quantized_layer_type /*&& (!is_merged_layer)*/ && (!is_ignored_layer_name)) { |
2182 | if(layer_type == "Convolution" || layer_type == "InnerProduct" || layer_type == "Deconvolution") { | 2188 | if(layer_type == "Convolution" || layer_type == "InnerProduct" || layer_type == "Deconvolution") { |
2183 | QuantizationParameter& quantization_param = *layers_[layer_id]->mutable_layer_param().mutable_quantization_param(); | 2189 | QuantizationParameter& quantization_param = *layers_[layer_id]->mutable_layer_param().mutable_quantization_param(); |
2184 | for(int blob_id=0; blob_id<layers_[layer_id]->blobs().size(); blob_id++) { | 2190 | for(int blob_id=0; blob_id<layers_[layer_id]->blobs().size(); blob_id++) { |
diff --git a/src/caffe/quantized_layer.cpp b/src/caffe/quantized_layer.cpp index e686d76c..8407b43e 100644 --- a/src/caffe/quantized_layer.cpp +++ b/src/caffe/quantized_layer.cpp | |||
@@ -51,7 +51,7 @@ void QuantizedLayer<Ftype, Btype>::QuantizeWeights_cpu(Ftype* data, const int bl | |||
51 | case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT: | 51 | case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT: |
52 | Trim2FixedPoint_cpu(data, count, param.power2_scale_weights(), qparam_w.bitwidth(), | 52 | Trim2FixedPoint_cpu(data, count, param.power2_scale_weights(), qparam_w.bitwidth(), |
53 | param.rounding_scheme(), qparam_w.fracbits(), qparam_w.scale_target(), | 53 | param.rounding_scheme(), qparam_w.fracbits(), qparam_w.scale_target(), |
54 | qparam_w.offset(), qparam_w.unsigned_quant(), clip); | 54 | qparam_w.offset(), qparam_w.unsigned_quant(), clip, false); |
55 | break; | 55 | break; |
56 | case QuantizationParameter_Precision_FLOAT: | 56 | case QuantizationParameter_Precision_FLOAT: |
57 | break; | 57 | break; |
@@ -72,7 +72,7 @@ void QuantizedLayer<Ftype, Btype>::QuantizeLayerInputs_cpu(Ftype* data, const in | |||
72 | case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT: | 72 | case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT: |
73 | Trim2FixedPoint_cpu(data, count, param.power2_scale_activations(), qparam_in.bitwidth(), | 73 | Trim2FixedPoint_cpu(data, count, param.power2_scale_activations(), qparam_in.bitwidth(), |
74 | param.rounding_scheme(), qparam_in.fracbits(), qparam_in.scale_target(), | 74 | param.rounding_scheme(), qparam_in.fracbits(), qparam_in.scale_target(), |
75 | qparam_in.offset(), qparam_in.unsigned_quant(), true); | 75 | qparam_in.offset(), qparam_in.unsigned_quant(), true, true); |
76 | break; | 76 | break; |
77 | case QuantizationParameter_Precision_FLOAT: | 77 | case QuantizationParameter_Precision_FLOAT: |
78 | break; | 78 | break; |
@@ -91,7 +91,7 @@ void QuantizedLayer<Ftype, Btype>::QuantizeLayerOutputs_cpu( | |||
91 | case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT: | 91 | case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT: |
92 | Trim2FixedPoint_cpu(data, count, param.power2_scale_activations(), qparam_out.bitwidth(), | 92 | Trim2FixedPoint_cpu(data, count, param.power2_scale_activations(), qparam_out.bitwidth(), |
93 | param.rounding_scheme(), qparam_out.fracbits(), qparam_out.scale_target(), | 93 | param.rounding_scheme(), qparam_out.fracbits(), qparam_out.scale_target(), |
94 | qparam_out.offset(), qparam_out.unsigned_quant(), true); | 94 | qparam_out.offset(), qparam_out.unsigned_quant(), true, true); |
95 | break; | 95 | break; |
96 | case QuantizationParameter_Precision_FLOAT: | 96 | case QuantizationParameter_Precision_FLOAT: |
97 | break; | 97 | break; |
@@ -103,7 +103,7 @@ void QuantizedLayer<Ftype, Btype>::QuantizeLayerOutputs_cpu( | |||
103 | 103 | ||
104 | template<typename Ftype, typename Btype> | 104 | template<typename Ftype, typename Btype> |
105 | void QuantizedLayer<Ftype, Btype>::Trim2FixedPoint_cpu(Ftype* data, const int cnt, bool power2_range, const int bitwidth, | 105 | void QuantizedLayer<Ftype, Btype>::Trim2FixedPoint_cpu(Ftype* data, const int cnt, bool power2_range, const int bitwidth, |
106 | const int rounding, int fracbits, float scale, float offset, bool unsigned_quant, bool clip) { | 106 | const int rounding, int fracbits, float scale, float offset, bool unsigned_quant, bool clip, bool roundup) { |
107 | float inv_scale = 1.0f/scale; | 107 | float inv_scale = 1.0f/scale; |
108 | 108 | ||
109 | int qrange = unsigned_quant? bitwidth : (bitwidth - 1); | 109 | int qrange = unsigned_quant? bitwidth : (bitwidth - 1); |
@@ -121,7 +121,13 @@ void QuantizedLayer<Ftype, Btype>::Trim2FixedPoint_cpu(Ftype* data, const int cn | |||
121 | // Round data | 121 | // Round data |
122 | switch (rounding) { | 122 | switch (rounding) { |
123 | case QuantizationParameter_Rounding_NEAREST: | 123 | case QuantizationParameter_Rounding_NEAREST: |
124 | data[index] = round(data[index]); | 124 | //data[index] = round(data[index]); |
125 | if(roundup) { | ||
126 | //data[index] = int(data[index]+0.5); | ||
127 | data[index] = (int(data[index] * 4096) + 2048)>>12; | ||
128 | } else { | ||
129 | data[index] = int(data[index] >= 0? (data[index]+0.5) : (data[index]-0.5)); | ||
130 | } | ||
125 | break; | 131 | break; |
126 | case QuantizationParameter_Rounding_STOCHASTIC: | 132 | case QuantizationParameter_Rounding_STOCHASTIC: |
127 | data[index] = floor(data[index] + RandUniform_cpu()); | 133 | data[index] = floor(data[index] + RandUniform_cpu()); |
diff --git a/src/caffe/quantized_layer.cu b/src/caffe/quantized_layer.cu index 3a9f3f70..69c2e974 100644 --- a/src/caffe/quantized_layer.cu +++ b/src/caffe/quantized_layer.cu | |||
@@ -46,7 +46,7 @@ void QuantizedLayer<Ftype, Btype>::QuantizeWeights_gpu(Ftype* data, const int bl | |||
46 | case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT: | 46 | case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT: |
47 | Trim2FixedPoint_gpu(data, count, param.power2_scale_weights(), qparam_w.bitwidth(), | 47 | Trim2FixedPoint_gpu(data, count, param.power2_scale_weights(), qparam_w.bitwidth(), |
48 | param.rounding_scheme(), qparam_w.fracbits(), qparam_w.scale_target(), | 48 | param.rounding_scheme(), qparam_w.fracbits(), qparam_w.scale_target(), |
49 | qparam_w.offset(), qparam_w.unsigned_quant(), clip); | 49 | qparam_w.offset(), qparam_w.unsigned_quant(), clip, false); |
50 | break; | 50 | break; |
51 | case QuantizationParameter_Precision_FLOAT: | 51 | case QuantizationParameter_Precision_FLOAT: |
52 | break; | 52 | break; |
@@ -66,7 +66,7 @@ void QuantizedLayer<Ftype, Btype>::QuantizeLayerInputs_gpu( | |||
66 | case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT: | 66 | case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT: |
67 | Trim2FixedPoint_gpu(data, count, param.power2_scale_activations(), qparam_in.bitwidth(), | 67 | Trim2FixedPoint_gpu(data, count, param.power2_scale_activations(), qparam_in.bitwidth(), |
68 | param.rounding_scheme(), qparam_in.fracbits(), qparam_in.scale_target(), | 68 | param.rounding_scheme(), qparam_in.fracbits(), qparam_in.scale_target(), |
69 | qparam_in.offset(), qparam_in.unsigned_quant(), true); | 69 | qparam_in.offset(), qparam_in.unsigned_quant(), true, true); |
70 | break; | 70 | break; |
71 | case QuantizationParameter_Precision_FLOAT: | 71 | case QuantizationParameter_Precision_FLOAT: |
72 | break; | 72 | break; |
@@ -85,7 +85,7 @@ void QuantizedLayer<Ftype, Btype>::QuantizeLayerOutputs_gpu(Ftype* data, | |||
85 | case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT: | 85 | case QuantizationParameter_Precision_DYNAMIC_FIXED_POINT: |
86 | Trim2FixedPoint_gpu(data, count, param.power2_scale_activations(), qparam_out.bitwidth(), | 86 | Trim2FixedPoint_gpu(data, count, param.power2_scale_activations(), qparam_out.bitwidth(), |
87 | param.rounding_scheme(), qparam_out.fracbits(), qparam_out.scale_target(), | 87 | param.rounding_scheme(), qparam_out.fracbits(), qparam_out.scale_target(), |
88 | qparam_out.offset(), qparam_out.unsigned_quant(), true); | 88 | qparam_out.offset(), qparam_out.unsigned_quant(), true, true); |
89 | break; | 89 | break; |
90 | case QuantizationParameter_Precision_FLOAT: | 90 | case QuantizationParameter_Precision_FLOAT: |
91 | break; | 91 | break; |
@@ -96,7 +96,7 @@ void QuantizedLayer<Ftype, Btype>::QuantizeLayerOutputs_gpu(Ftype* data, | |||
96 | } | 96 | } |
97 | 97 | ||
98 | template <typename Dtype> | 98 | template <typename Dtype> |
99 | __global__ void Trim2FixedPoint_kernel(Dtype* data, const int cnt, | 99 | __global__ void Trim2FixedPoint_roundassymetric_kernel(Dtype* data, const int cnt, |
100 | const int bitwidth, const int rounding, float scale, float inv_scale, float offset, float min_data, float max_data, bool clip) { | 100 | const int bitwidth, const int rounding, float scale, float inv_scale, float offset, float min_data, float max_data, bool clip) { |
101 | CUDA_KERNEL_LOOP(index, cnt) { | 101 | CUDA_KERNEL_LOOP(index, cnt) { |
102 | 102 | ||
@@ -105,7 +105,8 @@ __global__ void Trim2FixedPoint_kernel(Dtype* data, const int cnt, | |||
105 | // Round data | 105 | // Round data |
106 | switch (rounding) { | 106 | switch (rounding) { |
107 | case QuantizationParameter_Rounding_NEAREST: | 107 | case QuantizationParameter_Rounding_NEAREST: |
108 | data[index] = rint(data[index]); | 108 | //data[index] = rint(data[index]); |
109 | data[index] = int(data[index] >= 0? (data[index]+0.5) : (data[index]-0.5)); | ||
109 | break; | 110 | break; |
110 | case QuantizationParameter_Rounding_STOCHASTIC: | 111 | case QuantizationParameter_Rounding_STOCHASTIC: |
111 | data[index] = __float2int_rd(data[index] + RandUniform_device(index)); | 112 | data[index] = __float2int_rd(data[index] + RandUniform_device(index)); |
@@ -124,20 +125,58 @@ __global__ void Trim2FixedPoint_kernel(Dtype* data, const int cnt, | |||
124 | } | 125 | } |
125 | } | 126 | } |
126 | 127 | ||
128 | template <typename Dtype> | ||
129 | __global__ void Trim2FixedPoint_roundup_kernel(Dtype* data, const int cnt, | ||
130 | const int bitwidth, const int rounding, float scale, float inv_scale, float offset, float min_data, float max_data, bool clip) { | ||
131 | CUDA_KERNEL_LOOP(index, cnt) { | ||
132 | |||
133 | data[index] = (data[index] * scale) + offset; | ||
134 | |||
135 | // Round data | ||
136 | switch (rounding) { | ||
137 | case QuantizationParameter_Rounding_NEAREST: | ||
138 | //data[index] = rint(data[index]); | ||
139 | //data[index] = (data[index]+0.5); | ||
140 | data[index] = (int(data[index] * 4096) + 2048)>>12; | ||
141 | break; | ||
142 | case QuantizationParameter_Rounding_STOCHASTIC: | ||
143 | //data[index] = __float2int_rd(data[index] + RandUniform_device(index)); | ||
144 | break; | ||
145 | default: | ||
146 | break; | ||
147 | } | ||
148 | |||
149 | // Saturate data | ||
150 | if(clip) { | ||
151 | data[index] = (data[index]>(Dtype)max_data? (Dtype)max_data: | ||
152 | (data[index]<(Dtype)min_data?(Dtype)min_data:data[index])); | ||
153 | } | ||
154 | |||
155 | data[index] = (data[index] - offset) * inv_scale; | ||
156 | } | ||
157 | } | ||
158 | |||
127 | 159 | ||
128 | template<typename Ftype, typename Btype> | 160 | template<typename Ftype, typename Btype> |
129 | void QuantizedLayer<Ftype, Btype>::Trim2FixedPoint_gpu(Ftype* data, const int cnt, bool power2_range, | 161 | void QuantizedLayer<Ftype, Btype>::Trim2FixedPoint_gpu(Ftype* data, const int cnt, bool power2_range, |
130 | const int bitwidth, const int rounding, int fracbits, float scale, float offset, bool unsigned_quant, bool clip) { | 162 | const int bitwidth, const int rounding, int fracbits, float scale, float offset, bool unsigned_quant, bool clip, |
163 | bool roundup) { | ||
131 | float inv_scale = 1.0f/scale; | 164 | float inv_scale = 1.0f/scale; |
132 | 165 | ||
133 | int qrange = unsigned_quant? bitwidth : (bitwidth - 1); | 166 | int qrange = unsigned_quant? bitwidth : (bitwidth - 1); |
134 | float min_data = unsigned_quant? 0 : -(powf(2, qrange)); | 167 | float min_data = unsigned_quant? 0 : -(powf(2, qrange)); |
135 | float max_data = +(powf(2, qrange) - 1); | 168 | float max_data = +(powf(2, qrange) - 1); |
136 | 169 | ||
137 | Trim2FixedPoint_kernel<<<CAFFE_GET_BLOCKS(cnt), CAFFE_CUDA_NUM_THREADS>>>( | 170 | if(roundup) { |
138 | data, cnt, bitwidth, rounding, scale, inv_scale, offset, min_data, max_data, clip); | 171 | Trim2FixedPoint_roundup_kernel<<<CAFFE_GET_BLOCKS(cnt), CAFFE_CUDA_NUM_THREADS>>>( |
172 | data, cnt, bitwidth, rounding, scale, inv_scale, offset, min_data, max_data, clip); | ||
173 | } else { | ||
174 | Trim2FixedPoint_roundassymetric_kernel<<<CAFFE_GET_BLOCKS(cnt), CAFFE_CUDA_NUM_THREADS>>>( | ||
175 | data, cnt, bitwidth, rounding, scale, inv_scale, offset, min_data, max_data, clip); | ||
176 | } | ||
139 | } | 177 | } |
140 | 178 | ||
179 | |||
141 | template void QuantizedLayer<double, double>::Quantize_gpu(const vector<Blob*>& bottom,const vector<Blob*>& top); | 180 | template void QuantizedLayer<double, double>::Quantize_gpu(const vector<Blob*>& bottom,const vector<Blob*>& top); |
142 | template void QuantizedLayer<double, float>::Quantize_gpu(const vector<Blob*>& bottom,const vector<Blob*>& top); | 181 | template void QuantizedLayer<double, float>::Quantize_gpu(const vector<Blob*>& bottom,const vector<Blob*>& top); |
143 | template void QuantizedLayer<double, float16>::Quantize_gpu(const vector<Blob*>& bottom,const vector<Blob*>& top); | 182 | template void QuantizedLayer<double, float16>::Quantize_gpu(const vector<Blob*>& bottom,const vector<Blob*>& top); |