• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANTIZE_UTIL_H_
18 #define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANTIZE_UTIL_H_
19 
20 #ifndef _MSC_VER
21 #include <dirent.h>
22 #endif
23 #include <sys/stat.h>
24 #include <memory>
25 #include <string>
26 #include <cmath>
27 #include <set>
28 #include <array>
29 #include <vector>
30 #include <algorithm>
31 #include <limits>
32 #include <utility>
33 #include "ops/mat_mul.h"
34 #include "ops/lstm.h"
35 #include "ops/fusion/full_connection.h"
36 #include "tools/converter/quantizer/quantizer.h"
37 #include "include/errorcode.h"
38 #include "ir/func_graph.h"
39 #include "ir/anf.h"
40 #include "include/model.h"
41 #include "base/base.h"
42 #include "ir/primitive.h"
43 #include "abstract/dshape.h"
44 #include "tools/converter/quantizer/huffman_encode.h"
45 #include "tools/converter/quantizer/bitpacking.h"
46 #include "tools/converter/quantizer/mixed_bit_weight_quantizer.h"
47 #include "src/lite_session.h"
48 #include "tools/converter/graphdef_transform.h"
49 #include "src/common/file_utils.h"
50 #include "src/common/quant_utils.h"
51 
52 namespace mindspore::lite::quant {
53 enum WeightQuantType {
54   FIXED_BIT_PER_CHANNEL = 0,
55   FIXED_BIT_PER_LAYER = 1,
56   MIXED_BIT_PER_LAYER = 2,
57 };
58 constexpr size_t kUint8Quantization = 8;
59 constexpr size_t kMaxBit = 8;
60 constexpr size_t kMaxNum1024 = 1024;
61 constexpr float kPercentBase = 100.0;
62 constexpr size_t kMillisecondsBase = 10;
63 constexpr size_t kWightIndex = 1;
64 constexpr double kScaleThreashold = 1e-38;
65 
66 struct SessionModel {
67   session::LiteSession *session{nullptr};
68   Model *model{nullptr};
69 };
70 
71 /**
72  * 1. when op's weight size > mWeightSize just skip
73  * 2. only do conv/deconv/convdepthwise/deconvdepthwise/mul/matmul/batchmatmul quantization
74  * 3. when conv/deconv/convdepthwise/deconvdepthwise ops' weight channel size > covWeightQuantChannelThreshold just skip
75  * */
76 class QuantStrategy {
77  public:
78   explicit QuantStrategy(size_t weightSize, size_t covWeightQuantChannelThreshold = 16);
79 
80   ~QuantStrategy() = default;
81 
82   bool CanConvOpQuantized(const CNodePtr &node) const;
83   bool CanMulOpQuantized(const CNodePtr &node) const;
84   static bool CanOpFullQuantized(const AnfNodePtr &node);
85   bool CanTensorQuantized(const AnfNodePtr &inputNode) const;
86 
87   size_t m_weight_size_;
88   size_t m_conv_weight_quant_channel_threshold_;
89 
90  private:
91   static const std::vector<std::string> conv_types_;
92   static const std::vector<std::string> mul_types_;
93 };
94 
95 constexpr float delta = 0.1;
96 constexpr float ratio = 10.0;
97 constexpr int percent = 10;
98 constexpr int quant_param_size = 32 * 8;
99 
100 QuantParamHolderPtr GetCNodeQuantHolder(const PrimitivePtr &primitive);
101 
102 STATUS CalQuantizationParams(schema::QuantParamT *quantParam, double mMin, double mMax, bool narrowRange = false,
103                              int numBits = kUint8Quantization);
104 
105 std::pair<float, float> OutlierMethod(std::vector<float> min_datas, std::vector<float> max_datas);
106 
107 std::vector<int8_t> KMeans(float *data, size_t elem_count, size_t k, size_t epochs, schema::QuantParamT *quantParam);
108 
109 STATUS UpdateTensorDataAndSize(const tensor::TensorPtr &weight, void *quant_datas, int new_size, TypeId new_data_type);
110 
111 int CalChannels(const ShapeVector &dims, int channel_cnt, bool *channel_at_first);
112 
113 void CalQuantAssitInfo(const PrimitivePtr &primitive, const ShapeVector &shapes, int index, bool *channel_at_first,
114                        int *channel_cnt);
115 
116 void CalQuantAssitInfo(const schema::PrimitiveT &primitive, const std::vector<int> &shapes, int index,
117                        bool *channel_at_first, int *channel_cnt);
118 
119 bool TensorQuantParamsInited(const schema::TensorT &tensor);
120 
121 template <typename T>
DoBitPack(const tensor::TensorPtr & weight,const size_t & bit_num,const std::vector<T> & quant_datas)122 STATUS DoBitPack(const tensor::TensorPtr &weight, const size_t &bit_num, const std::vector<T> &quant_datas) {
123   if (bit_num != 8 && bit_num != 16) {
124     std::vector<T> data{};
125     for (size_t i = 0; i < quant_datas.size(); ++i) {
126       data.emplace_back((static_cast<T>(quant_datas[i])));
127     }
128     if (bit_num > 0 && bit_num < 8) {
129       std::vector<uint8_t> pack_data{};
130       BitPack::BitPacking<T, uint8_t>(bit_num, data, &pack_data);
131       auto status =
132         UpdateTensorDataAndSize(weight, pack_data.data(), pack_data.size() * sizeof(uint8_t), kNumberTypeUInt8);
133       if (status != RET_OK) {
134         MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
135         return RET_ERROR;
136       }
137     } else if (bit_num > 8 && bit_num < 16) {
138       std::vector<uint16_t> pack_data{};
139       BitPack::BitPacking<T, uint16_t>(bit_num, data, &pack_data);
140       auto status =
141         UpdateTensorDataAndSize(weight, pack_data.data(), pack_data.size() * sizeof(uint16_t), kNumberTypeUInt16);
142       if (status != RET_OK) {
143         MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
144         return RET_ERROR;
145       }
146     }
147   }
148   return RET_OK;
149 }
150 
151 STATUS MixedBitQuantFilter(const tensor::TensorPtr &weight, const PrimitivePtr &primitive, QuantType quant_type,
152                            WeightQuantType weight_quant_type, TypeId quant_data_type, double init_scale, int index);
153 
154 template <typename T>
155 STATUS FixedBitQuantFilter(const tensor::TensorPtr &weight, const PrimitivePtr &primitive, QuantType quant_type,
156                            int quant_max, int quant_min, size_t bit_num, WeightQuantType weight_quant_type,
157                            TypeId quant_data_type, int index = 1, bool k_means = false) {
158   MS_ASSERT(weight != nullptr);
159   MS_ASSERT(primitive != nullptr);
160   auto dims = weight->shape();
161   if (weight_quant_type == FIXED_BIT_PER_CHANNEL) {
162     if (dims.size() <= 1) {
163       MS_LOG(WARNING) << "dims is " << dims.size() << " can not per_channel";
164       weight_quant_type = FIXED_BIT_PER_LAYER;
165     }
166   }
167 
168   std::vector<schema::QuantParamT> quant_params;
169   size_t elem_count = weight->DataSize();
170   auto *raw_data = static_cast<float *>(weight->data_c());
171   if (raw_data == nullptr) {
172     MS_LOG(ERROR) << "rawDatas is nullptr";
173     return RET_ERROR;
174   }
175 
176   std::vector<T> quant_data(elem_count);
177   int ret = RET_OK;
178   if (weight_quant_type == FIXED_BIT_PER_CHANNEL) {
179     bool channel_at_first = true;
180     int channel_cnt = -1;
181     CalQuantAssitInfo(primitive, dims, index, &channel_at_first, &channel_cnt);
182     auto channels = CalChannels(dims, channel_cnt, &channel_at_first);
183     if (channels == 0) {
184       MS_LOG(ERROR) << "channels is zero";
185       return RET_ERROR;
186     }
187     ret = DoPerChannelQuant<T>(static_cast<float *>(weight->data_c()), weight->DataSize(),
188                                static_cast<mindspore::schema::QuantType>(quant_type), &quant_params, quant_max,
189                                quant_min, bit_num, k_means, &quant_data, channels, channel_at_first);
190     if (ret == RET_CONTINUE) {
191       return ret;
192     } else if (ret != RET_OK) {
193       MS_LOG(ERROR) << "Do per channel quant failed.";
194       return ret;
195     }
196   } else if (weight_quant_type == FIXED_BIT_PER_LAYER) {
197     ret = DoPerLayerQuant<T>(static_cast<float *>(weight->data_c()), weight->DataSize(), &quant_params, quant_max,
198                              quant_min, bit_num, k_means, &quant_data);
199     if (ret != RET_OK) {
200       MS_LOG(ERROR) << "Do per layer quant failed.";
201       return ret;
202     }
203   } else {
204     MS_LOG(ERROR) << "Unsupported weight quant type:" << weight_quant_type;
205   }
206   auto status = UpdateTensorDataAndSize(weight, quant_data.data(), quant_data.size() * sizeof(T), quant_data_type);
207   if (status != RET_OK) {
208     MS_LOG(ERROR) << "UpdateTensorDataAndSize error";
209     return RET_ERROR;
210   }
211 
212 #ifdef HUFFMAN_ENCODE
213   auto huffman_encode = std::make_unique<lite::HuffmanEncode>();
214   ret = huffman_encode->DoHuffmanEncode(weight, primitive, quant_datas.data(), bit_num);
215   if (ret != RET_OK) {
216     MS_LOG(ERROR) << "Do huffman encode failed.";
217     return ret;
218   }
219 #endif
220 
221   if (quant_params.empty()) {
222     MS_LOG(ERROR) << "quant_params empty";
223     return RET_ERROR;
224   }
225   auto quant_param_holder = GetCNodeQuantHolder(primitive);
226   if (quant_type == QuantType_QUANT_ALL) {
227     quant_param_holder->set_input_quant_param(index, quant_params);
228   } else {
229     quant_param_holder->set_input_quant_param(index, quant_params);
230   }
231   return ret;
232 }
233 
234 // utils
235 
236 std::string NodePrimitiveType(const CNodePtr &cnode);
237 
238 SessionModel CreateSessionByFuncGraph(const FuncGraphPtr &func_graph, const converter::Flags &flags, int thread_num);
239 
240 FuncGraphPtr CopyFuncGraph(const FuncGraphPtr &);
241 
242 void GetLiteParameter(const AnfNodePtr &node, ParameterPtr *param_node, tensor::TensorPtr *tensor_info);
243 
244 bool CheckNodeInSet(const CNodePtr &cnode, const std::set<PrimitivePtr> &support_primitive_types);
245 }  // namespace mindspore::lite::quant
246 #endif  // MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANTIZE_UTIL_H_
247