1 /**
2 * Copyright 2020-2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANTIZE_UTIL_H_
18 #define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANTIZE_UTIL_H_
19
20 #ifndef _MSC_VER
21 #include <dirent.h>
22 #endif
23
24 #include <sys/stat.h>
25 #include <memory>
26 #include <string>
27 #include <cmath>
28 #include <set>
29 #include <array>
30 #include <vector>
31 #include <algorithm>
32 #include <limits>
33 #include <utility>
34 #include <map>
35 #include <functional>
36 #include "ir/anf.h"
37 #include "src/tensor.h"
38 #include "include/api/model.h"
39 #include "include/errorcode.h"
40 #include "tools/converter/cxx_api/converter_para.h"
41 #include "tools/converter/quantizer/quant_param_holder.h"
42 #include "tools/converter/quantizer/quant_params.h"
43 #include "tools/converter/quantizer/mixed_bit_weight_quantization.h"
44 #include "tools/common/string_util.h"
45 #include "ops/quant_dtype_cast.h"
46
47 namespace mindspore::lite::quant {
48 int UpdateTensorDataAndSize(const AnfNodePtr &node, const tensor::TensorPtr &weight, const void *quant_datas,
49 size_t new_size, TypeId new_data_type);
50
51 int GetPreferredDim(const CNodePtr &cnode, int input_index, const std::vector<int> &dims);
52
53 int GetFollowedNodePreferredDim(const FuncGraphPtr &func_graph, const CNodePtr &cnode, const std::vector<int> &dims);
54
55 std::vector<int> ConvertShapeVectorToInt32(const ShapeVector &dims);
56
57 int DeQuantData(const mindspore::MSTensor *tensor, std::vector<double> *dequant_data);
58
59 int GetQuantType(const CNodePtr &cnode, quant::QuantType *quant_type);
60
61 int GetQuantTypeNew(const CNodePtr &cnode, quant::QuantType *quant_type);
62
63 void GetFuncGraphs(const FuncGraphPtr &func_graph, std::set<FuncGraphPtr> *all_func_graphs);
64
65 int UpdateDataType(const AnfNodePtr &node, TypeId new_data_type);
66
67 bool IsGraphInDTypeCast(const CNodePtr &cnode);
68
69 bool IsGraphOutDTypeCast(const FuncGraphPtr &func_graph, const CNodePtr &cnode);
70
71 int GetCastNodeType(const FuncGraphPtr &func_graph, const CNodePtr &cnode, CastNodeType *cast_node_type);
72
73 std::string NodePrimitiveType(const CNodePtr &cnode);
74
75 Status BuildModelByFuncGraph(const std::shared_ptr<mindspore::Model> &model, const FuncGraphPtr &func_graph,
76 const std::shared_ptr<mindspore::ConverterPara> ¶m, size_t *size);
77
78 mindspore::lite::Tensor *MSTensorToLiteTensor(const mindspore::MSTensor &tensor);
79
80 std::vector<mindspore::lite::Tensor *> MSTensorToLiteTensors(const std::vector<mindspore::MSTensor> &src_tensors);
81
82 void GetParameterAndTensor(const AnfNodePtr &node, ParameterPtr *param_node, tensor::TensorPtr *tensor_info);
83
84 bool CheckNodeInSet(const CNodePtr &cnode, const std::set<PrimitivePtr> &support_primitive_types);
85
86 int GetElementNumFromShape(const std::vector<int> &dims, int *total_size);
87
88 int GetBucketAllIndex(const std::vector<int> &dims, int preferred_dim,
89 std::vector<std::vector<size_t>> *buckets_data_index);
90
91 bool CheckControlFlowType(const AnfNodePtr &node);
92
93 bool CheckFollowedNodeInSet(const FuncGraphPtr &func_graph, const CNodePtr &cnode,
94 const std::set<PrimitivePtr> &support_primitive_types);
95
96 int CloneFuncGraph(const FuncGraphPtr &func_graph, const std::shared_ptr<ConverterPara> ¶m,
97 FuncGraphPtr *func_graph_bak);
98
99 int ConvertFp16ToFp32(const FuncGraphPtr &old_graph);
100
101 int ConvertFp32ToFp16(const FuncGraphPtr &old_graph);
102
103 int ConvertCNodeFp32ToFp16(const CNodePtr &cnode);
104
105 int ConvertCNodeFp16ToFp32(const CNodePtr &cnode);
106
107 int MarkOriginDataType(const FuncGraphPtr &func_graph);
108
109 int DumpGraph(const FuncGraphPtr &func_graph, const std::shared_ptr<ConverterPara> ¶m,
110 const std::string &save_path);
111
112 bool IsPerchannelWeight(const std::vector<schema::QuantParamT> &quant_params, const tensor::TensorPtr &weight,
113 int preferred_dim);
114 QuantizationParamPtr ConvertQuantParamTToQuantizationParam(const std::vector<schema::QuantParamT> &quant_params);
115
116 std::vector<schema::QuantParamT> ConvertQuantizationParamToQuantParamT(const QuantizationParamPtr &quantization_param);
117
118 std::vector<schema::QuantParamT> GetInputNodeQuantParam(const CNodePtr &cnode, size_t index,
119 size_t multi_ouput_index = 0);
120 STATUS SetInputNodeQuantParam(const CNodePtr &cnode, size_t index, const std::vector<schema::QuantParamT> &quant_param);
121
122 tensor::TensorPtr GetNodeTensor(const AnfNodePtr &node);
123
124 int RemoveInputNodeQuantParam(const CNodePtr &cnode, size_t index);
125
126 std::vector<schema::QuantParamT> CloneQuantParam(const std::vector<schema::QuantParamT> &src);
127
128 int CalBiasQuantParams(const std::vector<schema::QuantParamT> &active_params,
129 const std::vector<schema::QuantParamT> &weight_params,
130 std::vector<schema::QuantParamT> *bias_params);
131
132 bool IsAntiQuantModeNodes(const AnfNodePtr &node);
133
134 STATUS GetScaleZpFromAntiQuantModeNodes(const AnfNodePtr &node, ParameterPtr *scale_param_node,
135 ParameterPtr *zp_param_node);
136
137 STATUS RemoveAntiQuantModeNodes(const FuncGraphPtr &func_graph, const AnfNodePtr &node, int index);
138
139 std::vector<std::vector<int64_t>> ExtractStrategy(const ValuePtr &stra);
140
141 std::vector<schema::QuantParamT> CalQuantParamWithMinMax(const tensor::TensorPtr &min_value,
142 const tensor::TensorPtr &max_value, bool symmetric);
143
144 std::vector<schema::QuantParamT> GetQuantParamWithFakeQuantNode(const CNodePtr &fake_quant_node, bool symmetric);
145
146 template <typename T>
DeQuantData(const int8_t * tensor_data,int64_t elements_num,std::vector<mindspore::QuantParam> quant_params,std::vector<T> * dequant_data)147 int DeQuantData(const int8_t *tensor_data, int64_t elements_num, std::vector<mindspore::QuantParam> quant_params,
148 std::vector<T> *dequant_data) {
149 if (quant_params.size() != 1) {
150 MS_LOG(ERROR) << "unexpected quant_params size: " << quant_params.size() << " only support per-layer now.";
151 return RET_ERROR;
152 }
153 auto scale = quant_params[0].scale;
154 auto zp = quant_params[0].zero_point;
155 dequant_data->resize(elements_num);
156 for (int64_t i = 0; i < elements_num; i++) {
157 dequant_data->at(i) = scale * (tensor_data[i] - zp);
158 }
159 return RET_OK;
160 }
161
162 // quant and dequant
163 // quant_data = std::round(origin_data / scale + zero_point)
164 // new_data = scale * (quant_data - zero_point)
165 template <typename T>
QuantDeQuantData(float origin_data,const schema::QuantParamT * quant_param,int quant_max,int quant_min)166 T QuantDeQuantData(float origin_data, const schema::QuantParamT *quant_param, int quant_max, int quant_min) {
167 MS_ASSERT(quant_param != nullptr);
168 MS_ASSERT(quant_param->inited);
169 const auto scale = quant_param->scale;
170 const int zero_point = quant_param->zeroPoint;
171 if (scale <= SCALE_THREASHOLD) {
172 return 0;
173 }
174 return [quant_max, quant_min, zero_point, scale, origin_data] {
175 auto quant_data = std::round(origin_data / scale + zero_point);
176 if (quant_data > quant_max) {
177 quant_data = quant_max;
178 } else if (quant_data < quant_min) {
179 quant_data = quant_min;
180 }
181 return static_cast<T>(scale * (quant_data - zero_point));
182 }();
183 }
184 } // namespace mindspore::lite::quant
185 #endif // MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANTIZE_UTIL_H_
186