• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANT_PARAMS_H_
18 #define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANT_PARAMS_H_
19 #include <map>
20 #include <set>
21 #include <string>
22 #include <vector>
23 #include "mindspore/core/ops/lite_ops.h"
24 #include "mindspore/core/ops/math_ops.h"
25 #include "mindspore/core/ops/nn_ops.h"
26 #include "schema/inner/model_generated.h"
27 #include "src/common/quant_utils.h"
28 namespace mindspore::lite::quant {
29 enum WeightQuantType {
30   FIXED_BIT_PER_CHANNEL = 0,
31   FIXED_BIT_PER_LAYER = 1,
32   MIXED_BIT_PER_LAYER = 2,
33 };
34 constexpr size_t k1Bit = 1;
35 constexpr size_t k2Bit = 2;
36 constexpr size_t k8Bit = 8;
37 constexpr size_t k10Bit = 10;
38 constexpr size_t k16Bit = 16;
39 constexpr size_t k32Bit = 32;
40 constexpr size_t kBitNumPerByte = 8;
41 constexpr size_t kMaxNum1024 = 1024;
42 constexpr size_t kMillisecondsBase = 10;
43 constexpr float kDelta = 0.1;
44 constexpr float kRatio = 10.0;
45 constexpr int kCpuBindMode = 1;
46 constexpr int kPrimIndex = 0;
47 constexpr int kPrimOffset = 1;
48 constexpr int kU8ZeroPointOffset = 128;
49 constexpr int kMinIterations = 40;
50 constexpr auto kQuantParam = "quant_param";
51 constexpr auto kGraphInputQuantParam = "graph_input_quant_param";
52 constexpr auto kGraphOutputQuantParam = "graph_output_quant_param";
53 constexpr auto kQuantType = "quant_type";
54 constexpr auto kClusterQuant = "cluster_quant";
55 constexpr auto kClusterCentroidList = "cluster_centroid_list";
56 constexpr auto kLinearQuant = "linear_quant";
57 constexpr auto kScaleList = "scale_list";
58 constexpr auto kZeroPointList = "zero_point_list";
59 constexpr auto kMinList = "min_list";
60 constexpr auto kMaxList = "max_list";
61 constexpr auto kVarCorrList = "var_corr_list";
62 constexpr auto kMeanCorrList = "mean_corr_list";
63 constexpr auto kNumBitList = "num_bit_list";
64 constexpr auto kNarrowRangeList = "narrow_range_list";
65 constexpr auto kDstDtypeList = "dst_dtype_list";
66 constexpr auto kRoundTypeList = "round_type_list";
67 constexpr auto kMultiplierList = "multiplier_list";
68 constexpr auto kChannelAxis = "channel_axis";
69 constexpr float kBinarySearchStep = 2.0;
70 
71 const std::set<PrimitivePtr> kHasBiasOperator = {prim::kPrimConv2DFusion,    prim::kPrimConv2dTransposeFusion,
72                                                  prim::kPrimMatMulFusion,    prim::kPrimFullConnection,
73                                                  prim::kPrimLayerNormFusion, prim::kPrimMatMul};
74 const std::set<PrimitivePtr> kUint8toFP32Operator = {prim::kPrimDetectionPostProcess};
75 const std::set<TypeId> kFullQuantDType = {kNumberTypeInt8, kNumberTypeUInt8, kNumberTypeFloat32};
76 
77 enum QuantType {
78   QUANT_NONE = 0,
79   QUANT_WEIGHT = 4,
80   QUANT_ALL = 5,
81   QUANT_DYNAMIC = 6,
82 };
83 
84 enum ActivationQuantizedMethod {
85   MAX_MIN = 0,
86   KL = 1,
87   REMOVAL_OUTLIER = 2,
88 };
89 
90 enum TargetDevice {
91   CPU,
92   KIRIN,
93   NVGPU,
94   DSP,
95   ASCEND,
96 };
97 
98 enum DebugMode {
99   FAST,
100   DETAIL,
101 };
102 
103 enum CastNodeType {
104   kNone,
105   kQuant,
106   kDeQuant,
107 };
108 
109 enum InsertDirection {
110   FORWARD,
111   BACKWARD,
112 };
113 
114 enum DequantStrategy {
115   DEFAULT,  // initial phase to dequant
116   ON_THE_FLY,
117 };
118 
119 enum WeightQuantStrategy {
120   MAX_MIN_ALGORITHM,
121   GPTQ_ALGORITHM,
122 };
123 
124 enum PrecisionMode {
125   QUANT,
126   FLOAT32,
127 };
128 
129 enum DynamicQuantStrategy {
130   ACTIVATION_LAYER_WEIGHT_CHANNEL,
131   ACTIVATION_CHANNEL_WEIGHT_LAYER,
132 };
133 
134 struct CommonQuantParam {
135   QuantType quant_type = QUANT_NONE;
136   int bit_num = 8;
137   int min_quant_weight_size = 0;
138   int min_quant_weight_channel = 16;
139   bool is_debug = false;
140   std::string debug_info_save_path;
141   DebugMode debug_mode = DETAIL;
142   std::set<std::string> skip_quant_node;
143   int thread_num = 4;
144   bool enable_encode = true;
145   std::string workspace;  // support for model larger than 2G
146 };
147 
148 struct WeightQuantParam {
149   DequantStrategy dequant_strategy = DEFAULT;
150   WeightQuantStrategy quant_strategy = MAX_MIN_ALGORITHM;
151   bool update_mindir = true;
152   int max_segments = 1;
153   bool per_channel = true;
154   bool bias_correction = true;
155 };
156 
157 struct MixedBitWeightQuantParam {
158   double init_scale = 0.02;
159   bool auto_tune = false;
160   bool use_cv_data = false;
161   int max_iterations = kMinIterations;
162 };
163 
164 struct FullQuantParam {
165   ActivationQuantizedMethod activation_quant_method = MAX_MIN;
166   bool bias_correction = true;
167   bool per_channel = true;
168   TargetDevice target_device = CPU;
169   double smooth_alpha = 0.5f;
170   bool enable_smooth_shift = false;
171 };
172 
173 struct TransformQuantParam {
174   PrecisionMode precision_mode = QUANT;
175 };
176 
177 struct DynamicQuantParam {
178   DynamicQuantStrategy quant_strategy = quant::ACTIVATION_LAYER_WEIGHT_CHANNEL;
179 };
180 
181 typedef struct {
182   int status;
183   float scale;
184 } BinarySearchResult;
185 
186 typedef struct {
187   float inv_norm;
188   lite::MinMax mm;
189 } LayerParam;
190 }  // namespace mindspore::lite::quant
191 
192 #endif  // MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANT_PARAMS_H_
193