1 /** 2 * Copyright 2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANT_PARAMS_H_ 18 #define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANT_PARAMS_H_ 19 #include <map> 20 #include <set> 21 #include <string> 22 #include <vector> 23 #include "mindspore/core/ops/lite_ops.h" 24 #include "mindspore/core/ops/math_ops.h" 25 #include "mindspore/core/ops/nn_ops.h" 26 #include "schema/inner/model_generated.h" 27 #include "src/common/quant_utils.h" 28 namespace mindspore::lite::quant { 29 enum WeightQuantType { 30 FIXED_BIT_PER_CHANNEL = 0, 31 FIXED_BIT_PER_LAYER = 1, 32 MIXED_BIT_PER_LAYER = 2, 33 }; 34 constexpr size_t k1Bit = 1; 35 constexpr size_t k2Bit = 2; 36 constexpr size_t k8Bit = 8; 37 constexpr size_t k10Bit = 10; 38 constexpr size_t k16Bit = 16; 39 constexpr size_t k32Bit = 32; 40 constexpr size_t kBitNumPerByte = 8; 41 constexpr size_t kMaxNum1024 = 1024; 42 constexpr size_t kMillisecondsBase = 10; 43 constexpr float kDelta = 0.1; 44 constexpr float kRatio = 10.0; 45 constexpr int kCpuBindMode = 1; 46 constexpr int kPrimIndex = 0; 47 constexpr int kPrimOffset = 1; 48 constexpr int kU8ZeroPointOffset = 128; 49 constexpr int kMinIterations = 40; 50 constexpr auto kQuantParam = "quant_param"; 51 constexpr auto kGraphInputQuantParam = "graph_input_quant_param"; 52 constexpr auto kGraphOutputQuantParam = "graph_output_quant_param"; 53 constexpr auto kQuantType = "quant_type"; 54 constexpr auto kClusterQuant = "cluster_quant"; 55 constexpr auto kClusterCentroidList = "cluster_centroid_list"; 56 constexpr auto kLinearQuant = "linear_quant"; 57 constexpr auto kScaleList = "scale_list"; 58 constexpr auto kZeroPointList = "zero_point_list"; 59 constexpr auto kMinList = "min_list"; 60 constexpr auto kMaxList = "max_list"; 61 constexpr auto kVarCorrList = "var_corr_list"; 62 constexpr auto kMeanCorrList = "mean_corr_list"; 63 constexpr auto kNumBitList = "num_bit_list"; 64 constexpr auto kNarrowRangeList = "narrow_range_list"; 65 constexpr auto kDstDtypeList = "dst_dtype_list"; 66 constexpr auto kRoundTypeList = "round_type_list"; 67 constexpr auto kMultiplierList = "multiplier_list"; 68 constexpr auto kChannelAxis = "channel_axis"; 69 constexpr float kBinarySearchStep = 2.0; 70 71 const std::set<PrimitivePtr> kHasBiasOperator = {prim::kPrimConv2DFusion, prim::kPrimConv2dTransposeFusion, 72 prim::kPrimMatMulFusion, prim::kPrimFullConnection, 73 prim::kPrimLayerNormFusion, prim::kPrimMatMul}; 74 const std::set<PrimitivePtr> kUint8toFP32Operator = {prim::kPrimDetectionPostProcess}; 75 const std::set<TypeId> kFullQuantDType = {kNumberTypeInt8, kNumberTypeUInt8, kNumberTypeFloat32}; 76 77 enum QuantType { 78 QUANT_NONE = 0, 79 QUANT_WEIGHT = 4, 80 QUANT_ALL = 5, 81 QUANT_DYNAMIC = 6, 82 }; 83 84 enum ActivationQuantizedMethod { 85 MAX_MIN = 0, 86 KL = 1, 87 REMOVAL_OUTLIER = 2, 88 }; 89 90 enum TargetDevice { 91 CPU, 92 KIRIN, 93 NVGPU, 94 DSP, 95 ASCEND, 96 }; 97 98 enum DebugMode { 99 FAST, 100 DETAIL, 101 }; 102 103 enum CastNodeType { 104 kNone, 105 kQuant, 106 kDeQuant, 107 }; 108 109 enum InsertDirection { 110 FORWARD, 111 BACKWARD, 112 }; 113 114 enum DequantStrategy { 115 DEFAULT, // initial phase to dequant 116 ON_THE_FLY, 117 }; 118 119 enum WeightQuantStrategy { 120 MAX_MIN_ALGORITHM, 121 GPTQ_ALGORITHM, 122 }; 123 124 enum PrecisionMode { 125 QUANT, 126 FLOAT32, 127 }; 128 129 enum DynamicQuantStrategy { 130 ACTIVATION_LAYER_WEIGHT_CHANNEL, 131 ACTIVATION_CHANNEL_WEIGHT_LAYER, 132 }; 133 134 struct CommonQuantParam { 135 QuantType quant_type = QUANT_NONE; 136 int bit_num = 8; 137 int min_quant_weight_size = 0; 138 int min_quant_weight_channel = 16; 139 bool is_debug = false; 140 std::string debug_info_save_path; 141 DebugMode debug_mode = DETAIL; 142 std::set<std::string> skip_quant_node; 143 int thread_num = 4; 144 bool enable_encode = true; 145 std::string workspace; // support for model larger than 2G 146 }; 147 148 struct WeightQuantParam { 149 DequantStrategy dequant_strategy = DEFAULT; 150 WeightQuantStrategy quant_strategy = MAX_MIN_ALGORITHM; 151 bool update_mindir = true; 152 int max_segments = 1; 153 bool per_channel = true; 154 bool bias_correction = true; 155 }; 156 157 struct MixedBitWeightQuantParam { 158 double init_scale = 0.02; 159 bool auto_tune = false; 160 bool use_cv_data = false; 161 int max_iterations = kMinIterations; 162 }; 163 164 struct FullQuantParam { 165 ActivationQuantizedMethod activation_quant_method = MAX_MIN; 166 bool bias_correction = true; 167 bool per_channel = true; 168 TargetDevice target_device = CPU; 169 double smooth_alpha = 0.5f; 170 bool enable_smooth_shift = false; 171 }; 172 173 struct TransformQuantParam { 174 PrecisionMode precision_mode = QUANT; 175 }; 176 177 struct DynamicQuantParam { 178 DynamicQuantStrategy quant_strategy = quant::ACTIVATION_LAYER_WEIGHT_CHANNEL; 179 }; 180 181 typedef struct { 182 int status; 183 float scale; 184 } BinarySearchResult; 185 186 typedef struct { 187 float inv_norm; 188 lite::MinMax mm; 189 } LayerParam; 190 } // namespace mindspore::lite::quant 191 192 #endif // MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_QUANT_PARAMS_H_ 193