1 /** 2 * Copyright 2019-2022 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_FULL_QUANT_QUANTIZER_H_ 18 #define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_FULL_QUANT_QUANTIZER_H_ 19 20 #include <string> 21 #include <memory> 22 #include <unordered_map> 23 #include <utility> 24 #include <vector> 25 #include <cfloat> 26 #include <map> 27 #include <set> 28 #include "ops/primitive_c.h" 29 #include "schema/inner/model_generated.h" 30 #include "tools/converter/quantizer/quantizer.h" 31 #include "tools/converter/quantizer/quantize_util.h" 32 #include "tools/converter/preprocess/preprocess_param.h" 33 #include "tools/converter/quantizer/calibrator.h" 34 #include "tools/converter/quantizer/data_distribution.h" 35 #include "src/common/quant_utils.h" 36 #include "tools/converter/quantizer/quant_strategy.h" 37 #include "tools/converter/quantizer/fixed_bit_weight_quantization.h" 38 39 namespace mindspore::lite::quant { 40 struct FullQuantInitParam { 41 // Config 42 TypeId activation_quant_data_type_{kNumberTypeInt8}; 43 TypeId activation_target_data_type_{kNumberTypeInt8}; 44 // quant and export are same data type. 45 TypeId weight_data_type_{kNumberTypeInt8}; 46 size_t bit_num_{k8Bit}; 47 int activation_q_min_{INT8_MIN}; 48 int activation_q_max_{INT8_MAX}; 49 int weight_channel_q_min_{-INT8_MAX}; 50 int weight_channel_q_max_{INT8_MAX}; 51 int weight_layer_q_min_{INT8_MIN}; 52 int weight_layer_q_max_{INT8_MAX}; 53 bool activation_symmetric_{false}; 54 bool weight_channel_symmetric_{true}; 55 bool weight_layer_symmetric_{false}; 56 }; 57 58 class FullQuantQuantizer : public Quantizer { 59 public: FullQuantQuantizer(const std::shared_ptr<ConverterPara> & param)60 explicit FullQuantQuantizer(const std::shared_ptr<ConverterPara> ¶m) : Quantizer(param) { 61 init_param_.bit_num_ = param_->commonQuantParam.bit_num; 62 } 63 64 ~FullQuantQuantizer() override; 65 66 int DoQuantize(FuncGraphPtr func_graph) override; 67 68 private: 69 int InitDeviceConfig(const FuncGraphPtr &func_graph); 70 71 int DoInference(CollectType collect_type); 72 73 int UpdateDivergeInterval(); 74 75 int QuantNodeSimpleOp(const CNodePtr &cnode); 76 77 int QuantNode(const FuncGraphPtr &func_graph); 78 79 int QuantNodeGraphInput(const PrimitivePtr &primitive, const AnfNodePtr &input_node, 80 const std::unique_ptr<DataDistribution> &info); 81 82 int QuantNodeCNode(const CNodePtr &cnode, const AnfNodePtr &input_node, 83 const std::unique_ptr<DataDistribution> &info); 84 85 std::vector<schema::QuantParamT> GetQuantParam(const AnfNodePtr &input_node, 86 const std::unique_ptr<DataDistribution> &info) const; 87 88 int QuantWeight(const CNodePtr &cnode, const PrimitivePtr &primitive, const AnfNodePtr &weight, int input_index, 89 const tensor::TensorPtr &tensor_info, bool per_channel = true); 90 91 int DoParameterWeightQuant(const CNodePtr &cnode, const ParameterPtr &weight, const PrimitivePtr &primitive, 92 int input_index, bool per_channel = true); 93 94 int DoValueNodeWeightQuant(const CNodePtr &cnode, const ValueNodePtr &weight, const PrimitivePtr &primitive, 95 int input_index, bool per_channel = true); 96 97 int DoParameterNodeQuant(const CNodePtr &cnode, const ParameterPtr &input_node, size_t input_index); 98 99 int DoValueNodeQuant(const CNodePtr &cnode, const ValueNodePtr &input_node, size_t input_index); 100 101 int IsSupportWeightQuant(const AnfNodePtr &input_node); 102 103 void InitQMinMax(); 104 105 void InitNvGpuConfig(); 106 107 void InitCpuConfig(); 108 109 void InitKirinConfig(); 110 111 void InitDSPConfig(); 112 113 void InitAscendConfig(); 114 115 int MarkQuantNode(const FuncGraphPtr &func_graph); 116 117 int QuantWithKL(); 118 119 int QuantValueNode(const CNodePtr &cnode, const AnfNodePtr &input_node, size_t i); 120 121 private: 122 FullQuantInitParam init_param_; 123 124 std::set<PrimitivePtr> support_int8_ops_; 125 std::set<PrimitivePtr> skip_check_dtype_ops_; 126 std::set<PrimitivePtr> per_channel_ops_; 127 std::set<mindspore::ActivationType> support_activation_; 128 129 std::shared_ptr<Calibrator> calibrator_{nullptr}; 130 std::shared_ptr<QuantStrategy> quant_strategy_{nullptr}; 131 std::shared_ptr<mindspore::Model> fp32_ms_model_{nullptr}; 132 133 // key is tensor_name 134 std::map<std::string, std::vector<schema::QuantParamT>> weight_quant_params_bak_; 135 FixedBitWeightQuantization fixed_bit_quant_; 136 }; 137 } // namespace mindspore::lite::quant 138 #endif // MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_FULL_QUANT_QUANTIZER_H_ 139