• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019-2022 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_FULL_QUANT_QUANTIZER_H_
18 #define MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_FULL_QUANT_QUANTIZER_H_
19 
20 #include <string>
21 #include <memory>
22 #include <unordered_map>
23 #include <utility>
24 #include <vector>
25 #include <cfloat>
26 #include <map>
27 #include <set>
28 #include "ops/primitive_c.h"
29 #include "schema/inner/model_generated.h"
30 #include "tools/converter/quantizer/quantizer.h"
31 #include "tools/converter/quantizer/quantize_util.h"
32 #include "tools/converter/preprocess/preprocess_param.h"
33 #include "tools/converter/quantizer/calibrator.h"
34 #include "tools/converter/quantizer/data_distribution.h"
35 #include "src/common/quant_utils.h"
36 #include "tools/converter/quantizer/quant_strategy.h"
37 #include "tools/converter/quantizer/fixed_bit_weight_quantization.h"
38 
39 namespace mindspore::lite::quant {
40 struct FullQuantInitParam {
41   // Config
42   TypeId activation_quant_data_type_{kNumberTypeInt8};
43   TypeId activation_target_data_type_{kNumberTypeInt8};
44   // quant and export are same data type.
45   TypeId weight_data_type_{kNumberTypeInt8};
46   size_t bit_num_{k8Bit};
47   int activation_q_min_{INT8_MIN};
48   int activation_q_max_{INT8_MAX};
49   int weight_channel_q_min_{-INT8_MAX};
50   int weight_channel_q_max_{INT8_MAX};
51   int weight_layer_q_min_{INT8_MIN};
52   int weight_layer_q_max_{INT8_MAX};
53   bool activation_symmetric_{false};
54   bool weight_channel_symmetric_{true};
55   bool weight_layer_symmetric_{false};
56 };
57 
58 class FullQuantQuantizer : public Quantizer {
59  public:
FullQuantQuantizer(const std::shared_ptr<ConverterPara> & param)60   explicit FullQuantQuantizer(const std::shared_ptr<ConverterPara> &param) : Quantizer(param) {
61     init_param_.bit_num_ = param_->commonQuantParam.bit_num;
62   }
63 
64   ~FullQuantQuantizer() override;
65 
66   int DoQuantize(FuncGraphPtr func_graph) override;
67 
68  private:
69   int InitDeviceConfig(const FuncGraphPtr &func_graph);
70 
71   int DoInference(CollectType collect_type);
72 
73   int UpdateDivergeInterval();
74 
75   int QuantNodeSimpleOp(const CNodePtr &cnode);
76 
77   int QuantNode(const FuncGraphPtr &func_graph);
78 
79   int QuantNodeGraphInput(const PrimitivePtr &primitive, const AnfNodePtr &input_node,
80                           const std::unique_ptr<DataDistribution> &info);
81 
82   int QuantNodeCNode(const CNodePtr &cnode, const AnfNodePtr &input_node,
83                      const std::unique_ptr<DataDistribution> &info);
84 
85   std::vector<schema::QuantParamT> GetQuantParam(const AnfNodePtr &input_node,
86                                                  const std::unique_ptr<DataDistribution> &info) const;
87 
88   int QuantWeight(const CNodePtr &cnode, const PrimitivePtr &primitive, const AnfNodePtr &weight, int input_index,
89                   const tensor::TensorPtr &tensor_info, bool per_channel = true);
90 
91   int DoParameterWeightQuant(const CNodePtr &cnode, const ParameterPtr &weight, const PrimitivePtr &primitive,
92                              int input_index, bool per_channel = true);
93 
94   int DoValueNodeWeightQuant(const CNodePtr &cnode, const ValueNodePtr &weight, const PrimitivePtr &primitive,
95                              int input_index, bool per_channel = true);
96 
97   int DoParameterNodeQuant(const CNodePtr &cnode, const ParameterPtr &input_node, size_t input_index);
98 
99   int DoValueNodeQuant(const CNodePtr &cnode, const ValueNodePtr &input_node, size_t input_index);
100 
101   int IsSupportWeightQuant(const AnfNodePtr &input_node);
102 
103   void InitQMinMax();
104 
105   void InitNvGpuConfig();
106 
107   void InitCpuConfig();
108 
109   void InitKirinConfig();
110 
111   void InitDSPConfig();
112 
113   void InitAscendConfig();
114 
115   int MarkQuantNode(const FuncGraphPtr &func_graph);
116 
117   int QuantWithKL();
118 
119   int QuantValueNode(const CNodePtr &cnode, const AnfNodePtr &input_node, size_t i);
120 
121  private:
122   FullQuantInitParam init_param_;
123 
124   std::set<PrimitivePtr> support_int8_ops_;
125   std::set<PrimitivePtr> skip_check_dtype_ops_;
126   std::set<PrimitivePtr> per_channel_ops_;
127   std::set<mindspore::ActivationType> support_activation_;
128 
129   std::shared_ptr<Calibrator> calibrator_{nullptr};
130   std::shared_ptr<QuantStrategy> quant_strategy_{nullptr};
131   std::shared_ptr<mindspore::Model> fp32_ms_model_{nullptr};
132 
133   // key is tensor_name
134   std::map<std::string, std::vector<schema::QuantParamT>> weight_quant_params_bak_;
135   FixedBitWeightQuantization fixed_bit_quant_;
136 };
137 }  // namespace mindspore::lite::quant
138 #endif  // MINDSPORE_LITE_TOOLS_CONVERTER_QUANTIZER_FULL_QUANT_QUANTIZER_H_
139