• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_NNACL_QUANTIZATION_QUANTIZE_H_
18 #define MINDSPORE_NNACL_QUANTIZATION_QUANTIZE_H_
19 
20 #include <math.h>
21 #include <limits.h>
22 #include "nnacl/op_base.h"
23 
24 #define INPUT_PER_CHANNEL 0b001
25 #define FILTER_PER_CHANNEL 0b010
26 #define OUTPUT_PER_CHANNEL 0b100
27 
28 typedef struct ConvQuantArg {
29   RoundingMode round_mode_;
30   CalFixedMultiplierMode quant_multiplier_mode_;
31   QuantArg *input_quant_args_;
32   QuantArg *filter_quant_args_;
33   QuantArg *output_quant_args_;
34   double *real_multiplier_;
35   int32_t *left_shift_;
36   int32_t *right_shift_;
37   int32_t *quant_multiplier_;
38   int32_t *out_act_min_;
39   int32_t *out_act_max_;
40   size_t input_arg_num_;
41   size_t filter_arg_num_;
42   size_t output_arg_num_;
43   uint8_t per_channel_;
44 } ConvQuantArg;
45 
46 typedef struct ConcatQuantArg {
47   QuantArg *in_args_;
48   QuantArg out_args_;
49   int8_t output_activation_min_;
50   int8_t output_activation_max_;
51 } ConcatQuantArg;
52 
53 typedef struct PreluQuantArg {
54   int *input_sizes_;
55   int output_size_;
56   int **input_shapes_;
57   int *output_shape_;
58   size_t input_num_;
59   size_t output_dim_;
60   float alpha_;
61   QuantArg in_args_;
62   QuantArg out_args_;
63   int output_activation_min_;
64   int output_activation_max_;
65   QuantArg *in_quant_args_;
66   QuantArg out_quant_args_;
67 } PreluQuantArg;
68 
69 typedef struct CropQuantArg {
70   QuantArg in_args_;
71   QuantArg out_args_;
72   int output_activation_min_;
73   int output_activation_max_;
74 } CropQuantArg;
75 
76 typedef struct ArithSelfQuantArg {
77   QuantArg in_args_;
78   QuantArg out_args_;
79   int output_activation_min_;
80   int output_activation_max_;
81   int output_multiplier_;
82   int shift_left_;
83   int shift_right_;
84 } ArithSelfQuantArg;
85 
86 typedef struct GatherQuantArg {
87   double alpha_;
88   int zp_in_;
89   int zp_out_;
90 } GatherQuantArg;
91 
92 typedef struct SoftmaxQuantArg {
93   QuantArg in_quant_args_;
94   QuantArg out_quant_arg_;
95   int output_activation_min_;
96   int output_activation_max_;
97   int output_multiplier_;
98   int shift_left_;
99   int shift_right_;
100 } SoftmaxQuantArg;
101 
102 typedef struct SubQuantArg {
103   QuantArg in0_args_;
104   QuantArg in1_args_;
105   QuantArg out_args_;
106   int output_activation_min_;
107   int output_activation_max_;
108   int input0_multiplier_;
109   int input1_multiplier_;
110   int output_multiplier_;
111   int input0_shift_;
112   int input1_shift_;
113   int output_shift_;
114   int left_shift_result0_;
115   int left_shift_result1_;
116   int right_shift0_;
117   int right_shift1_;
118   int left_shift_out_;
119   int right_shift_out_;
120 } SubQuantArg;
121 
122 typedef struct ArithmeticQuantArg {
123   QuantArg in0_args_;
124   QuantArg in1_args_;
125   QuantArg out_args_;
126 } ArithmeticQuantArg;
127 
128 typedef struct DivQuantArg {
129   QuantArg in0_args_;
130   QuantArg in1_args_;
131   QuantArg out_args_;
132   int output_activation_min_;
133   int output_activation_max_;
134   int output_multiplier_;
135   int output_shift_;
136 } DivQuantArg;
137 
138 typedef struct ReduceQuantArg {
139   double in_scale_;
140   int32_t in_zp_;
141   double out_scale_;
142   int32_t out_zp_;
143   int32_t in_out_multiplier_;
144   int in_out_left_shift_;
145   int in_out_right_shift_;
146   int32_t mean_multiplier_;
147   int mean_left_shift_;
148   int mean_right_shift_;
149   int32_t prod_multiplier_;
150   int prod_left_shift_;
151   int prod_right_shift_;
152   int32_t sum_square_multiplier_;
153   int sum_square_left_shift_;
154   int sum_square_right_shift_;
155 } ReduceQuantArg;
156 
157 typedef struct LeakyReluQuantArg {
158   OpParameter op_parameter_;
159   PreluQuantArg quant_arg;
160   float slope_;
161   int64_t axis_;
162   int *in_shape_;
163   int *out_shape_;
164   int input_dim_;
165   int element_num;
166 } LeakyReluQuantArg;
167 
168 typedef struct ResizeQuantArg {
169   int32_t ratio_x_;
170   int32_t ratio_y_;
171   int32_t *x_axis_index_;
172   int32_t *x_axis_lower_;
173   int32_t *x_axis_upper_;
174   int32_t *y_axis_index_;
175   int32_t *y_axis_lower_;
176   int32_t *y_axis_upper_;
177 } ResizeQuantArg;
178 
179 typedef struct ResizeFloatScaleQuantArg {
180   float ratio_x_;
181   float ratio_y_;
182   float *x_axis_index_;
183   int32_t *x_axis_lower_;
184   int32_t *x_axis_upper_;
185   float *y_axis_index_;
186   int32_t *y_axis_lower_;
187   int32_t *y_axis_upper_;
188 } ResizeFloatScaleQuantArg;
189 
190 #ifdef __cplusplus
191 extern "C" {
192 #endif
193 
194 void QuantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift);
195 
196 void QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier, int *right_shift);
197 
198 void QuantizeRoundParameterWithDoublePrecision(double double_multiplier, int32_t *quantized_multiplier, int *left_shift,
199                                                int *right_shift);
200 
201 void QuantizeRoundParameterWithSinglePrecision(double double_multiplier, int32_t *quantized_multiplier, int *left_shift,
202                                                int *right_shift);
203 
204 uint8_t QuantizeToUint8(float real_value, float scale, int32_t zp);
205 
206 int32_t QuantizeToInt8(float real_value, float scale, int32_t zp);
207 
208 void CalculateActivationRangeQuantized(bool is_relu, bool is_relu6, int32_t zp, float scale, int *mini, int *maxi);
209 // quantize from float to int8
210 void Quantize(const float *input_data, int length, float scale, int zero_point, int8_t *output_data);
211 
212 // dequantize from int8 to float
213 void Dequantize(const int8_t *input_data, int length, float scale, int zero_point, float *output_data);
214 
215 #ifdef __cplusplus
216 }
217 #endif
218 
219 #endif  // MINDSPORE_NNACL_QUANTIZATION_QUANTIZE_H_
220