1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/compiler/mlir/tosa/transforms/legalize_utils.h"
17
18 #include "mlir/Dialect/Tosa/IR/TosaOps.h" // from @llvm-project
19 #include "mlir/Dialect/Tosa/Utils/QuantUtils.h" // from @llvm-project
20 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
21 #include "tensorflow/compiler/mlir/tosa/transforms/legalize_common.h"
22
23 // Implements legalization and post-legalization optimization helper functions
24
25 namespace mlir {
26 namespace tosa {
27
28 // Create a TOSA rescale op from TFLite scaling, zero points and rounding mode
buildRescale(PatternRewriter & rewriter,Operation * op,RankedTensorType output_type,Value input_val,double scale,int64_t input_zp,int64_t output_zp,bool double_round)29 Value buildRescale(PatternRewriter& rewriter, Operation* op,
30 RankedTensorType output_type, Value input_val, double scale,
31 int64_t input_zp, int64_t output_zp, bool double_round) {
32 int32_t multiplier;
33 int32_t shift;
34
35 // We currently only support 32-bit quantized multiplier.
36 computeMultiplierAndShift(scale, multiplier, shift, 32);
37
38 auto rescale_op = rewriter.create<tosa::RescaleOp>(
39 op->getLoc(), output_type, input_val,
40 rewriter.getI32IntegerAttr(static_cast<int32_t>(input_zp)),
41 rewriter.getI32IntegerAttr(static_cast<int32_t>(output_zp)),
42 rewriter.getI32ArrayAttr({multiplier}), rewriter.getI32ArrayAttr({shift}),
43 rewriter.getBoolAttr(true), rewriter.getBoolAttr(double_round),
44 rewriter.getBoolAttr(false));
45
46 return rescale_op.getResult();
47 }
48
49 // Creates TOSA rescale op with int32 output
buildRescaleToInt32(PatternRewriter & rewriter,Operation * op,Value input_val,double input_scale,int64_t input_zp)50 Value buildRescaleToInt32(PatternRewriter& rewriter, Operation* op,
51 Value input_val, double input_scale,
52 int64_t input_zp) {
53 // Output is always int32 type
54 auto input_type = input_val.getType().dyn_cast<mlir::RankedTensorType>();
55 assert(input_type);
56 auto output_type =
57 RankedTensorType::get(input_type.getShape(), rewriter.getI32Type());
58
59 return buildRescale(rewriter, op, output_type, input_val, input_scale,
60 input_zp, 0, false);
61 }
62
63 // Creates TOSA rescale op with int32 input
buildRescaleFromInt32(PatternRewriter & rewriter,Operation * op,RankedTensorType output_type,Value input_val,double output_scale,int64_t output_zp)64 Value buildRescaleFromInt32(PatternRewriter& rewriter, Operation* op,
65 RankedTensorType output_type, Value input_val,
66 double output_scale, int64_t output_zp) {
67 // Input should be int32 type
68 auto input_type = input_val.getType().dyn_cast<mlir::RankedTensorType>();
69 (void)input_type;
70 assert(input_type && input_type.getElementType().isInteger(32) &&
71 "expected rescale input element type to be i32");
72
73 // Potentially check input_shape == output_shape here
74 return buildRescale(rewriter, op, output_type, input_val, output_scale, 0,
75 output_zp, true);
76 }
77
78 // Creates a TOSA rescale op based on conv2d parameters.
buildRescaleOpConvOutput(PatternRewriter & rewriter,Operation * op,Value conv_val,RankedTensorType input_type,RankedTensorType weight_type,RankedTensorType output_type)79 Value buildRescaleOpConvOutput(PatternRewriter& rewriter, Operation* op,
80 Value conv_val, RankedTensorType input_type,
81 RankedTensorType weight_type,
82 RankedTensorType output_type) {
83 auto input_qtype =
84 input_type.getElementType().dyn_cast<mlir::quant::UniformQuantizedType>();
85 auto output_qtype = output_type.getElementType()
86 .dyn_cast<mlir::quant::UniformQuantizedType>();
87
88 double input_scale = input_qtype.getScale();
89
90 int64_t output_zp = output_qtype.getZeroPoint();
91 double output_scale = output_qtype.getScale();
92
93 if (auto weight_per_tensor_qtype =
94 weight_type.getElementType()
95 .dyn_cast<mlir::quant::UniformQuantizedType>()) {
96 // Per-tensor quantization
97 double weight_scale = weight_per_tensor_qtype.getScale();
98
99 int32_t multiplier;
100 int32_t shift;
101
102 double op_tensor_scale = (input_scale * weight_scale) / output_scale;
103
104 // We currently only support 32-bit quantized multiplier.
105 computeMultiplierAndShift(op_tensor_scale, multiplier, shift, 32);
106
107 auto rescale_op = rewriter.create<tosa::RescaleOp>(
108 op->getLoc(), output_type, conv_val, rewriter.getI32IntegerAttr(0),
109 rewriter.getI32IntegerAttr(output_zp),
110 rewriter.getI32ArrayAttr({multiplier}),
111 rewriter.getI32ArrayAttr({shift}), rewriter.getBoolAttr(true),
112 rewriter.getBoolAttr(true), rewriter.getBoolAttr(false));
113
114 return rescale_op.getResult();
115
116 } else if (auto weight_per_channel_qtype =
117 weight_type.getElementType()
118 .dyn_cast<mlir::quant::UniformQuantizedPerAxisType>()) {
119 // Per-channel quantization
120 auto output_last_axis = output_type.getShape().size() - 1;
121 uint32_t output_channels = output_type.getShape()[output_last_axis];
122
123 llvm::SmallVector<int32_t, 4> multiplier_arr;
124 llvm::SmallVector<int32_t, 4> shift_arr;
125
126 llvm::SmallVector<double, 4> weight_scale_arr(
127 weight_per_channel_qtype.getScales().begin(),
128 weight_per_channel_qtype.getScales().end());
129
130 int64_t output_zp = output_qtype.getZeroPoint();
131 double output_scale = output_qtype.getScale();
132
133 for (uint32_t oc = 0; oc < output_channels; oc++) {
134 double weight_scale = weight_scale_arr[oc];
135
136 int32_t multiplier;
137 int32_t shift;
138
139 double op_channel_scale = (input_scale * weight_scale) / output_scale;
140
141 // We currently only support 32-bit quantized multiplier.
142 computeMultiplierAndShift(op_channel_scale, multiplier, shift, 32);
143
144 multiplier_arr.push_back(multiplier);
145 shift_arr.push_back(shift);
146 }
147
148 auto rescale_op = rewriter.create<tosa::RescaleOp>(
149 op->getLoc(), output_type, conv_val, rewriter.getI32IntegerAttr(0),
150 rewriter.getI32IntegerAttr(output_zp),
151 rewriter.getI32ArrayAttr(multiplier_arr),
152 rewriter.getI32ArrayAttr(shift_arr), rewriter.getBoolAttr(true),
153 rewriter.getBoolAttr(true), rewriter.getBoolAttr(true));
154
155 return rescale_op.getResult();
156
157 } else {
158 op->emitOpError("buildConvRescaleOp: unknown weight quantized type");
159 return nullptr;
160 }
161 }
162
163 // Create a 513 entry TOSA constant tensor suitable for the Table operator based
164 // on the values from an int32_t func(int32_t) lambda function.
getTosa1DConstTensorTable(PatternRewriter & rewriter,Operation * op,std::function<int32_t (int32_t)> func)165 Value getTosa1DConstTensorTable(PatternRewriter& rewriter, Operation* op,
166 std::function<int32_t(int32_t)> func) {
167 llvm::SmallVector<int16_t, 4> table_vec;
168
169 for (int32_t i = -256; i <= 256; i++) {
170 int32_t value = func(i);
171 // Table entry is int16_t; clamp to expressible range.
172 table_vec.push_back(
173 static_cast<int16_t>(std::min(std::max(value, -32768), 32767)));
174 }
175
176 auto element_qtype =
177 UniformQuantizedType::get(true, rewriter.getIntegerType(16),
178 rewriter.getF32Type(), 1.0f, 0, -32768, 32767);
179 auto const_type = RankedTensorType::get({513}, element_qtype);
180 auto storage_type =
181 RankedTensorType::get({513}, element_qtype.getStorageType());
182 auto const_attr = DenseElementsAttr::get(
183 storage_type, llvm::makeArrayRef<int16_t>(table_vec));
184
185 auto const_op =
186 rewriter.create<tosa::ConstOp>(op->getLoc(), const_type, const_attr);
187 return const_op.getResult();
188 }
189
190 // Create a 32-bit float constant operator from a float
getTosaConstTensorSingleF32(PatternRewriter & rewriter,Operation * op,float val)191 Value getTosaConstTensorSingleF32(PatternRewriter& rewriter, Operation* op,
192 float val) {
193 auto const_type = RankedTensorType::get({}, rewriter.getF32Type());
194 auto const_attr = DenseElementsAttr::get(const_type, val);
195
196 auto const_op =
197 rewriter.create<tosa::ConstOp>(op->getLoc(), const_type, const_attr);
198 return const_op.getResult();
199 }
200
201 // Create a 32-bit integer constant operator from an int
getTosaConstTensorSingleI32(PatternRewriter & rewriter,Operation * op,int32_t val)202 Value getTosaConstTensorSingleI32(PatternRewriter& rewriter, Operation* op,
203 int32_t val) {
204 auto const_type = RankedTensorType::get({}, rewriter.getIntegerType(32));
205 auto const_attr = DenseElementsAttr::get(const_type, val);
206
207 auto const_op =
208 rewriter.create<tosa::ConstOp>(op->getLoc(), const_type, const_attr);
209 return const_op.getResult();
210 }
211
212 // Create a vector from a 32-bit value tensor. Returns the size of
213 // the new vector or -1 on error.
getVectorFromValue32(Value val,llvm::SmallVector<int32_t,4> & vec)214 int getVectorFromValue32(Value val, llvm::SmallVector<int32_t, 4>& vec) {
215 int i = 0;
216
217 ElementsAttr elems;
218
219 if (!matchPattern(val, m_Constant(&elems))) return -1;
220
221 for (auto idx : elems.getValues<IntegerAttr>()) {
222 vec.push_back(idx.getInt());
223 i++;
224 }
225
226 return i;
227 }
228
229 // Calculates the TOSA padding values based on TF operators padded with
230 // SAME/VALID.
231 //
232 // This could pass tensorflow::FilterTensorFormat and do
233 // GetFilterTensorSpatialDimIndex but the current TF core libs do not support
234 // FORMAT_OHWI parsing by that function in core/util/tensor_format.h
getPaddingValuesFromPadType(tensorflow::Padding tf_pad,tensorflow::TensorFormat data_format_tf,uint32_t first_filter_spatial_dim,RankedTensorType input_type,RankedTensorType filter_type,ArrayAttr strides,ArrayAttr dilations,PatternRewriter & rewriter,ArrayAttr & explicit_padding)235 bool getPaddingValuesFromPadType(
236 tensorflow::Padding tf_pad, tensorflow::TensorFormat data_format_tf,
237 uint32_t first_filter_spatial_dim, RankedTensorType input_type,
238 RankedTensorType filter_type, ArrayAttr strides, ArrayAttr dilations,
239 PatternRewriter& rewriter, ArrayAttr& explicit_padding) {
240 assert(tf_pad != tensorflow::Padding::EXPLICIT);
241
242 // Storing the numeric padding values is useful for TOSA codegen, as opposed
243 // to holding the padding regime mnemonic, i.e. SAME, VALID, FULL, ...
244 SmallVector<int64_t, 4> computed_paddings;
245
246 int64_t pad_before, pad_after;
247 for (int i = 0; i < 2; i++) { // Two spatial dimensions X&Y
248 int64_t ifm_dim = GetTensorSpatialDimIndex(
249 4, data_format_tf, i); // 4D tensor, NHWC/NCHW format
250 int64_t filter_dim = first_filter_spatial_dim + i;
251
252 int64_t dim_dilation = dilations[i].template cast<IntegerAttr>().getInt();
253 int64_t dim_stride = strides[i].template cast<IntegerAttr>().getInt();
254
255 tensorflow::int64 op_size, pad_before_tf,
256 pad_after_tf; // Complains if using int64_T
257 tensorflow::Status status = tensorflow::GetWindowedOutputSizeVerboseV2(
258 input_type.getDimSize(ifm_dim), filter_type.getDimSize(filter_dim),
259 dim_dilation, dim_stride, tf_pad, &op_size, &pad_before_tf,
260 &pad_after_tf);
261 if (!status.ok()) return false;
262
263 pad_before = pad_before_tf;
264 pad_after = pad_after_tf;
265 computed_paddings.push_back(pad_before);
266 computed_paddings.push_back(pad_after);
267 }
268
269 explicit_padding = rewriter.getI64ArrayAttr(computed_paddings);
270 return true;
271 }
272
273 // Calculates the TOSA padding values for explicit-padded TF operators.
274 //
275 // This function only handles the TF padding array explicit_padding, which is
276 // only present in certain TF ops. All others encode padding using the string
277 // SAME/VALID, which is interpreted using the getPaddingValuesFromPadString
278 // function below.
279
280 // The explicit padding array in TF holds 2 pad values for every
281 // dimension, even those that are not the 2 spatial ones. Just extract the
282 // 2x pad values for the XY dims.
getPaddingValuesFromExplicitPadAttr(ArrayAttr explicit_pad,tensorflow::TensorFormat data_format_tf,PatternRewriter & rewriter)283 ArrayAttr getPaddingValuesFromExplicitPadAttr(
284 ArrayAttr explicit_pad, tensorflow::TensorFormat data_format_tf,
285 PatternRewriter& rewriter) {
286 SmallVector<int64_t, 4> computed_paddings;
287
288 int64_t pad_before, pad_after;
289 for (int i = 0; i < 2; i++) { // Two spatial dimensions X&Y
290 int64_t dim = GetTensorSpatialDimIndex(4, data_format_tf,
291 i); // 4D tensor, NHWC/NCHW format
292
293 pad_before = explicit_pad[dim * 2].template cast<IntegerAttr>().getInt();
294 pad_after = explicit_pad[dim * 2 + 1].template cast<IntegerAttr>().getInt();
295 computed_paddings.push_back(pad_before);
296 computed_paddings.push_back(pad_after);
297 }
298
299 return rewriter.getI64ArrayAttr(computed_paddings);
300 }
301
302 // Calculates the TOSA padding values for transposeConv2d
getTransposeConv2dPaddingValues(tensorflow::Padding tf_pad,tensorflow::TensorFormat data_format_tf,uint32_t first_filter_spatial_dim,RankedTensorType input_type,RankedTensorType filter_type,RankedTensorType output_type,ArrayAttr strides,ArrayAttr dilations,PatternRewriter & rewriter,ArrayAttr & explicit_padding)303 bool getTransposeConv2dPaddingValues(
304 tensorflow::Padding tf_pad, tensorflow::TensorFormat data_format_tf,
305 uint32_t first_filter_spatial_dim, RankedTensorType input_type,
306 RankedTensorType filter_type, RankedTensorType output_type,
307 ArrayAttr strides, ArrayAttr dilations, PatternRewriter& rewriter,
308 ArrayAttr& explicit_padding) {
309 assert(tf_pad != tensorflow::Padding::EXPLICIT);
310
311 // Storing the numeric padding values is useful for TOSA codegen, as opposed
312 // to holding the padding regime mnemonic, i.e. SAME, VALID, FULL, ...
313
314 SmallVector<int64_t, 2> computed_paddings;
315
316 int64_t pad_before, pad_after;
317 for (int i = 0; i < 2; i++) { // Two spatial dimensions X&Y
318 int64_t ifm_dim = GetTensorSpatialDimIndex(
319 4, data_format_tf, i); // 4D tensor, NHWC/NCHW format
320 int64_t ofm_dim = GetTensorSpatialDimIndex(
321 4, data_format_tf, i); // 4D tensor, NHWC/NCHW format
322 int64_t filter_dim = first_filter_spatial_dim + i;
323
324 int64_t ifm_size = input_type.getDimSize(ifm_dim);
325 int64_t filter_size = filter_type.getDimSize(filter_dim);
326 int64_t ofm_size = output_type.getDimSize(ofm_dim);
327 int64_t dim_dilation = dilations[i].template cast<IntegerAttr>().getInt();
328 int64_t dim_stride = strides[i].template cast<IntegerAttr>().getInt();
329
330 int effective_filter_size = (filter_size - 1) * dim_dilation + 1;
331 int total_padding =
332 ((ifm_size - 1) * dim_stride + effective_filter_size - ofm_size);
333 total_padding = total_padding > 0 ? total_padding : 0;
334
335 pad_before = total_padding / 2;
336 pad_after = total_padding - pad_before;
337
338 computed_paddings.push_back(pad_before);
339 }
340
341 explicit_padding = rewriter.getI64ArrayAttr(computed_paddings);
342 return true;
343 }
344
345 // Templated function to create a constant op in a given dialect and with a
346 // given type. Specializations below.
347
348 // T0: target dialect constant op
349 // T1: native c++ integer type
350 template <typename T0, typename T1>
get1DConstTensor(PatternRewriter & rewriter,Operation * op,SmallVector<T1,8> arr)351 Value get1DConstTensor(PatternRewriter& rewriter, Operation* op,
352 SmallVector<T1, 8> arr) {
353 auto const_type =
354 RankedTensorType::get({static_cast<int32_t>(arr.size())},
355 rewriter.getIntegerType(sizeof(T1) * 8));
356 auto const_attr =
357 DenseElementsAttr::get(const_type, llvm::makeArrayRef<T1>(arr));
358
359 auto const_op = rewriter.create<T0>(op->getLoc(), const_type, const_attr);
360 return const_op.getResult();
361 }
362
363 // Specialization for Const ops
364 template <>
get1DConstTensor(PatternRewriter & rewriter,Operation * op,SmallVector<float,8> arr)365 Value get1DConstTensor<tosa::ConstOp, float>(PatternRewriter& rewriter,
366 Operation* op,
367 SmallVector<float, 8> arr) {
368 auto const_type = RankedTensorType::get({static_cast<int32_t>(arr.size())},
369 rewriter.getF32Type());
370 auto const_attr =
371 DenseElementsAttr::get(const_type, llvm::makeArrayRef<float>(arr));
372
373 auto const_op =
374 rewriter.create<tosa::ConstOp>(op->getLoc(), const_type, const_attr);
375 return const_op.getResult();
376 }
377
378 template Value get1DConstTensor<tosa::ConstOp, int32_t>(
379 PatternRewriter&, Operation*, SmallVector<int32_t, 8> arr);
380 template Value get1DConstTensor<tosa::ConstOp, int64_t>(
381 PatternRewriter&, Operation*, SmallVector<int64_t, 8> arr);
382 template Value get1DConstTensor<TFL::ConstOp, int32_t>(
383 PatternRewriter&, Operation*, SmallVector<int32_t, 8> arr);
384 template Value get1DConstTensor<TFL::ConstOp, int64_t>(
385 PatternRewriter&, Operation*, SmallVector<int64_t, 8> arr);
386
387 // Same as get1DConstTensor, but int48 is not native c++ type, needs additional
388 // interface
get1DConstTensorInt48(PatternRewriter & rewriter,Operation * op,SmallVector<int64_t,8> arr)389 Value get1DConstTensorInt48(PatternRewriter& rewriter, Operation* op,
390 SmallVector<int64_t, 8> arr) {
391 auto const_type = RankedTensorType::get({static_cast<int32_t>(arr.size())},
392 rewriter.getIntegerType(48));
393 auto const_attr =
394 DenseElementsAttr::get(const_type, llvm::makeArrayRef<int64_t>(arr));
395
396 auto const_op =
397 rewriter.create<tosa::ConstOp>(op->getLoc(), const_type, const_attr);
398 return const_op.getResult();
399 }
400
401 // Strip off quantization information for bias tensor and return a unquantized
402 // bias
getUnquantizedBias(PatternRewriter & rewriter,Operation * op,Value input)403 Value getUnquantizedBias(PatternRewriter& rewriter, Operation* op,
404 Value input) {
405 auto input_type = input.getType().dyn_cast<mlir::RankedTensorType>();
406 assert(input_type);
407 auto input_element_type = input_type.getElementType();
408 auto input_element_qtype =
409 input_element_type.dyn_cast<mlir::quant::QuantizedType>();
410
411 if (input_element_qtype) {
412 auto output_type = RankedTensorType::get(
413 input_type.getShape(),
414 rewriter.getIntegerType(
415 input_element_qtype.getStorageTypeIntegralWidth()));
416
417 auto input_defining_op = dyn_cast<TFL::QConstOp>(input.getDefiningOp());
418 auto dense_attr = input_defining_op.value().dyn_cast<DenseElementsAttr>();
419
420 if (dense_attr) {
421 auto const_op =
422 rewriter.create<tosa::ConstOp>(op->getLoc(), output_type, dense_attr);
423 return const_op.getResult();
424 } else {
425 return input;
426 }
427
428 } else {
429 return input;
430 }
431 }
432
433 } // namespace tosa
434 } // namespace mlir
435