1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include "tensorflow/lite/tools/optimize/operator_property.h"
16
17 #include "tensorflow/lite/schema/schema_generated.h"
18 #include "tensorflow/lite/schema/schema_utils.h"
19
20 namespace tflite {
21 namespace optimize {
22 namespace operator_property {
23
24 namespace {
GetOperatorVariant(const ModelT * model,int subgraph_index,int op_index)25 const OpVariant GetOperatorVariant(const ModelT* model, int subgraph_index,
26 int op_index) {
27 OpVariant op_variant;
28 OperatorT* op =
29 model->subgraphs.at(subgraph_index)->operators[op_index].get();
30 op_variant.op_code =
31 GetBuiltinCode(model->operator_codes[op->opcode_index].get());
32 if (op_variant.op_code == BuiltinOperator_LSTM ||
33 op_variant.op_code == BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM) {
34 if (op->inputs.size() == 5) {
35 // The 5 input ("basic") LSTM is not supported in this tooling (yet).
36 op_variant.is_quantizable = false;
37 return op_variant;
38 }
39 const int cell_to_output_weight_index = 11;
40 const int forget_layer_norm_coefficients_index = 21;
41 const int projection_weights_index = 16;
42 op_variant.use_projection = op->inputs[projection_weights_index] != -1;
43 op_variant.use_peephole = op->inputs[cell_to_output_weight_index] != -1;
44 if (op->inputs.size() == 20) {
45 op_variant.use_layer_norm = false;
46 } else {
47 op_variant.use_layer_norm =
48 op->inputs[forget_layer_norm_coefficients_index] != -1;
49 }
50 }
51 return op_variant;
52 }
53 } // namespace
54
GetOperatorProperty(const ModelT * model,int subgraph_index,int op_index)55 OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
56 int op_index) {
57 OpVariant op_variant = GetOperatorVariant(model, subgraph_index, op_index);
58 return GetOperatorProperty(op_variant);
59 }
60
61 // Update operation defintions in TensorFlow Lite dialect accordingly when there
62 // are any needs on updating the kernel support level.
63 // LINT.IfChange
GetOperatorProperty(OpVariant op_variant)64 OperatorProperty GetOperatorProperty(OpVariant op_variant) {
65 BuiltinOperator op_code = op_variant.op_code;
66 OperatorProperty property;
67 switch (op_code) {
68 case BuiltinOperator_ABS:
69 property.inputs = {{0, {}}};
70 property.outputs = {{0, {}}};
71 property.version = 2;
72 property.restrict_same_input_output_scale = true;
73 break;
74 case BuiltinOperator_RSQRT:
75 property.inputs = {{0, {}}};
76 property.outputs = {{0, {}}};
77 property.version = 2;
78 break;
79 case BuiltinOperator_ADD:
80 property.inputs = {{0, {}}, {1, {}}};
81 property.outputs = {{0, {}}};
82 property.version = 2;
83 property.quantize_input_as_activations = true;
84 break;
85 case BuiltinOperator_ARG_MAX:
86 property.inputs = {{0, {}}};
87 // ArgMax has no quantizable output.
88 property.version = 2;
89 property.quantizable_int16 = false;
90 break;
91 case BuiltinOperator_AVERAGE_POOL_2D:
92 property.inputs = {{0, {}}};
93 property.outputs = {{0, {}}};
94 property.restrict_same_input_output_scale = true;
95 property.version = 2;
96 break;
97 case BuiltinOperator_BATCH_MATMUL: {
98 property.inputs = {{0, {}}, {1, {}}};
99 property.outputs = {{0, {}}};
100 property.version = 2;
101 property.quantize_input_as_activations = true;
102 break;
103 }
104 case BuiltinOperator_BATCH_TO_SPACE_ND:
105 case BuiltinOperator_SPACE_TO_BATCH_ND:
106 case BuiltinOperator_SPACE_TO_DEPTH:
107 // We skip inputs 1 and 2 since they aren't real valued (they are shapes).
108 property.inputs = {{0, {}}};
109 property.outputs = {{0, {}}};
110 property.restrict_same_input_output_scale = true;
111 property.version = 2;
112 property.quantizable_int16 = false;
113 break;
114 case BuiltinOperator_BROADCAST_TO:
115 property.inputs = {{0, {}}};
116 property.outputs = {{0, {}}};
117 property.restrict_same_input_output_scale = true;
118 property.version = 3;
119 break;
120 case BuiltinOperator_DEPTH_TO_SPACE:
121 property.inputs = {{0, {}}};
122 property.outputs = {{0, {}}};
123 property.restrict_same_input_output_scale = true;
124 property.version = 2;
125 property.quantizable_int16 = false;
126 break;
127 case BuiltinOperator_SPLIT:
128 // We skip input 0 since it is the split dim which is not real valued.
129 property.inputs = {{1, {}}};
130 property.arbitrary_outputs = true;
131 property.restrict_same_input_output_scale = true;
132 property.version = 2;
133 break;
134 case BuiltinOperator_SPLIT_V:
135 property.inputs = {{0, {}}};
136 property.arbitrary_outputs = true;
137 property.restrict_same_input_output_scale = true;
138 property.version = 2;
139 break;
140 case BuiltinOperator_CONCATENATION:
141 property.arbitrary_inputs = true;
142 property.outputs = {{0, {}}};
143 property.restrict_same_input_output_scale = true;
144 property.version = 2;
145 break;
146 case BuiltinOperator_CONV_2D: {
147 TensorProperty tensor_property;
148 tensor_property.per_axis = true;
149 tensor_property.per_axis_index = 0;
150 tensor_property.symmetric = true;
151 property.inputs = {{0, {}}, {1, tensor_property}};
152 property.outputs = {{0, {}}};
153 property.biases = {2};
154 property.version = 3;
155 break;
156 }
157 case BuiltinOperator_TRANSPOSE_CONV: {
158 TensorProperty tensor_property;
159 tensor_property.per_axis = true;
160 tensor_property.per_axis_index = 0;
161 tensor_property.symmetric = true;
162 property.inputs = {{2, {}}, {1, tensor_property}};
163 property.outputs = {{0, {}}};
164 property.biases = {3};
165 property.version = 3;
166 break;
167 }
168 case BuiltinOperator_DEPTHWISE_CONV_2D: {
169 TensorProperty tensor_property;
170 tensor_property.per_axis = true;
171 tensor_property.per_axis_index = 3;
172 tensor_property.symmetric = true;
173 property.inputs = {
174 {0, {}},
175 {1, tensor_property},
176 };
177 property.outputs = {{0, {}}};
178 property.biases = {2};
179 property.version = 3;
180 break;
181 }
182 case BuiltinOperator_EQUAL:
183 case BuiltinOperator_NOT_EQUAL:
184 case BuiltinOperator_GREATER:
185 case BuiltinOperator_GREATER_EQUAL:
186 case BuiltinOperator_LESS:
187 case BuiltinOperator_LESS_EQUAL:
188 property.inputs = {{0, {}}, {1, {}}};
189 // Comparisons have no quantizable outputs.
190 property.version = 2;
191 property.quantizable_int16 = false;
192 break;
193 case BuiltinOperator_EXPAND_DIMS:
194 // We skip input 1 as it is not real valued (it's the index of axis) and
195 // hence does not need to be quantized.
196 property.inputs = {{0, {}}};
197 property.outputs = {{0, {}}};
198 property.version = 1;
199 break;
200 case BuiltinOperator_FILL: {
201 property.inputs = {{1, {}}};
202 property.outputs = {{0, {}}};
203 property.restrict_same_input_output_scale = true;
204 property.version = 3;
205 break;
206 }
207 case BuiltinOperator_FULLY_CONNECTED: {
208 TensorProperty tensor_property;
209 tensor_property.symmetric = true;
210 property.inputs = {{0, {}}, {1, tensor_property}};
211 property.outputs = {{0, {}}};
212 property.biases = {2};
213 property.version = 4;
214 break;
215 }
216 case BuiltinOperator_GATHER:
217 property.inputs = {{0, {}}};
218 property.outputs = {{0, {}}};
219 property.restrict_same_input_output_scale = true;
220 property.quantize_input_as_activations = true;
221 property.version = 2;
222 break;
223 case BuiltinOperator_GATHER_ND:
224 property.inputs = {{0, {}}};
225 property.outputs = {{0, {}}};
226 property.restrict_same_input_output_scale = true;
227 property.version = 3;
228 break;
229 case BuiltinOperator_HARD_SWISH: {
230 property.inputs = {{0, {}}};
231 property.outputs = {{0, {}}};
232 property.version = 1;
233 property.quantizable_int16 = false;
234 break;
235 }
236 case BuiltinOperator_LOG_SOFTMAX: {
237 property.inputs = {{0, {}}};
238 // LogSoftmax requires output with 16/256 as scale and 127 as zero point.
239 TensorProperty tensor_property;
240 tensor_property.restriction = true;
241 tensor_property.restricted_value_int8 = {16.0f / 256.0f, 127};
242 property.outputs = {{0, tensor_property}};
243 property.version = 2;
244 property.quantizable_int16 = false;
245 break;
246 }
247 case BuiltinOperator_LOGISTIC: {
248 property.inputs = {{0, {}}};
249 // Logistic requires output with 1/256 as scale and -128 as zero point.
250 TensorProperty tensor_property;
251 tensor_property.restriction = true;
252 tensor_property.restricted_value_int8 = {1 / 256.0f, -128};
253 tensor_property.restricted_value_int16 = {1 / 32768.0f, 0};
254 property.outputs = {{0, tensor_property}};
255 property.version = 2;
256 break;
257 }
258 case BuiltinOperator_LSTM:
259 case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: {
260 if (!op_variant.is_quantizable) {
261 // Early exist for 5 input LSTM.
262 // It is not supported in this tooling yet.
263 property.quantizable = false;
264 break;
265 }
266 // TODO(jianlijianli): extend LSTM op spec to include input, bias etc.
267 // LSTM needs 5 intermediate tensors. This agrees with the fully quantized
268 // kernels in lstm_eval.cc
269 if (op_variant.use_layer_norm && op_variant.use_projection &&
270 op_variant.use_peephole) {
271 static const float alpha = static_cast<float>(std::pow(2, -10));
272 TensorProperty tensor_property_9;
273 tensor_property_9.number_of_bits = 16;
274 tensor_property_9.symmetric = true;
275 TensorProperty tensor_property_12;
276 tensor_property_12.use_derived_scale = true;
277 tensor_property_12.number_of_bits = 32;
278 tensor_property_12.derived_scale = {{20}, {}, {alpha}};
279 TensorProperty tensor_property_13;
280 tensor_property_13.use_derived_scale = true;
281 tensor_property_13.number_of_bits = 32;
282 tensor_property_13.derived_scale = {{21}, {}, {alpha}};
283 TensorProperty tensor_property_14;
284 tensor_property_14.use_derived_scale = true;
285 tensor_property_14.number_of_bits = 32;
286 tensor_property_14.derived_scale = {{22}, {}, {alpha}};
287 TensorProperty tensor_property_15;
288 tensor_property_15.use_derived_scale = true;
289 tensor_property_15.number_of_bits = 32;
290 tensor_property_15.derived_scale = {{23}, {}, {alpha}};
291 TensorProperty tensor_property_17;
292 tensor_property_17.use_derived_scale = true;
293 tensor_property_17.number_of_bits = 32;
294 tensor_property_17.derived_scale = {{16}, {4}, {}};
295 TensorProperty tensor_property_19;
296 tensor_property_19.extend_to_power_of_two = true;
297 tensor_property_19.number_of_bits = 16;
298 tensor_property_19.state_tensor = true;
299 tensor_property_19.symmetric = true;
300 TensorProperty tensor_property_20;
301 tensor_property_20.number_of_bits = 16;
302 tensor_property_20.symmetric = true;
303
304 property.inputs = {
305 {0, {}},
306 {1, {}},
307 {2, {}},
308 {3, {}},
309 {4, {}},
310 {5, {}},
311 {6, {}},
312 {7, {}},
313 {8, {}},
314 {9, tensor_property_9},
315 {10, tensor_property_9},
316 {11, tensor_property_9},
317 {16, {}},
318 {19, tensor_property_19},
319 {20, tensor_property_20},
320 {21, tensor_property_20},
321 {22, tensor_property_20},
322 {23, tensor_property_20},
323 {12, tensor_property_12},
324 {13, tensor_property_13},
325 {14, tensor_property_14},
326 {15, tensor_property_15},
327 {17, tensor_property_17},
328 };
329 property.outputs = {{0, {}}};
330 property.intermediates = {
331 {0, tensor_property_20},
332 {1, tensor_property_20},
333 {2, tensor_property_20},
334 {3, tensor_property_20},
335 {4, {}},
336 };
337 property.restrict_scale = {{18, 0}};
338 property.version = 2;
339 }
340 if (op_variant.use_layer_norm && op_variant.use_projection &&
341 !op_variant.use_peephole) {
342 static const float alpha = static_cast<float>(std::pow(2, -10));
343
344 TensorProperty tensor_property_12;
345 tensor_property_12.use_derived_scale = true;
346 tensor_property_12.number_of_bits = 32;
347 tensor_property_12.derived_scale = {{20}, {}, {alpha}};
348 TensorProperty tensor_property_13;
349 tensor_property_13.use_derived_scale = true;
350 tensor_property_13.number_of_bits = 32;
351 tensor_property_13.derived_scale = {{21}, {}, {alpha}};
352 TensorProperty tensor_property_14;
353 tensor_property_14.use_derived_scale = true;
354 tensor_property_14.number_of_bits = 32;
355 tensor_property_14.derived_scale = {{22}, {}, {alpha}};
356 TensorProperty tensor_property_15;
357 tensor_property_15.use_derived_scale = true;
358 tensor_property_15.number_of_bits = 32;
359 tensor_property_15.derived_scale = {{23}, {}, {alpha}};
360 TensorProperty tensor_property_17;
361 tensor_property_17.use_derived_scale = true;
362 tensor_property_17.number_of_bits = 32;
363 tensor_property_17.derived_scale = {{16}, {4}, {}};
364 TensorProperty tensor_property_19;
365 tensor_property_19.extend_to_power_of_two = true;
366 tensor_property_19.number_of_bits = 16;
367 tensor_property_19.state_tensor = true;
368 tensor_property_19.symmetric = true;
369 TensorProperty tensor_property_20;
370 tensor_property_20.number_of_bits = 16;
371 tensor_property_20.symmetric = true;
372
373 property.inputs = {
374 {0, {}},
375 {1, {}},
376 {2, {}},
377 {3, {}},
378 {4, {}},
379 {5, {}},
380 {6, {}},
381 {7, {}},
382 {8, {}},
383 {16, {}},
384 {19, tensor_property_19},
385 {20, tensor_property_20},
386 {21, tensor_property_20},
387 {22, tensor_property_20},
388 {23, tensor_property_20},
389 {12, tensor_property_12},
390 {13, tensor_property_13},
391 {14, tensor_property_14},
392 {15, tensor_property_15},
393 {17, tensor_property_17},
394 };
395 property.outputs = {{0, {}}};
396 property.intermediates = {
397 {0, tensor_property_20},
398 {1, tensor_property_20},
399 {2, tensor_property_20},
400 {3, tensor_property_20},
401 {4, {}},
402 };
403 property.restrict_scale = {{18, 0}};
404 property.version = 2;
405 }
406 if (op_variant.use_layer_norm && !op_variant.use_projection &&
407 op_variant.use_peephole) {
408 static const float alpha = static_cast<float>(std::pow(2, -10));
409 TensorProperty tensor_property_9;
410 tensor_property_9.number_of_bits = 16;
411 tensor_property_9.symmetric = true;
412 TensorProperty tensor_property_12;
413 tensor_property_12.use_derived_scale = true;
414 tensor_property_12.number_of_bits = 32;
415 tensor_property_12.derived_scale = {{20}, {}, {alpha}};
416 TensorProperty tensor_property_13;
417 tensor_property_13.use_derived_scale = true;
418 tensor_property_13.number_of_bits = 32;
419 tensor_property_13.derived_scale = {{21}, {}, {alpha}};
420 TensorProperty tensor_property_14;
421 tensor_property_14.use_derived_scale = true;
422 tensor_property_14.number_of_bits = 32;
423 tensor_property_14.derived_scale = {{22}, {}, {alpha}};
424 TensorProperty tensor_property_15;
425 tensor_property_15.use_derived_scale = true;
426 tensor_property_15.number_of_bits = 32;
427 tensor_property_15.derived_scale = {{23}, {}, {alpha}};
428 TensorProperty tensor_property_19;
429 tensor_property_19.extend_to_power_of_two = true;
430 tensor_property_19.number_of_bits = 16;
431 tensor_property_19.state_tensor = true;
432 tensor_property_19.symmetric = true;
433 TensorProperty tensor_property_20;
434 tensor_property_20.number_of_bits = 16;
435 tensor_property_20.symmetric = true;
436
437 property.inputs = {
438 {0, {}},
439 {1, {}},
440 {2, {}},
441 {3, {}},
442 {4, {}},
443 {5, {}},
444 {6, {}},
445 {7, {}},
446 {8, {}},
447 {9, tensor_property_9},
448 {10, tensor_property_9},
449 {11, tensor_property_9},
450 {19, tensor_property_19},
451 {20, tensor_property_20},
452 {21, tensor_property_20},
453 {22, tensor_property_20},
454 {23, tensor_property_20},
455 {12, tensor_property_12},
456 {13, tensor_property_13},
457 {14, tensor_property_14},
458 {15, tensor_property_15},
459 };
460 property.outputs = {{0, {}}};
461 property.intermediates = {
462 {0, tensor_property_20},
463 {1, tensor_property_20},
464 {2, tensor_property_20},
465 {3, tensor_property_20},
466 // Without projection, hidden state (4), output (0) and input
467 // activation state (18) are the same except that the very first
468 // inference of input activation is not captured in hidden and
469 // output.
470 // This is not an issue because this intermediate tensor is not used
471 // in the kernel and its quantization parameters are ignored.
472 {4, {}},
473 };
474 property.restrict_scale = {{18, 0}};
475 property.version = 2;
476 }
477 if (op_variant.use_layer_norm && !op_variant.use_projection &&
478 !op_variant.use_peephole) {
479 static const float alpha = static_cast<float>(std::pow(2, -10));
480 TensorProperty tensor_property_12;
481 tensor_property_12.use_derived_scale = true;
482 tensor_property_12.number_of_bits = 32;
483 tensor_property_12.derived_scale = {{20}, {}, {alpha}};
484 TensorProperty tensor_property_13;
485 tensor_property_13.use_derived_scale = true;
486 tensor_property_13.number_of_bits = 32;
487 tensor_property_13.derived_scale = {{21}, {}, {alpha}};
488 TensorProperty tensor_property_14;
489 tensor_property_14.use_derived_scale = true;
490 tensor_property_14.number_of_bits = 32;
491 tensor_property_14.derived_scale = {{22}, {}, {alpha}};
492 TensorProperty tensor_property_15;
493 tensor_property_15.use_derived_scale = true;
494 tensor_property_15.number_of_bits = 32;
495 tensor_property_15.derived_scale = {{23}, {}, {alpha}};
496 TensorProperty tensor_property_19;
497 tensor_property_19.extend_to_power_of_two = true;
498 tensor_property_19.number_of_bits = 16;
499 tensor_property_19.state_tensor = true;
500 tensor_property_19.symmetric = true;
501 TensorProperty tensor_property_20;
502 tensor_property_20.number_of_bits = 16;
503 tensor_property_20.symmetric = true;
504
505 property.inputs = {
506 {0, {}},
507 {1, {}},
508 {2, {}},
509 {3, {}},
510 {4, {}},
511 {5, {}},
512 {6, {}},
513 {7, {}},
514 {8, {}},
515 {19, tensor_property_19},
516 {20, tensor_property_20},
517 {21, tensor_property_20},
518 {22, tensor_property_20},
519 {23, tensor_property_20},
520 {12, tensor_property_12},
521 {13, tensor_property_13},
522 {14, tensor_property_14},
523 {15, tensor_property_15},
524 };
525 property.outputs = {{0, {}}};
526 property.intermediates = {
527 {0, tensor_property_20},
528 {1, tensor_property_20},
529 {2, tensor_property_20},
530 {3, tensor_property_20},
531 // Without projection, hidden state (4), output (0) and input
532 // activation state (18) are the same except that the very first
533 // inference of input activation is not captured in hidden and
534 // output.
535 // This is not an issue because this intermediate tensor is not used
536 // in the kernel and its quantization parameters are ignored.
537 {4, {}},
538 };
539 property.restrict_scale = {{18, 0}};
540 property.version = 2;
541 }
542 if (!op_variant.use_layer_norm && op_variant.use_projection &&
543 op_variant.use_peephole) {
544 TensorProperty tensor_property_9;
545 tensor_property_9.number_of_bits = 16;
546 tensor_property_9.symmetric = true;
547 // Without layer norm, we choose to quantize bias with the scale of
548 // input and its corresponding weight. The other choice will
549 // be to use the scale of recurrent and its corresponding weight but we
550 // choose to use the smaller scale, which means higher resolution.
551 TensorProperty tensor_property_12;
552 tensor_property_12.use_derived_scale = true;
553 tensor_property_12.number_of_bits = 32;
554 tensor_property_12.derived_scale = {{0, 1}, {}, {}};
555 TensorProperty tensor_property_13;
556 tensor_property_13.use_derived_scale = true;
557 tensor_property_13.number_of_bits = 32;
558 tensor_property_13.derived_scale = {{0, 2}, {}, {}};
559 TensorProperty tensor_property_14;
560 tensor_property_14.use_derived_scale = true;
561 tensor_property_14.number_of_bits = 32;
562 tensor_property_14.derived_scale = {{0, 3}, {}, {}};
563 TensorProperty tensor_property_15;
564 tensor_property_15.use_derived_scale = true;
565 tensor_property_15.number_of_bits = 32;
566 tensor_property_15.derived_scale = {{0, 4}, {}, {}};
567 TensorProperty tensor_property_17;
568 tensor_property_17.use_derived_scale = true;
569 tensor_property_17.number_of_bits = 32;
570 tensor_property_17.derived_scale = {{16}, {4}, {}};
571 TensorProperty tensor_property_19;
572 tensor_property_19.extend_to_power_of_two = true;
573 tensor_property_19.number_of_bits = 16;
574 tensor_property_19.state_tensor = true;
575 tensor_property_19.symmetric = true;
576
577 property.inputs = {
578 {0, {}},
579 {1, {}},
580 {2, {}},
581 {3, {}},
582 {4, {}},
583 {5, {}},
584 {6, {}},
585 {7, {}},
586 {8, {}},
587 {9, tensor_property_9},
588 {10, tensor_property_9},
589 {11, tensor_property_9},
590 {16, {}},
591 {19, tensor_property_19},
592 {12, tensor_property_12},
593 {13, tensor_property_13},
594 {14, tensor_property_14},
595 {15, tensor_property_15},
596 {17, tensor_property_17},
597 };
598 property.outputs = {{0, {}}};
599 property.intermediates = {
600 // Without layer normalization, intermediate tensors 0, 1, 2, 3 are
601 // not used and their quantization parameters are ignored.
602 {0, {}},
603 {1, {}},
604 {2, {}},
605 {3, {}},
606 // Hidden state is quantized as usual.
607 {4, {}},
608 };
609 property.restrict_scale = {{18, 0}};
610 property.version = 2;
611 }
612 if (!op_variant.use_layer_norm && op_variant.use_projection &&
613 !op_variant.use_peephole) {
614 // Without layer norm, we choose to quantize bias with the scale of
615 // input and its corresponding weight. The other choice will
616 // be to use the scale of recurrent and its corresponding weight but we
617 // choose to use the smaller scale, which means higher resolution.
618 TensorProperty tensor_property_12;
619 tensor_property_12.use_derived_scale = true;
620 tensor_property_12.number_of_bits = 32;
621 tensor_property_12.derived_scale = {{0, 1}, {}, {}};
622 TensorProperty tensor_property_13;
623 tensor_property_13.use_derived_scale = true;
624 tensor_property_13.number_of_bits = 32;
625 tensor_property_13.derived_scale = {{0, 2}, {}, {}};
626 TensorProperty tensor_property_14;
627 tensor_property_14.use_derived_scale = true;
628 tensor_property_14.number_of_bits = 32;
629 tensor_property_14.derived_scale = {{0, 3}, {}, {}};
630 TensorProperty tensor_property_15;
631 tensor_property_15.use_derived_scale = true;
632 tensor_property_15.number_of_bits = 32;
633 tensor_property_15.derived_scale = {{0, 4}, {}, {}};
634 TensorProperty tensor_property_17;
635 tensor_property_17.use_derived_scale = true;
636 tensor_property_17.number_of_bits = 32;
637 tensor_property_17.derived_scale = {{16}, {4}, {}};
638 TensorProperty tensor_property_19;
639 tensor_property_19.extend_to_power_of_two = true;
640 tensor_property_19.number_of_bits = 16;
641 tensor_property_19.state_tensor = true;
642 tensor_property_19.symmetric = true;
643
644 property.inputs = {
645 {0, {}},
646 {1, {}},
647 {2, {}},
648 {3, {}},
649 {4, {}},
650 {5, {}},
651 {6, {}},
652 {7, {}},
653 {8, {}},
654 {16, {}},
655 {19, tensor_property_19},
656 {12, tensor_property_12},
657 {13, tensor_property_13},
658 {14, tensor_property_14},
659 {15, tensor_property_15},
660 {17, tensor_property_17},
661 };
662 property.outputs = {{0, {}}};
663 property.intermediates = {
664 // Without layer normalization, intermediate tensors 0, 1, 2, 3 are
665 // not used and their quantization parameters are ignored.
666 {0, {}},
667 {1, {}},
668 {2, {}},
669 {3, {}},
670 // Hidden state is quantized as usual.
671 {4, {}},
672 };
673 property.restrict_scale = {{18, 0}};
674 property.version = 2;
675 }
676 if (!op_variant.use_layer_norm && !op_variant.use_projection &&
677 op_variant.use_peephole) {
678 TensorProperty tensor_property_9;
679 tensor_property_9.number_of_bits = 16;
680 tensor_property_9.symmetric = true;
681 // Without layer norm, we choose to quantize bias with the scale of
682 // input and its corresponding weight. The other choice will
683 // be to use the scale of recurrent and its corresponding weight but we
684 // choose to use the smaller scale, which means higher resolution.
685 TensorProperty tensor_property_12;
686 tensor_property_12.use_derived_scale = true;
687 tensor_property_12.number_of_bits = 32;
688 tensor_property_12.derived_scale = {{0, 1}, {}, {}};
689 TensorProperty tensor_property_13;
690 tensor_property_13.use_derived_scale = true;
691 tensor_property_13.number_of_bits = 32;
692 tensor_property_13.derived_scale = {{0, 2}, {}, {}};
693 TensorProperty tensor_property_14;
694 tensor_property_14.use_derived_scale = true;
695 tensor_property_14.number_of_bits = 32;
696 tensor_property_14.derived_scale = {{0, 3}, {}, {}};
697 TensorProperty tensor_property_15;
698 tensor_property_15.use_derived_scale = true;
699 tensor_property_15.number_of_bits = 32;
700 tensor_property_15.derived_scale = {{0, 4}, {}, {}};
701 TensorProperty tensor_property_19;
702 tensor_property_19.extend_to_power_of_two = true;
703 tensor_property_19.number_of_bits = 16;
704 tensor_property_19.state_tensor = true;
705 tensor_property_19.symmetric = true;
706
707 property.inputs = {
708 {0, {}},
709 {1, {}},
710 {2, {}},
711 {3, {}},
712 {4, {}},
713 {5, {}},
714 {6, {}},
715 {7, {}},
716 {8, {}},
717 {9, tensor_property_9},
718 {10, tensor_property_9},
719 {11, tensor_property_9},
720 {19, tensor_property_19},
721 {12, tensor_property_12},
722 {13, tensor_property_13},
723 {14, tensor_property_14},
724 {15, tensor_property_15},
725 };
726 property.outputs = {{0, {}}};
727 property.intermediates = {
728 // Without layer normalization, intermediate tensors 0, 1, 2, 3 are
729 // not used and their quantization parameters are ignored.
730 {0, {}},
731 {1, {}},
732 {2, {}},
733 {3, {}},
734 // Without projection, hidden state (4), output (0) and input
735 // activation state (18) are the same except that the very first
736 // inference of input activation is not captured in hidden and
737 // output.
738 // This is not an issue because this intermediate tensor is not used
739 // in the kernel and its quantization parameters are ignored.
740 {4, {}},
741 };
742 property.restrict_scale = {{18, 0}};
743 property.version = 2;
744 }
745 if (!op_variant.use_layer_norm && !op_variant.use_projection &&
746 !op_variant.use_peephole) {
747 // Without layer norm, we choose to quantize bias with the scale of
748 // input and its corresponding weight. The other choice will
749 // be to use the scale of recurrent and its corresponding weight but we
750 // choose to use the smaller scale, which means higher resolution.
751 TensorProperty tensor_property_12;
752 tensor_property_12.use_derived_scale = true;
753 tensor_property_12.number_of_bits = 32;
754 tensor_property_12.derived_scale = {{0, 1}, {}, {}};
755 TensorProperty tensor_property_13;
756 tensor_property_13.use_derived_scale = true;
757 tensor_property_13.number_of_bits = 32;
758 tensor_property_13.derived_scale = {{0, 2}, {}, {}};
759 TensorProperty tensor_property_14;
760 tensor_property_14.use_derived_scale = true;
761 tensor_property_14.number_of_bits = 32;
762 tensor_property_14.derived_scale = {{0, 3}, {}, {}};
763 TensorProperty tensor_property_15;
764 tensor_property_15.use_derived_scale = true;
765 tensor_property_15.number_of_bits = 32;
766 tensor_property_15.derived_scale = {{0, 4}, {}, {}};
767 TensorProperty tensor_property_19;
768 tensor_property_19.extend_to_power_of_two = true;
769 tensor_property_19.number_of_bits = 16;
770 tensor_property_19.state_tensor = true;
771 tensor_property_19.symmetric = true;
772
773 property.inputs = {
774 {0, {}},
775 {1, {}},
776 {2, {}},
777 {3, {}},
778 {4, {}},
779 {5, {}},
780 {6, {}},
781 {7, {}},
782 {8, {}},
783 {19, tensor_property_19},
784 {12, tensor_property_12},
785 {13, tensor_property_13},
786 {14, tensor_property_14},
787 {15, tensor_property_15},
788 };
789 property.outputs = {{0, {}}};
790 property.intermediates = {
791 // Without layer normalization, intermediate tensors 0, 1, 2, 3 are
792 // not used and their quantization parameters are ignored.
793 {0, {}},
794 {1, {}},
795 {2, {}},
796 {3, {}},
797 // Without projection, hidden state (4), output (0) and input
798 // activation state (18) are the same except that the very first
799 // inference of input activation is not captured in hidden and
800 // output.
801 // This is not an issue because this intermediate tensor is not used
802 // in the kernel and its quantization parameters are ignored.
803 {4, {}},
804 };
805 property.restrict_scale = {{18, 0}};
806 property.version = 2;
807 }
808 property.quantizable_int16 = false;
809 break;
810 }
811 case BuiltinOperator_L2_NORMALIZATION: {
812 property.inputs = {{0, {}}};
813 // L2 Norm requires output with 1/128 as scale and 0 as zero point.
814 TensorProperty tensor_property;
815 tensor_property.restriction = true;
816 tensor_property.restricted_value_int8 = {1 / 128.0f, 0};
817 property.outputs = {{0, tensor_property}};
818 property.version = 2;
819 property.quantizable_int16 = false;
820 break;
821 }
822 case BuiltinOperator_MAX_POOL_2D:
823 property.inputs = {{0, {}}};
824 property.outputs = {{0, {}}};
825 property.restrict_same_input_output_scale = true;
826 property.version = 2;
827 break;
828 case BuiltinOperator_MAXIMUM:
829 property.arbitrary_inputs = true;
830 property.outputs = {{0, {}}};
831 property.restrict_same_input_output_scale = true;
832 property.quantize_input_as_activations = true;
833 property.version = 2;
834 break;
835 case BuiltinOperator_MEAN:
836 property.inputs = {{0, {}}};
837 property.outputs = {{0, {}}};
838 property.version = 2;
839 break;
840 case BuiltinOperator_MINIMUM:
841 property.arbitrary_inputs = true;
842 property.outputs = {{0, {}}};
843 property.restrict_same_input_output_scale = true;
844 property.quantize_input_as_activations = true;
845 property.version = 2;
846 break;
847 case BuiltinOperator_MUL:
848 property.inputs = {{0, {}}, {1, {}}};
849 property.outputs = {{0, {}}};
850 property.quantize_input_as_activations = true;
851 property.version = 2;
852 break;
853 case BuiltinOperator_PACK:
854 property.arbitrary_inputs = true;
855 property.outputs = {{0, {}}};
856 property.restrict_same_input_output_scale = true;
857 property.restrict_same_input_output_scale = true;
858 property.version = 2;
859 break;
860 case BuiltinOperator_PAD:
861 case BuiltinOperator_PADV2:
862 property.inputs = {{0, {}}};
863 property.outputs = {{0, {}}};
864 property.restrict_same_input_output_scale = true;
865 property.version = 2;
866 break;
867 case BuiltinOperator_QUANTIZE:
868 property.inputs = {{0, {}}};
869 property.outputs = {{0, {}}};
870 property.version = 2;
871 break;
872 case BuiltinOperator_PRELU:
873 property.inputs = {{0, {}}, {1, {}}};
874 property.outputs = {{0, {}}};
875 property.restrict_same_input_output_scale = false;
876 property.version = 1;
877 property.quantizable_int16 = false;
878 break;
879 case BuiltinOperator_LEAKY_RELU:
880 property.inputs = {{0, {}}};
881 property.outputs = {{0, {}}};
882 property.version = 2;
883 break;
884 case BuiltinOperator_RELU:
885 case BuiltinOperator_RELU6:
886 property.inputs = {{0, {}}};
887 property.outputs = {{0, {}}};
888 property.version = 2;
889 break;
890 case BuiltinOperator_RELU_N1_TO_1:
891 property.inputs = {{0, {}}};
892 property.outputs = {{0, {}}};
893 property.version = 1;
894 property.quantizable_int16 = false;
895 break;
896 case BuiltinOperator_RESHAPE:
897 property.inputs = {{0, {}}};
898 property.outputs = {{0, {}}};
899 property.restrict_same_input_output_scale = true;
900 property.version = 1;
901 break;
902 case BuiltinOperator_RESIZE_BILINEAR:
903 case BuiltinOperator_RESIZE_NEAREST_NEIGHBOR:
904 property.inputs = {{0, {}}};
905 property.outputs = {{0, {}}};
906 property.restrict_same_input_output_scale = true;
907 property.version = 2;
908 break;
909 case BuiltinOperator_REVERSE_V2:
910 property.inputs = {{0, {}}};
911 property.outputs = {{0, {}}};
912 property.restrict_same_input_output_scale = true;
913 property.version = 3;
914 break;
915 case BuiltinOperator_SELECT:
916 property.inputs = {{1, {}}, {2, {}}};
917 property.outputs = {{0, {}}};
918 property.restrict_same_input_output_scale = true;
919 property.version = 1;
920 break;
921 case BuiltinOperator_SHAPE:
922 property.inputs = {{0, {}}};
923 // Shape has no quantizable output.
924 property.version = 1;
925 break;
926 case BuiltinOperator_SLICE:
927 // We skip inputs 1 and 2 since they aren't real valued (they are the
928 // index and size).
929 property.inputs = {{0, {}}};
930 property.outputs = {{0, {}}};
931 property.restrict_same_input_output_scale = true;
932 property.version = 2;
933 break;
934 case BuiltinOperator_SQUEEZE:
935 property.inputs = {{0, {}}};
936 property.outputs = {{0, {}}};
937 property.restrict_same_input_output_scale = true;
938 property.version = 1;
939 break;
940 case BuiltinOperator_SOFTMAX: {
941 property.inputs = {{0, {}}};
942 // Softmax requires output with 1/256 as scale and -128 as zero point.
943 TensorProperty tensor_property;
944 tensor_property.restriction = true;
945 tensor_property.restricted_value_int8 = {1 / 256.0f, -128};
946 tensor_property.restricted_value_int16 = {1 / 32768.0f, 0};
947 property.outputs = {{0, tensor_property}};
948 property.version = 2;
949 break;
950 }
951 case BuiltinOperator_STRIDED_SLICE:
952 property.inputs = {{0, {}}};
953 property.outputs = {{0, {}}};
954 property.restrict_same_input_output_scale = true;
955 property.version = 2;
956 break;
957 case BuiltinOperator_SQUARED_DIFFERENCE:
958 case BuiltinOperator_SUB:
959 property.inputs = {{0, {}}, {1, {}}};
960 property.outputs = {{0, {}}};
961 property.version = 2;
962 property.quantize_input_as_activations = true;
963 break;
964 case BuiltinOperator_SUM:
965 property.inputs = {{0, {}}};
966 property.outputs = {{0, {}}};
967 property.version = 2;
968 break;
969 case BuiltinOperator_TANH: {
970 property.inputs = {{0, {}}};
971 // Tanh requires output with 1/128 as scale and 0 as zero point.
972 TensorProperty tensor_property;
973 tensor_property.restriction = true;
974 tensor_property.restricted_value_int8 = {1 / 128.0f, 0};
975 tensor_property.restricted_value_int16 = {1 / 32768.0f, 0};
976 property.outputs = {{0, tensor_property}};
977 property.version = 2;
978 break;
979 }
980 case BuiltinOperator_SVDF: {
981 TensorProperty tensor_property_time;
982 // Only 10bits are needed because 6bits are reserved for the reduce
983 // operation after element-wise multiplication between state and time
984 // weights.
985 tensor_property_time.number_of_bits = 10;
986 TensorProperty tensor_property_bias;
987 tensor_property_bias.use_derived_scale = true;
988 tensor_property_bias.number_of_bits = 32;
989 tensor_property_bias.derived_scale = {{2, 4}, {}, {}};
990 TensorProperty tensor_property_state;
991 tensor_property_state.number_of_bits = 16;
992 tensor_property_state.state_tensor = true;
993
994 property.inputs = {{0, {}},
995 {1, {}},
996 {2, tensor_property_time},
997 {4, tensor_property_state},
998 {3, tensor_property_bias}};
999 property.outputs = {{0, {}}};
1000 property.version = 3;
1001 property.quantizable_int16 = false;
1002 break;
1003 }
1004 case BuiltinOperator_TRANSPOSE:
1005 property.inputs = {{0, {}}};
1006 property.outputs = {{0, {}}};
1007 property.restrict_same_input_output_scale = true;
1008 property.version = 2;
1009 break;
1010 case BuiltinOperator_UNPACK:
1011 property.inputs = {{0, {}}};
1012 property.arbitrary_outputs = true;
1013 property.restrict_same_input_output_scale = true;
1014 property.version = 1;
1015 break;
1016 case BuiltinOperator_MIRROR_PAD:
1017 property.inputs = {{0, {}}};
1018 property.outputs = {{0, {}}};
1019 property.restrict_same_input_output_scale = true;
1020 property.version = 2;
1021 property.quantizable_int16 = false;
1022 break;
1023 case BuiltinOperator_REDUCE_MAX:
1024 case BuiltinOperator_REDUCE_MIN:
1025 property.inputs = {{0, {}}};
1026 property.outputs = {{0, {}}};
1027 property.restrict_same_input_output_scale = true;
1028 property.version = 2;
1029 break;
1030 case BuiltinOperator_WHERE:
1031 property.inputs = {{0, {}}};
1032 property.outputs = {{0, {}}};
1033 property.version = 1;
1034 break;
1035 default:
1036 // No quantized implementation exists for this operation.
1037 property.quantizable = false;
1038 property.quantizable_int16 = false;
1039 }
1040 return property;
1041 } // NOLINT(readability/fn_size)
1042 // LINT.ThenChange(//tensorflow/compiler/mlir/lite/ir/tfl_ops.td)
1043
1044 } // namespace operator_property
1045 } // namespace optimize
1046 } // namespace tflite
1047