1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 #ifndef TENSORFLOW_LITE_TOOLS_OPTIMIZE_OPERATOR_PROPERTY_H_ 16 #define TENSORFLOW_LITE_TOOLS_OPTIMIZE_OPERATOR_PROPERTY_H_ 17 18 #include "tensorflow/lite/model.h" 19 #include "tensorflow/lite/schema/schema_generated.h" 20 21 namespace tflite { 22 namespace optimize { 23 namespace operator_property { 24 25 // The scales of a certain tensor can be derived from the multiplications of all 26 // the scales. For example, for bias in conv, derived_scale = {{0, 1}, {}, {}} 27 // and for lstm gate bias, the derived scale is {{}, {0}, {2^-10}} 28 struct DerivedScale { 29 // MSVC2015 version 14.0 and below doesn't support struct initialization with 30 // initializer lists so emulate the behavior using a float initializer list. 31 #if _MSC_VER <= 1900 DerivedScaleDerivedScale32 DerivedScale() {} 33 // Construct this object with a list of initializer lists. All list elements 34 // are cast to float values to avoid ambiguous construction of a union-style 35 // object that could take either std::initializer_list<float> or 36 // std::initializer_list<int>. DerivedScaleDerivedScale37 DerivedScale(std::initializer_list<std::initializer_list<float>> values) { 38 assert(values.size() == 3); 39 std::vector<std::initializer_list<float>> items(values); 40 for (auto& it : items[0]) { 41 input_tensors.push_back(static_cast<int>(it)); 42 } 43 for (auto& it : items[1]) { 44 intermediate_tensors.push_back(static_cast<int>(it)); 45 } 46 factors.assign(items[2]); 47 } 48 #endif // _MSC_VER <= 1900 49 50 std::vector<int> input_tensors = {}; 51 std::vector<int> intermediate_tensors = {}; 52 // This is a list of extra factors that are not associated with any other 53 // tensor. 54 std::vector<float> factors = {}; 55 }; 56 57 struct TensorProperty { 58 // per_axis also implies symmetric currently. 59 bool per_axis = false; 60 // TODO(jianlijianli): remove dimension index and read it from tensor instead. 61 int per_axis_index = 0; 62 bool symmetric = false; 63 64 // Constraints. 65 bool restriction = false; 66 // scale/zero_point hardcoded. 67 std::pair<float, int> restricted_value_int8 = {0.0f, 0}; 68 std::pair<float, int> restricted_value_int16 = {0.0f, 0}; 69 70 // Use derived scale. 71 bool use_derived_scale = false; 72 // The derived scale. 73 DerivedScale derived_scale; 74 75 // The number of bits for this tensor. It could be 8, 16, 32 or even not power 76 // of two. 77 int number_of_bits = 8; 78 79 // Extend the range to power of two. 80 bool extend_to_power_of_two = false; 81 82 // State tensor. 83 bool state_tensor = false; 84 }; 85 86 struct OperatorProperty { 87 // Is a quantized operations currently supported. 88 bool quantizable = true; 89 // Is a quantized operations currently supported for 16x8 90 bool quantizable_int16 = true; 91 // Op has arbitrary number of inputs, such as concat. 92 bool arbitrary_inputs = false; 93 // Op has arbitrary number of outputs, such as slice. 94 bool arbitrary_outputs = false; 95 // Input indexes -> input tensor property. 96 // Must be topologically sorted since there are derived scales. 97 std::vector<std::pair<int, TensorProperty>> inputs = {}; 98 // Output indexes -> output tensor property. 99 std::vector<std::pair<int, TensorProperty>> outputs = {}; 100 // Bias indexes. 101 // TODO(jianlijianli): remove this by putting biases into inputs as well since 102 // we now can model "derived scale". 103 std::vector<int> biases = {}; 104 105 // Intermediate indexes -> intermediate tensor property. 106 std::vector<std::pair<int, TensorProperty>> intermediates = {}; 107 108 // Force output to reuse the same scale and zero point of input. 109 bool restrict_same_input_output_scale = false; 110 111 // Use same min of min and max of max for each group. 112 // Incompatible with restrict_same_input_output_scale and restricted_value. 113 // Currently it only supports scale pair of {input_index, output_index}. 114 // TODO(b/174534943): make it compatible with other restrictions when there 115 // is a use case. 116 std::vector<std::vector<int>> restrict_scale = {}; 117 118 // Op version. 119 int version = 1; 120 121 // When we quantize activations into 16 bit and weights into 8 bit, 122 // we want to quantize all inputs, including constant tensors, 123 // for the operators like Add, Mul into 16-bit as well. The constant 124 // inputs are quantized as weights and this variable indicates 125 // that we want to do quantizations of these tensors as activations. 126 bool quantize_input_as_activations = false; 127 }; 128 129 // The op as well as it variants. 130 // TODO(b/174283888): extend it to support ops that has multiple variants. 131 struct OpVariant { 132 BuiltinOperator op_code; 133 bool use_layer_norm = false; 134 bool use_projection = false; 135 bool use_peephole = false; 136 // An attribute to indicate if quantization is supported for this Op. 137 // This attribute is equivalent to the "quantizable" attribute in 138 // "OperatorProperty". It added here since OpVariants peeks inside the Op and 139 // determines its quantization related properties. 140 bool is_quantizable = true; 141 }; 142 143 OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index, 144 int op_index); 145 OperatorProperty GetOperatorProperty(OpVariant op_variant); 146 147 } // namespace operator_property 148 } // namespace optimize 149 } // namespace tflite 150 151 #endif // TENSORFLOW_LITE_TOOLS_OPTIMIZE_OPERATOR_PROPERTY_H_ 152