1 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/xnnpack/quantize_tester.h"
17
18 #include <algorithm>
19 #include <array>
20 #include <cstdint>
21 #include <functional>
22 #include <numeric>
23 #include <random>
24 #include <vector>
25
26 #include <gtest/gtest.h>
27 #include "flatbuffers/flatbuffers.h" // from @flatbuffers
28 #include "tensorflow/lite/interpreter.h"
29 #include "tensorflow/lite/kernels/register.h"
30 #include "tensorflow/lite/model.h"
31 #include "tensorflow/lite/schema/schema_conversion_utils.h"
32 #include "tensorflow/lite/schema/schema_generated.h"
33 #include "tensorflow/lite/version.h"
34
35 namespace tflite {
36 namespace xnnpack {
37
38 template <class T>
PopulateInput(Interpreter * delegate_interpreter,Interpreter * default_interpreter) const39 void QuantizeTester::PopulateInput(Interpreter* delegate_interpreter,
40 Interpreter* default_interpreter) const {
41 std::random_device random_device;
42 auto rng = std::mt19937(random_device());
43 std::uniform_int_distribution<int> input_distribution(
44 std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
45 auto input_rng = std::bind(input_distribution, std::ref(rng));
46
47 T* default_input_data = default_interpreter->typed_input_tensor<T>(0);
48 std::generate(default_input_data, default_input_data + ComputeSize(Shape()),
49 std::ref(input_rng));
50
51 T* xnnpack_input_data = delegate_interpreter->typed_input_tensor<T>(0);
52 std::copy(default_input_data, default_input_data + ComputeSize(Shape()),
53 xnnpack_input_data);
54 }
55
56 template <>
PopulateInput(Interpreter * delegate_interpreter,Interpreter * default_interpreter) const57 void QuantizeTester::PopulateInput<float>(
58 Interpreter* delegate_interpreter, Interpreter* default_interpreter) const {
59 std::random_device random_device;
60 auto rng = std::mt19937(random_device());
61 std::uniform_real_distribution<float> input_distribution(-1.0f, 1.0f);
62 auto input_rng = std::bind(input_distribution, std::ref(rng));
63
64 float* default_input_data = default_interpreter->typed_input_tensor<float>(0);
65 std::generate(default_input_data, default_input_data + ComputeSize(Shape()),
66 std::ref(input_rng));
67
68 float* xnnpack_input_data =
69 delegate_interpreter->typed_input_tensor<float>(0);
70 std::copy(default_input_data, default_input_data + ComputeSize(Shape()),
71 xnnpack_input_data);
72 }
73
74 template <class T>
InvokeAndCheckOutput(Interpreter * delegate_interpreter,Interpreter * default_interpreter) const75 void QuantizeTester::InvokeAndCheckOutput(
76 Interpreter* delegate_interpreter, Interpreter* default_interpreter) const {
77 ASSERT_EQ(default_interpreter->Invoke(), kTfLiteOk);
78 ASSERT_EQ(delegate_interpreter->Invoke(), kTfLiteOk);
79
80 T* default_output_data = default_interpreter->typed_output_tensor<T>(0);
81 T* delegate_output_data = delegate_interpreter->typed_output_tensor<T>(0);
82
83 for (size_t i = 0; i < ComputeSize(Shape()); i++) {
84 ASSERT_LE(std::abs(static_cast<int32_t>(default_output_data[i]) -
85 static_cast<int32_t>(delegate_output_data[i])),
86 1)
87 << "default " << static_cast<int32_t>(default_output_data[i])
88 << ", delegate " << static_cast<int32_t>(delegate_output_data[i])
89 << " at index " << i << " / " << ComputeSize(Shape());
90 }
91 }
92
Test(TensorType input_type,TensorType output_type,TfLiteDelegate * delegate) const93 void QuantizeTester::Test(TensorType input_type, TensorType output_type,
94 TfLiteDelegate* delegate) const {
95 std::vector<char> buffer = CreateTfLiteModel(input_type, output_type);
96 const Model* model = GetModel(buffer.data());
97
98 std::unique_ptr<Interpreter> delegate_interpreter;
99 ASSERT_EQ(
100 InterpreterBuilder(
101 model,
102 ::tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates())(
103 &delegate_interpreter),
104 kTfLiteOk);
105 std::unique_ptr<Interpreter> default_interpreter;
106 ASSERT_EQ(
107 InterpreterBuilder(
108 model,
109 ::tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates())(
110 &default_interpreter),
111 kTfLiteOk);
112
113 ASSERT_TRUE(delegate_interpreter);
114 ASSERT_TRUE(default_interpreter);
115
116 ASSERT_EQ(delegate_interpreter->inputs().size(), 1);
117 ASSERT_EQ(default_interpreter->inputs().size(), 1);
118
119 ASSERT_EQ(delegate_interpreter->outputs().size(), 1);
120 ASSERT_EQ(default_interpreter->outputs().size(), 1);
121
122 ASSERT_EQ(delegate_interpreter->AllocateTensors(), kTfLiteOk);
123 ASSERT_EQ(default_interpreter->AllocateTensors(), kTfLiteOk);
124
125 ASSERT_EQ(delegate_interpreter->ModifyGraphWithDelegate(delegate), kTfLiteOk);
126
127 switch (input_type) {
128 case TensorType_FLOAT32:
129 PopulateInput<float>(delegate_interpreter.get(),
130 default_interpreter.get());
131 break;
132 case TensorType_INT8:
133 PopulateInput<int8_t>(delegate_interpreter.get(),
134 default_interpreter.get());
135 break;
136 case TensorType_UINT8:
137 PopulateInput<uint8_t>(delegate_interpreter.get(),
138 default_interpreter.get());
139 break;
140 default:
141 GTEST_FAIL() << "unsupported input type "
142 << EnumNameTensorType(input_type);
143 }
144
145 switch (output_type) {
146 case TensorType_INT8:
147 InvokeAndCheckOutput<int8_t>(delegate_interpreter.get(),
148 default_interpreter.get());
149 break;
150 case TensorType_UINT8:
151 InvokeAndCheckOutput<uint8_t>(delegate_interpreter.get(),
152 default_interpreter.get());
153 break;
154 default:
155 GTEST_FAIL() << "unsupported output type "
156 << EnumNameTensorType(output_type);
157 }
158 }
159
CreateTfLiteModel(TensorType input_type,TensorType output_type) const160 std::vector<char> QuantizeTester::CreateTfLiteModel(
161 TensorType input_type, TensorType output_type) const {
162 flatbuffers::FlatBufferBuilder builder;
163 flatbuffers::Offset<OperatorCode> operator_code =
164 CreateOperatorCode(builder, BuiltinOperator_QUANTIZE);
165
166 const std::array<flatbuffers::Offset<Buffer>, 1> buffers{{
167 CreateBuffer(builder, builder.CreateVector({})),
168 }};
169
170 flatbuffers::Offset<QuantizationParameters> input_quantization = 0;
171 if (input_type != TensorType_FLOAT32) {
172 input_quantization = CreateQuantizationParameters(
173 builder, /*min=*/0, /*max=*/0,
174 builder.CreateVector<float>({InputScale()}),
175 builder.CreateVector<int64_t>({InputZeroPoint()}));
176 }
177
178 const std::array<flatbuffers::Offset<Tensor>, 2> tensors{{
179 CreateTensor(
180 builder,
181 builder.CreateVector<int32_t>(Shape().data(), Shape().size()),
182 input_type,
183 /*buffer=*/0, /*name=*/0, input_quantization),
184 CreateTensor(
185 builder,
186 builder.CreateVector<int32_t>(Shape().data(), Shape().size()),
187 output_type,
188 /*buffer=*/0, /*name=*/0,
189 CreateQuantizationParameters(
190 builder, /*min=*/0, /*max=*/0,
191 builder.CreateVector<float>({OutputScale()}),
192 builder.CreateVector<int64_t>({OutputZeroPoint()}))),
193 }};
194
195 const std::array<int32_t, 1> op_inputs{{0}};
196 const std::array<int32_t, 1> op_outputs{{1}};
197 flatbuffers::Offset<Operator> op = CreateOperator(
198 builder, /*opcode_index=*/0,
199 builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
200 builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()));
201
202 const std::array<int32_t, 1> subgraph_inputs{{0}};
203 const std::array<int32_t, 1> subgraph_outputs{{1}};
204 flatbuffers::Offset<SubGraph> subgraph = CreateSubGraph(
205 builder, builder.CreateVector(tensors.data(), tensors.size()),
206 builder.CreateVector<int32_t>(subgraph_inputs.data(),
207 subgraph_inputs.size()),
208 builder.CreateVector<int32_t>(subgraph_outputs.data(),
209 subgraph_outputs.size()),
210 builder.CreateVector(&op, 1));
211
212 flatbuffers::Offset<flatbuffers::String> description =
213 builder.CreateString("Quantize operator model");
214
215 flatbuffers::Offset<Model> model_buffer = CreateModel(
216 builder, TFLITE_SCHEMA_VERSION, builder.CreateVector(&operator_code, 1),
217 builder.CreateVector(&subgraph, 1), description,
218 builder.CreateVector(buffers.data(), buffers.size()));
219
220 builder.Finish(model_buffer);
221
222 return std::vector<char>(builder.GetBufferPointer(),
223 builder.GetBufferPointer() + builder.GetSize());
224 }
225
ComputeSize(const std::vector<int32_t> & shape)226 int32_t QuantizeTester::ComputeSize(const std::vector<int32_t>& shape) {
227 return std::accumulate(shape.cbegin(), shape.cend(), 1,
228 std::multiplies<int32_t>());
229 }
230
231 } // namespace xnnpack
232 } // namespace tflite
233