1 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/xnnpack/quantized_transpose_conv_tester.h"
17
18 #include <algorithm>
19 #include <cassert>
20 #include <cstdint>
21 #include <functional>
22 #include <numeric>
23 #include <random>
24 #include <type_traits>
25 #include <vector>
26
27 #include <gtest/gtest.h>
28 #include "fp16.h" // from @FP16
29 #include "flatbuffers/flatbuffers.h" // from @flatbuffers
30 #include "tensorflow/lite/c/builtin_op_data.h"
31 #include "tensorflow/lite/interpreter.h"
32 #include "tensorflow/lite/kernels/register.h"
33 #include "tensorflow/lite/model.h"
34 #include "tensorflow/lite/schema/schema_conversion_utils.h"
35 #include "tensorflow/lite/schema/schema_generated.h"
36 #include "tensorflow/lite/version.h"
37
38 namespace tflite {
39 namespace xnnpack {
40
Test(TfLiteDelegate * delegate) const41 void QuantizedTransposeConvTester::Test(TfLiteDelegate* delegate) const {
42 std::vector<char> buffer = CreateTfLiteModel();
43 const Model* model = GetModel(buffer.data());
44
45 std::unique_ptr<Interpreter> delegate_interpreter;
46 ASSERT_EQ(
47 InterpreterBuilder(
48 model,
49 ::tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates())(
50 &delegate_interpreter),
51 kTfLiteOk);
52 std::unique_ptr<Interpreter> default_interpreter;
53 ASSERT_EQ(
54 InterpreterBuilder(
55 model,
56 ::tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates())(
57 &default_interpreter),
58 kTfLiteOk);
59
60 ASSERT_TRUE(delegate_interpreter);
61 ASSERT_TRUE(default_interpreter);
62
63 ASSERT_EQ(delegate_interpreter->inputs().size(), 1);
64 ASSERT_EQ(default_interpreter->inputs().size(), 1);
65
66 ASSERT_EQ(delegate_interpreter->outputs().size(), 1);
67 ASSERT_EQ(default_interpreter->outputs().size(), 1);
68
69 ASSERT_EQ(delegate_interpreter->AllocateTensors(), kTfLiteOk);
70 ASSERT_EQ(default_interpreter->AllocateTensors(), kTfLiteOk);
71
72 ASSERT_EQ(delegate_interpreter->ModifyGraphWithDelegate(delegate), kTfLiteOk);
73
74 if (weights_cache_ != nullptr) {
75 TfLiteXNNPackDelegateWeightsCacheFinalizeHard(weights_cache_);
76 }
77
78 std::random_device random_device;
79 auto rng = std::mt19937(random_device());
80
81 const int input_data_size =
82 BatchSize() * InputHeight() * InputWidth() * InputChannels();
83
84 // std::uniform_int_distribution<T> is undefined behavior when T is not short,
85 // int, long, long long, or their respective unsigned variants:
86 // https://en.cppreference.com/w/cpp/numeric/random/uniform_int_distribution.
87 auto uint8rng =
88 std::bind(std::uniform_int_distribution<int32_t>(0, 255), rng);
89 uint8_t* default_input_data = reinterpret_cast<uint8_t*>(
90 default_interpreter->input_tensor(0)->data.data);
91 std::generate(default_input_data, default_input_data + input_data_size,
92 std::ref(uint8rng));
93
94 uint8_t* xnnpack_input_data = reinterpret_cast<uint8_t*>(
95 delegate_interpreter->input_tensor(0)->data.data);
96 std::copy(default_input_data, default_input_data + input_data_size,
97 xnnpack_input_data);
98
99 ASSERT_EQ(default_interpreter->Invoke(), kTfLiteOk);
100 ASSERT_EQ(delegate_interpreter->Invoke(), kTfLiteOk);
101
102 if (Unsigned()) {
103 EnsureOutputsClose<uint8_t>(default_interpreter.get(),
104 delegate_interpreter.get());
105 } else {
106 EnsureOutputsClose<int8_t>(default_interpreter.get(),
107 delegate_interpreter.get());
108 }
109 }
110
111 template <typename WeightType>
EnsureOutputsClose(const Interpreter * default_interpreter,const Interpreter * delegate_interpreter) const112 void QuantizedTransposeConvTester::EnsureOutputsClose(
113 const Interpreter* default_interpreter,
114 const Interpreter* delegate_interpreter) const {
115 const WeightType* default_output_data =
116 default_interpreter->typed_output_tensor<WeightType>(0);
117 const WeightType* xnnpack_output_data =
118 delegate_interpreter->typed_output_tensor<WeightType>(0);
119
120 const size_t output_data_size =
121 BatchSize() * OutputHeight() * OutputWidth() * OutputChannels();
122
123 const int kQuantizationErrorTolerance = 1;
124
125 for (size_t i = 0; i < output_data_size; i++) {
126 const int diff = static_cast<int>(default_output_data[i]) -
127 static_cast<int>(xnnpack_output_data[i]);
128 ASSERT_LE(std::abs(diff), kQuantizationErrorTolerance);
129 }
130 }
131
CreateTfLiteModel() const132 std::vector<char> QuantizedTransposeConvTester::CreateTfLiteModel() const {
133 std::random_device random_device;
134 auto rng = std::mt19937(random_device());
135
136 const std::vector<int32_t> input_shape = {BatchSize(), InputHeight(),
137 InputWidth(), InputChannels()};
138 const std::vector<int32_t> output_shape = {BatchSize(), OutputHeight(),
139 OutputWidth(), OutputChannels()};
140 const std::vector<int32_t> filter_shape = {OutputChannels(), KernelHeight(),
141 KernelWidth(), InputChannels()};
142 const std::vector<int32_t> bias_shape = {OutputChannels()};
143
144 flatbuffers::FlatBufferBuilder builder;
145
146 std::vector<flatbuffers::Offset<OperatorCode>> operator_codes;
147
148 std::vector<flatbuffers::Offset<tflite::Operator>> operators;
149 std::vector<flatbuffers::Offset<Tensor>> tensors;
150
151 // Buffer 0 is a sentinel as required by the schema, means "no buffer".
152 std::vector<flatbuffers::Offset<tflite::Buffer>> buffers = {
153 CreateBuffer(builder, builder.CreateVector({}))};
154 const int kNoBuffer = 0;
155
156 // Create a tensor containing the expected output shape.
157 const int buffer_index_output_shape = buffers.size();
158 buffers.emplace_back(CreateBuffer(
159 builder, builder.CreateVector(
160 reinterpret_cast<const uint8_t*>(output_shape.data()),
161 sizeof(int32_t) * output_shape.size())));
162
163 std::vector<int32_t> output_shape_tensor_shape = {4};
164 const int tensor_index_output_shape = tensors.size();
165 tensors.emplace_back(
166 CreateTensorDirect(builder, &output_shape_tensor_shape, TensorType_INT32,
167 /*buffer=*/buffer_index_output_shape));
168
169 flatbuffers::Offset<::tflite::QuantizationParameters>
170 quantization_parameters = 0;
171
172 std::vector<uint8_t> filter_data(OutputChannels() * KernelHeight() *
173 KernelWidth() * InputChannels());
174
175 auto uint8rng =
176 std::bind(std::uniform_int_distribution<int32_t>(0, 255), rng);
177 std::generate(filter_data.begin(), filter_data.end(), uint8rng);
178
179 const int buffer_index_filter = buffers.size();
180 buffers.emplace_back(CreateBuffer(
181 builder,
182 builder.CreateVector(reinterpret_cast<const uint8_t*>(filter_data.data()),
183 sizeof(uint8_t) * filter_data.size())));
184
185 const ::tflite::TensorType input_tensor_type =
186 Unsigned() ? ::tflite::TensorType_UINT8 : ::tflite::TensorType_INT8;
187
188 auto f32rng = std::bind(std::uniform_real_distribution<float>(), rng);
189 const float quantization_scale = f32rng();
190 int64_t zero_point = 0;
191 if (Unsigned()) {
192 zero_point = std::accumulate(filter_data.begin(), filter_data.end(), 0) /
193 filter_data.size();
194 }
195
196 quantization_parameters = CreateQuantizationParameters(
197 builder, /*min=*/0, /*max=*/0,
198 builder.CreateVector<float>({quantization_scale}),
199 builder.CreateVector<int64_t>({zero_point}));
200 tensors.emplace_back(CreateTensorDirect(
201 builder, &filter_shape, input_tensor_type, buffer_index_filter,
202 /*name=*/nullptr, quantization_parameters));
203
204 if (UseBias()) {
205 const int32_t kMaxAbsBias = 10000;
206 auto int32rng = std::bind(
207 std::uniform_int_distribution<int32_t>(-kMaxAbsBias, kMaxAbsBias), rng);
208 std::vector<int32_t> bias_data(OutputChannels());
209 std::generate(bias_data.begin(), bias_data.end(), int32rng);
210
211 const int buffer_index_bias = buffers.size();
212 buffers.emplace_back(CreateBuffer(
213 builder,
214 builder.CreateVector(reinterpret_cast<const uint8_t*>(bias_data.data()),
215 sizeof(int32_t) * bias_data.size())));
216
217 // TFLite checks that bias quantization scale is close to that of the
218 // input and filter quantization scales multiplied.
219 const float bias_quantization_scale =
220 quantization_scale * quantization_scale;
221 auto bias_quantization_parameters = CreateQuantizationParameters(
222 builder, /*min=*/0, /*max=*/0,
223 /*scale=*/builder.CreateVector<float>({bias_quantization_scale}),
224 /*zero_point=*/builder.CreateVector<int64_t>({0}));
225
226 tensors.emplace_back(
227 CreateTensorDirect(builder, &bias_shape, TensorType_INT32,
228 /*buffer=*/buffer_index_bias,
229 /*name=*/nullptr, bias_quantization_parameters));
230 }
231
232 const int top_tensor = tensors.size() - 1;
233 const int tensor_index_filter = UseBias() ? top_tensor - 1 : top_tensor;
234
235 const int tensor_index_input = tensors.size();
236 tensors.emplace_back(
237 CreateTensorDirect(builder, &input_shape, input_tensor_type, kNoBuffer,
238 /*name=*/nullptr, quantization_parameters));
239
240 std::vector<int32_t> op_inputs = {tensor_index_output_shape,
241 tensor_index_filter, tensor_index_input};
242 if (UseBias()) {
243 const int tensor_index_bias = top_tensor;
244 op_inputs.push_back(tensor_index_bias);
245 }
246
247 const int tensor_index_output = tensors.size();
248 tensors.emplace_back(
249 CreateTensorDirect(builder, &output_shape, input_tensor_type, kNoBuffer,
250 /*name=*/nullptr, quantization_parameters));
251
252 const std::vector<int32_t> op_outputs = {tensor_index_output};
253
254 const int opcode_index_transpose_conv = operator_codes.size();
255 operator_codes.emplace_back(
256 CreateOperatorCode(builder, BuiltinOperator_TRANSPOSE_CONV));
257
258 flatbuffers::Offset<TransposeConvOptions> transpose_conv_options =
259 CreateTransposeConvOptions(builder, Padding(), StrideWidth(),
260 StrideHeight());
261 operators.emplace_back(CreateOperatorDirect(
262 builder, /*opcode_index=*/opcode_index_transpose_conv, &op_inputs,
263 &op_outputs, BuiltinOptions_TransposeConvOptions,
264 transpose_conv_options.Union()));
265
266 const std::vector<int32_t> subgraph_inputs = {tensor_index_input};
267 const std::vector<int32_t> subgraph_outputs = {tensor_index_output};
268 flatbuffers::Offset<SubGraph> subgraph = CreateSubGraphDirect(
269 builder, &tensors, &subgraph_inputs, &subgraph_outputs, &operators);
270
271 flatbuffers::Offset<flatbuffers::String> description =
272 builder.CreateString("Quantized TransposeConv model");
273
274 flatbuffers::Offset<Model> model_buffer = CreateModel(
275 builder, TFLITE_SCHEMA_VERSION,
276 builder.CreateVector(operator_codes.data(), operator_codes.size()),
277 builder.CreateVector(&subgraph, 1), description,
278 builder.CreateVector(buffers.data(), buffers.size()));
279
280 builder.Finish(model_buffer);
281
282 return std::vector<char>(builder.GetBufferPointer(),
283 builder.GetBufferPointer() + builder.GetSize());
284 }
285
286 } // namespace xnnpack
287 } // namespace tflite
288