1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/xnnpack/conv_2d_tester.h"
17
18 #include <algorithm>
19 #include <array>
20 #include <cstdint>
21 #include <functional>
22 #include <random>
23 #include <vector>
24
25 #include <gtest/gtest.h>
26 #include "fp16.h" // from @FP16
27 #include "flatbuffers/flatbuffers.h" // from @flatbuffers
28 #include "tensorflow/lite/delegates/xnnpack/test_util.h"
29 #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
30 #include "tensorflow/lite/interpreter.h"
31 #include "tensorflow/lite/kernels/register.h"
32 #include "tensorflow/lite/model.h"
33 #include "tensorflow/lite/schema/schema_conversion_utils.h"
34 #include "tensorflow/lite/schema/schema_generated.h"
35 #include "tensorflow/lite/version.h"
36
37 namespace tflite {
38 namespace xnnpack {
39
Test(TfLiteDelegate * delegate) const40 void Conv2DTester::Test(TfLiteDelegate* delegate) const {
41 std::vector<char> buffer = CreateTfLiteModel();
42 const Model* model = GetModel(buffer.data());
43
44 std::unique_ptr<Interpreter> delegate_interpreter;
45 ASSERT_EQ(
46 InterpreterBuilder(
47 model,
48 ::tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates())(
49 &delegate_interpreter),
50 kTfLiteOk);
51 std::unique_ptr<Interpreter> default_interpreter;
52 ASSERT_EQ(
53 InterpreterBuilder(
54 model,
55 ::tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates())(
56 &default_interpreter),
57 kTfLiteOk);
58
59 ASSERT_TRUE(delegate_interpreter);
60 ASSERT_TRUE(default_interpreter);
61
62 ASSERT_EQ(delegate_interpreter->inputs().size(), 1);
63 ASSERT_EQ(default_interpreter->inputs().size(), 1);
64
65 ASSERT_EQ(delegate_interpreter->outputs().size(), 1);
66 ASSERT_EQ(default_interpreter->outputs().size(), 1);
67
68 ASSERT_EQ(delegate_interpreter->AllocateTensors(), kTfLiteOk);
69 ASSERT_EQ(default_interpreter->AllocateTensors(), kTfLiteOk);
70
71 ASSERT_EQ(delegate_interpreter->ModifyGraphWithDelegate(delegate), kTfLiteOk);
72
73 if (weights_cache_ != nullptr) {
74 TfLiteXNNPackDelegateWeightsCacheFinalizeHard(weights_cache_);
75 }
76
77 std::random_device random_device;
78 auto rng = std::mt19937(random_device());
79 auto input_rng =
80 std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
81 float* default_input_data = default_interpreter->typed_input_tensor<float>(0);
82 std::generate(default_input_data,
83 default_input_data + BatchSize() * InputHeight() *
84 InputWidth() * InputChannels(),
85 input_rng);
86
87 float* delegate_input_data =
88 delegate_interpreter->typed_input_tensor<float>(0);
89 std::copy(default_input_data,
90 default_input_data +
91 BatchSize() * InputHeight() * InputWidth() * InputChannels(),
92 delegate_input_data);
93
94 ASSERT_EQ(default_interpreter->Invoke(), kTfLiteOk);
95 ASSERT_EQ(delegate_interpreter->Invoke(), kTfLiteOk);
96
97 float* default_output_data =
98 default_interpreter->typed_output_tensor<float>(0);
99 float* delegate_output_data =
100 delegate_interpreter->typed_output_tensor<float>(0);
101
102 for (int32_t i = 0; i < BatchSize(); i++) {
103 for (int32_t y = 0; y < OutputHeight(); y++) {
104 for (int32_t x = 0; x < OutputWidth(); x++) {
105 for (int32_t c = 0; c < OutputChannels(); c++) {
106 const int32_t index = ((i * OutputHeight() + y) * OutputWidth() + x) *
107 OutputChannels() +
108 c;
109 ASSERT_NEAR(default_output_data[index], delegate_output_data[index],
110 std::abs(default_output_data[index]) * 3.0e-6f)
111 << "batch " << i << " / " << BatchSize() << ", y position " << y
112 << " / " << OutputHeight() << ", x position " << x << " / "
113 << OutputWidth() << ", channel " << c << " / "
114 << OutputChannels();
115 }
116 }
117 }
118 }
119 }
120
CreateTfLiteModel() const121 std::vector<char> Conv2DTester::CreateTfLiteModel() const {
122 std::random_device random_device;
123 auto rng = std::mt19937(random_device());
124 auto range_rng = std::bind(
125 std::uniform_real_distribution<float>(-25.0f, 25.0f), std::ref(rng));
126
127 flatbuffers::FlatBufferBuilder builder;
128 std::vector<flatbuffers::Offset<OperatorCode>> operator_codes{
129 {CreateOperatorCode(builder, BuiltinOperator_CONV_2D)}};
130 std::vector<flatbuffers::Offset<tflite::Operator>> operators;
131 std::vector<flatbuffers::Offset<tflite::Buffer>> buffers{
132 {CreateBuffer(builder, builder.CreateVector({}))}};
133
134 if (SparseWeights()) {
135 operator_codes.emplace_back(
136 CreateOperatorCode(builder, BuiltinOperator_DENSIFY));
137 const std::array<int32_t, 1> densify_filter_inputs{{0}};
138 const std::array<int32_t, 1> densify_filter_outputs{
139 {(FP16Weights() || INT8Weights() || INT8ChannelWiseWeights()) ? 1 : 2}};
140 operators.emplace_back(CreateOperator(
141 builder, /*opcode_index=*/operator_codes.size() - 1,
142 builder.CreateVector<int32_t>(densify_filter_inputs.data(),
143 densify_filter_inputs.size()),
144 builder.CreateVector<int32_t>(densify_filter_outputs.data(),
145 densify_filter_outputs.size())));
146 }
147
148 const std::vector<int32_t> filter_shape = {
149 OutputChannels(), KernelHeight(), KernelWidth(), KernelInputChannels()};
150 const std::vector<int32_t> bias_shape = {OutputChannels()};
151 std::vector<float> filter_scales;
152 std::vector<int64_t> filter_zero_points;
153 int32_t filter_quantized_dimension = 0;
154 if (FP16Weights()) {
155 operator_codes.emplace_back(
156 CreateOperatorCode(builder, BuiltinOperator_DEQUANTIZE));
157
158 std::vector<uint16_t> filter_data(OutputChannels() * KernelHeight() *
159 KernelWidth() * KernelInputChannels());
160 std::vector<uint16_t> bias_data(OutputChannels());
161 for (int32_t oc = 0; oc < OutputChannels(); oc++) {
162 // Use the same range of all-positive or all-negative values to generate
163 // all weights within the same output channel, but different ranges for
164 // different output channels. This ensures that no catastrophic
165 // cancellation occur, but test covers both positive and negative inputs.
166 const float range = range_rng();
167 auto value_rng =
168 std::bind(fp16_ieee_from_fp32_value,
169 std::bind(std::uniform_real_distribution<float>(
170 std::min(range, 0.0f), std::max(range, 0.0f)),
171 std::ref(rng)));
172 bias_data[oc] = value_rng();
173 for (int32_t ic = 0; ic < KernelInputChannels(); ic++) {
174 for (int32_t y = 0; y < KernelHeight(); y++) {
175 for (int32_t x = 0; x < KernelWidth(); x++) {
176 const int32_t index =
177 ((oc * KernelHeight() + y) * KernelWidth() + x) *
178 KernelInputChannels() +
179 ic;
180 filter_data[index] = value_rng();
181 }
182 }
183 }
184 }
185
186 buffers.emplace_back(CreateBuffer(
187 builder, builder.CreateVector(
188 reinterpret_cast<const uint8_t*>(filter_data.data()),
189 sizeof(uint16_t) * filter_data.size())));
190 buffers.emplace_back(CreateBuffer(
191 builder,
192 builder.CreateVector(reinterpret_cast<const uint8_t*>(bias_data.data()),
193 sizeof(uint16_t) * bias_data.size())));
194
195 const std::array<int32_t, 1> dequantize_filter_inputs{
196 {SparseWeights() ? 1 : 0}};
197 const std::array<int32_t, 1> dequantize_filter_outputs{
198 {SparseWeights() ? 4 : 3}};
199 operators.emplace_back(CreateOperator(
200 builder, /*opcode_index=*/operator_codes.size() - 1,
201 builder.CreateVector<int32_t>(dequantize_filter_inputs.data(),
202 dequantize_filter_inputs.size()),
203 builder.CreateVector<int32_t>(dequantize_filter_outputs.data(),
204 dequantize_filter_outputs.size())));
205 const std::array<int32_t, 1> dequantize_bias_inputs{
206 {SparseWeights() ? 2 : 1}};
207 const std::array<int32_t, 1> dequantize_bias_outputs{
208 {SparseWeights() ? 5 : 4}};
209 operators.emplace_back(CreateOperator(
210 builder, /*opcode_index=*/operator_codes.size() - 1,
211 builder.CreateVector<int32_t>(dequantize_bias_inputs.data(),
212 dequantize_bias_inputs.size()),
213 builder.CreateVector<int32_t>(dequantize_bias_outputs.data(),
214 dequantize_bias_outputs.size())));
215 } else {
216 std::vector<float> filter_data(OutputChannels() * KernelHeight() *
217 KernelWidth() * KernelInputChannels());
218 std::vector<float> bias_data(OutputChannels());
219 for (int32_t oc = 0; oc < OutputChannels(); oc++) {
220 // Use the same range of all-positive or all-negative values to generate
221 // all weights within the same output channel, but different ranges for
222 // different output channels. This ensures that no catastrophic
223 // cancellation occur, but test covers both positive and negative inputs.
224 const float range = range_rng();
225 auto value_rng =
226 std::bind(std::uniform_real_distribution<float>(
227 std::min(range, 0.0f), std::max(range, 0.0f)),
228 std::ref(rng));
229 bias_data[oc] = value_rng();
230 for (int32_t ic = 0; ic < KernelInputChannels(); ic++) {
231 for (int32_t y = 0; y < KernelHeight(); y++) {
232 for (int32_t x = 0; x < KernelWidth(); x++) {
233 const int32_t index =
234 ((oc * KernelHeight() + y) * KernelWidth() + x) *
235 KernelInputChannels() +
236 ic;
237 filter_data[index] = value_rng();
238 }
239 }
240 }
241 }
242
243 if (INT8Weights() || INT8ChannelWiseWeights()) {
244 operator_codes.emplace_back(
245 CreateOperatorCode(builder, BuiltinOperator_DEQUANTIZE));
246
247 std::vector<int8_t> quantized_filter_data(filter_data.size());
248 if (INT8Weights()) {
249 filter_scales.resize(1, GetInt8QuantizationScale(filter_data));
250 filter_zero_points.resize(1, 0);
251 std::transform(filter_data.begin(), filter_data.end(),
252 quantized_filter_data.begin(),
253 std::bind(QuantizeInt8, std::placeholders::_1, 0,
254 filter_scales[0]));
255 } else {
256 filter_quantized_dimension =
257 static_cast<int32_t>(filter_shape.size()) - 1;
258 const int32_t num_scales = filter_shape[filter_quantized_dimension];
259 filter_scales = GetInt8QuantizationScalePerChannel(
260 filter_data.data(), filter_quantized_dimension, filter_shape);
261 filter_zero_points.resize(num_scales, 0);
262 QuantizeInt8PerChannel(filter_scales.data(), filter_zero_points.data(),
263 filter_quantized_dimension, filter_data.data(),
264 quantized_filter_data.data(), filter_shape);
265 }
266 buffers.emplace_back(CreateBuffer(
267 builder,
268 builder.CreateVector(
269 reinterpret_cast<const uint8_t*>(quantized_filter_data.data()),
270 sizeof(int8_t) * quantized_filter_data.size())));
271
272 const std::array<int32_t, 1> dequantize_filter_inputs{
273 {SparseWeights() ? 1 : 0}};
274 const std::array<int32_t, 1> dequantize_filter_outputs{
275 {SparseWeights() ? 3 : 2}};
276 operators.emplace_back(CreateOperator(
277 builder, /*opcode_index=*/operator_codes.size() - 1,
278 builder.CreateVector<int32_t>(dequantize_filter_inputs.data(),
279 dequantize_filter_inputs.size()),
280 builder.CreateVector<int32_t>(dequantize_filter_outputs.data(),
281 dequantize_filter_outputs.size())));
282 } else {
283 buffers.emplace_back(CreateBuffer(
284 builder, builder.CreateVector(
285 reinterpret_cast<const uint8_t*>(filter_data.data()),
286 sizeof(float) * filter_data.size())));
287 }
288
289 // Bias is stored in FP32 even when filter is quantized to INT8
290 buffers.emplace_back(CreateBuffer(
291 builder,
292 builder.CreateVector(reinterpret_cast<const uint8_t*>(bias_data.data()),
293 sizeof(float) * bias_data.size())));
294 }
295
296 const std::array<int32_t, 4> input_shape{
297 {BatchSize(), InputHeight(), InputWidth(), InputChannels()}};
298 const std::array<int32_t, 4> output_shape{
299 {BatchSize(), OutputHeight(), OutputWidth(), OutputChannels()}};
300
301 std::vector<flatbuffers::Offset<tflite::Tensor>> tensors;
302 if (SparseWeights()) {
303 // Sparse tensor in TFLite can be in different formats. Here we choose the
304 // simplest configuration that
305 // 1. all dimensions are dense,
306 // 2. in-order traversal, and
307 // 3. no block configuration.
308 int dims_count = filter_shape.size();
309 std::vector<flatbuffers::Offset<DimensionMetadata>> dim_metadata(
310 dims_count);
311 std::vector<int> traversal_order(dims_count);
312 for (int i = 0; i < dims_count; i++) {
313 traversal_order[i] = i;
314 dim_metadata[i] = CreateDimensionMetadata(builder, DimensionType_DENSE,
315 filter_shape[i]);
316 }
317 flatbuffers::Offset<SparsityParameters> sparsity_param =
318 CreateSparsityParameters(builder, builder.CreateVector(traversal_order),
319 0, builder.CreateVector(dim_metadata));
320 if (INT8Weights() || INT8ChannelWiseWeights()) {
321 tensors.emplace_back(
322 CreateTensor(builder,
323 builder.CreateVector<int32_t>(filter_shape.data(),
324 filter_shape.size()),
325 /*type=*/TensorType_INT8,
326 /*buffer=*/1, /*name=*/0,
327 CreateQuantizationParameters(
328 builder, /*min=*/0, /*max=*/0,
329 builder.CreateVector<float>(filter_scales),
330 builder.CreateVector<int64_t>(filter_zero_points),
331 /*details_type=*/QuantizationDetails_NONE,
332 /*details=*/0, filter_quantized_dimension),
333 /*is_variable=*/false, /*sparsity=*/sparsity_param));
334 } else {
335 tensors.emplace_back(CreateTensor(
336 builder,
337 builder.CreateVector<int32_t>(filter_shape.data(),
338 filter_shape.size()),
339 /*type=*/FP16Weights() ? TensorType_FLOAT16 : TensorType_FLOAT32,
340 /*buffer=*/1, /*name=*/0, /*quantization=*/0,
341 /*is_variable=*/false, /*sparsity=*/sparsity_param));
342 }
343 }
344 if (FP16Weights()) {
345 tensors.emplace_back(CreateTensor(
346 builder,
347 builder.CreateVector<int32_t>(filter_shape.data(), filter_shape.size()),
348 TensorType_FLOAT16, /*buffer=*/SparseWeights() ? 0 : 1));
349 tensors.emplace_back(CreateTensor(
350 builder,
351 builder.CreateVector<int32_t>(bias_shape.data(), bias_shape.size()),
352 TensorType_FLOAT16, /*buffer=*/2));
353 } else if (INT8Weights() || INT8ChannelWiseWeights()) {
354 tensors.emplace_back(CreateTensor(
355 builder,
356 builder.CreateVector<int32_t>(filter_shape.data(), filter_shape.size()),
357 TensorType_INT8, /*buffer=*/SparseWeights() ? 0 : 1, /*name=*/0,
358 CreateQuantizationParameters(
359 builder, /*min=*/0, /*max=*/0,
360 builder.CreateVector<float>(filter_scales),
361 builder.CreateVector<int64_t>(filter_zero_points),
362 /*details_type=*/QuantizationDetails_NONE,
363 /*details=*/0, filter_quantized_dimension)));
364 }
365 tensors.emplace_back(CreateTensor(
366 builder,
367 builder.CreateVector<int32_t>(input_shape.data(), input_shape.size()),
368 TensorType_FLOAT32));
369 tensors.emplace_back(CreateTensor(
370 builder,
371 builder.CreateVector<int32_t>(filter_shape.data(), filter_shape.size()),
372 TensorType_FLOAT32,
373 /*buffer=*/
374 (FP16Weights() || INT8Weights() || INT8ChannelWiseWeights() ||
375 SparseWeights())
376 ? 0
377 : 1));
378 tensors.emplace_back(CreateTensor(
379 builder,
380 builder.CreateVector<int32_t>(bias_shape.data(), bias_shape.size()),
381 TensorType_FLOAT32, /*buffer=*/FP16Weights() ? 0 : 2));
382 tensors.emplace_back(CreateTensor(
383 builder,
384 builder.CreateVector<int32_t>(output_shape.data(), output_shape.size()),
385 TensorType_FLOAT32));
386
387 const std::array<int32_t, 3> op_inputs{
388 {static_cast<int>(tensors.size()) - 4,
389 static_cast<int>(tensors.size()) - 3,
390 static_cast<int>(tensors.size()) - 2}};
391 const std::array<int32_t, 1> op_outputs{
392 {static_cast<int>(tensors.size()) - 1}};
393
394 flatbuffers::Offset<Conv2DOptions> conv2d_options =
395 CreateConv2DOptions(builder, Padding(), StrideWidth(), StrideHeight(),
396 Activation(), DilationWidth(), DilationHeight());
397 operators.emplace_back(CreateOperator(
398 builder, /*opcode_index=*/0,
399 builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
400 builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()),
401 BuiltinOptions_Conv2DOptions, conv2d_options.Union()));
402
403 const std::array<int32_t, 1> subgraph_inputs{
404 {static_cast<int>(tensors.size()) - 4}};
405 const std::array<int32_t, 1> subgraph_outputs{
406 {static_cast<int>(tensors.size()) - 1}};
407 flatbuffers::Offset<SubGraph> subgraph = CreateSubGraph(
408 builder, builder.CreateVector(tensors.data(), tensors.size()),
409 builder.CreateVector<int32_t>(subgraph_inputs.data(),
410 subgraph_inputs.size()),
411 builder.CreateVector<int32_t>(subgraph_outputs.data(),
412 subgraph_outputs.size()),
413 builder.CreateVector(operators.data(), operators.size()));
414
415 flatbuffers::Offset<flatbuffers::String> description =
416 builder.CreateString("Conv2D model");
417
418 flatbuffers::Offset<Model> model_buffer = CreateModel(
419 builder, TFLITE_SCHEMA_VERSION,
420 builder.CreateVector(operator_codes.data(), operator_codes.size()),
421 builder.CreateVector(&subgraph, 1), description,
422 builder.CreateVector(buffers.data(), buffers.size()));
423
424 builder.Finish(model_buffer);
425
426 return std::vector<char>(builder.GetBufferPointer(),
427 builder.GetBufferPointer() + builder.GetSize());
428 }
429
430 } // namespace xnnpack
431 } // namespace tflite
432