• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/xnnpack/conv_2d_tester.h"
17 
18 #include <algorithm>
19 #include <array>
20 #include <cstdint>
21 #include <functional>
22 #include <random>
23 #include <vector>
24 
25 #include <gtest/gtest.h>
26 #include "fp16.h"  // from @FP16
27 #include "flatbuffers/flatbuffers.h"  // from @flatbuffers
28 #include "tensorflow/lite/delegates/xnnpack/test_util.h"
29 #include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
30 #include "tensorflow/lite/interpreter.h"
31 #include "tensorflow/lite/kernels/register.h"
32 #include "tensorflow/lite/model.h"
33 #include "tensorflow/lite/schema/schema_conversion_utils.h"
34 #include "tensorflow/lite/schema/schema_generated.h"
35 #include "tensorflow/lite/version.h"
36 
37 namespace tflite {
38 namespace xnnpack {
39 
Test(TfLiteDelegate * delegate) const40 void Conv2DTester::Test(TfLiteDelegate* delegate) const {
41   std::vector<char> buffer = CreateTfLiteModel();
42   const Model* model = GetModel(buffer.data());
43 
44   std::unique_ptr<Interpreter> delegate_interpreter;
45   ASSERT_EQ(
46       InterpreterBuilder(
47           model,
48           ::tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates())(
49           &delegate_interpreter),
50       kTfLiteOk);
51   std::unique_ptr<Interpreter> default_interpreter;
52   ASSERT_EQ(
53       InterpreterBuilder(
54           model,
55           ::tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates())(
56           &default_interpreter),
57       kTfLiteOk);
58 
59   ASSERT_TRUE(delegate_interpreter);
60   ASSERT_TRUE(default_interpreter);
61 
62   ASSERT_EQ(delegate_interpreter->inputs().size(), 1);
63   ASSERT_EQ(default_interpreter->inputs().size(), 1);
64 
65   ASSERT_EQ(delegate_interpreter->outputs().size(), 1);
66   ASSERT_EQ(default_interpreter->outputs().size(), 1);
67 
68   ASSERT_EQ(delegate_interpreter->AllocateTensors(), kTfLiteOk);
69   ASSERT_EQ(default_interpreter->AllocateTensors(), kTfLiteOk);
70 
71   ASSERT_EQ(delegate_interpreter->ModifyGraphWithDelegate(delegate), kTfLiteOk);
72 
73   if (weights_cache_ != nullptr) {
74     TfLiteXNNPackDelegateWeightsCacheFinalizeHard(weights_cache_);
75   }
76 
77   std::random_device random_device;
78   auto rng = std::mt19937(random_device());
79   auto input_rng =
80       std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
81   float* default_input_data = default_interpreter->typed_input_tensor<float>(0);
82   std::generate(default_input_data,
83                 default_input_data + BatchSize() * InputHeight() *
84                                          InputWidth() * InputChannels(),
85                 input_rng);
86 
87   float* delegate_input_data =
88       delegate_interpreter->typed_input_tensor<float>(0);
89   std::copy(default_input_data,
90             default_input_data +
91                 BatchSize() * InputHeight() * InputWidth() * InputChannels(),
92             delegate_input_data);
93 
94   ASSERT_EQ(default_interpreter->Invoke(), kTfLiteOk);
95   ASSERT_EQ(delegate_interpreter->Invoke(), kTfLiteOk);
96 
97   float* default_output_data =
98       default_interpreter->typed_output_tensor<float>(0);
99   float* delegate_output_data =
100       delegate_interpreter->typed_output_tensor<float>(0);
101 
102   for (int32_t i = 0; i < BatchSize(); i++) {
103     for (int32_t y = 0; y < OutputHeight(); y++) {
104       for (int32_t x = 0; x < OutputWidth(); x++) {
105         for (int32_t c = 0; c < OutputChannels(); c++) {
106           const int32_t index = ((i * OutputHeight() + y) * OutputWidth() + x) *
107                                     OutputChannels() +
108                                 c;
109           ASSERT_NEAR(default_output_data[index], delegate_output_data[index],
110                       std::abs(default_output_data[index]) * 3.0e-6f)
111               << "batch " << i << " / " << BatchSize() << ", y position " << y
112               << " / " << OutputHeight() << ", x position " << x << " / "
113               << OutputWidth() << ", channel " << c << " / "
114               << OutputChannels();
115         }
116       }
117     }
118   }
119 }
120 
CreateTfLiteModel() const121 std::vector<char> Conv2DTester::CreateTfLiteModel() const {
122   std::random_device random_device;
123   auto rng = std::mt19937(random_device());
124   auto range_rng = std::bind(
125       std::uniform_real_distribution<float>(-25.0f, 25.0f), std::ref(rng));
126 
127   flatbuffers::FlatBufferBuilder builder;
128   std::vector<flatbuffers::Offset<OperatorCode>> operator_codes{
129       {CreateOperatorCode(builder, BuiltinOperator_CONV_2D)}};
130   std::vector<flatbuffers::Offset<tflite::Operator>> operators;
131   std::vector<flatbuffers::Offset<tflite::Buffer>> buffers{
132       {CreateBuffer(builder, builder.CreateVector({}))}};
133 
134   if (SparseWeights()) {
135     operator_codes.emplace_back(
136         CreateOperatorCode(builder, BuiltinOperator_DENSIFY));
137     const std::array<int32_t, 1> densify_filter_inputs{{0}};
138     const std::array<int32_t, 1> densify_filter_outputs{
139         {(FP16Weights() || INT8Weights() || INT8ChannelWiseWeights()) ? 1 : 2}};
140     operators.emplace_back(CreateOperator(
141         builder, /*opcode_index=*/operator_codes.size() - 1,
142         builder.CreateVector<int32_t>(densify_filter_inputs.data(),
143                                       densify_filter_inputs.size()),
144         builder.CreateVector<int32_t>(densify_filter_outputs.data(),
145                                       densify_filter_outputs.size())));
146   }
147 
148   const std::vector<int32_t> filter_shape = {
149       OutputChannels(), KernelHeight(), KernelWidth(), KernelInputChannels()};
150   const std::vector<int32_t> bias_shape = {OutputChannels()};
151   std::vector<float> filter_scales;
152   std::vector<int64_t> filter_zero_points;
153   int32_t filter_quantized_dimension = 0;
154   if (FP16Weights()) {
155     operator_codes.emplace_back(
156         CreateOperatorCode(builder, BuiltinOperator_DEQUANTIZE));
157 
158     std::vector<uint16_t> filter_data(OutputChannels() * KernelHeight() *
159                                       KernelWidth() * KernelInputChannels());
160     std::vector<uint16_t> bias_data(OutputChannels());
161     for (int32_t oc = 0; oc < OutputChannels(); oc++) {
162       // Use the same range of all-positive or all-negative values to generate
163       // all weights within the same output channel, but different ranges for
164       // different output channels. This ensures that no catastrophic
165       // cancellation occur, but test covers both positive and negative inputs.
166       const float range = range_rng();
167       auto value_rng =
168           std::bind(fp16_ieee_from_fp32_value,
169                     std::bind(std::uniform_real_distribution<float>(
170                                   std::min(range, 0.0f), std::max(range, 0.0f)),
171                               std::ref(rng)));
172       bias_data[oc] = value_rng();
173       for (int32_t ic = 0; ic < KernelInputChannels(); ic++) {
174         for (int32_t y = 0; y < KernelHeight(); y++) {
175           for (int32_t x = 0; x < KernelWidth(); x++) {
176             const int32_t index =
177                 ((oc * KernelHeight() + y) * KernelWidth() + x) *
178                     KernelInputChannels() +
179                 ic;
180             filter_data[index] = value_rng();
181           }
182         }
183       }
184     }
185 
186     buffers.emplace_back(CreateBuffer(
187         builder, builder.CreateVector(
188                      reinterpret_cast<const uint8_t*>(filter_data.data()),
189                      sizeof(uint16_t) * filter_data.size())));
190     buffers.emplace_back(CreateBuffer(
191         builder,
192         builder.CreateVector(reinterpret_cast<const uint8_t*>(bias_data.data()),
193                              sizeof(uint16_t) * bias_data.size())));
194 
195     const std::array<int32_t, 1> dequantize_filter_inputs{
196         {SparseWeights() ? 1 : 0}};
197     const std::array<int32_t, 1> dequantize_filter_outputs{
198         {SparseWeights() ? 4 : 3}};
199     operators.emplace_back(CreateOperator(
200         builder, /*opcode_index=*/operator_codes.size() - 1,
201         builder.CreateVector<int32_t>(dequantize_filter_inputs.data(),
202                                       dequantize_filter_inputs.size()),
203         builder.CreateVector<int32_t>(dequantize_filter_outputs.data(),
204                                       dequantize_filter_outputs.size())));
205     const std::array<int32_t, 1> dequantize_bias_inputs{
206         {SparseWeights() ? 2 : 1}};
207     const std::array<int32_t, 1> dequantize_bias_outputs{
208         {SparseWeights() ? 5 : 4}};
209     operators.emplace_back(CreateOperator(
210         builder, /*opcode_index=*/operator_codes.size() - 1,
211         builder.CreateVector<int32_t>(dequantize_bias_inputs.data(),
212                                       dequantize_bias_inputs.size()),
213         builder.CreateVector<int32_t>(dequantize_bias_outputs.data(),
214                                       dequantize_bias_outputs.size())));
215   } else {
216     std::vector<float> filter_data(OutputChannels() * KernelHeight() *
217                                    KernelWidth() * KernelInputChannels());
218     std::vector<float> bias_data(OutputChannels());
219     for (int32_t oc = 0; oc < OutputChannels(); oc++) {
220       // Use the same range of all-positive or all-negative values to generate
221       // all weights within the same output channel, but different ranges for
222       // different output channels. This ensures that no catastrophic
223       // cancellation occur, but test covers both positive and negative inputs.
224       const float range = range_rng();
225       auto value_rng =
226           std::bind(std::uniform_real_distribution<float>(
227                         std::min(range, 0.0f), std::max(range, 0.0f)),
228                     std::ref(rng));
229       bias_data[oc] = value_rng();
230       for (int32_t ic = 0; ic < KernelInputChannels(); ic++) {
231         for (int32_t y = 0; y < KernelHeight(); y++) {
232           for (int32_t x = 0; x < KernelWidth(); x++) {
233             const int32_t index =
234                 ((oc * KernelHeight() + y) * KernelWidth() + x) *
235                     KernelInputChannels() +
236                 ic;
237             filter_data[index] = value_rng();
238           }
239         }
240       }
241     }
242 
243     if (INT8Weights() || INT8ChannelWiseWeights()) {
244       operator_codes.emplace_back(
245           CreateOperatorCode(builder, BuiltinOperator_DEQUANTIZE));
246 
247       std::vector<int8_t> quantized_filter_data(filter_data.size());
248       if (INT8Weights()) {
249         filter_scales.resize(1, GetInt8QuantizationScale(filter_data));
250         filter_zero_points.resize(1, 0);
251         std::transform(filter_data.begin(), filter_data.end(),
252                        quantized_filter_data.begin(),
253                        std::bind(QuantizeInt8, std::placeholders::_1, 0,
254                                  filter_scales[0]));
255       } else {
256         filter_quantized_dimension =
257             static_cast<int32_t>(filter_shape.size()) - 1;
258         const int32_t num_scales = filter_shape[filter_quantized_dimension];
259         filter_scales = GetInt8QuantizationScalePerChannel(
260             filter_data.data(), filter_quantized_dimension, filter_shape);
261         filter_zero_points.resize(num_scales, 0);
262         QuantizeInt8PerChannel(filter_scales.data(), filter_zero_points.data(),
263                                filter_quantized_dimension, filter_data.data(),
264                                quantized_filter_data.data(), filter_shape);
265       }
266       buffers.emplace_back(CreateBuffer(
267           builder,
268           builder.CreateVector(
269               reinterpret_cast<const uint8_t*>(quantized_filter_data.data()),
270               sizeof(int8_t) * quantized_filter_data.size())));
271 
272       const std::array<int32_t, 1> dequantize_filter_inputs{
273           {SparseWeights() ? 1 : 0}};
274       const std::array<int32_t, 1> dequantize_filter_outputs{
275           {SparseWeights() ? 3 : 2}};
276       operators.emplace_back(CreateOperator(
277           builder, /*opcode_index=*/operator_codes.size() - 1,
278           builder.CreateVector<int32_t>(dequantize_filter_inputs.data(),
279                                         dequantize_filter_inputs.size()),
280           builder.CreateVector<int32_t>(dequantize_filter_outputs.data(),
281                                         dequantize_filter_outputs.size())));
282     } else {
283       buffers.emplace_back(CreateBuffer(
284           builder, builder.CreateVector(
285                        reinterpret_cast<const uint8_t*>(filter_data.data()),
286                        sizeof(float) * filter_data.size())));
287     }
288 
289     // Bias is stored in FP32 even when filter is quantized to INT8
290     buffers.emplace_back(CreateBuffer(
291         builder,
292         builder.CreateVector(reinterpret_cast<const uint8_t*>(bias_data.data()),
293                              sizeof(float) * bias_data.size())));
294   }
295 
296   const std::array<int32_t, 4> input_shape{
297       {BatchSize(), InputHeight(), InputWidth(), InputChannels()}};
298   const std::array<int32_t, 4> output_shape{
299       {BatchSize(), OutputHeight(), OutputWidth(), OutputChannels()}};
300 
301   std::vector<flatbuffers::Offset<tflite::Tensor>> tensors;
302   if (SparseWeights()) {
303     // Sparse tensor in TFLite can be in different formats. Here we choose the
304     // simplest configuration that
305     //   1. all dimensions are dense,
306     //   2. in-order traversal, and
307     //   3. no block configuration.
308     int dims_count = filter_shape.size();
309     std::vector<flatbuffers::Offset<DimensionMetadata>> dim_metadata(
310         dims_count);
311     std::vector<int> traversal_order(dims_count);
312     for (int i = 0; i < dims_count; i++) {
313       traversal_order[i] = i;
314       dim_metadata[i] = CreateDimensionMetadata(builder, DimensionType_DENSE,
315                                                 filter_shape[i]);
316     }
317     flatbuffers::Offset<SparsityParameters> sparsity_param =
318         CreateSparsityParameters(builder, builder.CreateVector(traversal_order),
319                                  0, builder.CreateVector(dim_metadata));
320     if (INT8Weights() || INT8ChannelWiseWeights()) {
321       tensors.emplace_back(
322           CreateTensor(builder,
323                        builder.CreateVector<int32_t>(filter_shape.data(),
324                                                      filter_shape.size()),
325                        /*type=*/TensorType_INT8,
326                        /*buffer=*/1, /*name=*/0,
327                        CreateQuantizationParameters(
328                            builder, /*min=*/0, /*max=*/0,
329                            builder.CreateVector<float>(filter_scales),
330                            builder.CreateVector<int64_t>(filter_zero_points),
331                            /*details_type=*/QuantizationDetails_NONE,
332                            /*details=*/0, filter_quantized_dimension),
333                        /*is_variable=*/false, /*sparsity=*/sparsity_param));
334     } else {
335       tensors.emplace_back(CreateTensor(
336           builder,
337           builder.CreateVector<int32_t>(filter_shape.data(),
338                                         filter_shape.size()),
339           /*type=*/FP16Weights() ? TensorType_FLOAT16 : TensorType_FLOAT32,
340           /*buffer=*/1, /*name=*/0, /*quantization=*/0,
341           /*is_variable=*/false, /*sparsity=*/sparsity_param));
342     }
343   }
344   if (FP16Weights()) {
345     tensors.emplace_back(CreateTensor(
346         builder,
347         builder.CreateVector<int32_t>(filter_shape.data(), filter_shape.size()),
348         TensorType_FLOAT16, /*buffer=*/SparseWeights() ? 0 : 1));
349     tensors.emplace_back(CreateTensor(
350         builder,
351         builder.CreateVector<int32_t>(bias_shape.data(), bias_shape.size()),
352         TensorType_FLOAT16, /*buffer=*/2));
353   } else if (INT8Weights() || INT8ChannelWiseWeights()) {
354     tensors.emplace_back(CreateTensor(
355         builder,
356         builder.CreateVector<int32_t>(filter_shape.data(), filter_shape.size()),
357         TensorType_INT8, /*buffer=*/SparseWeights() ? 0 : 1, /*name=*/0,
358         CreateQuantizationParameters(
359             builder, /*min=*/0, /*max=*/0,
360             builder.CreateVector<float>(filter_scales),
361             builder.CreateVector<int64_t>(filter_zero_points),
362             /*details_type=*/QuantizationDetails_NONE,
363             /*details=*/0, filter_quantized_dimension)));
364   }
365   tensors.emplace_back(CreateTensor(
366       builder,
367       builder.CreateVector<int32_t>(input_shape.data(), input_shape.size()),
368       TensorType_FLOAT32));
369   tensors.emplace_back(CreateTensor(
370       builder,
371       builder.CreateVector<int32_t>(filter_shape.data(), filter_shape.size()),
372       TensorType_FLOAT32,
373       /*buffer=*/
374       (FP16Weights() || INT8Weights() || INT8ChannelWiseWeights() ||
375        SparseWeights())
376           ? 0
377           : 1));
378   tensors.emplace_back(CreateTensor(
379       builder,
380       builder.CreateVector<int32_t>(bias_shape.data(), bias_shape.size()),
381       TensorType_FLOAT32, /*buffer=*/FP16Weights() ? 0 : 2));
382   tensors.emplace_back(CreateTensor(
383       builder,
384       builder.CreateVector<int32_t>(output_shape.data(), output_shape.size()),
385       TensorType_FLOAT32));
386 
387   const std::array<int32_t, 3> op_inputs{
388       {static_cast<int>(tensors.size()) - 4,
389        static_cast<int>(tensors.size()) - 3,
390        static_cast<int>(tensors.size()) - 2}};
391   const std::array<int32_t, 1> op_outputs{
392       {static_cast<int>(tensors.size()) - 1}};
393 
394   flatbuffers::Offset<Conv2DOptions> conv2d_options =
395       CreateConv2DOptions(builder, Padding(), StrideWidth(), StrideHeight(),
396                           Activation(), DilationWidth(), DilationHeight());
397   operators.emplace_back(CreateOperator(
398       builder, /*opcode_index=*/0,
399       builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
400       builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()),
401       BuiltinOptions_Conv2DOptions, conv2d_options.Union()));
402 
403   const std::array<int32_t, 1> subgraph_inputs{
404       {static_cast<int>(tensors.size()) - 4}};
405   const std::array<int32_t, 1> subgraph_outputs{
406       {static_cast<int>(tensors.size()) - 1}};
407   flatbuffers::Offset<SubGraph> subgraph = CreateSubGraph(
408       builder, builder.CreateVector(tensors.data(), tensors.size()),
409       builder.CreateVector<int32_t>(subgraph_inputs.data(),
410                                     subgraph_inputs.size()),
411       builder.CreateVector<int32_t>(subgraph_outputs.data(),
412                                     subgraph_outputs.size()),
413       builder.CreateVector(operators.data(), operators.size()));
414 
415   flatbuffers::Offset<flatbuffers::String> description =
416       builder.CreateString("Conv2D model");
417 
418   flatbuffers::Offset<Model> model_buffer = CreateModel(
419       builder, TFLITE_SCHEMA_VERSION,
420       builder.CreateVector(operator_codes.data(), operator_codes.size()),
421       builder.CreateVector(&subgraph, 1), description,
422       builder.CreateVector(buffers.data(), buffers.size()));
423 
424   builder.Finish(model_buffer);
425 
426   return std::vector<char>(builder.GetBufferPointer(),
427                            builder.GetBufferPointer() + builder.GetSize());
428 }
429 
430 }  // namespace xnnpack
431 }  // namespace tflite
432