1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/xnnpack/conv_2d_tester.h"
17
18 #include <array>
19 #include <cstdint>
20 #include <functional>
21 #include <random>
22 #include <vector>
23
24 #include <gtest/gtest.h>
25 #include <fp16.h>
26 #include "flatbuffers/flatbuffers.h" // from @flatbuffers
27 #include "tensorflow/lite/interpreter.h"
28 #include "tensorflow/lite/kernels/register.h"
29 #include "tensorflow/lite/model.h"
30 #include "tensorflow/lite/schema/schema_conversion_utils.h"
31 #include "tensorflow/lite/schema/schema_generated.h"
32 #include "tensorflow/lite/version.h"
33
34 namespace tflite {
35 namespace xnnpack {
36
Test(TfLiteDelegate * delegate) const37 void Conv2DTester::Test(TfLiteDelegate* delegate) const {
38 std::vector<char> buffer = CreateTfLiteModel();
39 const Model* model = GetModel(buffer.data());
40
41 std::unique_ptr<Interpreter> delegate_interpreter;
42 ASSERT_EQ(
43 InterpreterBuilder(
44 model,
45 ::tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates())(
46 &delegate_interpreter),
47 kTfLiteOk);
48 std::unique_ptr<Interpreter> default_interpreter;
49 ASSERT_EQ(
50 InterpreterBuilder(
51 model,
52 ::tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates())(
53 &default_interpreter),
54 kTfLiteOk);
55
56 ASSERT_TRUE(delegate_interpreter);
57 ASSERT_TRUE(default_interpreter);
58
59 ASSERT_EQ(delegate_interpreter->inputs().size(), 1);
60 ASSERT_EQ(default_interpreter->inputs().size(), 1);
61
62 ASSERT_EQ(delegate_interpreter->outputs().size(), 1);
63 ASSERT_EQ(default_interpreter->outputs().size(), 1);
64
65 ASSERT_EQ(delegate_interpreter->AllocateTensors(), kTfLiteOk);
66 ASSERT_EQ(default_interpreter->AllocateTensors(), kTfLiteOk);
67
68 ASSERT_EQ(delegate_interpreter->ModifyGraphWithDelegate(delegate), kTfLiteOk);
69
70 std::random_device random_device;
71 auto rng = std::mt19937(random_device());
72 auto input_rng =
73 std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
74 float* default_input_data = default_interpreter->typed_tensor<float>(
75 default_interpreter->inputs()[0]);
76 std::generate(default_input_data,
77 default_input_data + BatchSize() * InputHeight() *
78 InputWidth() * InputChannels(),
79 input_rng);
80
81 float* delegate_input_data = delegate_interpreter->typed_tensor<float>(
82 delegate_interpreter->inputs()[0]);
83 std::copy(default_input_data,
84 default_input_data +
85 BatchSize() * InputHeight() * InputWidth() * InputChannels(),
86 delegate_input_data);
87
88 ASSERT_EQ(default_interpreter->Invoke(), kTfLiteOk);
89 ASSERT_EQ(delegate_interpreter->Invoke(), kTfLiteOk);
90
91 float* default_output_data = default_interpreter->typed_tensor<float>(
92 default_interpreter->outputs()[0]);
93 float* delegate_output_data = delegate_interpreter->typed_tensor<float>(
94 delegate_interpreter->outputs()[0]);
95
96 for (int32_t i = 0; i < BatchSize(); i++) {
97 for (int32_t y = 0; y < OutputHeight(); y++) {
98 for (int32_t x = 0; x < OutputWidth(); x++) {
99 for (int32_t c = 0; c < OutputChannels(); c++) {
100 const int32_t index = ((i * OutputHeight() + y) * OutputWidth() + x) *
101 OutputChannels() +
102 c;
103 ASSERT_NEAR(default_output_data[index], delegate_output_data[index],
104 std::abs(default_output_data[index]) * 3.0e-6f)
105 << "batch " << i << " / " << BatchSize() << ", y position " << y
106 << " / " << OutputHeight() << ", x position " << x << " / "
107 << OutputWidth() << ", channel " << c << " / "
108 << OutputChannels();
109 }
110 }
111 }
112 }
113 }
114
CreateTfLiteModel() const115 std::vector<char> Conv2DTester::CreateTfLiteModel() const {
116 std::random_device random_device;
117 auto rng = std::mt19937(random_device());
118 auto range_rng = std::bind(
119 std::uniform_real_distribution<float>(-25.0f, 25.0f), std::ref(rng));
120
121 flatbuffers::FlatBufferBuilder builder;
122 std::vector<flatbuffers::Offset<OperatorCode>> operator_codes{
123 {CreateOperatorCode(builder, BuiltinOperator_CONV_2D)}};
124 std::vector<flatbuffers::Offset<tflite::Operator>> operators;
125 std::vector<flatbuffers::Offset<tflite::Buffer>> buffers{
126 {CreateBuffer(builder, builder.CreateVector({}))}};
127
128 if (SparseWeights()) {
129 operator_codes.emplace_back(
130 CreateOperatorCode(builder, BuiltinOperator_DENSIFY));
131 const std::array<int32_t, 1> densify_filter_inputs{{0}};
132 const std::array<int32_t, 1> densify_filter_outputs{
133 {FP16Weights() ? 1 : 2}};
134 operators.emplace_back(CreateOperator(
135 builder, /*opcode_index=*/operator_codes.size() - 1,
136 builder.CreateVector<int32_t>(densify_filter_inputs.data(),
137 densify_filter_inputs.size()),
138 builder.CreateVector<int32_t>(densify_filter_outputs.data(),
139 densify_filter_outputs.size())));
140 }
141
142 if (FP16Weights()) {
143 operator_codes.emplace_back(
144 CreateOperatorCode(builder, BuiltinOperator_DEQUANTIZE));
145
146 std::vector<uint16_t> filter_data(OutputChannels() * KernelHeight() *
147 KernelWidth() * InputChannels());
148 std::vector<uint16_t> bias_data(OutputChannels());
149 for (int32_t oc = 0; oc < OutputChannels(); oc++) {
150 // Use the same range of all-positive or all-negative values to generate
151 // all weights within the same output channel, but different ranges for
152 // different output channels. This ensures that no catastrophic
153 // cancellation occur, but test covers both positive and negative inputs.
154 const float range = range_rng();
155 auto value_rng =
156 std::bind(fp16_ieee_from_fp32_value,
157 std::bind(std::uniform_real_distribution<float>(
158 std::min(range, 0.0f), std::max(range, 0.0f)),
159 std::ref(rng)));
160 bias_data[oc] = value_rng();
161 for (int32_t ic = 0; ic < InputChannels(); ic++) {
162 for (int32_t y = 0; y < KernelHeight(); y++) {
163 for (int32_t x = 0; x < KernelWidth(); x++) {
164 const int32_t index =
165 ((oc * KernelHeight() + y) * KernelWidth() + x) *
166 InputChannels() +
167 ic;
168 filter_data[index] = value_rng();
169 }
170 }
171 }
172 }
173
174 buffers.emplace_back(CreateBuffer(
175 builder, builder.CreateVector(
176 reinterpret_cast<const uint8_t*>(filter_data.data()),
177 sizeof(uint16_t) * filter_data.size())));
178 buffers.emplace_back(CreateBuffer(
179 builder,
180 builder.CreateVector(reinterpret_cast<const uint8_t*>(bias_data.data()),
181 sizeof(uint16_t) * bias_data.size())));
182
183 const std::array<int32_t, 1> dequantize_filter_inputs{
184 {SparseWeights() ? 1 : 0}};
185 const std::array<int32_t, 1> dequantize_filter_outputs{
186 {SparseWeights() ? 4 : 3}};
187 operators.emplace_back(CreateOperator(
188 builder, /*opcode_index=*/operator_codes.size() - 1,
189 builder.CreateVector<int32_t>(dequantize_filter_inputs.data(),
190 dequantize_filter_inputs.size()),
191 builder.CreateVector<int32_t>(dequantize_filter_outputs.data(),
192 dequantize_filter_outputs.size())));
193 const std::array<int32_t, 1> dequantize_bias_inputs{
194 {SparseWeights() ? 2 : 1}};
195 const std::array<int32_t, 1> dequantize_bias_outputs{
196 {SparseWeights() ? 5 : 4}};
197 operators.emplace_back(CreateOperator(
198 builder, /*opcode_index=*/operator_codes.size() - 1,
199 builder.CreateVector<int32_t>(dequantize_bias_inputs.data(),
200 dequantize_bias_inputs.size()),
201 builder.CreateVector<int32_t>(dequantize_bias_outputs.data(),
202 dequantize_bias_outputs.size())));
203 } else {
204 std::vector<float> filter_data(OutputChannels() * KernelHeight() *
205 KernelWidth() * InputChannels());
206 std::vector<float> bias_data(OutputChannels());
207 for (int32_t oc = 0; oc < OutputChannels(); oc++) {
208 // Use the same range of all-positive or all-negative values to generate
209 // all weights within the same output channel, but different ranges for
210 // different output channels. This ensures that no catastrophic
211 // cancellation occur, but test covers both positive and negative inputs.
212 const float range = range_rng();
213 auto value_rng =
214 std::bind(std::uniform_real_distribution<float>(
215 std::min(range, 0.0f), std::max(range, 0.0f)),
216 std::ref(rng));
217 bias_data[oc] = value_rng();
218 for (int32_t ic = 0; ic < InputChannels(); ic++) {
219 for (int32_t y = 0; y < KernelHeight(); y++) {
220 for (int32_t x = 0; x < KernelWidth(); x++) {
221 const int32_t index =
222 ((oc * KernelHeight() + y) * KernelWidth() + x) *
223 InputChannels() +
224 ic;
225 filter_data[index] = value_rng();
226 }
227 }
228 }
229 }
230
231 buffers.emplace_back(CreateBuffer(
232 builder, builder.CreateVector(
233 reinterpret_cast<const uint8_t*>(filter_data.data()),
234 sizeof(float) * filter_data.size())));
235 buffers.emplace_back(CreateBuffer(
236 builder,
237 builder.CreateVector(reinterpret_cast<const uint8_t*>(bias_data.data()),
238 sizeof(float) * bias_data.size())));
239 }
240
241 const std::array<int32_t, 4> input_shape{
242 {BatchSize(), InputHeight(), InputWidth(), InputChannels()}};
243 const std::array<int32_t, 4> output_shape{
244 {BatchSize(), OutputHeight(), OutputWidth(), OutputChannels()}};
245 const std::array<int32_t, 4> filter_shape{
246 {OutputChannels(), KernelHeight(), KernelWidth(), InputChannels()}};
247 const std::array<int32_t, 1> bias_shape{{OutputChannels()}};
248
249 std::vector<flatbuffers::Offset<tflite::Tensor>> tensors;
250 if (SparseWeights()) {
251 // Sparse tensor in TFLite can be in different formats. Here we choose the
252 // simplest configuration that
253 // 1. all dimensions are dense,
254 // 2. in-order traversal, and
255 // 3. no block configuration.
256 int dims_count = filter_shape.size();
257 std::vector<flatbuffers::Offset<DimensionMetadata>> dim_metadata(
258 dims_count);
259 std::vector<int> traversal_order(dims_count);
260 for (int i = 0; i < dims_count; i++) {
261 traversal_order[i] = i;
262 dim_metadata[i] = CreateDimensionMetadata(builder, DimensionType_DENSE,
263 filter_shape[i]);
264 }
265 flatbuffers::Offset<SparsityParameters> sparsity_param =
266 CreateSparsityParameters(builder, builder.CreateVector(traversal_order),
267 0, builder.CreateVector(dim_metadata));
268 tensors.emplace_back(CreateTensor(
269 builder,
270 builder.CreateVector<int32_t>(filter_shape.data(), filter_shape.size()),
271 /*type=*/FP16Weights() ? TensorType_FLOAT16 : TensorType_FLOAT32,
272 /*buffer=*/1, /*name=*/0, /*quantization=*/0,
273 /*is_variable=*/false, /*sparsity=*/sparsity_param));
274 }
275 if (FP16Weights()) {
276 tensors.emplace_back(CreateTensor(
277 builder,
278 builder.CreateVector<int32_t>(filter_shape.data(), filter_shape.size()),
279 TensorType_FLOAT16, /*buffer=*/SparseWeights() ? 0 : 1));
280 tensors.emplace_back(CreateTensor(
281 builder,
282 builder.CreateVector<int32_t>(bias_shape.data(), bias_shape.size()),
283 TensorType_FLOAT16, /*buffer=*/2));
284 }
285 tensors.emplace_back(CreateTensor(
286 builder,
287 builder.CreateVector<int32_t>(input_shape.data(), input_shape.size()),
288 TensorType_FLOAT32));
289 tensors.emplace_back(CreateTensor(
290 builder,
291 builder.CreateVector<int32_t>(filter_shape.data(), filter_shape.size()),
292 TensorType_FLOAT32, /*buffer=*/FP16Weights() || SparseWeights() ? 0 : 1));
293 tensors.emplace_back(CreateTensor(
294 builder,
295 builder.CreateVector<int32_t>(bias_shape.data(), bias_shape.size()),
296 TensorType_FLOAT32, /*buffer=*/FP16Weights() ? 0 : 2));
297 tensors.emplace_back(CreateTensor(
298 builder,
299 builder.CreateVector<int32_t>(output_shape.data(), output_shape.size()),
300 TensorType_FLOAT32));
301
302 const std::array<int32_t, 3> op_inputs{
303 {static_cast<int>(tensors.size()) - 4,
304 static_cast<int>(tensors.size()) - 3,
305 static_cast<int>(tensors.size()) - 2}};
306 const std::array<int32_t, 1> op_outputs{
307 {static_cast<int>(tensors.size()) - 1}};
308
309 flatbuffers::Offset<Conv2DOptions> conv2d_options =
310 CreateConv2DOptions(builder, Padding(), StrideWidth(), StrideHeight(),
311 Activation(), DilationWidth(), DilationHeight());
312 operators.emplace_back(CreateOperator(
313 builder, /*opcode_index=*/0,
314 builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
315 builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()),
316 BuiltinOptions_Conv2DOptions, conv2d_options.Union()));
317
318 const std::array<int32_t, 1> subgraph_inputs{
319 {static_cast<int>(tensors.size()) - 4}};
320 const std::array<int32_t, 1> subgraph_outputs{
321 {static_cast<int>(tensors.size()) - 1}};
322 flatbuffers::Offset<SubGraph> subgraph = CreateSubGraph(
323 builder, builder.CreateVector(tensors.data(), tensors.size()),
324 builder.CreateVector<int32_t>(subgraph_inputs.data(),
325 subgraph_inputs.size()),
326 builder.CreateVector<int32_t>(subgraph_outputs.data(),
327 subgraph_outputs.size()),
328 builder.CreateVector(operators.data(), operators.size()));
329
330 flatbuffers::Offset<flatbuffers::String> description =
331 builder.CreateString("Conv2D model");
332
333 flatbuffers::Offset<Model> model_buffer = CreateModel(
334 builder, TFLITE_SCHEMA_VERSION,
335 builder.CreateVector(operator_codes.data(), operator_codes.size()),
336 builder.CreateVector(&subgraph, 1), description,
337 builder.CreateVector(buffers.data(), buffers.size()));
338
339 builder.Finish(model_buffer);
340
341 return std::vector<char>(builder.GetBufferPointer(),
342 builder.GetBufferPointer() + builder.GetSize());
343 }
344
345 } // namespace xnnpack
346 } // namespace tflite
347