1 /*
2 * Copyright (c) 2023-2024 Tomeu Vizoso <tomeu@tomeuvizoso.net>
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include <cstring>
7 #include <dlfcn.h>
8 #include <filesystem>
9 #include <fstream>
10 #include <stdio.h>
11 #include <vector>
12 #include <gtest/gtest.h>
13 #include <xtensor/xrandom.hpp>
14
15 #include "util/macros.h"
16
17 #include "tensorflow/lite/c/c_api.h"
18 #include "tensorflow/lite/c/common.h"
19
20 #include <fcntl.h>
21 #include "test_executor.h"
22 #include "tflite-schema-v2.15.0_generated.h"
23
24 static float
randf(float min,float max)25 randf(float min, float max)
26 {
27 return ((max - min) * ((float)rand() / (float)RAND_MAX)) + min;
28 }
29
30 static void
read_model(const char * file_name,tflite::ModelT & model)31 read_model(const char *file_name, tflite::ModelT &model)
32 {
33 std::ostringstream file_path;
34 assert(getenv("TEFLON_TEST_DATA"));
35 file_path << getenv("TEFLON_TEST_DATA") << "/" << file_name;
36
37 FILE *f = fopen(file_path.str().c_str(), "rb");
38 assert(f);
39 fseek(f, 0, SEEK_END);
40 long fsize = ftell(f);
41 fseek(f, 0, SEEK_SET);
42 void *buf = malloc(fsize);
43 fread(buf, fsize, 1, f);
44 fclose(f);
45
46 tflite::GetModel(buf)->UnPackTo(&model);
47 }
48
49 static void
patch_conv2d(unsigned operation_index,tflite::ModelT * model,int input_size,int weight_size,int input_channels,int output_channels,int stride,bool padding_same,bool is_signed,bool depthwise)50 patch_conv2d(unsigned operation_index,
51 tflite::ModelT *model,
52 int input_size,
53 int weight_size,
54 int input_channels,
55 int output_channels,
56 int stride,
57 bool padding_same,
58 bool is_signed,
59 bool depthwise)
60 {
61 unsigned output_size = 0;
62 unsigned input_index;
63 unsigned weights_index;
64 unsigned bias_index;
65 unsigned output_index;
66 unsigned weights_buffer_index;
67 unsigned bias_buffer_index;
68
69 auto subgraph = model->subgraphs[0];
70
71 /* Operation */
72 if (depthwise) {
73 auto value = new tflite::DepthwiseConv2DOptionsT();
74 value->depth_multiplier = 1;
75 value->padding = padding_same ? tflite::Padding_SAME : tflite::Padding_VALID;
76 value->stride_w = stride;
77 value->stride_h = stride;
78 value->dilation_w_factor = 1;
79 value->dilation_h_factor = 1;
80 subgraph->operators[operation_index]->builtin_options.value = value;
81 subgraph->operators[operation_index]->builtin_options.type = tflite::BuiltinOptions_DepthwiseConv2DOptions;
82
83 model->operator_codes[0]->deprecated_builtin_code = 4;
84 model->operator_codes[0]->builtin_code = tflite::BuiltinOperator_DEPTHWISE_CONV_2D;
85 } else {
86 auto value = new tflite::Conv2DOptionsT();
87 value->padding = padding_same ? tflite::Padding_SAME : tflite::Padding_VALID;
88 value->stride_w = stride;
89 value->stride_h = stride;
90 subgraph->operators[operation_index]->builtin_options.value = value;
91 }
92
93 input_index = subgraph->operators[operation_index]->inputs.data()[0];
94 weights_index = subgraph->operators[operation_index]->inputs.data()[1];
95 bias_index = subgraph->operators[operation_index]->inputs.data()[2];
96 output_index = subgraph->operators[operation_index]->outputs.data()[0];
97
98 /* Input */
99 auto input_tensor = subgraph->tensors[input_index];
100 input_tensor->shape.data()[0] = 1;
101 input_tensor->shape.data()[1] = input_size;
102 input_tensor->shape.data()[2] = input_size;
103 input_tensor->shape.data()[3] = input_channels;
104 input_tensor->type = is_signed ? tflite::TensorType_INT8 : tflite::TensorType_UINT8;
105 if (is_signed)
106 input_tensor->quantization->zero_point[0] -= 128;
107
108 /* Bias */
109 auto bias_tensor = subgraph->tensors[bias_index];
110 bias_buffer_index = bias_tensor->buffer;
111 bias_tensor->shape.data()[0] = output_channels;
112
113 auto bias_data = &model->buffers[bias_buffer_index]->data;
114 xt::xarray<int32_t> bias_array = xt::random::randint<int32_t>({output_channels}, -20000, 20000);
115 bias_data->resize(bias_array.size() * sizeof(int32_t));
116 memcpy(bias_data->data(), bias_array.data(), bias_array.size() * sizeof(int32_t));
117
118 /* Weight */
119 auto weight_tensor = subgraph->tensors[weights_index];
120 weights_buffer_index = weight_tensor->buffer;
121 if (depthwise) {
122 weight_tensor->shape.data()[0] = 1;
123 weight_tensor->shape.data()[1] = weight_size;
124 weight_tensor->shape.data()[2] = weight_size;
125 weight_tensor->shape.data()[3] = output_channels;
126 } else {
127 weight_tensor->shape.data()[0] = output_channels;
128 weight_tensor->shape.data()[1] = weight_size;
129 weight_tensor->shape.data()[2] = weight_size;
130 weight_tensor->shape.data()[3] = input_channels;
131 }
132 weight_tensor->type = is_signed ? tflite::TensorType_INT8 : tflite::TensorType_UINT8;
133 if (is_signed)
134 weight_tensor->quantization->zero_point[0] = 0;
135
136 auto weights_data = &model->buffers[weights_buffer_index]->data;
137 std::vector<int> weight_shape;
138 if (depthwise)
139 weight_shape = {1, weight_size, weight_size, output_channels};
140 else
141 weight_shape = {output_channels, weight_size, weight_size, input_channels};
142
143 xt::xarray<uint8_t> weights_array = xt::random::randint<uint8_t>(weight_shape, 0, 255);
144 weights_data->resize(weights_array.size());
145 memcpy(weights_data->data(), weights_array.data(), weights_array.size());
146
147 /* Output */
148 if (padding_same)
149 output_size = (input_size + stride - 1) / stride;
150 else
151 output_size = (input_size + stride - weight_size) / stride;
152
153 auto output_tensor = subgraph->tensors[output_index];
154 output_tensor->shape.data()[0] = 1;
155 output_tensor->shape.data()[1] = output_size;
156 output_tensor->shape.data()[2] = output_size;
157 output_tensor->shape.data()[3] = output_channels;
158 output_tensor->type = is_signed ? tflite::TensorType_INT8 : tflite::TensorType_UINT8;
159 if (is_signed)
160 output_tensor->quantization->zero_point[0] -= 128;
161 }
162
163 void *
conv2d_generate_model(int input_size,int weight_size,int input_channels,int output_channels,int stride,bool padding_same,bool is_signed,bool depthwise,size_t * buf_size)164 conv2d_generate_model(int input_size,
165 int weight_size,
166 int input_channels,
167 int output_channels,
168 int stride,
169 bool padding_same,
170 bool is_signed,
171 bool depthwise,
172 size_t *buf_size)
173 {
174 void *buf;
175 tflite::ModelT model;
176 read_model("conv2d.tflite", model);
177
178 patch_conv2d(0, &model, input_size, weight_size, input_channels, output_channels, stride, padding_same, is_signed, depthwise);
179
180 flatbuffers::FlatBufferBuilder builder;
181 builder.Finish(tflite::Model::Pack(builder, &model), "TFL3");
182
183 *buf_size = builder.GetSize();
184 buf = malloc(*buf_size);
185 memcpy(buf, builder.GetBufferPointer(), builder.GetSize());
186
187 return buf;
188 }
189
190 static void
patch_quant_for_add(tflite::ModelT * model,bool is_signed)191 patch_quant_for_add(tflite::ModelT *model, bool is_signed)
192 {
193 auto subgraph = model->subgraphs[0];
194 auto add_op = subgraph->operators[2];
195
196 auto input_index = add_op->inputs.data()[0];
197 auto input_tensor = subgraph->tensors[input_index];
198 input_tensor->quantization->scale[0] = randf(0.0078125, 0.4386410117149353);
199 input_tensor->quantization->zero_point[0] = rand() % 255;
200 if (is_signed)
201 input_tensor->quantization->zero_point[0] -= 128;
202
203 input_index = add_op->inputs.data()[1];
204 input_tensor = subgraph->tensors[input_index];
205 input_tensor->quantization->scale[0] = randf(0.0078125, 0.4386410117149353);
206 input_tensor->quantization->zero_point[0] = rand() % 255;
207 if (is_signed)
208 input_tensor->quantization->zero_point[0] -= 128;
209 }
210
211 void *
add_generate_model(int input_size,int weight_size,int input_channels,int output_channels,int stride,bool padding_same,bool is_signed,bool depthwise,size_t * buf_size)212 add_generate_model(int input_size,
213 int weight_size,
214 int input_channels,
215 int output_channels,
216 int stride,
217 bool padding_same,
218 bool is_signed,
219 bool depthwise,
220 size_t *buf_size)
221 {
222 void *buf;
223 tflite::ModelT model;
224 read_model("add.tflite", model);
225
226 patch_conv2d(0, &model, input_size, weight_size, input_channels, output_channels, stride, padding_same, is_signed, depthwise);
227 patch_conv2d(1, &model, input_size, weight_size, input_channels, output_channels, stride, padding_same, is_signed, depthwise);
228 patch_quant_for_add(&model, is_signed);
229
230 /* Output */
231 auto subgraph = model.subgraphs[0];
232 unsigned input_index = subgraph->operators[2]->inputs.data()[0];
233 unsigned output_index = subgraph->operators[2]->outputs.data()[0];
234
235 auto input_tensor = subgraph->tensors[input_index];
236 auto output_tensor = subgraph->tensors[output_index];
237 output_tensor->shape.data()[0] = input_tensor->shape.data()[0];
238 output_tensor->shape.data()[1] = input_tensor->shape.data()[1];
239 output_tensor->shape.data()[2] = input_tensor->shape.data()[2];
240 output_tensor->shape.data()[3] = input_tensor->shape.data()[3];
241 output_tensor->type = is_signed ? tflite::TensorType_INT8 : tflite::TensorType_UINT8;
242
243 flatbuffers::FlatBufferBuilder builder;
244 builder.Finish(tflite::Model::Pack(builder, &model), "TFL3");
245
246 *buf_size = builder.GetSize();
247 buf = malloc(*buf_size);
248 memcpy(buf, builder.GetBufferPointer(), builder.GetSize());
249
250 return buf;
251 }
252
253 static void
patch_fully_connected(unsigned operation_index,tflite::ModelT * model,int input_size,int output_channels,bool is_signed)254 patch_fully_connected(unsigned operation_index,
255 tflite::ModelT *model,
256 int input_size,
257 int output_channels,
258 bool is_signed)
259 {
260 unsigned input_index;
261 unsigned weights_index;
262 unsigned bias_index;
263 unsigned output_index;
264 unsigned weights_buffer_index;
265 unsigned bias_buffer_index;
266
267 auto subgraph = model->subgraphs[0];
268
269 /* Operation */
270 auto value = new tflite::FullyConnectedOptionsT();
271 subgraph->operators[operation_index]->builtin_options.value = value;
272
273 input_index = subgraph->operators[operation_index]->inputs.data()[0];
274 weights_index = subgraph->operators[operation_index]->inputs.data()[1];
275 bias_index = subgraph->operators[operation_index]->inputs.data()[2];
276 output_index = subgraph->operators[operation_index]->outputs.data()[0];
277
278 /* Input */
279 auto input_tensor = subgraph->tensors[input_index];
280 input_tensor->shape.data()[0] = 1;
281 input_tensor->shape.data()[1] = input_size;
282 input_tensor->type = is_signed ? tflite::TensorType_INT8 : tflite::TensorType_UINT8;
283
284 /* Bias */
285 auto bias_tensor = subgraph->tensors[bias_index];
286 bias_buffer_index = bias_tensor->buffer;
287 bias_tensor->shape.data()[0] = output_channels;
288
289 auto bias_data = &model->buffers[bias_buffer_index]->data;
290 xt::xarray<int32_t> bias_array = xt::random::randint<int32_t>({output_channels}, -20000, 20000);
291 bias_data->resize(bias_array.size() * sizeof(int32_t));
292 memcpy(bias_data->data(), bias_array.data(), bias_array.size() * sizeof(int32_t));
293
294 /* Weight */
295 auto weight_tensor = subgraph->tensors[weights_index];
296 weights_buffer_index = weight_tensor->buffer;
297 weight_tensor->shape.data()[0] = output_channels;
298 weight_tensor->shape.data()[1] = input_size;
299 weight_tensor->type = is_signed ? tflite::TensorType_INT8 : tflite::TensorType_UINT8;
300
301 auto weights_data = &model->buffers[weights_buffer_index]->data;
302 std::vector<int> weight_shape;
303 weight_shape = {output_channels, input_size};
304
305 xt::xarray<uint8_t> weights_array = xt::random::randint<uint8_t>(weight_shape, 0, 255);
306 weights_data->resize(weights_array.size());
307 memcpy(weights_data->data(), weights_array.data(), weights_array.size());
308
309 /* Output */
310 auto output_tensor = subgraph->tensors[output_index];
311 output_tensor->shape.data()[0] = 1;
312 output_tensor->shape.data()[1] = output_channels;
313 output_tensor->type = is_signed ? tflite::TensorType_INT8 : tflite::TensorType_UINT8;
314 }
315
316 void *
fully_connected_generate_model(int input_size,int output_channels,bool is_signed,size_t * buf_size)317 fully_connected_generate_model(int input_size,
318 int output_channels,
319 bool is_signed,
320 size_t *buf_size)
321 {
322 void *buf;
323 tflite::ModelT model;
324 read_model("fully_connected.tflite", model);
325
326 patch_fully_connected(0, &model, input_size, output_channels, is_signed);
327
328 flatbuffers::FlatBufferBuilder builder;
329 builder.Finish(tflite::Model::Pack(builder, &model), "TFL3");
330
331 *buf_size = builder.GetSize();
332 buf = malloc(*buf_size);
333 memcpy(buf, builder.GetBufferPointer(), builder.GetSize());
334
335 return buf;
336 }
337
338 static void
tflite_error_cb(void * user_data,const char * format,va_list args)339 tflite_error_cb(void *user_data, const char *format, va_list args)
340 {
341 vfprintf(stderr, format, args);
342 }
343
344 TfLiteDelegate *(*tflite_plugin_create_delegate)(char **options_keys,
345 char **options_values,
346 size_t num_options,
347 void (*report_error)(const char *));
348
349 void (*tflite_plugin_destroy_delegate)(TfLiteDelegate *delegate);
350
351 static void
load_delegate()352 load_delegate()
353 {
354 const char *delegate_path = getenv("TEFLON_TEST_DELEGATE");
355 assert(delegate_path);
356
357 void *delegate_lib = dlopen(delegate_path, RTLD_LAZY | RTLD_LOCAL);
358 assert(delegate_lib);
359
360 tflite_plugin_create_delegate = reinterpret_cast<TfLiteDelegate *(*)(char **options_keys,
361 char **options_values,
362 size_t num_options,
363 void (*report_error)(const char *))>(
364 dlsym(delegate_lib, "tflite_plugin_create_delegate"));
365 assert(tflite_plugin_create_delegate);
366
367 tflite_plugin_destroy_delegate = reinterpret_cast<void (*)(TfLiteDelegate *delegate)>(
368 dlsym(delegate_lib, "tflite_plugin_destroy_delegate"));
369 assert(tflite_plugin_destroy_delegate);
370 }
371
372 bool
cache_is_enabled(void)373 cache_is_enabled(void)
374 {
375 return getenv("TEFLON_ENABLE_CACHE");
376 }
377
378 void *
read_buf(const char * path,size_t * buf_size)379 read_buf(const char *path, size_t *buf_size)
380 {
381 FILE *f = fopen(path, "rb");
382 if (f == NULL)
383 return NULL;
384
385 fseek(f, 0, SEEK_END);
386 long fsize = ftell(f);
387 fseek(f, 0, SEEK_SET);
388
389 void *buf = malloc(fsize);
390 fread(buf, fsize, 1, f);
391
392 fclose(f);
393
394 if(buf_size != NULL)
395 *buf_size = fsize;
396
397 return buf;
398 }
399
400 void
run_model(TfLiteModel * model,enum executor executor,void *** input,size_t * num_inputs,void *** output,size_t ** output_sizes,TfLiteType ** output_types,size_t * num_outputs,std::string cache_dir)401 run_model(TfLiteModel *model, enum executor executor, void ***input, size_t *num_inputs,
402 void ***output, size_t **output_sizes, TfLiteType **output_types,
403 size_t *num_outputs, std::string cache_dir)
404 {
405 TfLiteDelegate *delegate = NULL;
406 TfLiteInterpreterOptions *options = TfLiteInterpreterOptionsCreate();
407
408 if (executor == EXECUTOR_NPU) {
409 load_delegate();
410 delegate = tflite_plugin_create_delegate(NULL, NULL, 0, NULL);
411 TfLiteInterpreterOptionsAddDelegate(options, delegate);
412 }
413
414 TfLiteInterpreterOptionsSetErrorReporter(options, tflite_error_cb, NULL);
415
416 TfLiteInterpreter *interpreter = TfLiteInterpreterCreate(model, options);
417 assert(interpreter);
418
419 TfLiteInterpreterAllocateTensors(interpreter);
420
421 *num_inputs = TfLiteInterpreterGetInputTensorCount(interpreter);
422 if (*input == NULL)
423 *input = (void**)calloc(*num_inputs, sizeof(*input));
424 for (unsigned i = 0; i < *num_inputs; i++) {
425 TfLiteTensor *input_tensor = TfLiteInterpreterGetInputTensor(interpreter, i);
426 std::ostringstream input_cache;
427 input_cache << cache_dir << "/" << "input-" << i << ".data";
428
429 if ((*input)[i] == NULL) {
430 if (cache_is_enabled())
431 (*input)[i] = read_buf(input_cache.str().c_str(), NULL);
432 if ((*input)[i] == NULL) {
433 (*input)[i] = malloc(input_tensor->bytes);
434
435 std::vector<size_t> shape;
436
437 shape.resize(input_tensor->dims->size);
438 for (int j = 0; j < input_tensor->dims->size; j++)
439 shape[j] = input_tensor->dims->data[j];
440
441 switch (input_tensor->type) {
442 case kTfLiteFloat32: {
443 xt::xarray<float_t> a = xt::random::rand<float_t>(shape);
444 memcpy((*input)[i], a.data(), input_tensor->bytes);
445 break;
446 }
447 default: {
448 xt::xarray<uint8_t> a = xt::random::randint<uint8_t>(shape, 0, 255);
449 memcpy((*input)[i], a.data(), input_tensor->bytes);
450 break;
451 }
452 }
453
454 if (cache_is_enabled()) {
455 if (!cache_dir.empty() && !std::filesystem::exists(cache_dir))
456 std::filesystem::create_directory(cache_dir);
457
458 std::ofstream file(input_cache.str().c_str(), std::ios::out | std::ios::binary);
459 file.write(reinterpret_cast<const char *>((*input)[i]), input_tensor->bytes);
460 file.close();
461 }
462 }
463 }
464
465 TfLiteTensorCopyFromBuffer(input_tensor, (*input)[i], input_tensor->bytes);
466 }
467
468 std::ostringstream output_cache;
469 output_cache << cache_dir << "/" << "output-" << 0 << ".data";
470
471 if (executor == EXECUTOR_NPU || !cache_is_enabled() || !std::filesystem::exists(output_cache.str())) {
472 EXPECT_EQ(TfLiteInterpreterInvoke(interpreter), kTfLiteOk);
473 }
474
475 *num_outputs = TfLiteInterpreterGetOutputTensorCount(interpreter);
476 *output = (void**)malloc(sizeof(*output) * *num_outputs);
477 *output_sizes = (size_t*)malloc(sizeof(*output_sizes) * *num_outputs);
478 *output_types = (TfLiteType*)malloc(sizeof(*output_types) * *num_outputs);
479 for (unsigned i = 0; i < *num_outputs; i++) {
480 const TfLiteTensor *output_tensor = TfLiteInterpreterGetOutputTensor(interpreter, i);
481 output_cache.str("");
482 output_cache << cache_dir << "/" << "output-" << i << ".data";
483 (*output_types)[i] = output_tensor->type;
484
485 if (executor == EXECUTOR_CPU && cache_is_enabled() && std::filesystem::exists(output_cache.str())) {
486 (*output)[i] = read_buf(output_cache.str().c_str(), NULL);
487 } else {
488 (*output)[i] = malloc(output_tensor->bytes);
489 EXPECT_EQ(TfLiteTensorCopyToBuffer(output_tensor, (*output)[i], output_tensor->bytes), kTfLiteOk);
490
491 if (cache_is_enabled() && executor == EXECUTOR_CPU) {
492 std::ofstream file = std::ofstream(output_cache.str().c_str(), std::ios::out | std::ios::binary);
493 file.write(reinterpret_cast<const char *>((*output)[i]), output_tensor->bytes);
494 file.close();
495 }
496 }
497
498 switch (output_tensor->type) {
499 case kTfLiteFloat32: {
500 (*output_sizes)[i] = output_tensor->bytes / 4;
501 break;
502 }
503 default: {
504 (*output_sizes)[i] = output_tensor->bytes;
505 break;
506 }
507 }
508 }
509
510 TfLiteInterpreterDelete(interpreter);
511 if (executor == EXECUTOR_NPU)
512 tflite_plugin_destroy_delegate(delegate);
513 TfLiteInterpreterOptionsDelete(options);
514 }
515