1 /*
2 * Copyright (c) 2023-2024 Tomeu Vizoso <tomeu@tomeuvizoso.net>
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "pipe-loader/pipe_loader.h"
7 #include "pipe/p_context.h"
8 #include "pipe/p_screen.h"
9 #include "pipe/p_state.h"
10 #include "util/format/u_format.h"
11 #include "util/u_inlines.h"
12
13 #include "tensorflow/lite/c/common.h"
14 #include "tensorflow/lite/builtin_ops.h"
15 #include "tensorflow/lite/core/c/builtin_op_data.h"
16
17 /* TODO: Move to TfLiteAsyncKernel for zero-copy of buffers */
18
19 enum teflon_debug_flags {
20 TEFLON_DEBUG_VERBOSE = 1 << 1,
21 };
22
23 static const struct debug_named_value teflon_debug_flags[] = {
24 { "verbose", TEFLON_DEBUG_VERBOSE, "Verbose logging." },
25 DEBUG_NAMED_VALUE_END
26 };
27
28 DEBUG_GET_ONCE_FLAGS_OPTION(debug_teflon, "TEFLON_DEBUG", teflon_debug_flags, 0)
29
30 static inline void
teflon_debug(const char * format,...)31 teflon_debug(const char *format, ...)
32 {
33 if (unlikely(debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)) {
34 va_list ap;
35 va_start(ap, format);
36 _debug_vprintf(format, ap);
37 va_end(ap);
38 }
39 }
40
41 struct teflon_delegate
42 {
43 TfLiteDelegate base;
44 struct pipe_loader_device *dev;
45 struct pipe_context *context;
46 };
47
48 struct teflon_subgraph
49 {
50 struct pipe_ml_subgraph *base;
51
52 unsigned *input_tensors;
53 unsigned input_count;
54
55 unsigned *output_tensors;
56 unsigned output_count;
57 };
58
59 static struct pipe_resource *
create_resource(struct pipe_context * context,TfLiteTensor tensor)60 create_resource(struct pipe_context *context, TfLiteTensor tensor)
61 {
62 unsigned bytes;
63 unsigned size = 1;
64
65 for (int i = 0; i < tensor.dims->size; i++)
66 size *= tensor.dims->data[i];
67
68 switch(tensor.type) {
69 case kTfLiteInt8:
70 case kTfLiteUInt8:
71 bytes = 1;
72 break;
73 case kTfLiteInt16:
74 case kTfLiteUInt16:
75 case kTfLiteFloat16:
76 bytes = 2;
77 break;
78 case kTfLiteInt32:
79 case kTfLiteUInt32:
80 case kTfLiteFloat32:
81 bytes = 4;
82 break;
83 case kTfLiteInt64:
84 case kTfLiteUInt64:
85 case kTfLiteFloat64:
86 case kTfLiteComplex64:
87 bytes = 8;
88 break;
89 default:
90 unreachable("Unsupported TF type");
91 }
92
93 return pipe_buffer_create_with_data(context, 0, PIPE_USAGE_DEFAULT, size * bytes, tensor.data.data);
94 }
95
96 static void
fill_operation(struct teflon_delegate * delegate,TfLiteContext * tf_context,TfLiteNode * node,TfLiteRegistration * node_registration,struct pipe_ml_operation * operation,struct pipe_tensor * tensors)97 fill_operation(struct teflon_delegate *delegate, TfLiteContext *tf_context, TfLiteNode *node, TfLiteRegistration *node_registration, struct pipe_ml_operation *operation, struct pipe_tensor *tensors)
98 {
99 operation->input_count = node->inputs->size;
100 operation->input_tensors = calloc(operation->input_count, sizeof(void*));
101 for (unsigned i = 0; i < node->inputs->size; i++)
102 operation->input_tensors[i] = &tensors[node->inputs->data[i]];
103
104 operation->output_count = node->outputs->size;
105 operation->output_tensors = calloc(operation->output_count, sizeof(void*));
106 for (unsigned i = 0; i < node->outputs->size; i++)
107 operation->output_tensors[i] = &tensors[node->outputs->data[i]];
108
109 switch(node_registration->builtin_code) {
110 case kTfLiteBuiltinConv2d:
111 case kTfLiteBuiltinDepthwiseConv2d: {
112 operation->type = PIPE_ML_OPERATION_TYPE_CONVOLUTION;
113 operation->conv.weight_tensor = &tensors[node->inputs->data[1]];
114 operation->conv.bias_tensor = &tensors[node->inputs->data[2]];
115 if (node_registration->builtin_code == kTfLiteBuiltinConv2d) {
116 TfLiteConvParams* params = (TfLiteConvParams*)node->builtin_data;
117
118 assert(params->activation == kTfLiteActNone ||
119 params->activation == kTfLiteActRelu ||
120 params->activation == kTfLiteActRelu6);
121 if (node_registration->version >= 2) {
122 assert(params->dilation_width_factor == 1);
123 assert(params->dilation_height_factor == 1);
124 }
125 operation->conv.stride_x = params->stride_width;
126 operation->conv.stride_y = params->stride_height;
127 operation->conv.padding_same = params->padding == kTfLitePaddingSame;
128 operation->conv.depthwise = false;
129 operation->conv.relu = params->activation == kTfLiteActRelu ||
130 params->activation == kTfLiteActRelu6;
131 } else {
132 TfLiteDepthwiseConvParams* params = (TfLiteDepthwiseConvParams*)node->builtin_data;
133
134 assert(params->activation == kTfLiteActNone ||
135 params->activation == kTfLiteActRelu ||
136 params->activation == kTfLiteActRelu6);
137 if (node_registration->version >= 2) {
138 assert(params->dilation_width_factor == 1);
139 assert(params->dilation_height_factor == 1);
140 }
141 operation->conv.stride_x = params->stride_width;
142 operation->conv.stride_y = params->stride_height;
143 operation->conv.padding_same = params->padding == kTfLitePaddingSame;
144 operation->conv.depthwise = true;
145 operation->conv.relu = params->activation == kTfLiteActRelu ||
146 params->activation == kTfLiteActRelu6;
147 }
148 operation->conv.pointwise = operation->conv.weight_tensor->dims[1] == 1 && \
149 operation->conv.weight_tensor->dims[2] == 1;
150 break;
151 }
152 case kTfLiteBuiltinAveragePool2d:
153 operation->type = PIPE_ML_OPERATION_TYPE_POOLING;
154 break;
155 case kTfLiteBuiltinAdd:
156 operation->type = PIPE_ML_OPERATION_TYPE_ADD;
157 break;
158 case kTfLiteBuiltinConcatenation:
159 operation->type = PIPE_ML_OPERATION_TYPE_CONCATENATION;
160 break;
161 case kTfLiteBuiltinSplit:
162 operation->type = PIPE_ML_OPERATION_TYPE_SPLIT;
163 break;
164 case kTfLiteBuiltinPad: {
165 int32_t *paddings = tf_context->tensors[node->inputs->data[1]].data.data;
166
167 operation->type = PIPE_ML_OPERATION_TYPE_PAD;
168 operation->pad.before_x = paddings[2];
169 operation->pad.after_x = paddings[3];
170 operation->pad.before_y = paddings[4];
171 operation->pad.after_y = paddings[5];
172 break;
173 }
174 case kTfLiteBuiltinFullyConnected: {
175 operation->type = PIPE_ML_OPERATION_TYPE_FULLY_CONNECTED;
176 operation->fcon.weight_tensor = &tensors[node->inputs->data[1]];
177 operation->fcon.bias_tensor = &tensors[node->inputs->data[2]];
178 break;
179 }
180 default:
181 unreachable("Unsupported ML operation type");
182 }
183 }
184
185 static void
fill_tensor(struct teflon_delegate * delegate,TfLiteContext * tf_context,struct pipe_tensor * tensor,unsigned index)186 fill_tensor(struct teflon_delegate *delegate, TfLiteContext *tf_context, struct pipe_tensor *tensor, unsigned index)
187 {
188 struct pipe_context *context = delegate->context;
189 TfLiteTensor tf_tensor = tf_context->tensors[index];
190
191 if (tf_tensor.type == kTfLiteNoType)
192 return; /* Placeholder tensor */
193
194 if (tf_tensor.data.data)
195 tensor->resource = create_resource(context, tf_tensor);
196
197 tensor->index = index;
198 memcpy(tensor->dims, tf_tensor.dims->data, tf_tensor.dims->size * sizeof(*tensor->dims));
199
200 if (tf_tensor.quantization.type == kTfLiteAffineQuantization) {
201 const TfLiteAffineQuantization *quant = (const TfLiteAffineQuantization *)tf_tensor.quantization.params;
202 tensor->scale = quant->scale->data[0];
203 tensor->zero_point = quant->zero_point->data[0];
204 }
205
206 switch(tf_tensor.type) {
207 case kTfLiteUInt8:
208 case kTfLiteUInt16:
209 case kTfLiteUInt32:
210 case kTfLiteUInt64:
211 tensor->is_signed = false;
212 break;
213 default:
214 tensor->is_signed = true;
215 }
216 }
217
218 static void
dump_graph(struct pipe_tensor * tensors,unsigned tensor_count,struct pipe_ml_operation * operations,unsigned operation_count)219 dump_graph(struct pipe_tensor *tensors, unsigned tensor_count, struct pipe_ml_operation *operations, unsigned operation_count)
220 {
221 teflon_debug("\n");
222 teflon_debug("teflon: compiling graph: %d tensors %d operations\n",
223 tensor_count, operation_count);
224
225 teflon_debug("%3s %-8s %3s %s %-12s\n", "idx", "scale", "zp", "has_data", "size");
226 teflon_debug("=======================================\n");
227 for (int i = 0; i < tensor_count; i++) {
228 teflon_debug("%3d %6f %3x %-8s %dx%dx%dx%d\n",
229 tensors[i].index,
230 tensors[i].scale,
231 tensors[i].zero_point,
232 tensors[i].resource == NULL ? "no" : "yes",
233 tensors[i].dims[0], tensors[i].dims[1], tensors[i].dims[2], tensors[i].dims[3]);
234 }
235
236 teflon_debug("\n");
237 teflon_debug("%3s %-6s %25s %25s %s\n", "idx", "type", "inputs", "outputs", "operation type-specific");
238 teflon_debug("================================================================================================\n");
239 for (int i = 0; i < operation_count; i++) {
240 teflon_debug("%3d ", i);
241
242 switch(operations[i].type) {
243 case PIPE_ML_OPERATION_TYPE_ADD:
244 teflon_debug("%-6s ", "ADD");
245 break;
246 case PIPE_ML_OPERATION_TYPE_CONVOLUTION:
247 teflon_debug("%-6s ", operations[i].conv.depthwise ? "DWCONV" : "CONV");
248 break;
249 case PIPE_ML_OPERATION_TYPE_CONCATENATION:
250 teflon_debug("%-6s ", "CONCAT");
251 break;
252 case PIPE_ML_OPERATION_TYPE_POOLING:
253 teflon_debug("%-6s ", "POOL");
254 break;
255 case PIPE_ML_OPERATION_TYPE_SPLIT:
256 teflon_debug("%-6s ", "SPLIT");
257 break;
258 case PIPE_ML_OPERATION_TYPE_PAD:
259 teflon_debug("%-6s ", "PAD");
260 break;
261 case PIPE_ML_OPERATION_TYPE_FULLY_CONNECTED:
262 teflon_debug("%-6s ", "FCON");
263 break;
264 }
265
266 for (unsigned j = 0; j < operations[i].input_count; j++) {
267 teflon_debug("%d", operations[i].input_tensors[j]->index);
268 if (j < operations[i].input_count - 1)
269 teflon_debug(",");
270 }
271
272 teflon_debug(" ");
273
274 for (unsigned j = 0; j < operations[i].output_count; j++) {
275 teflon_debug("%d", operations[i].output_tensors[j]->index);
276 if (j < operations[i].output_count - 1)
277 teflon_debug(",");
278 }
279
280 teflon_debug("\n");
281 }
282 teflon_debug("\n");
283 }
284
285 static void *
partition_init(TfLiteContext * tf_context,const char * buffer,size_t length)286 partition_init(TfLiteContext *tf_context, const char *buffer, size_t length)
287 {
288 const TfLiteDelegateParams *params = (const TfLiteDelegateParams *)buffer;
289 struct teflon_delegate *delegate = (struct teflon_delegate *)params->delegate;
290 struct pipe_context *context = delegate->context;
291 struct pipe_ml_operation operations[params->nodes_to_replace->size];
292 struct pipe_tensor tensors[tf_context->tensors_size];
293 long start = 0, end = 0;
294
295 memset(operations, 0, sizeof(operations));
296 memset(tensors, 0, sizeof(tensors));
297
298 if (unlikely(debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)) {
299 struct timespec time;
300 clock_gettime(CLOCK_MONOTONIC, &time);
301 start = (long)time.tv_sec * 1000 + (long)time.tv_nsec / 1000000;
302 }
303
304 for (int i = 0; i < tf_context->tensors_size; i++)
305 fill_tensor(delegate, tf_context, &tensors[i], i);
306
307 for (int i = 0; i < params->nodes_to_replace->size; i++)
308 {
309 const int node_index = params->nodes_to_replace->data[i];
310 TfLiteNode *delegated_node = NULL;
311 TfLiteRegistration *delegated_node_registration = NULL;
312 tf_context->GetNodeAndRegistration(tf_context, node_index, &delegated_node,
313 &delegated_node_registration);
314
315 fill_operation(delegate, tf_context, delegated_node, delegated_node_registration, &operations[i], tensors);
316 }
317
318 if (debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)
319 dump_graph(tensors, tf_context->tensors_size, operations, params->nodes_to_replace->size);
320
321 struct pipe_ml_subgraph *subgraph;
322 subgraph = context->ml_subgraph_create(context,
323 operations,
324 params->nodes_to_replace->size);
325
326 for (int i = 0; i < tf_context->tensors_size; i++)
327 pipe_resource_reference(&tensors[i].resource, NULL);
328
329 struct teflon_subgraph *tsubgraph = calloc(1, sizeof(*tsubgraph));
330 tsubgraph->base = subgraph;
331
332 tsubgraph->input_tensors = malloc(params->input_tensors->size * sizeof(*tsubgraph->input_tensors));
333 for (int i = 0; i < params->input_tensors->size; i++) {
334 unsigned tensor_idx = params->input_tensors->data[i];
335 TfLiteTensor *tensor = &tf_context->tensors[tensor_idx];
336 if (tensor->allocation_type == kTfLiteMmapRo)
337 continue;
338 tsubgraph->input_tensors[tsubgraph->input_count] = tensor_idx;
339 tsubgraph->input_count++;
340 }
341
342 tsubgraph->output_count = params->output_tensors->size;
343 tsubgraph->output_tensors = malloc(params->output_tensors->size * sizeof(*tsubgraph->output_tensors));
344 memcpy(tsubgraph->output_tensors, params->output_tensors->data,
345 params->output_tensors->size * sizeof(*tsubgraph->output_tensors));
346
347 if (unlikely(debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)) {
348 struct timespec time;
349 clock_gettime(CLOCK_MONOTONIC, &time);
350 end = (long)time.tv_sec * 1000 + (long)time.tv_nsec / 1000000;
351 teflon_debug("teflon: compiled graph, took %ld ms\n", (end - start));
352 }
353
354 return tsubgraph;
355 }
356
357 static TfLiteStatus
partition_prepare(TfLiteContext * context,TfLiteNode * node)358 partition_prepare(TfLiteContext *context, TfLiteNode *node)
359 {
360 // TODO: If input size has changed, resize input, intermediate and output buffers
361
362 return kTfLiteOk;
363 }
364
365 // De-allocates the per-node-and-Interpreter custom data.
366 static void
partition_free(TfLiteContext * tf_context,void * buffer)367 partition_free(TfLiteContext *tf_context, void *buffer)
368 {
369 struct teflon_subgraph *tsubgraph = (struct teflon_subgraph *)buffer;
370 struct pipe_ml_subgraph *subgraph = tsubgraph->base;
371 struct pipe_context *context = subgraph->context;
372
373 context->ml_subgraph_destroy(context, subgraph);
374 free(tsubgraph->input_tensors);
375 free(tsubgraph->output_tensors);
376 free(tsubgraph);
377 }
378
379 static TfLiteStatus
partition_invoke(TfLiteContext * tf_context,TfLiteNode * node)380 partition_invoke(TfLiteContext *tf_context, TfLiteNode *node)
381 {
382 struct teflon_delegate *delegate = (struct teflon_delegate *)node->delegate;
383 struct teflon_subgraph *tsubgraph = (struct teflon_subgraph *)node->user_data;
384 struct pipe_ml_subgraph *subgraph = tsubgraph->base;
385 struct pipe_context *context = delegate->context;
386 long start = 0, end = 0;
387
388 if (unlikely(debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)) {
389 struct timespec time;
390 clock_gettime(CLOCK_MONOTONIC, &time);
391 start = (long)time.tv_sec * 1000 + (long)time.tv_nsec / 1000000;
392 }
393
394 void **buffers = malloc(tsubgraph->input_count * sizeof(*buffers));
395 bool *is_signed = malloc(tsubgraph->input_count * sizeof(*is_signed));
396 for (unsigned i = 0; i < tsubgraph->input_count; i++) {
397 TfLiteTensor tf_tensor = tf_context->tensors[tsubgraph->input_tensors[i]];
398
399 buffers[i] = tf_tensor.data.data;
400 is_signed[i] = !(tf_tensor.type == kTfLiteUInt8 ||
401 tf_tensor.type == kTfLiteUInt16 ||
402 tf_tensor.type == kTfLiteUInt32 ||
403 tf_tensor.type == kTfLiteUInt64);
404 }
405 context->ml_subgraph_invoke(context, subgraph, tsubgraph->input_count, tsubgraph->input_tensors, buffers, is_signed);
406 free(buffers);
407 free(is_signed);
408
409 buffers = malloc(tsubgraph->output_count * sizeof(*buffers));
410 is_signed = malloc(tsubgraph->output_count * sizeof(*is_signed));
411 for (unsigned i = 0; i < tsubgraph->output_count; i++) {
412 TfLiteTensor tf_tensor = tf_context->tensors[tsubgraph->output_tensors[i]];
413
414 buffers[i] = tf_tensor.data.data;
415 is_signed[i] = !(tf_tensor.type == kTfLiteUInt8 ||
416 tf_tensor.type == kTfLiteUInt16 ||
417 tf_tensor.type == kTfLiteUInt32 ||
418 tf_tensor.type == kTfLiteUInt64);
419 }
420 context->ml_subgraph_read_output(context, subgraph, tsubgraph->output_count, tsubgraph->output_tensors, buffers, is_signed);
421 free(buffers);
422 free(is_signed);
423
424 if (unlikely(debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)) {
425 struct timespec time;
426 clock_gettime(CLOCK_MONOTONIC, &time);
427 end = (long)time.tv_sec * 1000 + (long)time.tv_nsec / 1000000;
428 teflon_debug("teflon: invoked graph, took %ld ms\n", (end - start));
429 }
430
431 return kTfLiteOk;
432 }
433
434 static bool
tensor_quantization_supported(TfLiteTensor * tensor)435 tensor_quantization_supported(TfLiteTensor *tensor)
436 {
437 if (tensor->quantization.type == kTfLiteAffineQuantization) {
438 TfLiteAffineQuantization *affine = (TfLiteAffineQuantization *)tensor->quantization.params;
439
440 /*
441 * Per-axis quantization not supported, for details see:
442 * https://ai.google.dev/edge/litert/models/quantization_spec#per-axis_vs_per-tensor
443 */
444 return affine->scale->size == 1 && affine->zero_point->size == 1;
445 }
446 return false;
447 }
448
449 static bool
fused_relu6_supported(TfLiteTensor * tensor)450 fused_relu6_supported(TfLiteTensor *tensor)
451 {
452 TfLiteAffineQuantization *affine;
453 int quantized_max;
454
455 switch (tensor->type) {
456 case kTfLiteInt8:
457 quantized_max = INT8_MAX;
458 break;
459 case kTfLiteUInt8:
460 quantized_max = UINT8_MAX;
461 break;
462 default:
463 return false;
464 }
465
466 assert(tensor->quantization.type == kTfLiteAffineQuantization);
467 affine = (TfLiteAffineQuantization *)tensor->quantization.params;
468
469 assert(affine->scale->size == affine->zero_point->size);
470 for (int i = 0; i < affine->zero_point->size; i++) {
471 if ((quantized_max - affine->zero_point->data[i]) * affine->scale->data[i] > 6.0f)
472 return false;
473 }
474 return true;
475 }
476
477 static bool
fused_activation_supported(TfLiteFusedActivation activation,TfLiteTensor * tensor)478 fused_activation_supported(TfLiteFusedActivation activation, TfLiteTensor *tensor)
479 {
480 switch (activation) {
481 case kTfLiteActNone:
482 case kTfLiteActRelu:
483 return true;
484 case kTfLiteActRelu6:
485 return fused_relu6_supported(tensor);
486 default:
487 return false;
488 }
489 }
490
491 static TfLiteStatus
PrepareDelegate(TfLiteContext * context,TfLiteDelegate * delegate)492 PrepareDelegate(TfLiteContext *context, TfLiteDelegate *delegate)
493 {
494 TfLiteIntArray *plan;
495 TfLiteNode *node;
496 TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan));
497
498 // Get a list of supported nodes.
499 TfLiteIntArray *supported_nodes = malloc(plan->size * sizeof(int) + sizeof(*supported_nodes));
500 supported_nodes->size = plan->size;
501 unsigned node_count = 0;
502 for (int i = 0; i < plan->size; i++) {
503 int node_index = plan->data[i];
504 bool supported = false;
505 TfLiteRegistration *registration;
506 TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
507 context, node_index, &node, ®istration));
508
509 switch(registration->builtin_code) {
510 case kTfLiteBuiltinConv2d: {
511 TfLiteTensor *input_tensor = &context->tensors[node->inputs->data[0]];
512 TfLiteTensor *weight_tensor = &context->tensors[node->inputs->data[1]];
513 TfLiteTensor *bias_tensor = &context->tensors[node->inputs->data[2]];
514 TfLiteTensor *output_tensor = &context->tensors[node->outputs->data[0]];
515 TfLiteConvParams* params = (TfLiteConvParams*)node->builtin_data;
516
517 // Dilation and per-axis quantization not yet implemented
518 if (tensor_quantization_supported(input_tensor) &&
519 tensor_quantization_supported(weight_tensor) &&
520 tensor_quantization_supported(bias_tensor) &&
521 tensor_quantization_supported(output_tensor) &&
522 fused_activation_supported(params->activation, output_tensor) &&
523 (registration->version < 2 ||
524 (params->dilation_width_factor == 1 &&
525 params->dilation_height_factor == 1))) {
526 supported = true;
527 }
528 break;
529 }
530 case kTfLiteBuiltinDepthwiseConv2d: {
531 TfLiteTensor *input_tensor = &context->tensors[node->inputs->data[0]];
532 TfLiteTensor *weight_tensor = &context->tensors[node->inputs->data[1]];
533 TfLiteTensor *bias_tensor = &context->tensors[node->inputs->data[2]];
534 TfLiteTensor *output_tensor = &context->tensors[node->outputs->data[0]];
535 TfLiteDepthwiseConvParams* params = (TfLiteDepthwiseConvParams*)node->builtin_data;
536
537 // Dilation and per-axis quantization not yet implemented
538 if (tensor_quantization_supported(input_tensor) &&
539 tensor_quantization_supported(weight_tensor) &&
540 tensor_quantization_supported(bias_tensor) &&
541 tensor_quantization_supported(output_tensor) &&
542 fused_activation_supported(params->activation, output_tensor) &&
543 (registration->version < 2 ||
544 (params->dilation_width_factor == 1 &&
545 params->dilation_height_factor == 1))) {
546 supported = true;
547 }
548 break;
549 }
550 case kTfLiteBuiltinAdd: {
551 supported = context->tensors[node->inputs->data[0]].data.data == NULL &&
552 context->tensors[node->inputs->data[1]].data.data == NULL;
553 break;
554 }
555 case kTfLiteBuiltinConcatenation: {
556 TfLiteConcatenationParams *params = node->builtin_data;
557 supported = true;
558
559 if (params->axis != 3 &&
560 params->axis != -1)
561 supported = false;
562
563 unsigned input_channels = context->tensors[node->inputs->data[0]].dims->data[3];
564 for (unsigned i = 1; i < node->inputs->size; i++)
565 if (input_channels != context->tensors[node->inputs->data[i]].dims->data[3])
566 supported = false;
567
568 break;
569 }
570 case kTfLiteBuiltinSplit: {
571 int32_t axis = context->tensors[node->inputs->data[0]].data.i32[0];
572 supported = true;
573
574 if (axis != 3 &&
575 axis != -1)
576 supported = false;
577
578 unsigned output_channels = context->tensors[node->outputs->data[0]].dims->data[3];
579 for (unsigned i = 1; i < node->outputs->size; i++)
580 if (output_channels != context->tensors[node->outputs->data[i]].dims->data[3])
581 supported = false;
582
583 break;
584 }
585 case kTfLiteBuiltinPad: {
586 uint32_t *padding = context->tensors[node->inputs->data[1]].data.data;
587 supported = padding[0] == 0 &&
588 padding[1] == 0 &&
589 padding[2] == 1 &&
590 padding[3] == 1 &&
591 padding[4] == 1 &&
592 padding[5] == 1 &&
593 padding[6] == 0 &&
594 padding[7] == 0;
595 break;
596 }
597 case kTfLiteBuiltinFullyConnected:
598 supported = true;
599 break;
600 }
601
602 if (supported)
603 supported_nodes->data[node_count++] = node_index;
604 }
605 supported_nodes->size = node_count;
606
607 TfLiteRegistration registration;
608
609 registration.init = partition_init;
610 registration.free = partition_free;
611 registration.prepare = partition_prepare;
612 registration.invoke = partition_invoke;
613
614 registration.profiling_string = NULL;
615 registration.builtin_code = kTfLiteBuiltinDelegate;
616 registration.version = 1;
617 registration.registration_external = NULL;
618 registration.custom_name = "Teflon Delegate";
619
620 // Replace supported subgraphs.
621 TfLiteStatus status = context->ReplaceNodeSubsetsWithDelegateKernels(
622 context,
623 registration,
624 supported_nodes,
625 delegate);
626
627 free(supported_nodes);
628
629 return status;
630 }
631
632 static TfLiteStatus
CopyFromBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle buffer_handle,TfLiteTensor * tensor)633 CopyFromBufferHandle(TfLiteContext *context,
634 TfLiteDelegate *delegate,
635 TfLiteBufferHandle buffer_handle,
636 TfLiteTensor *tensor)
637 {
638 return kTfLiteOk;
639 }
640
641 static void
FreeBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle * handle)642 FreeBufferHandle(TfLiteContext *context,
643 TfLiteDelegate *delegate,
644 TfLiteBufferHandle *handle)
645 {
646 }
647
648 TfLiteDelegate *tflite_plugin_create_delegate(char **options_keys,
649 char **options_values,
650 size_t num_options,
651 void (*report_error)(const char *));
652
653 void tflite_plugin_destroy_delegate(TfLiteDelegate *delegate);
654
tflite_plugin_create_delegate(char ** options_keys,char ** options_values,size_t num_options,void (* report_error)(const char *))655 __attribute__((visibility("default"))) TfLiteDelegate *tflite_plugin_create_delegate(char **options_keys,
656 char **options_values,
657 size_t num_options,
658 void (*report_error)(const char *))
659 {
660 struct teflon_delegate *delegate = (struct teflon_delegate *)calloc(1, sizeof(*delegate));
661 struct pipe_screen *screen;
662 struct pipe_loader_device **devs;
663
664 delegate->base.flags = kTfLiteDelegateFlagsAllowDynamicTensors | kTfLiteDelegateFlagsRequirePropagatedShapes;
665 delegate->base.Prepare = &PrepareDelegate;
666 delegate->base.CopyFromBufferHandle = &CopyFromBufferHandle;
667 delegate->base.FreeBufferHandle = &FreeBufferHandle;
668
669 int n = pipe_loader_probe(NULL, 0, false);
670 devs = (struct pipe_loader_device **)malloc(sizeof(*devs) * n);
671 pipe_loader_probe(devs, n, false);
672
673 for (int i = 0; i < n; i++) {
674 if (strstr("etnaviv", devs[i]->driver_name))
675 delegate->dev = devs[i];
676 else
677 pipe_loader_release(&devs[i], 1);
678 }
679 free(devs);
680
681 if (delegate->dev == NULL) {
682 fprintf(stderr, "Couldn't open kernel device\n");
683 return NULL;
684 }
685
686 teflon_debug("Teflon delegate: loaded %s driver\n", delegate->dev->driver_name);
687
688 screen = pipe_loader_create_screen(delegate->dev, false);
689 delegate->context = screen->context_create(screen, NULL, PIPE_CONTEXT_COMPUTE_ONLY);
690
691 return &delegate->base;
692 }
693
tflite_plugin_destroy_delegate(TfLiteDelegate * tflite_delegate)694 __attribute__((visibility("default"))) void tflite_plugin_destroy_delegate(TfLiteDelegate *tflite_delegate)
695 {
696 struct teflon_delegate *delegate = (struct teflon_delegate *)tflite_delegate;
697 struct pipe_screen *screen;
698
699 if (tflite_delegate == NULL) {
700 fprintf(stderr, "tflite_plugin_destroy_delegate: NULL delegate!\n");
701 return;
702 }
703
704 screen = delegate->context->screen;
705 delegate->context->destroy(delegate->context);
706 screen->destroy(screen);
707 pipe_loader_release(&delegate->dev, 1);
708 free(delegate);
709 }
710