• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023-2024 Tomeu Vizoso <tomeu@tomeuvizoso.net>
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "pipe-loader/pipe_loader.h"
7 #include "pipe/p_context.h"
8 #include "pipe/p_screen.h"
9 #include "pipe/p_state.h"
10 #include "util/format/u_format.h"
11 #include "util/u_inlines.h"
12 
13 #include "tensorflow/lite/c/common.h"
14 #include "tensorflow/lite/builtin_ops.h"
15 #include "tensorflow/lite/core/c/builtin_op_data.h"
16 
17 /* TODO: Move to TfLiteAsyncKernel for zero-copy of buffers */
18 
19 enum teflon_debug_flags {
20    TEFLON_DEBUG_VERBOSE = 1 << 1,
21 };
22 
23 static const struct debug_named_value teflon_debug_flags[] = {
24     { "verbose", TEFLON_DEBUG_VERBOSE, "Verbose logging." },
25     DEBUG_NAMED_VALUE_END
26 };
27 
28 DEBUG_GET_ONCE_FLAGS_OPTION(debug_teflon, "TEFLON_DEBUG", teflon_debug_flags, 0)
29 
30 static inline void
teflon_debug(const char * format,...)31 teflon_debug(const char *format, ...)
32 {
33    if (unlikely(debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)) {
34       va_list ap;
35       va_start(ap, format);
36       _debug_vprintf(format, ap);
37       va_end(ap);
38    }
39 }
40 
41 struct teflon_delegate
42 {
43    TfLiteDelegate base;
44    struct pipe_loader_device *dev;
45    struct pipe_context *context;
46 };
47 
48 struct teflon_subgraph
49 {
50    struct pipe_ml_subgraph *base;
51 
52    unsigned *input_tensors;
53    unsigned input_count;
54 
55    unsigned *output_tensors;
56    unsigned output_count;
57 };
58 
59 static struct pipe_resource *
create_resource(struct pipe_context * context,TfLiteTensor tensor)60 create_resource(struct pipe_context *context, TfLiteTensor tensor)
61 {
62    unsigned bytes;
63    unsigned size = 1;
64 
65    for (int i = 0; i < tensor.dims->size; i++)
66       size *= tensor.dims->data[i];
67 
68    switch(tensor.type) {
69       case kTfLiteInt8:
70       case kTfLiteUInt8:
71          bytes = 1;
72          break;
73       case kTfLiteInt16:
74       case kTfLiteUInt16:
75       case kTfLiteFloat16:
76          bytes = 2;
77          break;
78       case kTfLiteInt32:
79       case kTfLiteUInt32:
80       case kTfLiteFloat32:
81          bytes = 4;
82          break;
83       case kTfLiteInt64:
84       case kTfLiteUInt64:
85       case kTfLiteFloat64:
86       case kTfLiteComplex64:
87          bytes = 8;
88          break;
89       default:
90          unreachable("Unsupported TF type");
91    }
92 
93    return pipe_buffer_create_with_data(context, 0, PIPE_USAGE_DEFAULT, size * bytes, tensor.data.data);
94 }
95 
96 static void
fill_operation(struct teflon_delegate * delegate,TfLiteContext * tf_context,TfLiteNode * node,TfLiteRegistration * node_registration,struct pipe_ml_operation * operation,struct pipe_tensor * tensors)97 fill_operation(struct teflon_delegate *delegate, TfLiteContext *tf_context, TfLiteNode *node, TfLiteRegistration *node_registration, struct pipe_ml_operation *operation, struct pipe_tensor *tensors)
98 {
99    operation->input_count = node->inputs->size;
100    operation->input_tensors = calloc(operation->input_count, sizeof(void*));
101    for (unsigned i = 0; i < node->inputs->size; i++)
102       operation->input_tensors[i] = &tensors[node->inputs->data[i]];
103 
104    operation->output_count = node->outputs->size;
105    operation->output_tensors = calloc(operation->output_count, sizeof(void*));
106    for (unsigned i = 0; i < node->outputs->size; i++)
107       operation->output_tensors[i] = &tensors[node->outputs->data[i]];
108 
109    switch(node_registration->builtin_code) {
110       case kTfLiteBuiltinConv2d:
111       case kTfLiteBuiltinDepthwiseConv2d: {
112          operation->type = PIPE_ML_OPERATION_TYPE_CONVOLUTION;
113          operation->conv.weight_tensor = &tensors[node->inputs->data[1]];
114          operation->conv.bias_tensor = &tensors[node->inputs->data[2]];
115          if (node_registration->builtin_code == kTfLiteBuiltinConv2d) {
116             TfLiteConvParams* params = (TfLiteConvParams*)node->builtin_data;
117 
118             assert(params->activation == kTfLiteActNone ||
119                    params->activation == kTfLiteActRelu ||
120                    params->activation == kTfLiteActRelu6);
121             if (node_registration->version >= 2) {
122                assert(params->dilation_width_factor == 1);
123                assert(params->dilation_height_factor == 1);
124             }
125             operation->conv.stride_x = params->stride_width;
126             operation->conv.stride_y = params->stride_height;
127             operation->conv.padding_same = params->padding == kTfLitePaddingSame;
128             operation->conv.depthwise = false;
129             operation->conv.relu = params->activation == kTfLiteActRelu ||
130                                    params->activation == kTfLiteActRelu6;
131          } else {
132             TfLiteDepthwiseConvParams* params = (TfLiteDepthwiseConvParams*)node->builtin_data;
133 
134             assert(params->activation == kTfLiteActNone ||
135                    params->activation == kTfLiteActRelu ||
136                    params->activation == kTfLiteActRelu6);
137             if (node_registration->version >= 2) {
138                assert(params->dilation_width_factor == 1);
139                assert(params->dilation_height_factor == 1);
140             }
141             operation->conv.stride_x = params->stride_width;
142             operation->conv.stride_y = params->stride_height;
143             operation->conv.padding_same = params->padding == kTfLitePaddingSame;
144             operation->conv.depthwise = true;
145             operation->conv.relu = params->activation == kTfLiteActRelu ||
146                                    params->activation == kTfLiteActRelu6;
147          }
148          operation->conv.pointwise = operation->conv.weight_tensor->dims[1] == 1 && \
149                                      operation->conv.weight_tensor->dims[2] == 1;
150          break;
151       }
152       case kTfLiteBuiltinAveragePool2d:
153          operation->type = PIPE_ML_OPERATION_TYPE_POOLING;
154          break;
155       case kTfLiteBuiltinAdd:
156          operation->type = PIPE_ML_OPERATION_TYPE_ADD;
157          break;
158       case kTfLiteBuiltinConcatenation:
159          operation->type = PIPE_ML_OPERATION_TYPE_CONCATENATION;
160          break;
161       case kTfLiteBuiltinSplit:
162          operation->type = PIPE_ML_OPERATION_TYPE_SPLIT;
163          break;
164       case kTfLiteBuiltinPad: {
165          int32_t *paddings = tf_context->tensors[node->inputs->data[1]].data.data;
166 
167          operation->type = PIPE_ML_OPERATION_TYPE_PAD;
168          operation->pad.before_x = paddings[2];
169          operation->pad.after_x = paddings[3];
170          operation->pad.before_y = paddings[4];
171          operation->pad.after_y = paddings[5];
172          break;
173       }
174       case kTfLiteBuiltinFullyConnected: {
175          operation->type = PIPE_ML_OPERATION_TYPE_FULLY_CONNECTED;
176          operation->fcon.weight_tensor = &tensors[node->inputs->data[1]];
177          operation->fcon.bias_tensor = &tensors[node->inputs->data[2]];
178          break;
179       }
180       default:
181          unreachable("Unsupported ML operation type");
182    }
183 }
184 
185 static void
fill_tensor(struct teflon_delegate * delegate,TfLiteContext * tf_context,struct pipe_tensor * tensor,unsigned index)186 fill_tensor(struct teflon_delegate *delegate, TfLiteContext *tf_context, struct pipe_tensor *tensor, unsigned index)
187 {
188    struct pipe_context *context = delegate->context;
189    TfLiteTensor tf_tensor = tf_context->tensors[index];
190 
191    if (tf_tensor.type == kTfLiteNoType)
192       return; /* Placeholder tensor */
193 
194    if (tf_tensor.data.data)
195       tensor->resource = create_resource(context, tf_tensor);
196 
197    tensor->index = index;
198    memcpy(tensor->dims, tf_tensor.dims->data, tf_tensor.dims->size * sizeof(*tensor->dims));
199 
200    if (tf_tensor.quantization.type == kTfLiteAffineQuantization) {
201       const TfLiteAffineQuantization *quant = (const TfLiteAffineQuantization *)tf_tensor.quantization.params;
202       tensor->scale = quant->scale->data[0];
203       tensor->zero_point = quant->zero_point->data[0];
204    }
205 
206    switch(tf_tensor.type) {
207       case kTfLiteUInt8:
208       case kTfLiteUInt16:
209       case kTfLiteUInt32:
210       case kTfLiteUInt64:
211          tensor->is_signed = false;
212          break;
213       default:
214          tensor->is_signed = true;
215    }
216 }
217 
218 static void
dump_graph(struct pipe_tensor * tensors,unsigned tensor_count,struct pipe_ml_operation * operations,unsigned operation_count)219 dump_graph(struct pipe_tensor *tensors, unsigned tensor_count, struct pipe_ml_operation *operations, unsigned operation_count)
220 {
221    teflon_debug("\n");
222    teflon_debug("teflon: compiling graph: %d tensors %d operations\n",
223                 tensor_count, operation_count);
224 
225    teflon_debug("%3s %-8s %3s %s %-12s\n", "idx", "scale", "zp", "has_data", "size");
226    teflon_debug("=======================================\n");
227    for (int i = 0; i < tensor_count; i++) {
228       teflon_debug("%3d %6f %3x %-8s %dx%dx%dx%d\n",
229                   tensors[i].index,
230                   tensors[i].scale,
231                   tensors[i].zero_point,
232                   tensors[i].resource == NULL ? "no" : "yes",
233                   tensors[i].dims[0], tensors[i].dims[1], tensors[i].dims[2], tensors[i].dims[3]);
234    }
235 
236    teflon_debug("\n");
237    teflon_debug("%3s %-6s %25s %25s  %s\n", "idx", "type", "inputs", "outputs", "operation type-specific");
238    teflon_debug("================================================================================================\n");
239    for (int i = 0; i < operation_count; i++) {
240       teflon_debug("%3d ", i);
241 
242       switch(operations[i].type) {
243          case PIPE_ML_OPERATION_TYPE_ADD:
244             teflon_debug("%-6s ", "ADD");
245             break;
246          case PIPE_ML_OPERATION_TYPE_CONVOLUTION:
247             teflon_debug("%-6s ", operations[i].conv.depthwise ? "DWCONV" : "CONV");
248             break;
249          case PIPE_ML_OPERATION_TYPE_CONCATENATION:
250             teflon_debug("%-6s ", "CONCAT");
251             break;
252          case PIPE_ML_OPERATION_TYPE_POOLING:
253             teflon_debug("%-6s ", "POOL");
254             break;
255          case PIPE_ML_OPERATION_TYPE_SPLIT:
256             teflon_debug("%-6s ", "SPLIT");
257             break;
258          case PIPE_ML_OPERATION_TYPE_PAD:
259             teflon_debug("%-6s ", "PAD");
260             break;
261          case PIPE_ML_OPERATION_TYPE_FULLY_CONNECTED:
262             teflon_debug("%-6s ", "FCON");
263             break;
264       }
265 
266       for (unsigned j = 0; j < operations[i].input_count; j++) {
267          teflon_debug("%d", operations[i].input_tensors[j]->index);
268          if (j < operations[i].input_count - 1)
269             teflon_debug(",");
270       }
271 
272       teflon_debug(" ");
273 
274       for (unsigned j = 0; j < operations[i].output_count; j++) {
275          teflon_debug("%d", operations[i].output_tensors[j]->index);
276          if (j < operations[i].output_count - 1)
277             teflon_debug(",");
278       }
279 
280       teflon_debug("\n");
281    }
282    teflon_debug("\n");
283 }
284 
285 static void *
partition_init(TfLiteContext * tf_context,const char * buffer,size_t length)286 partition_init(TfLiteContext *tf_context, const char *buffer, size_t length)
287 {
288    const TfLiteDelegateParams *params = (const TfLiteDelegateParams *)buffer;
289    struct teflon_delegate *delegate = (struct teflon_delegate *)params->delegate;
290    struct pipe_context *context = delegate->context;
291    struct pipe_ml_operation operations[params->nodes_to_replace->size];
292    struct pipe_tensor tensors[tf_context->tensors_size];
293    long start = 0, end = 0;
294 
295    memset(operations, 0, sizeof(operations));
296    memset(tensors, 0, sizeof(tensors));
297 
298    if (unlikely(debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)) {
299       struct timespec time;
300       clock_gettime(CLOCK_MONOTONIC, &time);
301       start = (long)time.tv_sec * 1000 + (long)time.tv_nsec / 1000000;
302    }
303 
304    for (int i = 0; i < tf_context->tensors_size; i++)
305       fill_tensor(delegate, tf_context, &tensors[i], i);
306 
307    for (int i = 0; i < params->nodes_to_replace->size; i++)
308    {
309       const int node_index = params->nodes_to_replace->data[i];
310       TfLiteNode *delegated_node = NULL;
311       TfLiteRegistration *delegated_node_registration = NULL;
312       tf_context->GetNodeAndRegistration(tf_context, node_index, &delegated_node,
313                                          &delegated_node_registration);
314 
315       fill_operation(delegate, tf_context, delegated_node, delegated_node_registration, &operations[i], tensors);
316    }
317 
318    if (debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)
319       dump_graph(tensors, tf_context->tensors_size, operations, params->nodes_to_replace->size);
320 
321    struct pipe_ml_subgraph *subgraph;
322    subgraph = context->ml_subgraph_create(context,
323                                           operations,
324                                           params->nodes_to_replace->size);
325 
326    for (int i = 0; i < tf_context->tensors_size; i++)
327       pipe_resource_reference(&tensors[i].resource, NULL);
328 
329    struct teflon_subgraph *tsubgraph = calloc(1, sizeof(*tsubgraph));
330    tsubgraph->base = subgraph;
331 
332    tsubgraph->input_tensors = malloc(params->input_tensors->size * sizeof(*tsubgraph->input_tensors));
333    for (int i = 0; i < params->input_tensors->size; i++) {
334       unsigned tensor_idx = params->input_tensors->data[i];
335       TfLiteTensor *tensor = &tf_context->tensors[tensor_idx];
336       if (tensor->allocation_type == kTfLiteMmapRo)
337          continue;
338       tsubgraph->input_tensors[tsubgraph->input_count] = tensor_idx;
339       tsubgraph->input_count++;
340    }
341 
342    tsubgraph->output_count = params->output_tensors->size;
343    tsubgraph->output_tensors = malloc(params->output_tensors->size * sizeof(*tsubgraph->output_tensors));
344    memcpy(tsubgraph->output_tensors, params->output_tensors->data,
345           params->output_tensors->size * sizeof(*tsubgraph->output_tensors));
346 
347    if (unlikely(debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)) {
348       struct timespec time;
349       clock_gettime(CLOCK_MONOTONIC, &time);
350       end = (long)time.tv_sec * 1000 + (long)time.tv_nsec / 1000000;
351       teflon_debug("teflon: compiled graph, took %ld ms\n", (end - start));
352    }
353 
354    return tsubgraph;
355 }
356 
357 static TfLiteStatus
partition_prepare(TfLiteContext * context,TfLiteNode * node)358 partition_prepare(TfLiteContext *context, TfLiteNode *node)
359 {
360    // TODO: If input size has changed, resize input, intermediate and output buffers
361 
362    return kTfLiteOk;
363 }
364 
365 // De-allocates the per-node-and-Interpreter custom data.
366 static void
partition_free(TfLiteContext * tf_context,void * buffer)367 partition_free(TfLiteContext *tf_context, void *buffer)
368 {
369    struct teflon_subgraph *tsubgraph = (struct teflon_subgraph *)buffer;
370    struct pipe_ml_subgraph *subgraph = tsubgraph->base;
371    struct pipe_context *context = subgraph->context;
372 
373    context->ml_subgraph_destroy(context, subgraph);
374    free(tsubgraph->input_tensors);
375    free(tsubgraph->output_tensors);
376    free(tsubgraph);
377 }
378 
379 static TfLiteStatus
partition_invoke(TfLiteContext * tf_context,TfLiteNode * node)380 partition_invoke(TfLiteContext *tf_context, TfLiteNode *node)
381 {
382    struct teflon_delegate *delegate = (struct teflon_delegate *)node->delegate;
383    struct teflon_subgraph *tsubgraph = (struct teflon_subgraph *)node->user_data;
384    struct pipe_ml_subgraph *subgraph = tsubgraph->base;
385    struct pipe_context *context = delegate->context;
386    long start = 0, end = 0;
387 
388    if (unlikely(debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)) {
389       struct timespec time;
390       clock_gettime(CLOCK_MONOTONIC, &time);
391       start = (long)time.tv_sec * 1000 + (long)time.tv_nsec / 1000000;
392    }
393 
394    void **buffers = malloc(tsubgraph->input_count * sizeof(*buffers));
395    bool *is_signed = malloc(tsubgraph->input_count * sizeof(*is_signed));
396    for (unsigned i = 0; i < tsubgraph->input_count; i++) {
397       TfLiteTensor tf_tensor = tf_context->tensors[tsubgraph->input_tensors[i]];
398 
399       buffers[i] = tf_tensor.data.data;
400       is_signed[i] = !(tf_tensor.type == kTfLiteUInt8 ||
401                        tf_tensor.type == kTfLiteUInt16 ||
402                        tf_tensor.type == kTfLiteUInt32 ||
403                        tf_tensor.type == kTfLiteUInt64);
404    }
405    context->ml_subgraph_invoke(context, subgraph, tsubgraph->input_count, tsubgraph->input_tensors, buffers, is_signed);
406    free(buffers);
407    free(is_signed);
408 
409    buffers = malloc(tsubgraph->output_count * sizeof(*buffers));
410    is_signed = malloc(tsubgraph->output_count * sizeof(*is_signed));
411    for (unsigned i = 0; i < tsubgraph->output_count; i++) {
412       TfLiteTensor tf_tensor = tf_context->tensors[tsubgraph->output_tensors[i]];
413 
414       buffers[i] = tf_tensor.data.data;
415       is_signed[i] = !(tf_tensor.type == kTfLiteUInt8 ||
416                        tf_tensor.type == kTfLiteUInt16 ||
417                        tf_tensor.type == kTfLiteUInt32 ||
418                        tf_tensor.type == kTfLiteUInt64);
419    }
420    context->ml_subgraph_read_output(context, subgraph, tsubgraph->output_count, tsubgraph->output_tensors, buffers, is_signed);
421    free(buffers);
422    free(is_signed);
423 
424    if (unlikely(debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)) {
425       struct timespec time;
426       clock_gettime(CLOCK_MONOTONIC, &time);
427       end = (long)time.tv_sec * 1000 + (long)time.tv_nsec / 1000000;
428       teflon_debug("teflon: invoked graph, took %ld ms\n", (end - start));
429    }
430 
431    return kTfLiteOk;
432 }
433 
434 static bool
tensor_quantization_supported(TfLiteTensor * tensor)435 tensor_quantization_supported(TfLiteTensor *tensor)
436 {
437    if (tensor->quantization.type == kTfLiteAffineQuantization) {
438       TfLiteAffineQuantization *affine = (TfLiteAffineQuantization *)tensor->quantization.params;
439 
440       /*
441        * Per-axis quantization not supported, for details see:
442        * https://ai.google.dev/edge/litert/models/quantization_spec#per-axis_vs_per-tensor
443        */
444       return affine->scale->size == 1 && affine->zero_point->size == 1;
445    }
446    return false;
447 }
448 
449 static bool
fused_relu6_supported(TfLiteTensor * tensor)450 fused_relu6_supported(TfLiteTensor *tensor)
451 {
452    TfLiteAffineQuantization *affine;
453    int quantized_max;
454 
455    switch (tensor->type) {
456       case kTfLiteInt8:
457          quantized_max = INT8_MAX;
458          break;
459       case kTfLiteUInt8:
460          quantized_max = UINT8_MAX;
461          break;
462       default:
463          return false;
464    }
465 
466    assert(tensor->quantization.type == kTfLiteAffineQuantization);
467    affine = (TfLiteAffineQuantization *)tensor->quantization.params;
468 
469    assert(affine->scale->size == affine->zero_point->size);
470    for (int i = 0; i < affine->zero_point->size; i++) {
471       if ((quantized_max - affine->zero_point->data[i]) * affine->scale->data[i] > 6.0f)
472          return false;
473    }
474    return true;
475 }
476 
477 static bool
fused_activation_supported(TfLiteFusedActivation activation,TfLiteTensor * tensor)478 fused_activation_supported(TfLiteFusedActivation activation, TfLiteTensor *tensor)
479 {
480    switch (activation) {
481       case kTfLiteActNone:
482       case kTfLiteActRelu:
483          return true;
484       case kTfLiteActRelu6:
485          return fused_relu6_supported(tensor);
486       default:
487          return false;
488    }
489 }
490 
491 static TfLiteStatus
PrepareDelegate(TfLiteContext * context,TfLiteDelegate * delegate)492 PrepareDelegate(TfLiteContext *context, TfLiteDelegate *delegate)
493 {
494    TfLiteIntArray *plan;
495    TfLiteNode *node;
496    TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan));
497 
498    // Get a list of supported nodes.
499    TfLiteIntArray *supported_nodes = malloc(plan->size * sizeof(int) + sizeof(*supported_nodes));
500    supported_nodes->size = plan->size;
501    unsigned node_count = 0;
502    for (int i = 0; i < plan->size; i++) {
503       int node_index = plan->data[i];
504       bool supported = false;
505       TfLiteRegistration *registration;
506       TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
507           context, node_index, &node, &registration));
508 
509       switch(registration->builtin_code) {
510          case kTfLiteBuiltinConv2d: {
511             TfLiteTensor *input_tensor = &context->tensors[node->inputs->data[0]];
512             TfLiteTensor *weight_tensor = &context->tensors[node->inputs->data[1]];
513             TfLiteTensor *bias_tensor = &context->tensors[node->inputs->data[2]];
514             TfLiteTensor *output_tensor = &context->tensors[node->outputs->data[0]];
515             TfLiteConvParams* params = (TfLiteConvParams*)node->builtin_data;
516 
517             // Dilation and per-axis quantization not yet implemented
518             if (tensor_quantization_supported(input_tensor) &&
519                 tensor_quantization_supported(weight_tensor) &&
520                 tensor_quantization_supported(bias_tensor) &&
521                 tensor_quantization_supported(output_tensor) &&
522                 fused_activation_supported(params->activation, output_tensor) &&
523                 (registration->version < 2 ||
524                  (params->dilation_width_factor == 1 &&
525                   params->dilation_height_factor == 1))) {
526                supported = true;
527             }
528             break;
529          }
530          case kTfLiteBuiltinDepthwiseConv2d: {
531             TfLiteTensor *input_tensor = &context->tensors[node->inputs->data[0]];
532             TfLiteTensor *weight_tensor = &context->tensors[node->inputs->data[1]];
533             TfLiteTensor *bias_tensor = &context->tensors[node->inputs->data[2]];
534             TfLiteTensor *output_tensor = &context->tensors[node->outputs->data[0]];
535             TfLiteDepthwiseConvParams* params = (TfLiteDepthwiseConvParams*)node->builtin_data;
536 
537             // Dilation and per-axis quantization not yet implemented
538             if (tensor_quantization_supported(input_tensor) &&
539                 tensor_quantization_supported(weight_tensor) &&
540                 tensor_quantization_supported(bias_tensor) &&
541                 tensor_quantization_supported(output_tensor) &&
542                 fused_activation_supported(params->activation, output_tensor) &&
543                 (registration->version < 2 ||
544                  (params->dilation_width_factor == 1 &&
545                   params->dilation_height_factor == 1))) {
546                supported = true;
547             }
548             break;
549          }
550          case kTfLiteBuiltinAdd: {
551             supported = context->tensors[node->inputs->data[0]].data.data == NULL &&
552                         context->tensors[node->inputs->data[1]].data.data == NULL;
553             break;
554          }
555          case kTfLiteBuiltinConcatenation: {
556             TfLiteConcatenationParams *params = node->builtin_data;
557             supported = true;
558 
559             if (params->axis != 3 &&
560                 params->axis != -1)
561                supported = false;
562 
563             unsigned input_channels = context->tensors[node->inputs->data[0]].dims->data[3];
564             for (unsigned i = 1; i < node->inputs->size; i++)
565                if (input_channels != context->tensors[node->inputs->data[i]].dims->data[3])
566                   supported = false;
567 
568             break;
569          }
570          case kTfLiteBuiltinSplit: {
571             int32_t axis = context->tensors[node->inputs->data[0]].data.i32[0];
572             supported = true;
573 
574             if (axis != 3 &&
575                 axis != -1)
576                supported = false;
577 
578             unsigned output_channels = context->tensors[node->outputs->data[0]].dims->data[3];
579             for (unsigned i = 1; i < node->outputs->size; i++)
580                if (output_channels != context->tensors[node->outputs->data[i]].dims->data[3])
581                   supported = false;
582 
583             break;
584          }
585          case kTfLiteBuiltinPad: {
586             uint32_t *padding = context->tensors[node->inputs->data[1]].data.data;
587             supported = padding[0] == 0 &&
588                         padding[1] == 0 &&
589                         padding[2] == 1 &&
590                         padding[3] == 1 &&
591                         padding[4] == 1 &&
592                         padding[5] == 1 &&
593                         padding[6] == 0 &&
594                         padding[7] == 0;
595             break;
596          }
597          case kTfLiteBuiltinFullyConnected:
598             supported = true;
599             break;
600       }
601 
602       if (supported)
603          supported_nodes->data[node_count++] = node_index;
604    }
605    supported_nodes->size = node_count;
606 
607    TfLiteRegistration registration;
608 
609    registration.init = partition_init;
610    registration.free = partition_free;
611    registration.prepare = partition_prepare;
612    registration.invoke = partition_invoke;
613 
614    registration.profiling_string = NULL;
615    registration.builtin_code = kTfLiteBuiltinDelegate;
616    registration.version = 1;
617    registration.registration_external = NULL;
618    registration.custom_name = "Teflon Delegate";
619 
620    // Replace supported subgraphs.
621    TfLiteStatus status = context->ReplaceNodeSubsetsWithDelegateKernels(
622        context,
623        registration,
624        supported_nodes,
625        delegate);
626 
627    free(supported_nodes);
628 
629    return status;
630 }
631 
632 static TfLiteStatus
CopyFromBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle buffer_handle,TfLiteTensor * tensor)633 CopyFromBufferHandle(TfLiteContext *context,
634                                   TfLiteDelegate *delegate,
635                                   TfLiteBufferHandle buffer_handle,
636                                   TfLiteTensor *tensor)
637 {
638    return kTfLiteOk;
639 }
640 
641 static void
FreeBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle * handle)642 FreeBufferHandle(TfLiteContext *context,
643                       TfLiteDelegate *delegate,
644                       TfLiteBufferHandle *handle)
645 {
646 }
647 
648 TfLiteDelegate *tflite_plugin_create_delegate(char **options_keys,
649                                                 char **options_values,
650                                                 size_t num_options,
651                                                 void (*report_error)(const char *));
652 
653 void tflite_plugin_destroy_delegate(TfLiteDelegate *delegate);
654 
tflite_plugin_create_delegate(char ** options_keys,char ** options_values,size_t num_options,void (* report_error)(const char *))655 __attribute__((visibility("default"))) TfLiteDelegate *tflite_plugin_create_delegate(char **options_keys,
656                                                                                        char **options_values,
657                                                                                        size_t num_options,
658                                                                                        void (*report_error)(const char *))
659 {
660    struct teflon_delegate *delegate = (struct teflon_delegate *)calloc(1, sizeof(*delegate));
661    struct pipe_screen *screen;
662    struct pipe_loader_device **devs;
663 
664    delegate->base.flags = kTfLiteDelegateFlagsAllowDynamicTensors | kTfLiteDelegateFlagsRequirePropagatedShapes;
665    delegate->base.Prepare = &PrepareDelegate;
666    delegate->base.CopyFromBufferHandle = &CopyFromBufferHandle;
667    delegate->base.FreeBufferHandle = &FreeBufferHandle;
668 
669    int n = pipe_loader_probe(NULL, 0, false);
670    devs = (struct pipe_loader_device **)malloc(sizeof(*devs) * n);
671    pipe_loader_probe(devs, n, false);
672 
673    for (int i = 0; i < n; i++) {
674       if (strstr("etnaviv", devs[i]->driver_name))
675          delegate->dev = devs[i];
676       else
677          pipe_loader_release(&devs[i], 1);
678    }
679    free(devs);
680 
681    if (delegate->dev == NULL) {
682       fprintf(stderr, "Couldn't open kernel device\n");
683       return NULL;
684    }
685 
686    teflon_debug("Teflon delegate: loaded %s driver\n", delegate->dev->driver_name);
687 
688    screen = pipe_loader_create_screen(delegate->dev, false);
689    delegate->context = screen->context_create(screen, NULL, PIPE_CONTEXT_COMPUTE_ONLY);
690 
691    return &delegate->base;
692 }
693 
tflite_plugin_destroy_delegate(TfLiteDelegate * tflite_delegate)694 __attribute__((visibility("default"))) void tflite_plugin_destroy_delegate(TfLiteDelegate *tflite_delegate)
695 {
696    struct teflon_delegate *delegate = (struct teflon_delegate *)tflite_delegate;
697    struct pipe_screen *screen;
698 
699    if (tflite_delegate == NULL) {
700       fprintf(stderr, "tflite_plugin_destroy_delegate: NULL delegate!\n");
701       return;
702    }
703 
704    screen = delegate->context->screen;
705    delegate->context->destroy(delegate->context);
706    screen->destroy(screen);
707    pipe_loader_release(&delegate->dev, 1);
708    free(delegate);
709 }
710