• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2018 Sergey Lavrushkin
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * DNN native backend implementation.
24  */
25 
26 #include "dnn_backend_native.h"
27 #include "libavutil/avassert.h"
28 #include "dnn_backend_native_layer_conv2d.h"
29 #include "dnn_backend_native_layers.h"
30 #include "dnn_io_proc.h"
31 #include "dnn_backend_common.h"
32 
33 #define OFFSET(x) offsetof(NativeContext, x)
34 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM
35 static const AVOption dnn_native_options[] = {
36     { "conv2d_threads", "threads num for conv2d layer", OFFSET(options.conv2d_threads), AV_OPT_TYPE_INT,  { .i64 = 0 }, INT_MIN, INT_MAX, FLAGS },
37     { "async",          "use DNN async inference",      OFFSET(options.async),          AV_OPT_TYPE_BOOL, { .i64 = 0 },       0,       1, FLAGS },
38     { NULL },
39 };
40 
41 static const AVClass dnn_native_class = {
42     .class_name = "dnn_native",
43     .item_name  = av_default_item_name,
44     .option     = dnn_native_options,
45     .version    = LIBAVUTIL_VERSION_INT,
46     .category   = AV_CLASS_CATEGORY_FILTER,
47 };
48 
49 static int execute_model_native(Queue *lltask_queue);
50 
extract_lltask_from_task(TaskItem * task,Queue * lltask_queue)51 static int extract_lltask_from_task(TaskItem *task, Queue *lltask_queue)
52 {
53     NativeModel *native_model = task->model;
54     NativeContext *ctx = &native_model->ctx;
55     LastLevelTaskItem *lltask = av_malloc(sizeof(*lltask));
56 
57     if (!lltask) {
58         av_log(ctx, AV_LOG_ERROR, "Unable to allocate space for LastLevelTaskItem\n");
59         return AVERROR(ENOMEM);
60     }
61     task->inference_todo = 1;
62     task->inference_done = 0;
63     lltask->task = task;
64 
65     if (ff_queue_push_back(lltask_queue, lltask) < 0) {
66         av_log(ctx, AV_LOG_ERROR, "Failed to push back lltask_queue.\n");
67         av_freep(&lltask);
68         return AVERROR(ENOMEM);
69     }
70     return 0;
71 }
72 
get_input_native(void * model,DNNData * input,const char * input_name)73 static int get_input_native(void *model, DNNData *input, const char *input_name)
74 {
75     NativeModel *native_model = model;
76     NativeContext *ctx = &native_model->ctx;
77 
78     for (int i = 0; i < native_model->operands_num; ++i) {
79         DnnOperand *oprd = &native_model->operands[i];
80         if (strcmp(oprd->name, input_name) == 0) {
81             if (oprd->type != DOT_INPUT) {
82                 av_log(ctx, AV_LOG_ERROR, "Found \"%s\" in model, but it is not input node\n", input_name);
83                 return AVERROR(EINVAL);
84             }
85             input->dt = oprd->data_type;
86             av_assert0(oprd->dims[0] == 1);
87             input->height = oprd->dims[1];
88             input->width = oprd->dims[2];
89             input->channels = oprd->dims[3];
90             return 0;
91         }
92     }
93 
94     // do not find the input operand
95     av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", input_name);
96     return AVERROR(EINVAL);
97 }
98 
get_output_native(void * model,const char * input_name,int input_width,int input_height,const char * output_name,int * output_width,int * output_height)99 static int get_output_native(void *model, const char *input_name, int input_width, int input_height,
100                                        const char *output_name, int *output_width, int *output_height)
101 {
102     int ret = 0;
103     NativeModel *native_model = model;
104     NativeContext *ctx = &native_model->ctx;
105     TaskItem task;
106     DNNExecBaseParams exec_params = {
107         .input_name     = input_name,
108         .output_names   = &output_name,
109         .nb_output      = 1,
110         .in_frame       = NULL,
111         .out_frame      = NULL,
112     };
113 
114     ret = ff_dnn_fill_gettingoutput_task(&task, &exec_params, native_model, input_height, input_width, ctx);
115     if (ret != 0) {
116         goto err;
117     }
118 
119     ret = extract_lltask_from_task(&task, native_model->lltask_queue);
120     if (ret != 0) {
121         av_log(ctx, AV_LOG_ERROR, "unable to extract last level task from task.\n");
122         goto err;
123     }
124 
125     ret = execute_model_native(native_model->lltask_queue);
126     *output_width = task.out_frame->width;
127     *output_height = task.out_frame->height;
128 
129 err:
130     av_frame_free(&task.out_frame);
131     av_frame_free(&task.in_frame);
132     return ret;
133 }
134 
135 // Loads model and its parameters that are stored in a binary file with following structure:
136 // layers_num,layer_type,layer_parameterss,layer_type,layer_parameters...
137 // For CONV layer: activation_function, input_num, output_num, kernel_size, kernel, biases
138 // For DEPTH_TO_SPACE layer: block_size
ff_dnn_load_model_native(const char * model_filename,DNNFunctionType func_type,const char * options,AVFilterContext * filter_ctx)139 DNNModel *ff_dnn_load_model_native(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
140 {
141 #define DNN_NATIVE_MAGIC "FFMPEGDNNNATIVE"
142     DNNModel *model = NULL;
143     // sizeof - 1 to skip the terminating '\0' which is not written in the file
144     char buf[sizeof(DNN_NATIVE_MAGIC) - 1];
145     int version, header_size, major_version_expected = 1;
146     NativeModel *native_model = NULL;
147     AVIOContext *model_file_context;
148     int file_size, dnn_size, parsed_size;
149     int32_t layer;
150     DNNLayerType layer_type;
151 
152     if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
153         return NULL;
154     }
155     file_size = avio_size(model_file_context);
156 
157     model = av_mallocz(sizeof(DNNModel));
158     if (!model){
159         goto fail;
160     }
161 
162     /**
163      * check file header with string and version
164      */
165     if (avio_read(model_file_context, buf, sizeof(buf)) != sizeof(buf) ||
166         memcmp(buf, DNN_NATIVE_MAGIC, sizeof(buf)))
167         goto fail;
168     dnn_size = sizeof(buf);
169 
170     version = (int32_t)avio_rl32(model_file_context);
171     dnn_size += 4;
172     if (version != major_version_expected) {
173         goto fail;
174     }
175 
176     // currently no need to check minor version
177     version = (int32_t)avio_rl32(model_file_context);
178     dnn_size += 4;
179     header_size = dnn_size;
180 
181     native_model = av_mallocz(sizeof(NativeModel));
182     if (!native_model){
183         goto fail;
184     }
185     model->model = native_model;
186 
187     native_model->ctx.class = &dnn_native_class;
188     model->options = options;
189     if (av_opt_set_from_string(&native_model->ctx, model->options, NULL, "=", "&") < 0)
190         goto fail;
191     native_model->model = model;
192 
193     if (native_model->ctx.options.async) {
194         av_log(&native_model->ctx, AV_LOG_WARNING, "Async not supported. Rolling back to sync\n");
195         native_model->ctx.options.async = 0;
196     }
197 
198 #if !HAVE_PTHREAD_CANCEL
199     if (native_model->ctx.options.conv2d_threads > 1){
200         av_log(&native_model->ctx, AV_LOG_WARNING, "'conv2d_threads' option was set but it is not supported "
201                        "on this build (pthread support is required)\n");
202     }
203 #endif
204 
205     avio_seek(model_file_context, file_size - 8, SEEK_SET);
206     native_model->layers_num = (int32_t)avio_rl32(model_file_context);
207     native_model->operands_num = (int32_t)avio_rl32(model_file_context);
208     dnn_size += 8;
209     avio_seek(model_file_context, header_size, SEEK_SET);
210 
211     native_model->layers = av_mallocz(native_model->layers_num * sizeof(Layer));
212     if (!native_model->layers){
213         goto fail;
214     }
215 
216     native_model->operands = av_mallocz(native_model->operands_num * sizeof(DnnOperand));
217     if (!native_model->operands){
218         goto fail;
219     }
220 
221     native_model->task_queue = ff_queue_create();
222     if (!native_model->task_queue) {
223         goto fail;
224     }
225 
226     native_model->lltask_queue = ff_queue_create();
227     if (!native_model->lltask_queue) {
228         goto fail;
229     }
230 
231     for (layer = 0; layer < native_model->layers_num; ++layer){
232         layer_type = (int32_t)avio_rl32(model_file_context);
233         dnn_size += 4;
234 
235         if (layer_type >= DLT_COUNT) {
236             goto fail;
237         }
238 
239         native_model->layers[layer].type = layer_type;
240         parsed_size = ff_layer_funcs[layer_type].pf_load(&native_model->layers[layer], model_file_context, file_size, native_model->operands_num);
241         if (!parsed_size) {
242             goto fail;
243         }
244         dnn_size += parsed_size;
245     }
246 
247     for (int32_t i = 0; i < native_model->operands_num; ++i){
248         DnnOperand *oprd;
249         int32_t name_len;
250         int32_t operand_index = (int32_t)avio_rl32(model_file_context);
251         dnn_size += 4;
252 
253         if (operand_index >= native_model->operands_num) {
254             goto fail;
255         }
256 
257         oprd = &native_model->operands[operand_index];
258         name_len = (int32_t)avio_rl32(model_file_context);
259         dnn_size += 4;
260 
261         avio_get_str(model_file_context, name_len, oprd->name, sizeof(oprd->name));
262         dnn_size += name_len;
263 
264         oprd->type = (int32_t)avio_rl32(model_file_context);
265         dnn_size += 4;
266 
267         oprd->data_type = (int32_t)avio_rl32(model_file_context);
268         dnn_size += 4;
269 
270         for (int32_t dim = 0; dim < 4; ++dim) {
271             oprd->dims[dim] = (int32_t)avio_rl32(model_file_context);
272             dnn_size += 4;
273         }
274         if (oprd->type == DOT_INPUT && oprd->dims[0] != 1)
275             goto fail;
276 
277         oprd->isNHWC = 1;
278     }
279 
280     avio_closep(&model_file_context);
281 
282     if (dnn_size != file_size){
283         ff_dnn_free_model_native(&model);
284         return NULL;
285     }
286 
287     model->get_input = &get_input_native;
288     model->get_output = &get_output_native;
289     model->filter_ctx = filter_ctx;
290     model->func_type = func_type;
291 
292     return model;
293 
294 fail:
295     ff_dnn_free_model_native(&model);
296     avio_closep(&model_file_context);
297     return NULL;
298 }
299 
execute_model_native(Queue * lltask_queue)300 static int execute_model_native(Queue *lltask_queue)
301 {
302     NativeModel *native_model = NULL;
303     NativeContext *ctx = NULL;
304     int32_t layer;
305     DNNData input, output;
306     DnnOperand *oprd = NULL;
307     LastLevelTaskItem *lltask = NULL;
308     TaskItem *task = NULL;
309     int ret = 0;
310 
311     lltask = ff_queue_pop_front(lltask_queue);
312     if (!lltask) {
313         av_log(NULL, AV_LOG_ERROR, "Failed to get LastLevelTaskItem\n");
314         ret = AVERROR(EINVAL);
315         goto err;
316     }
317     task = lltask->task;
318     native_model = task->model;
319     ctx = &native_model->ctx;
320 
321     if (native_model->layers_num <= 0 || native_model->operands_num <= 0) {
322         av_log(ctx, AV_LOG_ERROR, "No operands or layers in model\n");
323         ret = AVERROR(EINVAL);
324         goto err;
325     }
326 
327     for (int i = 0; i < native_model->operands_num; ++i) {
328         oprd = &native_model->operands[i];
329         if (strcmp(oprd->name, task->input_name) == 0) {
330             if (oprd->type != DOT_INPUT) {
331                 av_log(ctx, AV_LOG_ERROR, "Found \"%s\" in model, but it is not input node\n", task->input_name);
332                 ret = AVERROR(EINVAL);
333                 goto err;
334             }
335             break;
336         }
337         oprd = NULL;
338     }
339     if (!oprd) {
340         av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", task->input_name);
341         ret = AVERROR(EINVAL);
342         goto err;
343     }
344 
345     oprd->dims[1] = task->in_frame->height;
346     oprd->dims[2] = task->in_frame->width;
347 
348     av_freep(&oprd->data);
349     oprd->length = ff_calculate_operand_data_length(oprd);
350     if (oprd->length <= 0) {
351         av_log(ctx, AV_LOG_ERROR, "The input data length overflow\n");
352         ret = AVERROR(EINVAL);
353         goto err;
354     }
355     oprd->data = av_malloc(oprd->length);
356     if (!oprd->data) {
357         av_log(ctx, AV_LOG_ERROR, "Failed to malloc memory for input data\n");
358         ret = AVERROR(ENOMEM);
359         goto err;
360     }
361 
362     input.height = oprd->dims[1];
363     input.width = oprd->dims[2];
364     input.channels = oprd->dims[3];
365     input.data = oprd->data;
366     input.dt = oprd->data_type;
367     if (task->do_ioproc) {
368         if (native_model->model->frame_pre_proc != NULL) {
369             native_model->model->frame_pre_proc(task->in_frame, &input, native_model->model->filter_ctx);
370         } else {
371             ff_proc_from_frame_to_dnn(task->in_frame, &input, ctx);
372         }
373     }
374 
375     if (task->nb_output != 1) {
376         // currently, the filter does not need multiple outputs,
377         // so we just pending the support until we really need it.
378         avpriv_report_missing_feature(ctx, "multiple outputs");
379         ret = AVERROR(ENOSYS);
380         goto err;
381     }
382 
383     for (layer = 0; layer < native_model->layers_num; ++layer){
384         DNNLayerType layer_type = native_model->layers[layer].type;
385         ret = ff_layer_funcs[layer_type].pf_exec(native_model->operands,
386                                                  native_model->layers[layer].input_operand_indexes,
387                                                  native_model->layers[layer].output_operand_index,
388                                                  native_model->layers[layer].params,
389                                                  &native_model->ctx);
390         if (ret != 0) {
391             av_log(ctx, AV_LOG_ERROR, "Failed to execute model\n");
392             goto err;
393         }
394     }
395 
396     for (uint32_t i = 0; i < task->nb_output; ++i) {
397         DnnOperand *oprd = NULL;
398         const char *output_name = task->output_names[i];
399         for (int j = 0; j < native_model->operands_num; ++j) {
400             if (strcmp(native_model->operands[j].name, output_name) == 0) {
401                 oprd = &native_model->operands[j];
402                 break;
403             }
404         }
405 
406         if (oprd == NULL) {
407             av_log(ctx, AV_LOG_ERROR, "Could not find output in model\n");
408             ret = AVERROR(EINVAL);
409             goto err;
410         }
411 
412         output.data = oprd->data;
413         output.height = oprd->dims[1];
414         output.width = oprd->dims[2];
415         output.channels = oprd->dims[3];
416         output.dt = oprd->data_type;
417 
418         if (task->do_ioproc) {
419             if (native_model->model->frame_post_proc != NULL) {
420                 native_model->model->frame_post_proc(task->out_frame, &output, native_model->model->filter_ctx);
421             } else {
422                 ff_proc_from_dnn_to_frame(task->out_frame, &output, ctx);
423             }
424         } else {
425             task->out_frame->width = output.width;
426             task->out_frame->height = output.height;
427         }
428     }
429     task->inference_done++;
430 err:
431     av_freep(&lltask);
432     return ret;
433 }
434 
ff_dnn_execute_model_native(const DNNModel * model,DNNExecBaseParams * exec_params)435 int ff_dnn_execute_model_native(const DNNModel *model, DNNExecBaseParams *exec_params)
436 {
437     NativeModel *native_model = model->model;
438     NativeContext *ctx = &native_model->ctx;
439     TaskItem *task;
440     int ret = 0;
441 
442     ret = ff_check_exec_params(ctx, DNN_NATIVE, model->func_type, exec_params);
443     if (ret != 0) {
444         return ret;
445     }
446 
447     task = av_malloc(sizeof(*task));
448     if (!task) {
449         av_log(ctx, AV_LOG_ERROR, "unable to alloc memory for task item.\n");
450         return AVERROR(ENOMEM);
451     }
452 
453     ret = ff_dnn_fill_task(task, exec_params, native_model, ctx->options.async, 1);
454     if (ret != 0) {
455         av_freep(&task);
456         return ret;
457     }
458 
459     if (ff_queue_push_back(native_model->task_queue, task) < 0) {
460         av_freep(&task);
461         av_log(ctx, AV_LOG_ERROR, "unable to push back task_queue.\n");
462         return AVERROR(ENOMEM);
463     }
464 
465     ret = extract_lltask_from_task(task, native_model->lltask_queue);
466     if (ret != 0) {
467         av_log(ctx, AV_LOG_ERROR, "unable to extract last level task from task.\n");
468         return ret;
469     }
470 
471     return execute_model_native(native_model->lltask_queue);
472 }
473 
ff_dnn_flush_native(const DNNModel * model)474 int ff_dnn_flush_native(const DNNModel *model)
475 {
476     NativeModel *native_model = model->model;
477 
478     if (ff_queue_size(native_model->lltask_queue) == 0) {
479         // no pending task need to flush
480         return 0;
481     }
482 
483     // for now, use sync node with flush operation
484     // Switch to async when it is supported
485     return execute_model_native(native_model->lltask_queue);
486 }
487 
ff_dnn_get_result_native(const DNNModel * model,AVFrame ** in,AVFrame ** out)488 DNNAsyncStatusType ff_dnn_get_result_native(const DNNModel *model, AVFrame **in, AVFrame **out)
489 {
490     NativeModel *native_model = model->model;
491     return ff_dnn_get_result_common(native_model->task_queue, in, out);
492 }
493 
ff_calculate_operand_dims_count(const DnnOperand * oprd)494 int32_t ff_calculate_operand_dims_count(const DnnOperand *oprd)
495 {
496     int32_t result = 1;
497     for (int i = 0; i < 4; ++i)
498         result *= oprd->dims[i];
499 
500     return result;
501 }
502 
ff_calculate_operand_data_length(const DnnOperand * oprd)503 int32_t ff_calculate_operand_data_length(const DnnOperand* oprd)
504 {
505     // currently, we just support DNN_FLOAT
506     uint64_t len = sizeof(float);
507     for (int i = 0; i < 4; i++) {
508         len *= oprd->dims[i];
509         if (len > INT32_MAX)
510             return 0;
511     }
512     return len;
513 }
514 
ff_dnn_free_model_native(DNNModel ** model)515 void ff_dnn_free_model_native(DNNModel **model)
516 {
517     NativeModel *native_model;
518     ConvolutionalParams *conv_params;
519     int32_t layer;
520 
521     if (*model)
522     {
523         if ((*model)->model) {
524             native_model = (*model)->model;
525             if (native_model->layers) {
526                 for (layer = 0; layer < native_model->layers_num; ++layer){
527                     if (native_model->layers[layer].type == DLT_CONV2D){
528                         conv_params = (ConvolutionalParams *)native_model->layers[layer].params;
529                         av_freep(&conv_params->kernel);
530                         av_freep(&conv_params->biases);
531                     }
532                     av_freep(&native_model->layers[layer].params);
533                 }
534                 av_freep(&native_model->layers);
535             }
536 
537             if (native_model->operands) {
538                 for (uint32_t operand = 0; operand < native_model->operands_num; ++operand)
539                     av_freep(&native_model->operands[operand].data);
540                 av_freep(&native_model->operands);
541             }
542 
543             while (ff_queue_size(native_model->lltask_queue) != 0) {
544                 LastLevelTaskItem *item = ff_queue_pop_front(native_model->lltask_queue);
545                 av_freep(&item);
546             }
547             ff_queue_destroy(native_model->lltask_queue);
548 
549             while (ff_queue_size(native_model->task_queue) != 0) {
550                 TaskItem *item = ff_queue_pop_front(native_model->task_queue);
551                 av_frame_free(&item->in_frame);
552                 av_frame_free(&item->out_frame);
553                 av_freep(&item);
554             }
555             ff_queue_destroy(native_model->task_queue);
556 
557             av_freep(&native_model);
558         }
559         av_freep(model);
560     }
561 }
562