1 /*
2 * Copyright (c) 2018 Sergey Lavrushkin
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 /**
22 * @file
23 * DNN native backend implementation.
24 */
25
26 #include "dnn_backend_native.h"
27 #include "libavutil/avassert.h"
28 #include "dnn_backend_native_layer_conv2d.h"
29 #include "dnn_backend_native_layers.h"
30 #include "dnn_io_proc.h"
31 #include "dnn_backend_common.h"
32
33 #define OFFSET(x) offsetof(NativeContext, x)
34 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM
35 static const AVOption dnn_native_options[] = {
36 { "conv2d_threads", "threads num for conv2d layer", OFFSET(options.conv2d_threads), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, FLAGS },
37 { "async", "use DNN async inference", OFFSET(options.async), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
38 { NULL },
39 };
40
41 static const AVClass dnn_native_class = {
42 .class_name = "dnn_native",
43 .item_name = av_default_item_name,
44 .option = dnn_native_options,
45 .version = LIBAVUTIL_VERSION_INT,
46 .category = AV_CLASS_CATEGORY_FILTER,
47 };
48
49 static int execute_model_native(Queue *lltask_queue);
50
extract_lltask_from_task(TaskItem * task,Queue * lltask_queue)51 static int extract_lltask_from_task(TaskItem *task, Queue *lltask_queue)
52 {
53 NativeModel *native_model = task->model;
54 NativeContext *ctx = &native_model->ctx;
55 LastLevelTaskItem *lltask = av_malloc(sizeof(*lltask));
56
57 if (!lltask) {
58 av_log(ctx, AV_LOG_ERROR, "Unable to allocate space for LastLevelTaskItem\n");
59 return AVERROR(ENOMEM);
60 }
61 task->inference_todo = 1;
62 task->inference_done = 0;
63 lltask->task = task;
64
65 if (ff_queue_push_back(lltask_queue, lltask) < 0) {
66 av_log(ctx, AV_LOG_ERROR, "Failed to push back lltask_queue.\n");
67 av_freep(&lltask);
68 return AVERROR(ENOMEM);
69 }
70 return 0;
71 }
72
get_input_native(void * model,DNNData * input,const char * input_name)73 static int get_input_native(void *model, DNNData *input, const char *input_name)
74 {
75 NativeModel *native_model = model;
76 NativeContext *ctx = &native_model->ctx;
77
78 for (int i = 0; i < native_model->operands_num; ++i) {
79 DnnOperand *oprd = &native_model->operands[i];
80 if (strcmp(oprd->name, input_name) == 0) {
81 if (oprd->type != DOT_INPUT) {
82 av_log(ctx, AV_LOG_ERROR, "Found \"%s\" in model, but it is not input node\n", input_name);
83 return AVERROR(EINVAL);
84 }
85 input->dt = oprd->data_type;
86 av_assert0(oprd->dims[0] == 1);
87 input->height = oprd->dims[1];
88 input->width = oprd->dims[2];
89 input->channels = oprd->dims[3];
90 return 0;
91 }
92 }
93
94 // do not find the input operand
95 av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", input_name);
96 return AVERROR(EINVAL);
97 }
98
get_output_native(void * model,const char * input_name,int input_width,int input_height,const char * output_name,int * output_width,int * output_height)99 static int get_output_native(void *model, const char *input_name, int input_width, int input_height,
100 const char *output_name, int *output_width, int *output_height)
101 {
102 int ret = 0;
103 NativeModel *native_model = model;
104 NativeContext *ctx = &native_model->ctx;
105 TaskItem task;
106 DNNExecBaseParams exec_params = {
107 .input_name = input_name,
108 .output_names = &output_name,
109 .nb_output = 1,
110 .in_frame = NULL,
111 .out_frame = NULL,
112 };
113
114 ret = ff_dnn_fill_gettingoutput_task(&task, &exec_params, native_model, input_height, input_width, ctx);
115 if (ret != 0) {
116 goto err;
117 }
118
119 ret = extract_lltask_from_task(&task, native_model->lltask_queue);
120 if (ret != 0) {
121 av_log(ctx, AV_LOG_ERROR, "unable to extract last level task from task.\n");
122 goto err;
123 }
124
125 ret = execute_model_native(native_model->lltask_queue);
126 *output_width = task.out_frame->width;
127 *output_height = task.out_frame->height;
128
129 err:
130 av_frame_free(&task.out_frame);
131 av_frame_free(&task.in_frame);
132 return ret;
133 }
134
135 // Loads model and its parameters that are stored in a binary file with following structure:
136 // layers_num,layer_type,layer_parameterss,layer_type,layer_parameters...
137 // For CONV layer: activation_function, input_num, output_num, kernel_size, kernel, biases
138 // For DEPTH_TO_SPACE layer: block_size
ff_dnn_load_model_native(const char * model_filename,DNNFunctionType func_type,const char * options,AVFilterContext * filter_ctx)139 DNNModel *ff_dnn_load_model_native(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
140 {
141 #define DNN_NATIVE_MAGIC "FFMPEGDNNNATIVE"
142 DNNModel *model = NULL;
143 // sizeof - 1 to skip the terminating '\0' which is not written in the file
144 char buf[sizeof(DNN_NATIVE_MAGIC) - 1];
145 int version, header_size, major_version_expected = 1;
146 NativeModel *native_model = NULL;
147 AVIOContext *model_file_context;
148 int file_size, dnn_size, parsed_size;
149 int32_t layer;
150 DNNLayerType layer_type;
151
152 if (avio_open(&model_file_context, model_filename, AVIO_FLAG_READ) < 0){
153 return NULL;
154 }
155 file_size = avio_size(model_file_context);
156
157 model = av_mallocz(sizeof(DNNModel));
158 if (!model){
159 goto fail;
160 }
161
162 /**
163 * check file header with string and version
164 */
165 if (avio_read(model_file_context, buf, sizeof(buf)) != sizeof(buf) ||
166 memcmp(buf, DNN_NATIVE_MAGIC, sizeof(buf)))
167 goto fail;
168 dnn_size = sizeof(buf);
169
170 version = (int32_t)avio_rl32(model_file_context);
171 dnn_size += 4;
172 if (version != major_version_expected) {
173 goto fail;
174 }
175
176 // currently no need to check minor version
177 version = (int32_t)avio_rl32(model_file_context);
178 dnn_size += 4;
179 header_size = dnn_size;
180
181 native_model = av_mallocz(sizeof(NativeModel));
182 if (!native_model){
183 goto fail;
184 }
185 model->model = native_model;
186
187 native_model->ctx.class = &dnn_native_class;
188 model->options = options;
189 if (av_opt_set_from_string(&native_model->ctx, model->options, NULL, "=", "&") < 0)
190 goto fail;
191 native_model->model = model;
192
193 if (native_model->ctx.options.async) {
194 av_log(&native_model->ctx, AV_LOG_WARNING, "Async not supported. Rolling back to sync\n");
195 native_model->ctx.options.async = 0;
196 }
197
198 #if !HAVE_PTHREAD_CANCEL
199 if (native_model->ctx.options.conv2d_threads > 1){
200 av_log(&native_model->ctx, AV_LOG_WARNING, "'conv2d_threads' option was set but it is not supported "
201 "on this build (pthread support is required)\n");
202 }
203 #endif
204
205 avio_seek(model_file_context, file_size - 8, SEEK_SET);
206 native_model->layers_num = (int32_t)avio_rl32(model_file_context);
207 native_model->operands_num = (int32_t)avio_rl32(model_file_context);
208 dnn_size += 8;
209 avio_seek(model_file_context, header_size, SEEK_SET);
210
211 native_model->layers = av_mallocz(native_model->layers_num * sizeof(Layer));
212 if (!native_model->layers){
213 goto fail;
214 }
215
216 native_model->operands = av_mallocz(native_model->operands_num * sizeof(DnnOperand));
217 if (!native_model->operands){
218 goto fail;
219 }
220
221 native_model->task_queue = ff_queue_create();
222 if (!native_model->task_queue) {
223 goto fail;
224 }
225
226 native_model->lltask_queue = ff_queue_create();
227 if (!native_model->lltask_queue) {
228 goto fail;
229 }
230
231 for (layer = 0; layer < native_model->layers_num; ++layer){
232 layer_type = (int32_t)avio_rl32(model_file_context);
233 dnn_size += 4;
234
235 if (layer_type >= DLT_COUNT) {
236 goto fail;
237 }
238
239 native_model->layers[layer].type = layer_type;
240 parsed_size = ff_layer_funcs[layer_type].pf_load(&native_model->layers[layer], model_file_context, file_size, native_model->operands_num);
241 if (!parsed_size) {
242 goto fail;
243 }
244 dnn_size += parsed_size;
245 }
246
247 for (int32_t i = 0; i < native_model->operands_num; ++i){
248 DnnOperand *oprd;
249 int32_t name_len;
250 int32_t operand_index = (int32_t)avio_rl32(model_file_context);
251 dnn_size += 4;
252
253 if (operand_index >= native_model->operands_num) {
254 goto fail;
255 }
256
257 oprd = &native_model->operands[operand_index];
258 name_len = (int32_t)avio_rl32(model_file_context);
259 dnn_size += 4;
260
261 avio_get_str(model_file_context, name_len, oprd->name, sizeof(oprd->name));
262 dnn_size += name_len;
263
264 oprd->type = (int32_t)avio_rl32(model_file_context);
265 dnn_size += 4;
266
267 oprd->data_type = (int32_t)avio_rl32(model_file_context);
268 dnn_size += 4;
269
270 for (int32_t dim = 0; dim < 4; ++dim) {
271 oprd->dims[dim] = (int32_t)avio_rl32(model_file_context);
272 dnn_size += 4;
273 }
274 if (oprd->type == DOT_INPUT && oprd->dims[0] != 1)
275 goto fail;
276
277 oprd->isNHWC = 1;
278 }
279
280 avio_closep(&model_file_context);
281
282 if (dnn_size != file_size){
283 ff_dnn_free_model_native(&model);
284 return NULL;
285 }
286
287 model->get_input = &get_input_native;
288 model->get_output = &get_output_native;
289 model->filter_ctx = filter_ctx;
290 model->func_type = func_type;
291
292 return model;
293
294 fail:
295 ff_dnn_free_model_native(&model);
296 avio_closep(&model_file_context);
297 return NULL;
298 }
299
execute_model_native(Queue * lltask_queue)300 static int execute_model_native(Queue *lltask_queue)
301 {
302 NativeModel *native_model = NULL;
303 NativeContext *ctx = NULL;
304 int32_t layer;
305 DNNData input, output;
306 DnnOperand *oprd = NULL;
307 LastLevelTaskItem *lltask = NULL;
308 TaskItem *task = NULL;
309 int ret = 0;
310
311 lltask = ff_queue_pop_front(lltask_queue);
312 if (!lltask) {
313 av_log(NULL, AV_LOG_ERROR, "Failed to get LastLevelTaskItem\n");
314 ret = AVERROR(EINVAL);
315 goto err;
316 }
317 task = lltask->task;
318 native_model = task->model;
319 ctx = &native_model->ctx;
320
321 if (native_model->layers_num <= 0 || native_model->operands_num <= 0) {
322 av_log(ctx, AV_LOG_ERROR, "No operands or layers in model\n");
323 ret = AVERROR(EINVAL);
324 goto err;
325 }
326
327 for (int i = 0; i < native_model->operands_num; ++i) {
328 oprd = &native_model->operands[i];
329 if (strcmp(oprd->name, task->input_name) == 0) {
330 if (oprd->type != DOT_INPUT) {
331 av_log(ctx, AV_LOG_ERROR, "Found \"%s\" in model, but it is not input node\n", task->input_name);
332 ret = AVERROR(EINVAL);
333 goto err;
334 }
335 break;
336 }
337 oprd = NULL;
338 }
339 if (!oprd) {
340 av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", task->input_name);
341 ret = AVERROR(EINVAL);
342 goto err;
343 }
344
345 oprd->dims[1] = task->in_frame->height;
346 oprd->dims[2] = task->in_frame->width;
347
348 av_freep(&oprd->data);
349 oprd->length = ff_calculate_operand_data_length(oprd);
350 if (oprd->length <= 0) {
351 av_log(ctx, AV_LOG_ERROR, "The input data length overflow\n");
352 ret = AVERROR(EINVAL);
353 goto err;
354 }
355 oprd->data = av_malloc(oprd->length);
356 if (!oprd->data) {
357 av_log(ctx, AV_LOG_ERROR, "Failed to malloc memory for input data\n");
358 ret = AVERROR(ENOMEM);
359 goto err;
360 }
361
362 input.height = oprd->dims[1];
363 input.width = oprd->dims[2];
364 input.channels = oprd->dims[3];
365 input.data = oprd->data;
366 input.dt = oprd->data_type;
367 if (task->do_ioproc) {
368 if (native_model->model->frame_pre_proc != NULL) {
369 native_model->model->frame_pre_proc(task->in_frame, &input, native_model->model->filter_ctx);
370 } else {
371 ff_proc_from_frame_to_dnn(task->in_frame, &input, ctx);
372 }
373 }
374
375 if (task->nb_output != 1) {
376 // currently, the filter does not need multiple outputs,
377 // so we just pending the support until we really need it.
378 avpriv_report_missing_feature(ctx, "multiple outputs");
379 ret = AVERROR(ENOSYS);
380 goto err;
381 }
382
383 for (layer = 0; layer < native_model->layers_num; ++layer){
384 DNNLayerType layer_type = native_model->layers[layer].type;
385 ret = ff_layer_funcs[layer_type].pf_exec(native_model->operands,
386 native_model->layers[layer].input_operand_indexes,
387 native_model->layers[layer].output_operand_index,
388 native_model->layers[layer].params,
389 &native_model->ctx);
390 if (ret != 0) {
391 av_log(ctx, AV_LOG_ERROR, "Failed to execute model\n");
392 goto err;
393 }
394 }
395
396 for (uint32_t i = 0; i < task->nb_output; ++i) {
397 DnnOperand *oprd = NULL;
398 const char *output_name = task->output_names[i];
399 for (int j = 0; j < native_model->operands_num; ++j) {
400 if (strcmp(native_model->operands[j].name, output_name) == 0) {
401 oprd = &native_model->operands[j];
402 break;
403 }
404 }
405
406 if (oprd == NULL) {
407 av_log(ctx, AV_LOG_ERROR, "Could not find output in model\n");
408 ret = AVERROR(EINVAL);
409 goto err;
410 }
411
412 output.data = oprd->data;
413 output.height = oprd->dims[1];
414 output.width = oprd->dims[2];
415 output.channels = oprd->dims[3];
416 output.dt = oprd->data_type;
417
418 if (task->do_ioproc) {
419 if (native_model->model->frame_post_proc != NULL) {
420 native_model->model->frame_post_proc(task->out_frame, &output, native_model->model->filter_ctx);
421 } else {
422 ff_proc_from_dnn_to_frame(task->out_frame, &output, ctx);
423 }
424 } else {
425 task->out_frame->width = output.width;
426 task->out_frame->height = output.height;
427 }
428 }
429 task->inference_done++;
430 err:
431 av_freep(&lltask);
432 return ret;
433 }
434
ff_dnn_execute_model_native(const DNNModel * model,DNNExecBaseParams * exec_params)435 int ff_dnn_execute_model_native(const DNNModel *model, DNNExecBaseParams *exec_params)
436 {
437 NativeModel *native_model = model->model;
438 NativeContext *ctx = &native_model->ctx;
439 TaskItem *task;
440 int ret = 0;
441
442 ret = ff_check_exec_params(ctx, DNN_NATIVE, model->func_type, exec_params);
443 if (ret != 0) {
444 return ret;
445 }
446
447 task = av_malloc(sizeof(*task));
448 if (!task) {
449 av_log(ctx, AV_LOG_ERROR, "unable to alloc memory for task item.\n");
450 return AVERROR(ENOMEM);
451 }
452
453 ret = ff_dnn_fill_task(task, exec_params, native_model, ctx->options.async, 1);
454 if (ret != 0) {
455 av_freep(&task);
456 return ret;
457 }
458
459 if (ff_queue_push_back(native_model->task_queue, task) < 0) {
460 av_freep(&task);
461 av_log(ctx, AV_LOG_ERROR, "unable to push back task_queue.\n");
462 return AVERROR(ENOMEM);
463 }
464
465 ret = extract_lltask_from_task(task, native_model->lltask_queue);
466 if (ret != 0) {
467 av_log(ctx, AV_LOG_ERROR, "unable to extract last level task from task.\n");
468 return ret;
469 }
470
471 return execute_model_native(native_model->lltask_queue);
472 }
473
ff_dnn_flush_native(const DNNModel * model)474 int ff_dnn_flush_native(const DNNModel *model)
475 {
476 NativeModel *native_model = model->model;
477
478 if (ff_queue_size(native_model->lltask_queue) == 0) {
479 // no pending task need to flush
480 return 0;
481 }
482
483 // for now, use sync node with flush operation
484 // Switch to async when it is supported
485 return execute_model_native(native_model->lltask_queue);
486 }
487
ff_dnn_get_result_native(const DNNModel * model,AVFrame ** in,AVFrame ** out)488 DNNAsyncStatusType ff_dnn_get_result_native(const DNNModel *model, AVFrame **in, AVFrame **out)
489 {
490 NativeModel *native_model = model->model;
491 return ff_dnn_get_result_common(native_model->task_queue, in, out);
492 }
493
ff_calculate_operand_dims_count(const DnnOperand * oprd)494 int32_t ff_calculate_operand_dims_count(const DnnOperand *oprd)
495 {
496 int32_t result = 1;
497 for (int i = 0; i < 4; ++i)
498 result *= oprd->dims[i];
499
500 return result;
501 }
502
ff_calculate_operand_data_length(const DnnOperand * oprd)503 int32_t ff_calculate_operand_data_length(const DnnOperand* oprd)
504 {
505 // currently, we just support DNN_FLOAT
506 uint64_t len = sizeof(float);
507 for (int i = 0; i < 4; i++) {
508 len *= oprd->dims[i];
509 if (len > INT32_MAX)
510 return 0;
511 }
512 return len;
513 }
514
ff_dnn_free_model_native(DNNModel ** model)515 void ff_dnn_free_model_native(DNNModel **model)
516 {
517 NativeModel *native_model;
518 ConvolutionalParams *conv_params;
519 int32_t layer;
520
521 if (*model)
522 {
523 if ((*model)->model) {
524 native_model = (*model)->model;
525 if (native_model->layers) {
526 for (layer = 0; layer < native_model->layers_num; ++layer){
527 if (native_model->layers[layer].type == DLT_CONV2D){
528 conv_params = (ConvolutionalParams *)native_model->layers[layer].params;
529 av_freep(&conv_params->kernel);
530 av_freep(&conv_params->biases);
531 }
532 av_freep(&native_model->layers[layer].params);
533 }
534 av_freep(&native_model->layers);
535 }
536
537 if (native_model->operands) {
538 for (uint32_t operand = 0; operand < native_model->operands_num; ++operand)
539 av_freep(&native_model->operands[operand].data);
540 av_freep(&native_model->operands);
541 }
542
543 while (ff_queue_size(native_model->lltask_queue) != 0) {
544 LastLevelTaskItem *item = ff_queue_pop_front(native_model->lltask_queue);
545 av_freep(&item);
546 }
547 ff_queue_destroy(native_model->lltask_queue);
548
549 while (ff_queue_size(native_model->task_queue) != 0) {
550 TaskItem *item = ff_queue_pop_front(native_model->task_queue);
551 av_frame_free(&item->in_frame);
552 av_frame_free(&item->out_frame);
553 av_freep(&item);
554 }
555 ff_queue_destroy(native_model->task_queue);
556
557 av_freep(&native_model);
558 }
559 av_freep(model);
560 }
561 }
562