• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2018 Sergey Lavrushkin
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * DNN inference functions interface for native backend.
24  */
25 
26 
27 #ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_H
28 #define AVFILTER_DNN_DNN_BACKEND_NATIVE_H
29 
30 #include "../dnn_interface.h"
31 #include "libavformat/avio.h"
32 #include "libavutil/opt.h"
33 #include "queue.h"
34 
35 /**
36  * the enum value of DNNLayerType should not be changed,
37  * the same values are used in convert_from_tensorflow.py
38  * and, it is used to index the layer execution/load function pointer.
39  */
40 typedef enum {
41     DLT_INPUT = 0,
42     DLT_CONV2D = 1,
43     DLT_DEPTH_TO_SPACE = 2,
44     DLT_MIRROR_PAD = 3,
45     DLT_MAXIMUM = 4,
46     DLT_MATH_BINARY = 5,
47     DLT_MATH_UNARY = 6,
48     DLT_AVG_POOL = 7,
49     DLT_DENSE = 8,
50     DLT_COUNT
51 } DNNLayerType;
52 
53 typedef enum {DOT_INPUT = 1, DOT_OUTPUT = 2, DOT_INTERMEDIATE = DOT_INPUT | DOT_OUTPUT} DNNOperandType;
54 typedef enum {VALID, SAME, SAME_CLAMP_TO_EDGE} DNNPaddingParam;
55 typedef enum {RELU, TANH, SIGMOID, NONE, LEAKY_RELU} DNNActivationFunc;
56 
57 typedef struct Layer{
58     DNNLayerType type;
59     /**
60      * a layer can have multiple inputs and one output.
61      * 4 is just a big enough number for input operands (increase it if necessary),
62      * do not use 'int32_t *input_operand_indexes', so we don't worry about mem leaks.
63      */
64     int32_t input_operand_indexes[4];
65     int32_t output_operand_index;
66     void *params;
67 } Layer;
68 
69 typedef struct DnnOperand{
70     /**
71      * there are two memory layouts, NHWC or NCHW, so we use dims,
72      * dims[0] is Number.
73      */
74     int32_t dims[4];
75 
76     /**
77      * input/output/intermediate operand of the network
78      */
79     DNNOperandType type;
80 
81     /**
82      * support different kinds of data type such as float, half float, int8 etc,
83      * first support float now.
84      */
85     DNNDataType data_type;
86 
87     /**
88      * NHWC if 1, otherwise NCHW.
89      * let's first support NHWC only, this flag is for extensive usage.
90      */
91     int8_t isNHWC;
92 
93     /**
94      * to avoid possible memory leak, do not use char *name
95      */
96     char name[128];
97 
98     /**
99      * data pointer with data length in bytes.
100      * usedNumbersLeft is only valid for intermediate operand,
101      * it means how many layers still depend on this operand,
102      * todo: the memory can be reused when usedNumbersLeft is zero.
103      */
104     void *data;
105     int32_t length;
106     int32_t usedNumbersLeft;
107 }DnnOperand;
108 
109 typedef struct InputParams{
110     int height, width, channels;
111 } InputParams;
112 
113 typedef struct NativeOptions{
114     uint8_t async;
115     uint32_t conv2d_threads;
116 } NativeOptions;
117 
118 typedef struct NativeContext {
119     const AVClass *class;
120     NativeOptions options;
121 } NativeContext;
122 
123 // Represents simple feed-forward convolutional network.
124 typedef struct NativeModel{
125     NativeContext ctx;
126     DNNModel *model;
127     Layer *layers;
128     int32_t layers_num;
129     DnnOperand *operands;
130     int32_t operands_num;
131     Queue *task_queue;
132     Queue *lltask_queue;
133 } NativeModel;
134 
135 DNNModel *ff_dnn_load_model_native(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx);
136 
137 int ff_dnn_execute_model_native(const DNNModel *model, DNNExecBaseParams *exec_params);
138 
139 DNNAsyncStatusType ff_dnn_get_result_native(const DNNModel *model, AVFrame **in, AVFrame **out);
140 
141 int ff_dnn_flush_native(const DNNModel *model);
142 
143 void ff_dnn_free_model_native(DNNModel **model);
144 
145 // NOTE: User must check for error (return value <= 0) to handle
146 // case like integer overflow.
147 int32_t ff_calculate_operand_data_length(const DnnOperand *oprd);
148 int32_t ff_calculate_operand_dims_count(const DnnOperand *oprd);
149 #endif
150