• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2019 Guo Yejun
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <stdio.h>
22 #include <string.h>
23 #include <math.h>
24 #include "libavfilter/dnn/dnn_backend_native_layer_conv2d.h"
25 
26 #define EPSON 0.00001
27 
test_with_same_dilate(void)28 static int test_with_same_dilate(void)
29 {
30     // the input data and expected data are generated with below python code.
31     /*
32     x = tf.placeholder(tf.float32, shape=[1, None, None, 3])
33     y = tf.layers.conv2d(x, 2, 3, activation=tf.nn.tanh, padding='same', dilation_rate=(2, 2), bias_initializer=tf.keras.initializers.he_normal())
34     data = np.random.rand(1, 5, 6, 3);
35 
36     sess=tf.Session()
37     sess.run(tf.global_variables_initializer())
38 
39     weights = dict([(var.name, sess.run(var)) for var in tf.trainable_variables()])
40     kernel = weights['conv2d/kernel:0']
41     kernel = np.transpose(kernel, [3, 0, 1, 2])
42     print("kernel:")
43     print(kernel.shape)
44     print(list(kernel.flatten()))
45 
46     bias = weights['conv2d/bias:0']
47     print("bias:")
48     print(bias.shape)
49     print(list(bias.flatten()))
50 
51     output = sess.run(y, feed_dict={x: data})
52 
53     print("input:")
54     print(data.shape)
55     print(list(data.flatten()))
56 
57     print("output:")
58     print(output.shape)
59     print(list(output.flatten()))
60     */
61 
62     ConvolutionalParams params;
63     DnnOperand operands[2];
64     int32_t input_indexes[1];
65     float input[1*5*6*3] = {
66         0.7012556460308194, 0.4233847954643357, 0.19515900664313612, 0.16343083004926495, 0.5758261611052848, 0.9510767434014871, 0.11014085055947687,
67         0.906327053637727, 0.8136794715542507, 0.45371764543639526, 0.5768443343523952, 0.19543668786046986, 0.15648326047898609, 0.2099500241141279,
68         0.17658777090552413, 0.059335724777169196, 0.1729991838469117, 0.8150514704819208, 0.4435535466703049, 0.3752188477566878, 0.749936650421431,
69         0.6823494635284907, 0.10776389679424747, 0.34247481674596836, 0.5147867256244629, 0.9063709728129032, 0.12423605800856818, 0.6064872945412728,
70         0.5891681538551459, 0.9865836236466314, 0.9002163879294677, 0.003968273184274618, 0.8628374809643967, 0.1327176268279583, 0.8449799925703798,
71         0.1937671869354366, 0.41524410152707425, 0.02038786604756837, 0.49792466069597496, 0.8881874553848784, 0.9683921035597336, 0.4122972568010813,
72         0.843553550993252, 0.9588482762501964, 0.5190350762645546, 0.4283584264145317, 0.09781496073714646, 0.9501058833776156, 0.8665541760152776,
73         0.31669272550095806, 0.07133074675453632, 0.606438007334886, 0.7007157020538224, 0.4827996264130444, 0.5167615606392761, 0.6385043039312651,
74         0.23069664707810555, 0.058233497329354456, 0.06323892961591071, 0.24816458893245974, 0.8646369065257812, 0.24742185893094837, 0.09991225948167437,
75         0.625700606979606, 0.7678541502111257, 0.6215834594679912, 0.5623003956582483, 0.07389123942681242, 0.7659100715711249, 0.486061471642225,
76         0.9947455699829012, 0.9094911797643259, 0.7644355876253265, 0.05384315321492239, 0.13565394382783613, 0.9810628204953316, 0.007386389078887889,
77         0.226182754156241, 0.2609021390764772, 0.24182802076928933, 0.13264782451941648, 0.2035816485767682, 0.005504188177612557, 0.7014619934040155,
78         0.956215988391991, 0.5670398541013633, 0.9809764721750784, 0.6886338100487461, 0.5758152317218274, 0.7137823176776179
79     };
80     float expected_output[1*5*6*2] = {
81         -0.9480655, -0.7169147, -0.9404794, -0.5567385, -0.8991124, -0.8306558, -0.94487447, -0.8932543, -0.88238764, -0.7301602,
82         -0.8974813, -0.7026703, -0.8858988, -0.53203243, -0.92881465, -0.5648504, -0.8871471, -0.7000097, -0.91754407, -0.79684794,
83         -0.760465, -0.117928326, -0.88302773, -0.8975289, -0.70615053, 0.19231977, -0.8318776, -0.386184, -0.80698484, -0.8556624,
84         -0.7336671, -0.6168619, -0.7658234, -0.63449603, -0.73314047, -0.87502456, -0.58158904, -0.4184259, -0.52618927, -0.13613208,
85         -0.5093187, -0.21027721, -0.39455596, -0.44507834, -0.22269244, -0.73400885, -0.77655095, -0.74408925, -0.57313335, -0.15333457,
86         -0.74620694, -0.34858236, -0.42586932, -0.5240488, 0.1634339, -0.2447881, -0.57927346, -0.62732303, -0.82287043, -0.8474058
87     };
88     float *output;
89     float kernel[2*3*3*3] = {
90         0.26025516, 0.16536498, -0.24351254, 0.33892477, -0.34005195, 0.35202783, 0.34056443, 0.01422739, 0.13799345, 0.29489166,
91         0.2781723, 0.178585, 0.22122234, 0.044115514, 0.13134438, 0.31705368, 0.22527462, -0.021323413, 0.115134746, -0.18216397,
92         -0.21197563, -0.027848959, -0.01704529, -0.12401503, -0.23415318, -0.12661739, -0.35338148, 0.20049328, -0.076153606,
93         -0.23642601, -0.3125769, -0.025851756, -0.30006272, 0.050762743, 0.32003498, 0.3052225, -0.0017385483, 0.25337684, -0.25664508,
94         0.27846587, -0.3112659, 0.2066065, 0.31499845, 0.113178134, 0.09449363, -0.11828774, -0.12671001, -0.36259216, 0.2710235,
95         -0.19676702, 0.023612618, -0.2596915, -0.34949252, -0.108270735
96     };
97     float bias[2] = { -1.6574852, -0.72915393 };
98 
99     NativeContext ctx;
100     ctx.class = NULL;
101     ctx.options.conv2d_threads = 1;
102 
103     params.activation = TANH;
104     params.has_bias = 1;
105     params.biases = bias;
106     params.dilation = 2;
107     params.input_num = 3;
108     params.kernel = kernel;
109     params.kernel_size = 3;
110     params.output_num = 2;
111     params.padding_method = SAME;
112 
113     operands[0].data = input;
114     operands[0].dims[0] = 1;
115     operands[0].dims[1] = 5;
116     operands[0].dims[2] = 6;
117     operands[0].dims[3] = 3;
118     operands[1].data = NULL;
119 
120     input_indexes[0] = 0;
121     ff_dnn_execute_layer_conv2d(operands, input_indexes, 1, &params, &ctx);
122 
123     output = operands[1].data;
124     for (int i = 0; i < sizeof(expected_output) / sizeof(float); i++) {
125         if (fabs(output[i] - expected_output[i]) > EPSON) {
126             printf("at index %d, output: %f, expected_output: %f\n", i, output[i], expected_output[i]);
127             av_freep(&output);
128             return 1;
129         }
130     }
131 
132     av_freep(&output);
133     return 0;
134 }
135 
test_with_valid(void)136 static int test_with_valid(void)
137 {
138     // the input data and expected data are generated with below python code.
139     /*
140     x = tf.placeholder(tf.float32, shape=[1, None, None, 3])
141     y = tf.layers.conv2d(x, 2, 3, activation=tf.nn.tanh, padding='valid', bias_initializer=tf.keras.initializers.he_normal())
142     data = np.random.rand(1, 5, 6, 3);
143 
144     sess=tf.Session()
145     sess.run(tf.global_variables_initializer())
146 
147     weights = dict([(var.name, sess.run(var)) for var in tf.trainable_variables()])
148     kernel = weights['conv2d/kernel:0']
149     kernel = np.transpose(kernel, [3, 0, 1, 2])
150     print("kernel:")
151     print(kernel.shape)
152     print(list(kernel.flatten()))
153 
154     bias = weights['conv2d/bias:0']
155     print("bias:")
156     print(bias.shape)
157     print(list(bias.flatten()))
158 
159     output = sess.run(y, feed_dict={x: data})
160 
161     print("input:")
162     print(data.shape)
163     print(list(data.flatten()))
164 
165     print("output:")
166     print(output.shape)
167     print(list(output.flatten()))
168     */
169 
170     ConvolutionalParams params;
171     DnnOperand operands[2];
172     int32_t input_indexes[1];
173     float input[1*5*6*3] = {
174         0.26126657468269665, 0.42762216215337556, 0.7466274030131497, 0.802550266787863, 0.3709323443076644, 0.5919817068197668, 0.49274512279324967,
175         0.7170132295090351, 0.0911793215410649, 0.5134213878288361, 0.670132600785118, 0.49417034512633484, 0.03887389460089885, 0.436785102836845,
176         0.1490231658611978, 0.6413606121498127, 0.8595987991375995, 0.9132593077586231, 0.7075959004873255, 0.17754995944845464, 0.5212507214937141,
177         0.35379732738215475, 0.25205107358505296, 0.3928792840544273, 0.09485294189485782, 0.8685115437448666, 0.6489046799288605, 0.509253797582924,
178         0.8993255536791972, 0.18740056466602373, 0.34237617336313986, 0.3871438962989183, 0.1488532571774911, 0.5187002331293636, 0.8137098818752955,
179         0.521761863717401, 0.4622312310118274, 0.29038411334638825, 0.16194915718170566, 0.5175999923925211, 0.8852230040101133, 0.0218263385047206,
180         0.08482355352852367, 0.3463638568376264, 0.28627127120619733, 0.9553293378948409, 0.4803391055970835, 0.841635695030805, 0.3556828280031952,
181         0.06778527221541808, 0.28193560357091596, 0.8399957619031576, 0.03305536359456385, 0.6625039162109645, 0.9300552020023897, 0.8551529138204146,
182         0.6133216915522418, 0.222427800857393, 0.1315422686800336, 0.6189144989185527, 0.5346184916866876, 0.8348888624532548, 0.6544834567840291,
183         0.2844062293389934, 0.28780026600883324, 0.5372272015684924, 0.6250226011503823, 0.28119106062279453, 0.49655812908420094, 0.6451488959145951,
184         0.7362580606834843, 0.44815578616664087, 0.6454760235835586, 0.6794062414265861, 0.045378883014935756, 0.9008388543865096, 0.7949752851269782,
185         0.4179928876222264, 0.28733419007048644, 0.996902319501908, 0.5690851338677467, 0.9511814013279738, 0.025323788678181636, 0.5594359732604794,
186         0.1213732595086251, 0.7172624313368294, 0.6759328959074691, 0.07252138454885071, 0.17557735158403442, 0.5988895455048769
187     };
188     float expected_output[1*3*4*2] = {
189         -0.556947, -0.42143887, -0.092070885, 0.27404794, -0.41886684, 0.0862887, -0.25001016, -0.342721, 0.020730592, 0.04016919, -0.69839877,
190         -0.06136704, 0.14186388, -0.11655602, -0.23489095, -0.3845829, -0.19017771, 0.1595885, -0.18308741, -0.3071209, -0.5848686, -0.22509028,
191         -0.6023201, -0.14448485
192     };
193     float *output;
194     float kernel[2*3*3*3] = {
195         -0.25291282, 0.22402048, 0.028642118, -0.14615723, -0.27362752, -0.34801802, -0.2759148, 0.19594926, -0.25029412, 0.34606284, 0.10376671,
196         -0.1015394, 0.23616093, 0.2134214, 0.35285157, 0.05893758, 0.0024731457, -0.17143056, 0.35758412, 0.2186206, -0.28384736, -0.21206513,
197         -0.20871592, 0.27070445, 0.25878823, 0.11136332, -0.33737376, 0.08353335, -0.34290665, 0.041805506, -0.09738535, 0.3284936, -0.16838405,
198         -0.032494456, -0.29193437, 0.033259362, -0.09272635, -0.2802651, -0.28648436, 0.3542878, 0.2432127, -0.24551713, 0.27813476, 0.21024024,
199         -0.013690501, -0.1350077, -0.07826337, -0.34563828, 0.3220685, -0.07571727, 0.19420576, 0.20783454, 0.18738335, 0.16672492
200     };
201     float bias[2] = { -0.4773722, -0.19620377 };
202 
203     NativeContext ctx;
204     ctx.class = NULL;
205     ctx.options.conv2d_threads = 1;
206 
207     params.activation = TANH;
208     params.has_bias = 1;
209     params.biases = bias;
210     params.dilation = 1;
211     params.input_num = 3;
212     params.kernel = kernel;
213     params.kernel_size = 3;
214     params.output_num = 2;
215     params.padding_method = VALID;
216 
217     operands[0].data = input;
218     operands[0].dims[0] = 1;
219     operands[0].dims[1] = 5;
220     operands[0].dims[2] = 6;
221     operands[0].dims[3] = 3;
222     operands[1].data = NULL;
223 
224     input_indexes[0] = 0;
225     ff_dnn_execute_layer_conv2d(operands, input_indexes, 1, &params, &ctx);
226 
227     output = operands[1].data;
228     for (int i = 0; i < sizeof(expected_output) / sizeof(float); i++) {
229         if (fabs(output[i] - expected_output[i]) > EPSON) {
230             printf("at index %d, output: %f, expected_output: %f\n", i, output[i], expected_output[i]);
231             av_freep(&output);
232             return 1;
233         }
234     }
235 
236     av_freep(&output);
237     return 0;
238 }
239 
main(int argc,char ** argv)240 int main(int argc, char **argv)
241 {
242     if (test_with_valid())
243         return 1;
244     if (test_with_same_dilate())
245         return 1;
246 
247     return 0;
248 }
249