• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010-2011 Kevin Stone
3  * Copyright (C) 2016 Paul B Mahol
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
19  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20  */
21 
22 #include <float.h>
23 
24 #include "libavutil/common.h"
25 #include "libavutil/float_dsp.h"
26 #include "libavutil/imgutils.h"
27 #include "libavutil/mem_internal.h"
28 #include "libavutil/opt.h"
29 #include "libavutil/pixdesc.h"
30 #include "avfilter.h"
31 #include "formats.h"
32 #include "internal.h"
33 #include "video.h"
34 
35 static const size_t NNEDI_WEIGHTS_SIZE = 13574928;
36 static const uint8_t NNEDI_XDIM[] = { 8, 16, 32, 48, 8, 16, 32 };
37 static const uint8_t NNEDI_YDIM[] = { 6, 6, 6, 6, 4, 4, 4 };
38 static const uint16_t NNEDI_NNS[] = { 16, 32, 64, 128, 256 };
39 
40 typedef struct PrescreenerCoefficients {
41     DECLARE_ALIGNED(32, float, kernel_l0)[4][16 * 4];
42     DECLARE_ALIGNED(32, float, bias_l0)[4];
43 
44     DECLARE_ALIGNED(32, float, kernel_l1)[4][4];
45     DECLARE_ALIGNED(32, float, bias_l1)[4];
46 
47     DECLARE_ALIGNED(32, float, kernel_l2)[4][8];
48     DECLARE_ALIGNED(32, float, bias_l2)[4];
49 } PrescreenerCoefficients;
50 
51 typedef struct PredictorCoefficients {
52     int xdim, ydim, nns, nsize;
53     float *data;
54     float *softmax_q1;
55     float *elliott_q1;
56     float *softmax_bias_q1;
57     float *elliott_bias_q1;
58     float *softmax_q2;
59     float *elliott_q2;
60     float *softmax_bias_q2;
61     float *elliott_bias_q2;
62 } PredictorCoefficients;
63 
64 typedef struct NNEDIContext {
65     const AVClass *class;
66 
67     char *weights_file;
68 
69     AVFrame *prev;
70     int eof;
71     int64_t pts;
72 
73     AVFloatDSPContext *fdsp;
74     int depth;
75     int nb_planes;
76     int nb_threads;
77     int linesize[4];
78     int planewidth[4];
79     int planeheight[4];
80     int field_n;
81 
82     PrescreenerCoefficients prescreener[4];
83     PredictorCoefficients coeffs[2][5][7];
84 
85     float half;
86     float in_scale;
87     float out_scale;
88 
89     // Parameters
90     int deint;
91     int field;
92     int process_plane;
93     int nsize;
94     int nnsparam;
95     int qual;
96     int etype;
97     int pscrn;
98 
99     int input_size;
100     uint8_t **prescreen_buf;
101     float **input_buf;
102     float **output_buf;
103 
104     void (*read)(const uint8_t *src, float *dst,
105                  int src_stride, int dst_stride,
106                  int width, int height, float scale);
107     void (*write)(const float *src, uint8_t *dst,
108                   int src_stride, int dst_stride,
109                   int width, int height, int depth, float scale);
110     void (*prescreen[2])(AVFilterContext *ctx,
111                          const void *src, ptrdiff_t src_stride,
112                          uint8_t *prescreen, int N,
113                          const PrescreenerCoefficients *const coeffs);
114 } NNEDIContext;
115 
116 #define OFFSET(x) offsetof(NNEDIContext, x)
117 #define RFLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
118 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
119 
120 static const AVOption nnedi_options[] = {
121     {"weights",  "set weights file", OFFSET(weights_file),  AV_OPT_TYPE_STRING, {.str="nnedi3_weights.bin"}, 0, 0, FLAGS },
122     {"deint",         "set which frames to deinterlace", OFFSET(deint),         AV_OPT_TYPE_INT, {.i64=0}, 0, 1, RFLAGS, "deint" },
123         {"all",        "deinterlace all frames",                       0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, "deint" },
124         {"interlaced", "only deinterlace frames marked as interlaced", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "deint" },
125     {"field",  "set mode of operation", OFFSET(field),         AV_OPT_TYPE_INT, {.i64=-1}, -2, 3, RFLAGS, "field" },
126         {"af", "use frame flags, both fields",  0, AV_OPT_TYPE_CONST, {.i64=-2}, 0, 0, RFLAGS, "field" },
127         {"a",  "use frame flags, single field", 0, AV_OPT_TYPE_CONST, {.i64=-1}, 0, 0, RFLAGS, "field" },
128         {"t",  "use top field only",            0, AV_OPT_TYPE_CONST, {.i64=0},  0, 0, RFLAGS, "field" },
129         {"b",  "use bottom field only",         0, AV_OPT_TYPE_CONST, {.i64=1},  0, 0, RFLAGS, "field" },
130         {"tf", "use both fields, top first",    0, AV_OPT_TYPE_CONST, {.i64=2},  0, 0, RFLAGS, "field" },
131         {"bf", "use both fields, bottom first", 0, AV_OPT_TYPE_CONST, {.i64=3},  0, 0, RFLAGS, "field" },
132     {"planes", "set which planes to process", OFFSET(process_plane), AV_OPT_TYPE_INT, {.i64=7}, 0, 15, RFLAGS },
133     {"nsize",  "set size of local neighborhood around each pixel, used by the predictor neural network", OFFSET(nsize), AV_OPT_TYPE_INT, {.i64=6}, 0, 6, RFLAGS, "nsize" },
134         {"s8x6",     NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, "nsize" },
135         {"s16x6",    NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "nsize" },
136         {"s32x6",    NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, RFLAGS, "nsize" },
137         {"s48x6",    NULL, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, RFLAGS, "nsize" },
138         {"s8x4",     NULL, 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, RFLAGS, "nsize" },
139         {"s16x4",    NULL, 0, AV_OPT_TYPE_CONST, {.i64=5}, 0, 0, RFLAGS, "nsize" },
140         {"s32x4",    NULL, 0, AV_OPT_TYPE_CONST, {.i64=6}, 0, 0, RFLAGS, "nsize" },
141     {"nns",    "set number of neurons in predictor neural network", OFFSET(nnsparam), AV_OPT_TYPE_INT, {.i64=1}, 0, 4, RFLAGS, "nns" },
142         {"n16",       NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, "nns" },
143         {"n32",       NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "nns" },
144         {"n64",       NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, RFLAGS, "nns" },
145         {"n128",      NULL, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, RFLAGS, "nns" },
146         {"n256",      NULL, 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, RFLAGS, "nns" },
147     {"qual",  "set quality", OFFSET(qual), AV_OPT_TYPE_INT, {.i64=1}, 1, 2, RFLAGS, "qual" },
148         {"fast", NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "qual" },
149         {"slow", NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, RFLAGS, "qual" },
150     {"etype", "set which set of weights to use in the predictor", OFFSET(etype), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, RFLAGS, "etype" },
151         {"a",  "weights trained to minimize absolute error", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, "etype" },
152         {"abs","weights trained to minimize absolute error", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, "etype" },
153         {"s",  "weights trained to minimize squared error",  0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "etype" },
154         {"mse","weights trained to minimize squared error",  0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "etype" },
155     {"pscrn", "set prescreening", OFFSET(pscrn), AV_OPT_TYPE_INT, {.i64=2}, 0, 4, RFLAGS, "pscrn" },
156         {"none",      NULL, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, RFLAGS, "pscrn" },
157         {"original",  NULL, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, RFLAGS, "pscrn" },
158         {"new",       NULL, 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, RFLAGS, "pscrn" },
159         {"new2",      NULL, 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, RFLAGS, "pscrn" },
160         {"new3",      NULL, 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, RFLAGS, "pscrn" },
161     { NULL }
162 };
163 
164 AVFILTER_DEFINE_CLASS(nnedi);
165 
config_output(AVFilterLink * outlink)166 static int config_output(AVFilterLink *outlink)
167 {
168     AVFilterContext *ctx = outlink->src;
169     const NNEDIContext *const s = ctx->priv;
170 
171     outlink->time_base     = av_mul_q(ctx->inputs[0]->time_base, (AVRational){1, 2});
172     outlink->w             = ctx->inputs[0]->w;
173     outlink->h             = ctx->inputs[0]->h;
174 
175     if (s->field == -2 || s->field > 1)
176         outlink->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate,
177                                        (AVRational){2, 1});
178 
179     return 0;
180 }
181 
182 static const enum AVPixelFormat pix_fmts[] = {
183     AV_PIX_FMT_GRAY8,
184     AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
185     AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
186     AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
187     AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
188     AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P,
189     AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P,
190     AV_PIX_FMT_YUVJ411P,
191     AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P,
192     AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP,
193     AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
194     AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
195     AV_PIX_FMT_YUV440P10,
196     AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12,
197     AV_PIX_FMT_YUV440P12,
198     AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14,
199     AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
200     AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
201     AV_PIX_FMT_YUVA444P9, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_YUVA444P12, AV_PIX_FMT_YUVA444P16,
202     AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA422P12, AV_PIX_FMT_YUVA422P16,
203     AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA420P16,
204     AV_PIX_FMT_GBRAP10,   AV_PIX_FMT_GBRAP12,    AV_PIX_FMT_GBRAP16,
205     AV_PIX_FMT_NONE
206 };
207 
dot_dsp(const NNEDIContext * const s,const float * kernel,const float * input,int n,float scale,float bias)208 static float dot_dsp(const NNEDIContext *const s, const float *kernel, const float *input,
209                      int n, float scale, float bias)
210 {
211     float sum, y;
212 
213     sum = s->fdsp->scalarproduct_float(kernel, input, n);
214 
215     y = sum * scale + bias + 1e-20f;
216 
217     return y;
218 }
219 
elliott(float x)220 static float elliott(float x)
221 {
222     return x / (1.0f + fabsf(x));
223 }
224 
transform_elliott(float * input,int size)225 static void transform_elliott(float *input, int size)
226 {
227     for (int i = 0; i < size; i++)
228         input[i] = elliott(input[i]);
229 }
230 
process_old(AVFilterContext * ctx,const void * src,ptrdiff_t src_stride,uint8_t * prescreen,int N,const PrescreenerCoefficients * const m_data)231 static void process_old(AVFilterContext *ctx,
232                         const void *src, ptrdiff_t src_stride,
233                         uint8_t *prescreen, int N,
234                         const PrescreenerCoefficients *const m_data)
235 {
236     NNEDIContext *s = ctx->priv;
237     const float *src_p = src;
238 
239     // Adjust source pointer to point to top-left of filter window.
240     const float *window = src_p - 2 * src_stride - 5;
241 
242     for (int j = 0; j < N; j++) {
243         LOCAL_ALIGNED_32(float, input, [48]);
244         float state[12];
245 
246         for (int i = 0; i < 4; i++)
247             memcpy(input + i * 12, window + i * src_stride + j, 12 * sizeof(float));
248 
249         // Layer 0.
250         for (int n = 0; n < 4; n++)
251             state[n] = dot_dsp(s, m_data->kernel_l0[n], input, 48, 1.0f, m_data->bias_l0[n]);
252         transform_elliott(state + 1, 3);
253 
254         // Layer 1.
255         for (int n = 0; n < 4; n++)
256             state[n + 4] = dot_dsp(s, m_data->kernel_l1[n], state, 4, 1.0f, m_data->bias_l1[n]);
257         transform_elliott(state + 4, 3);
258 
259         // Layer 2.
260         for (int n = 0; n < 4; n++)
261             state[n + 8] = dot_dsp(s, m_data->kernel_l2[n], state, 8, 1.0f, m_data->bias_l2[n]);
262 
263         prescreen[j] = FFMAX(state[10], state[11]) <= FFMAX(state[8], state[9]) ? 255 : 0;
264     }
265 }
266 
process_new(AVFilterContext * ctx,const void * src,ptrdiff_t src_stride,uint8_t * prescreen,int N,const PrescreenerCoefficients * const m_data)267 static void process_new(AVFilterContext *ctx,
268                         const void *src, ptrdiff_t src_stride,
269                         uint8_t *prescreen, int N,
270                         const PrescreenerCoefficients *const m_data)
271 {
272     NNEDIContext *s = ctx->priv;
273     const float *src_p = src;
274 
275     // Adjust source pointer to point to top-left of filter window.
276     const float *window = src_p - 2 * src_stride - 6;
277 
278     for (int j = 0; j < N; j += 4) {
279         LOCAL_ALIGNED_32(float, input, [64]);
280         float state[8];
281 
282         for (int i = 0; i < 4; i++)
283             memcpy(input + i * 16, window + i * src_stride + j, 16 * sizeof(float));
284 
285         for (int n = 0; n < 4; n++)
286             state[n] = dot_dsp(s, m_data->kernel_l0[n], input, 64, 1.0f, m_data->bias_l0[n]);
287         transform_elliott(state, 4);
288 
289         for (int n = 0; n < 4; n++)
290             state[n + 4] = dot_dsp(s, m_data->kernel_l1[n], state, 4, 1.0f, m_data->bias_l1[n]);
291 
292         for (int n = 0; n < 4; n++)
293             prescreen[j + n] = state[n + 4] > 0.f;
294     }
295 }
296 
filter_offset(int nn,const PredictorCoefficients * const model)297 static int filter_offset(int nn, const PredictorCoefficients *const model)
298 {
299     return nn * model->nsize;
300 }
301 
softmax_q1_filter(int nn,const PredictorCoefficients * const model)302 static const float *softmax_q1_filter(int nn,
303                                       const PredictorCoefficients *const model)
304 {
305     return model->softmax_q1 + filter_offset(nn, model);
306 }
307 
elliott_q1_filter(int nn,const PredictorCoefficients * const model)308 static const float *elliott_q1_filter(int nn,
309                                       const PredictorCoefficients *const model)
310 {
311     return model->elliott_q1 + filter_offset(nn, model);
312 }
313 
softmax_q2_filter(int nn,const PredictorCoefficients * const model)314 static const float *softmax_q2_filter(int nn,
315                                       const PredictorCoefficients *const model)
316 {
317     return model->softmax_q2 + filter_offset(nn, model);
318 }
319 
elliott_q2_filter(int nn,const PredictorCoefficients * const model)320 static const float *elliott_q2_filter(int nn,
321                                       const PredictorCoefficients *const model)
322 {
323     return model->elliott_q2 + filter_offset(nn, model);
324 }
325 
gather_input(const float * src,ptrdiff_t src_stride,float * buf,float mstd[4],const PredictorCoefficients * const model)326 static void gather_input(const float *src, ptrdiff_t src_stride,
327                          float *buf, float mstd[4],
328                          const PredictorCoefficients *const model)
329 {
330     const float scale = 1.f / model->nsize;
331     float sum = 0.f;
332     float sum_sq = 0.f;
333     float tmp;
334 
335     for (int i = 0; i < model->ydim; i++) {
336         memcpy(buf, src, model->xdim * sizeof(float));
337 
338         for (int j = 0; j < model->xdim; j++) {
339             const float val = src[j];
340 
341             sum += val;
342             sum_sq += val * val;
343         }
344 
345         src += src_stride;
346         buf += model->xdim;
347     }
348 
349     mstd[0] = sum * scale;
350     mstd[3] = 0.f;
351 
352     tmp = sum_sq * scale - mstd[0] * mstd[0];
353     if (tmp < FLT_EPSILON) {
354         mstd[1] = 0.0f;
355         mstd[2] = 0.0f;
356     } else {
357         mstd[1] = sqrtf(tmp);
358         mstd[2] = 1.0f / mstd[1];
359     }
360 }
361 
softmax_exp(float x)362 static float softmax_exp(float x)
363 {
364     return expf(av_clipf(x, -80.f, 80.f));
365 }
366 
transform_softmax_exp(float * input,int size)367 static void transform_softmax_exp(float *input, int size)
368 {
369     for (int i = 0; i < size; i++)
370         input[i] = softmax_exp(input[i]);
371 }
372 
wae5(const float * softmax,const float * el,int n,float mstd[4])373 static void wae5(const float *softmax, const float *el,
374                  int n, float mstd[4])
375 {
376     float vsum = 0.0f, wsum = 0.0f;
377 
378     for (int i = 0; i < n; i++) {
379         vsum += softmax[i] * elliott(el[i]);
380         wsum += softmax[i];
381     }
382 
383     if (wsum > 1e-10f)
384         mstd[3] += (5.0f * vsum) / wsum * mstd[1] + mstd[0];
385     else
386         mstd[3] += mstd[0];
387 }
388 
predictor(AVFilterContext * ctx,const void * src,ptrdiff_t src_stride,void * dst,const uint8_t * prescreen,int N,const PredictorCoefficients * const model,int use_q2)389 static void predictor(AVFilterContext *ctx,
390                       const void *src, ptrdiff_t src_stride, void *dst,
391                       const uint8_t *prescreen, int N,
392                       const PredictorCoefficients *const model, int use_q2)
393 {
394     const NNEDIContext *const s = ctx->priv;
395     const float *src_p = src;
396     float *dst_p = dst;
397 
398     // Adjust source pointer to point to top-left of filter window.
399     const float *window = src_p - (model->ydim / 2) * src_stride - (model->xdim / 2 - 1);
400     const int filter_size = model->nsize;
401     const int nns = model->nns;
402 
403     for (int i = 0; i < N; i++) {
404         LOCAL_ALIGNED_32(float, input, [48 * 6]);
405         float activation[256 * 2];
406         float mstd[4];
407         float scale;
408 
409         if (prescreen[i])
410             continue;
411 
412         gather_input(window + i, src_stride, input, mstd, model);
413         scale = mstd[2];
414 
415         for (int nn = 0; nn < nns; nn++)
416             activation[nn] = dot_dsp(s, softmax_q1_filter(nn, model), input, filter_size, scale, model->softmax_bias_q1[nn]);
417 
418         for (int nn = 0; nn < nns; nn++)
419             activation[nns + nn] = dot_dsp(s, elliott_q1_filter(nn, model), input, filter_size, scale, model->elliott_bias_q1[nn]);
420 
421         transform_softmax_exp(activation, nns);
422         wae5(activation, activation + nns, nns, mstd);
423 
424         if (use_q2) {
425             for (int nn = 0; nn < nns; nn++)
426                 activation[nn] = dot_dsp(s, softmax_q2_filter(nn, model), input, filter_size, scale, model->softmax_bias_q2[nn]);
427 
428             for (int nn = 0; nn < nns; nn++)
429                 activation[nns + nn] = dot_dsp(s, elliott_q2_filter(nn, model), input, filter_size, scale, model->elliott_bias_q2[nn]);
430 
431             transform_softmax_exp(activation, nns);
432             wae5(activation, activation + nns, nns, mstd);
433         }
434 
435         dst_p[i] = mstd[3] * (use_q2 ? 0.5f : 1.f);
436     }
437 }
438 
read_bytes(const uint8_t * src,float * dst,int src_stride,int dst_stride,int width,int height,float scale)439 static void read_bytes(const uint8_t *src, float *dst,
440                        int src_stride, int dst_stride,
441                        int width, int height, float scale)
442 {
443     for (int y = 0; y < height; y++) {
444         for (int x = 0; x < 32; x++)
445             dst[-x - 1] = src[x];
446 
447         for (int x = 0; x < width; x++)
448             dst[x] = src[x];
449 
450         for (int x = 0; x < 32; x++)
451             dst[width + x] = src[width - x - 1];
452 
453         dst += dst_stride;
454         src += src_stride;
455     }
456 }
457 
read_words(const uint8_t * srcp,float * dst,int src_stride,int dst_stride,int width,int height,float scale)458 static void read_words(const uint8_t *srcp, float *dst,
459                        int src_stride, int dst_stride,
460                        int width, int height, float scale)
461 {
462     const uint16_t *src = (const uint16_t *)srcp;
463 
464     src_stride /= 2;
465 
466     for (int y = 0; y < height; y++) {
467         for (int x = 0; x < 32; x++)
468             dst[-x - 1] = src[x] * scale;
469 
470         for (int x = 0; x < width; x++)
471             dst[x] = src[x] * scale;
472 
473         for (int x = 0; x < 32; x++)
474             dst[width + x] = src[width - x - 1] * scale;
475 
476         dst += dst_stride;
477         src += src_stride;
478     }
479 }
480 
write_bytes(const float * src,uint8_t * dst,int src_stride,int dst_stride,int width,int height,int depth,float scale)481 static void write_bytes(const float *src, uint8_t *dst,
482                         int src_stride, int dst_stride,
483                         int width, int height, int depth,
484                         float scale)
485 {
486     for (int y = 0; y < height; y++) {
487         for (int x = 0; x < width; x++)
488             dst[x] = av_clip_uint8(src[x]);
489 
490         dst += dst_stride;
491         src += src_stride;
492     }
493 }
494 
write_words(const float * src,uint8_t * dstp,int src_stride,int dst_stride,int width,int height,int depth,float scale)495 static void write_words(const float *src, uint8_t *dstp,
496                         int src_stride, int dst_stride,
497                         int width, int height, int depth,
498                         float scale)
499 {
500     uint16_t *dst = (uint16_t *)dstp;
501 
502     dst_stride /= 2;
503 
504     for (int y = 0; y < height; y++) {
505         for (int x = 0; x < width; x++)
506             dst[x] = av_clip_uintp2_c(src[x] * scale, depth);
507 
508         dst += dst_stride;
509         src += src_stride;
510     }
511 }
512 
interpolation(const void * src,ptrdiff_t src_stride,void * dst,const uint8_t * prescreen,int n)513 static void interpolation(const void *src, ptrdiff_t src_stride,
514                           void *dst, const uint8_t *prescreen, int n)
515 {
516     const float *src_p = src;
517     float *dst_p = dst;
518     const float *window = src_p - 2 * src_stride;
519 
520     for (int i = 0; i < n; i++) {
521         float accum = 0.0f;
522 
523         if (!prescreen[i])
524             continue;
525 
526         accum += (-3.0f / 32.0f) * window[0 * src_stride + i];
527         accum += (19.0f / 32.0f) * window[1 * src_stride + i];
528         accum += (19.0f / 32.0f) * window[2 * src_stride + i];
529         accum += (-3.0f / 32.0f) * window[3 * src_stride + i];
530 
531         dst_p[i] = accum;
532     }
533 }
534 
filter_slice(AVFilterContext * ctx,void * arg,int jobnr,int nb_jobs)535 static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
536 {
537     const NNEDIContext *const s = ctx->priv;
538     AVFrame *out = arg;
539     AVFrame *in = s->prev;
540     const float in_scale = s->in_scale;
541     const float out_scale = s->out_scale;
542     const int depth = s->depth;
543     const int interlaced = in->interlaced_frame;
544     const int tff = s->field_n == (s->field < 0 ? interlaced ? in->top_field_first : 1 :
545                                   (s->field & 1) ^ 1);
546 
547 
548     for (int p = 0; p < s->nb_planes; p++) {
549         const int height = s->planeheight[p];
550         const int width = s->planewidth[p];
551         const int slice_start = 2 * ((height / 2 * jobnr) / nb_jobs);
552         const int slice_end = 2 * ((height / 2 * (jobnr+1)) / nb_jobs);
553         const uint8_t *src_data = in->data[p];
554         uint8_t *dst_data = out->data[p];
555         uint8_t *dst = out->data[p] + slice_start * out->linesize[p];
556         const int src_linesize = in->linesize[p];
557         const int dst_linesize = out->linesize[p];
558         uint8_t *prescreen_buf = s->prescreen_buf[jobnr];
559         float *srcbuf = s->input_buf[jobnr];
560         const int srcbuf_stride = width + 64;
561         float *dstbuf = s->output_buf[jobnr];
562         const int dstbuf_stride = width;
563         const int slice_height = (slice_end - slice_start) / 2;
564         const int last_slice = slice_end == height;
565         const uint8_t *in_line;
566         uint8_t *out_line;
567         int y_out;
568 
569         if (!(s->process_plane & (1 << p))) {
570             av_image_copy_plane(dst, out->linesize[p],
571                                 in->data[p] + slice_start * in->linesize[p],
572                                 in->linesize[p],
573                                 s->linesize[p], slice_end - slice_start);
574             continue;
575         }
576 
577         y_out    = slice_start + (tff ^ (slice_start & 1));
578         in_line  = src_data + (y_out * src_linesize);
579         out_line = dst_data + (y_out * dst_linesize);
580 
581         while (y_out < slice_end) {
582             memcpy(out_line, in_line, s->linesize[p]);
583             y_out += 2;
584             in_line  += src_linesize * 2;
585             out_line += dst_linesize * 2;
586         }
587 
588         y_out = slice_start + ((!tff) ^ (slice_start & 1));
589 
590         s->read(src_data + FFMAX(y_out - 5, tff) * src_linesize,
591                 srcbuf + 32,
592                 src_linesize * 2, srcbuf_stride,
593                 width, 1, in_scale);
594         srcbuf += srcbuf_stride;
595 
596         s->read(src_data + FFMAX(y_out - 3, tff) * src_linesize,
597                 srcbuf + 32,
598                 src_linesize * 2, srcbuf_stride,
599                 width, 1, in_scale);
600         srcbuf += srcbuf_stride;
601 
602         s->read(src_data + FFMAX(y_out - 1, tff) * src_linesize,
603                 srcbuf + 32,
604                 src_linesize * 2, srcbuf_stride,
605                 width, 1, in_scale);
606         srcbuf += srcbuf_stride;
607 
608         in_line  = src_data + FFMIN(y_out + 1, height - 1 - !tff) * src_linesize;
609         out_line = dst_data + (y_out * dst_linesize);
610 
611         s->read(in_line, srcbuf + 32, src_linesize * 2, srcbuf_stride,
612                 width, slice_height - last_slice, in_scale);
613 
614         y_out += (slice_height - last_slice) * 2;
615 
616         s->read(src_data + FFMIN(y_out + 1, height - 1 - !tff) * src_linesize,
617                 srcbuf + 32 + srcbuf_stride * (slice_height - last_slice),
618                 src_linesize * 2, srcbuf_stride,
619                 width, 1, in_scale);
620 
621         s->read(src_data + FFMIN(y_out + 3, height - 1 - !tff) * src_linesize,
622                 srcbuf + 32 + srcbuf_stride * (slice_height + 1 - last_slice),
623                 src_linesize * 2, srcbuf_stride,
624                 width, 1, in_scale);
625 
626         s->read(src_data + FFMIN(y_out + 5, height - 1 - !tff) * src_linesize,
627                 srcbuf + 32 + srcbuf_stride * (slice_height + 2 - last_slice),
628                 src_linesize * 2, srcbuf_stride,
629                 width, 1, in_scale);
630 
631         for (int y = 0; y < slice_end - slice_start; y += 2) {
632             if (s->pscrn > 0)
633                 s->prescreen[s->pscrn > 1](ctx, srcbuf + (y / 2) * srcbuf_stride + 32,
634                              srcbuf_stride, prescreen_buf, width,
635                              &s->prescreener[s->pscrn - 1]);
636 
637             predictor(ctx,
638                       srcbuf + (y / 2) * srcbuf_stride + 32,
639                       srcbuf_stride,
640                       dstbuf + (y / 2) * dstbuf_stride,
641                       prescreen_buf, width,
642                       &s->coeffs[s->etype][s->nnsparam][s->nsize], s->qual == 2);
643 
644             if (s->pscrn > 0)
645                 interpolation(srcbuf + (y / 2) * srcbuf_stride + 32,
646                               srcbuf_stride,
647                               dstbuf + (y / 2) * dstbuf_stride,
648                               prescreen_buf, width);
649         }
650 
651         s->write(dstbuf, out_line, dstbuf_stride, dst_linesize * 2,
652                  width, slice_height, depth, out_scale);
653     }
654 
655     return 0;
656 }
657 
get_frame(AVFilterContext * ctx,int is_second)658 static int get_frame(AVFilterContext *ctx, int is_second)
659 {
660     NNEDIContext *s = ctx->priv;
661     AVFilterLink *outlink = ctx->outputs[0];
662     AVFrame *dst;
663 
664     dst = ff_get_video_buffer(outlink, outlink->w, outlink->h);
665     if (!dst)
666         return AVERROR(ENOMEM);
667     av_frame_copy_props(dst, s->prev);
668     dst->interlaced_frame = 0;
669     dst->pts = s->pts;
670 
671     ff_filter_execute(ctx, filter_slice, dst, NULL,
672                       FFMIN(s->planeheight[1] / 2, s->nb_threads));
673 
674     if (s->field == -2 || s->field > 1)
675         s->field_n = !s->field_n;
676 
677     return ff_filter_frame(outlink, dst);
678 }
679 
filter_frame(AVFilterLink * inlink,AVFrame * in)680 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
681 {
682     AVFilterContext *ctx = inlink->dst;
683     NNEDIContext *s = ctx->priv;
684     int ret;
685 
686     if (!s->prev) {
687         s->prev = in;
688         return 0;
689     }
690 
691     if ((s->deint && !s->prev->interlaced_frame) || ctx->is_disabled) {
692         s->prev->pts *= 2;
693         ret = ff_filter_frame(ctx->outputs[0], s->prev);
694         s->prev = in;
695         return ret;
696     }
697 
698     s->pts = s->prev->pts * 2;
699     ret = get_frame(ctx, 0);
700     if (ret < 0 || (s->field > -2 && s->field < 2)) {
701         av_frame_free(&s->prev);
702         s->prev = in;
703         return ret;
704     }
705 
706     s->pts = s->prev->pts + in->pts;
707     ret = get_frame(ctx, 1);
708     av_frame_free(&s->prev);
709     s->prev = in;
710     return ret;
711 }
712 
request_frame(AVFilterLink * link)713 static int request_frame(AVFilterLink *link)
714 {
715     AVFilterContext *ctx = link->src;
716     NNEDIContext *s = ctx->priv;
717     int ret;
718 
719     if (s->eof)
720         return AVERROR_EOF;
721 
722     ret  = ff_request_frame(ctx->inputs[0]);
723 
724     if (ret == AVERROR_EOF && s->prev) {
725         AVFrame *next = av_frame_clone(s->prev);
726 
727         if (!next)
728             return AVERROR(ENOMEM);
729 
730         next->pts = s->prev->pts + av_rescale_q(1, av_inv_q(ctx->outputs[0]->frame_rate),
731                                                 ctx->outputs[0]->time_base);
732         s->eof = 1;
733 
734         ret = filter_frame(ctx->inputs[0], next);
735     } else if (ret < 0) {
736         return ret;
737     }
738 
739     return ret;
740 }
741 
copy_weights(float * dst,int n,const float ** data)742 static void copy_weights(float *dst, int n, const float **data)
743 {
744     memcpy(dst, *data, n * sizeof(float));
745     *data += n;
746 }
747 
allocate(float ** ptr,int size)748 static float *allocate(float **ptr, int size)
749 {
750     float *ret = *ptr;
751 
752     *ptr += size;
753 
754     return ret;
755 }
756 
allocate_model(PredictorCoefficients * coeffs,int xdim,int ydim,int nns)757 static int allocate_model(PredictorCoefficients *coeffs, int xdim, int ydim, int nns)
758 {
759     int filter_size = nns * xdim * ydim;
760     int bias_size = nns;
761     float *data;
762 
763     data = av_calloc(filter_size + bias_size, 4 * sizeof(float));
764     if (!data)
765         return AVERROR(ENOMEM);
766 
767     coeffs->data = data;
768     coeffs->xdim = xdim;
769     coeffs->ydim = ydim;
770     coeffs->nsize = xdim * ydim;
771     coeffs->nns  = nns;
772 
773     coeffs->softmax_q1 = allocate(&data, filter_size);
774     coeffs->elliott_q1 = allocate(&data, filter_size);
775     coeffs->softmax_bias_q1 = allocate(&data, bias_size);
776     coeffs->elliott_bias_q1 = allocate(&data, bias_size);
777 
778     coeffs->softmax_q2 = allocate(&data, filter_size);
779     coeffs->elliott_q2 = allocate(&data, filter_size);
780     coeffs->softmax_bias_q2 = allocate(&data, bias_size);
781     coeffs->elliott_bias_q2 = allocate(&data, bias_size);
782 
783     return 0;
784 }
785 
read_weights(AVFilterContext * ctx,const float * bdata)786 static int read_weights(AVFilterContext *ctx, const float *bdata)
787 {
788     NNEDIContext *s = ctx->priv;
789     int ret;
790 
791     copy_weights(&s->prescreener[0].kernel_l0[0][0], 4 * 48, &bdata);
792     copy_weights(s->prescreener[0].bias_l0, 4, &bdata);
793 
794     copy_weights(&s->prescreener[0].kernel_l1[0][0], 4 * 4, &bdata);
795     copy_weights(s->prescreener[0].bias_l1, 4, &bdata);
796 
797     copy_weights(&s->prescreener[0].kernel_l2[0][0], 4 * 8, &bdata);
798     copy_weights(s->prescreener[0].bias_l2, 4, &bdata);
799 
800     for (int i = 0; i < 3; i++) {
801         PrescreenerCoefficients *data = &s->prescreener[i + 1];
802         float kernel_l0_shuffled[4 * 64];
803         float kernel_l1_shuffled[4 * 4];
804 
805         copy_weights(kernel_l0_shuffled, 4 * 64, &bdata);
806         copy_weights(data->bias_l0, 4, &bdata);
807 
808         copy_weights(kernel_l1_shuffled, 4 * 4, &bdata);
809         copy_weights(data->bias_l1, 4, &bdata);
810 
811         for (int n = 0; n < 4; n++) {
812             for (int k = 0; k < 64; k++)
813                 data->kernel_l0[n][k] = kernel_l0_shuffled[(k / 8) * 32 + n * 8 + k % 8];
814             for (int k = 0; k < 4; k++)
815                 data->kernel_l1[n][k] = kernel_l1_shuffled[k * 4 + n];
816         }
817     }
818 
819     for (int m = 0; m < 2; m++) {
820         // Grouping by neuron count.
821         for (int i = 0; i < 5; i++) {
822             const int nns = NNEDI_NNS[i];
823 
824             // Grouping by window size.
825             for (int j = 0; j < 7; j++) {
826                 PredictorCoefficients *model = &s->coeffs[m][i][j];
827                 const int xdim = NNEDI_XDIM[j];
828                 const int ydim = NNEDI_YDIM[j];
829                 const int filter_size = xdim * ydim;
830 
831                 ret = allocate_model(model, xdim, ydim, nns);
832                 if (ret < 0)
833                     return ret;
834 
835                 // Quality 1 model. NNS[i] * (XDIM[j] * YDIM[j]) * 2 coefficients.
836                 copy_weights(model->softmax_q1, nns * filter_size, &bdata);
837                 copy_weights(model->elliott_q1, nns * filter_size, &bdata);
838 
839                 // Quality 1 model bias. NNS[i] * 2 coefficients.
840                 copy_weights(model->softmax_bias_q1, nns, &bdata);
841                 copy_weights(model->elliott_bias_q1, nns, &bdata);
842 
843                 // Quality 2 model. NNS[i] * (XDIM[j] * YDIM[j]) * 2 coefficients.
844                 copy_weights(model->softmax_q2, nns * filter_size, &bdata);
845                 copy_weights(model->elliott_q2, nns * filter_size, &bdata);
846 
847                 // Quality 2 model bias. NNS[i] * 2 coefficients.
848                 copy_weights(model->softmax_bias_q2, nns, &bdata);
849                 copy_weights(model->elliott_bias_q2, nns, &bdata);
850             }
851         }
852     }
853 
854     return 0;
855 }
856 
mean(const float * input,int size)857 static float mean(const float *input, int size)
858 {
859     float sum = 0.f;
860 
861     for (int i = 0; i < size; i++)
862         sum += input[i];
863 
864     return sum / size;
865 }
866 
transform(float * input,int size,float mean,float half)867 static void transform(float *input, int size, float mean, float half)
868 {
869     for (int i = 0; i < size; i++)
870         input[i] = (input[i] - mean) / half;
871 }
872 
subtract_mean_old(PrescreenerCoefficients * coeffs,float half)873 static void subtract_mean_old(PrescreenerCoefficients *coeffs, float half)
874 {
875     for (int n = 0; n < 4; n++) {
876         float m = mean(coeffs->kernel_l0[n], 48);
877 
878         transform(coeffs->kernel_l0[n], 48, m, half);
879     }
880 }
881 
subtract_mean_new(PrescreenerCoefficients * coeffs,float half)882 static void subtract_mean_new(PrescreenerCoefficients *coeffs, float half)
883 {
884     for (int n = 0; n < 4; n++) {
885         float m = mean(coeffs->kernel_l0[n], 64);
886 
887         transform(coeffs->kernel_l0[n], 64, m, half);
888     }
889 }
890 
subtract_mean_predictor(PredictorCoefficients * model)891 static void subtract_mean_predictor(PredictorCoefficients *model)
892 {
893     const int filter_size = model->nsize;
894     const int nns = model->nns;
895     const float scale = 1.f / nns;
896 
897     double softmax_means[256]; // Average of individual softmax filters.
898     double elliott_means[256]; // Average of individual elliott filters.
899     double mean_filter[48 * 6] = { 0 }; // Pointwise average of all softmax filters.
900     double mean_bias;
901 
902     // Quality 1.
903     for (int nn = 0; nn < nns; nn++) {
904         softmax_means[nn] = mean(model->softmax_q1 + nn * filter_size, filter_size);
905         elliott_means[nn] = mean(model->elliott_q1 + nn * filter_size, filter_size);
906 
907         for (int k = 0; k < filter_size; k++)
908             mean_filter[k] += model->softmax_q1[nn * filter_size + k] - softmax_means[nn];
909     }
910 
911     for (int k = 0; k < filter_size; k++)
912         mean_filter[k] *= scale;
913 
914     mean_bias = mean(model->softmax_bias_q1, nns);
915 
916     for (int nn = 0; nn < nns; nn++) {
917         for (int k = 0; k < filter_size; k++) {
918             model->softmax_q1[nn * filter_size + k] -= softmax_means[nn] + mean_filter[k];
919             model->elliott_q1[nn * filter_size + k] -= elliott_means[nn];
920         }
921         model->softmax_bias_q1[nn] -= mean_bias;
922     }
923 
924     // Quality 2.
925     memset(mean_filter, 0, sizeof(mean_filter));
926 
927     for (int nn = 0; nn < nns; nn++) {
928         softmax_means[nn] = mean(model->softmax_q2 + nn * filter_size, filter_size);
929         elliott_means[nn] = mean(model->elliott_q2 + nn * filter_size, filter_size);
930 
931         for (int k = 0; k < filter_size; k++) {
932             mean_filter[k] += model->softmax_q2[nn * filter_size + k] - softmax_means[nn];
933         }
934     }
935 
936     for (int k = 0; k < filter_size; k++)
937         mean_filter[k] *= scale;
938 
939     mean_bias = mean(model->softmax_bias_q2, nns);
940 
941     for (int nn = 0; nn < nns; nn++) {
942         for (int k = 0; k < filter_size; k++) {
943             model->softmax_q2[nn * filter_size + k] -= softmax_means[nn] + mean_filter[k];
944             model->elliott_q2[nn * filter_size + k] -= elliott_means[nn];
945         }
946 
947         model->softmax_bias_q2[nn] -= mean_bias;
948     }
949 }
950 
init(AVFilterContext * ctx)951 static av_cold int init(AVFilterContext *ctx)
952 {
953     NNEDIContext *s = ctx->priv;
954     FILE *weights_file = NULL;
955     int64_t weights_size;
956     float *bdata;
957     size_t bytes_read;
958     int ret = 0;
959 
960     weights_file = avpriv_fopen_utf8(s->weights_file, "rb");
961     if (!weights_file) {
962         av_log(ctx, AV_LOG_ERROR, "No weights file provided, aborting!\n");
963         return AVERROR(EINVAL);
964     }
965 
966     if (fseek(weights_file, 0, SEEK_END)) {
967         av_log(ctx, AV_LOG_ERROR, "Couldn't seek to the end of weights file.\n");
968         fclose(weights_file);
969         return AVERROR(EINVAL);
970     }
971 
972     weights_size = ftell(weights_file);
973 
974     if (weights_size == -1) {
975         fclose(weights_file);
976         av_log(ctx, AV_LOG_ERROR, "Couldn't get size of weights file.\n");
977         return AVERROR(EINVAL);
978     } else if (weights_size != NNEDI_WEIGHTS_SIZE) {
979         fclose(weights_file);
980         av_log(ctx, AV_LOG_ERROR, "Unexpected weights file size.\n");
981         return AVERROR(EINVAL);
982     }
983 
984     if (fseek(weights_file, 0, SEEK_SET)) {
985         fclose(weights_file);
986         av_log(ctx, AV_LOG_ERROR, "Couldn't seek to the start of weights file.\n");
987         return AVERROR(EINVAL);
988     }
989 
990     bdata = av_malloc(NNEDI_WEIGHTS_SIZE);
991     if (!bdata) {
992         fclose(weights_file);
993         return AVERROR(ENOMEM);
994     }
995 
996     bytes_read = fread(bdata, 1, NNEDI_WEIGHTS_SIZE, weights_file);
997     if (bytes_read != NNEDI_WEIGHTS_SIZE) {
998         fclose(weights_file);
999         ret = AVERROR_INVALIDDATA;
1000         av_log(ctx, AV_LOG_ERROR, "Couldn't read weights file.\n");
1001         goto fail;
1002     }
1003 
1004     fclose(weights_file);
1005 
1006     s->fdsp = avpriv_float_dsp_alloc(0);
1007     if (!s->fdsp) {
1008         ret = AVERROR(ENOMEM);
1009         goto fail;
1010     }
1011 
1012     ret = read_weights(ctx, bdata);
1013     if (ret < 0)
1014         goto fail;
1015 
1016 fail:
1017     av_free(bdata);
1018     return ret;
1019 }
1020 
config_input(AVFilterLink * inlink)1021 static int config_input(AVFilterLink *inlink)
1022 {
1023     AVFilterContext *ctx = inlink->dst;
1024     NNEDIContext *s = ctx->priv;
1025     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
1026     int ret;
1027 
1028     s->depth = desc->comp[0].depth;
1029     s->nb_threads = ff_filter_get_nb_threads(ctx);
1030     s->nb_planes = av_pix_fmt_count_planes(inlink->format);
1031     if ((ret = av_image_fill_linesizes(s->linesize, inlink->format, inlink->w)) < 0)
1032         return ret;
1033 
1034     s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w);
1035     s->planewidth[0] = s->planewidth[3] = inlink->w;
1036     s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h);
1037     s->planeheight[0] = s->planeheight[3] = inlink->h;
1038 
1039     s->half = ((1 << 8) - 1) / 2.f;
1040     s->out_scale = 1 << (s->depth - 8);
1041     s->in_scale = 1.f / s->out_scale;
1042 
1043     switch (s->depth) {
1044     case 8:
1045         s->read  = read_bytes;
1046         s->write = write_bytes;
1047         break;
1048     default:
1049         s->read  = read_words;
1050         s->write = write_words;
1051         break;
1052     }
1053 
1054     subtract_mean_old(&s->prescreener[0], s->half);
1055     subtract_mean_new(&s->prescreener[1], s->half);
1056     subtract_mean_new(&s->prescreener[2], s->half);
1057     subtract_mean_new(&s->prescreener[3], s->half);
1058 
1059     s->prescreen[0] = process_old;
1060     s->prescreen[1] = process_new;
1061 
1062     for (int i = 0; i < 2; i++) {
1063         for (int j = 0; j < 5; j++) {
1064             for (int k = 0; k < 7; k++)
1065                 subtract_mean_predictor(&s->coeffs[i][j][k]);
1066         }
1067     }
1068 
1069     s->input_size = (s->planewidth[0] + 64) * (s->planeheight[0] + 6);
1070     s->input_buf = av_calloc(s->nb_threads, sizeof(*s->input_buf));
1071     if (!s->input_buf)
1072         return AVERROR(ENOMEM);
1073 
1074     for (int i = 0; i < s->nb_threads; i++) {
1075         s->input_buf[i] = av_calloc(s->input_size, sizeof(**s->input_buf));
1076         if (!s->input_buf[i])
1077             return AVERROR(ENOMEM);
1078     }
1079 
1080     s->output_buf = av_calloc(s->nb_threads, sizeof(*s->output_buf));
1081     if (!s->output_buf)
1082         return AVERROR(ENOMEM);
1083 
1084     for (int i = 0; i < s->nb_threads; i++) {
1085         s->output_buf[i] = av_calloc(s->input_size, sizeof(**s->output_buf));
1086         if (!s->output_buf[i])
1087             return AVERROR(ENOMEM);
1088     }
1089 
1090     s->prescreen_buf = av_calloc(s->nb_threads, sizeof(*s->prescreen_buf));
1091     if (!s->prescreen_buf)
1092         return AVERROR(ENOMEM);
1093 
1094     for (int i = 0; i < s->nb_threads; i++) {
1095         s->prescreen_buf[i] = av_calloc(s->planewidth[0], sizeof(**s->prescreen_buf));
1096         if (!s->prescreen_buf[i])
1097             return AVERROR(ENOMEM);
1098     }
1099 
1100     return 0;
1101 }
1102 
uninit(AVFilterContext * ctx)1103 static av_cold void uninit(AVFilterContext *ctx)
1104 {
1105     NNEDIContext *s = ctx->priv;
1106 
1107     for (int i = 0; i < s->nb_threads && s->prescreen_buf; i++)
1108         av_freep(&s->prescreen_buf[i]);
1109 
1110     av_freep(&s->prescreen_buf);
1111 
1112     for (int i = 0; i < s->nb_threads && s->input_buf; i++)
1113         av_freep(&s->input_buf[i]);
1114 
1115     av_freep(&s->input_buf);
1116 
1117     for (int i = 0; i < s->nb_threads && s->output_buf; i++)
1118         av_freep(&s->output_buf[i]);
1119 
1120     av_freep(&s->output_buf);
1121     av_freep(&s->fdsp);
1122 
1123     for (int i = 0; i < 2; i++) {
1124         for (int j = 0; j < 5; j++) {
1125             for (int k = 0; k < 7; k++) {
1126                 av_freep(&s->coeffs[i][j][k].data);
1127             }
1128         }
1129     }
1130 
1131     av_frame_free(&s->prev);
1132 }
1133 
1134 static const AVFilterPad inputs[] = {
1135     {
1136         .name          = "default",
1137         .type          = AVMEDIA_TYPE_VIDEO,
1138         .filter_frame  = filter_frame,
1139         .config_props  = config_input,
1140     },
1141 };
1142 
1143 static const AVFilterPad outputs[] = {
1144     {
1145         .name          = "default",
1146         .type          = AVMEDIA_TYPE_VIDEO,
1147         .config_props  = config_output,
1148         .request_frame = request_frame,
1149     },
1150 };
1151 
1152 const AVFilter ff_vf_nnedi = {
1153     .name          = "nnedi",
1154     .description   = NULL_IF_CONFIG_SMALL("Apply neural network edge directed interpolation intra-only deinterlacer."),
1155     .priv_size     = sizeof(NNEDIContext),
1156     .priv_class    = &nnedi_class,
1157     .init          = init,
1158     .uninit        = uninit,
1159     FILTER_INPUTS(inputs),
1160     FILTER_OUTPUTS(outputs),
1161     FILTER_PIXFMTS_ARRAY(pix_fmts),
1162     .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
1163     .process_command = ff_filter_process_command,
1164 };
1165