1 /*
2 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3 * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
4 * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 /**
24 * @file
25 * Fast Simple Post-processing filter
26 * This implementation is based on an algorithm described in
27 * "Aria Nosratinia Embedded Post-Processing for
28 * Enhancement of Compressed Images (1999)"
29 * (http://www.utdallas.edu/~aria/papers/vlsisp99.pdf)
30 * Further, with splitting (I)DCT into horizontal/vertical passes, one of
31 * them can be performed once per block, not per pixel. This allows for much
32 * higher speed.
33 *
34 * Originally written by Michael Niedermayer and Nikolaj for the MPlayer
35 * project, and ported by Arwa Arif for FFmpeg.
36 */
37
38 #include "libavutil/avassert.h"
39 #include "libavutil/imgutils.h"
40 #include "libavutil/mem_internal.h"
41 #include "libavutil/opt.h"
42 #include "libavutil/pixdesc.h"
43 #include "internal.h"
44 #include "qp_table.h"
45 #include "vf_fspp.h"
46
47 #define OFFSET(x) offsetof(FSPPContext, x)
48 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
49 static const AVOption fspp_options[] = {
50 { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 4}, 4, MAX_LEVEL, FLAGS },
51 { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 64, FLAGS },
52 { "strength", "set filter strength", OFFSET(strength), AV_OPT_TYPE_INT, {.i64 = 0}, -15, 32, FLAGS },
53 { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_BOOL,{.i64 = 0}, 0, 1, FLAGS },
54 { NULL }
55 };
56
57 AVFILTER_DEFINE_CLASS(fspp);
58
59 DECLARE_ALIGNED(32, static const uint8_t, dither)[8][8] = {
60 { 0, 48, 12, 60, 3, 51, 15, 63, },
61 { 32, 16, 44, 28, 35, 19, 47, 31, },
62 { 8, 56, 4, 52, 11, 59, 7, 55, },
63 { 40, 24, 36, 20, 43, 27, 39, 23, },
64 { 2, 50, 14, 62, 1, 49, 13, 61, },
65 { 34, 18, 46, 30, 33, 17, 45, 29, },
66 { 10, 58, 6, 54, 9, 57, 5, 53, },
67 { 42, 26, 38, 22, 41, 25, 37, 21, },
68 };
69
70 static const short custom_threshold[64] = {
71 // values (296) can't be too high
72 // -it causes too big quant dependence
73 // or maybe overflow(check), which results in some flashing
74 71, 296, 295, 237, 71, 40, 38, 19,
75 245, 193, 185, 121, 102, 73, 53, 27,
76 158, 129, 141, 107, 97, 73, 50, 26,
77 102, 116, 109, 98, 82, 66, 45, 23,
78 71, 94, 95, 81, 70, 56, 38, 20,
79 56, 77, 74, 66, 56, 44, 30, 15,
80 38, 53, 50, 45, 38, 30, 21, 11,
81 20, 27, 26, 23, 20, 15, 11, 5
82 };
83
84 //This func reads from 1 slice, 1 and clears 0 & 1
store_slice_c(uint8_t * dst,int16_t * src,ptrdiff_t dst_stride,ptrdiff_t src_stride,ptrdiff_t width,ptrdiff_t height,ptrdiff_t log2_scale)85 static void store_slice_c(uint8_t *dst, int16_t *src,
86 ptrdiff_t dst_stride, ptrdiff_t src_stride,
87 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
88 {
89 int y, x;
90 #define STORE(pos) \
91 temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
92 src[x + pos] = src[x + pos - 8 * src_stride] = 0; \
93 if (temp & 0x100) temp = ~(temp >> 31); \
94 dst[x + pos] = temp;
95
96 for (y = 0; y < height; y++) {
97 const uint8_t *d = dither[y];
98 for (x = 0; x < width; x += 8) {
99 int temp;
100 STORE(0);
101 STORE(1);
102 STORE(2);
103 STORE(3);
104 STORE(4);
105 STORE(5);
106 STORE(6);
107 STORE(7);
108 }
109 src += src_stride;
110 dst += dst_stride;
111 }
112 }
113
114 //This func reads from 2 slices, 0 & 2 and clears 2-nd
store_slice2_c(uint8_t * dst,int16_t * src,ptrdiff_t dst_stride,ptrdiff_t src_stride,ptrdiff_t width,ptrdiff_t height,ptrdiff_t log2_scale)115 static void store_slice2_c(uint8_t *dst, int16_t *src,
116 ptrdiff_t dst_stride, ptrdiff_t src_stride,
117 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
118 {
119 int y, x;
120 #define STORE2(pos) \
121 temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
122 src[x + pos + 16 * src_stride] = 0; \
123 if (temp & 0x100) temp = ~(temp >> 31); \
124 dst[x + pos] = temp;
125
126 for (y = 0; y < height; y++) {
127 const uint8_t *d = dither[y];
128 for (x = 0; x < width; x += 8) {
129 int temp;
130 STORE2(0);
131 STORE2(1);
132 STORE2(2);
133 STORE2(3);
134 STORE2(4);
135 STORE2(5);
136 STORE2(6);
137 STORE2(7);
138 }
139 src += src_stride;
140 dst += dst_stride;
141 }
142 }
143
mul_thrmat_c(int16_t * thr_adr_noq,int16_t * thr_adr,int q)144 static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
145 {
146 int a;
147 for (a = 0; a < 64; a++)
148 thr_adr[a] = q * thr_adr_noq[a];
149 }
150
filter(FSPPContext * p,uint8_t * dst,uint8_t * src,int dst_stride,int src_stride,int width,int height,uint8_t * qp_store,int qp_stride,int is_luma)151 static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src,
152 int dst_stride, int src_stride,
153 int width, int height,
154 uint8_t *qp_store, int qp_stride, int is_luma)
155 {
156 int x, x0, y, es, qy, t;
157
158 const int stride = is_luma ? p->temp_stride : (width + 16);
159 const int step = 6 - p->log2_count;
160 const int qpsh = 4 - p->hsub * !is_luma;
161 const int qpsv = 4 - p->vsub * !is_luma;
162
163 DECLARE_ALIGNED(32, int32_t, block_align)[4 * 8 * BLOCKSZ + 4 * 8 * BLOCKSZ];
164 int16_t *block = (int16_t *)block_align;
165 int16_t *block3 = (int16_t *)(block_align + 4 * 8 * BLOCKSZ);
166
167 memset(block3, 0, 4 * 8 * BLOCKSZ);
168
169 if (!src || !dst) return;
170
171 for (y = 0; y < height; y++) {
172 int index = 8 + 8 * stride + y * stride;
173 memcpy(p->src + index, src + y * src_stride, width);
174 for (x = 0; x < 8; x++) {
175 p->src[index - x - 1] = p->src[index + x ];
176 p->src[index + width + x ] = p->src[index + width - x - 1];
177 }
178 }
179
180 for (y = 0; y < 8; y++) {
181 memcpy(p->src + ( 7 - y ) * stride, p->src + ( y + 8 ) * stride, stride);
182 memcpy(p->src + (height + 8 + y) * stride, p->src + (height - y + 7) * stride, stride);
183 }
184 //FIXME (try edge emu)
185
186 for (y = 8; y < 24; y++)
187 memset(p->temp + 8 + y * stride, 0, width * sizeof(int16_t));
188
189 for (y = step; y < height + 8; y += step) { //step= 1,2
190 const int y1 = y - 8 + step; //l5-7 l4-6;
191 qy = y - 4;
192
193 if (qy > height - 1) qy = height - 1;
194 if (qy < 0) qy = 0;
195
196 qy = (qy >> qpsv) * qp_stride;
197 p->row_fdct(block, p->src + y * stride + 2 - (y&1), stride, 2);
198
199 for (x0 = 0; x0 < width + 8 - 8 * (BLOCKSZ - 1); x0 += 8 * (BLOCKSZ - 1)) {
200 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y&1), stride, 2 * (BLOCKSZ - 1));
201
202 if (p->qp)
203 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + 0 * 8, block3 + 0 * 8, 8 * (BLOCKSZ - 1)); //yes, this is a HOTSPOT
204 else
205 for (x = 0; x < 8 * (BLOCKSZ - 1); x += 8) {
206 t = x + x0 - 2; //correct t=x+x0-2-(y&1), but its the same
207
208 if (t < 0) t = 0; //t always < width-2
209
210 t = qp_store[qy + (t >> qpsh)];
211 t = ff_norm_qscale(t, p->qscale_type);
212
213 if (t != p->prev_q) p->prev_q = t, p->mul_thrmat((int16_t *)(&p->threshold_mtx_noq[0]), (int16_t *)(&p->threshold_mtx[0]), t);
214 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + x * 8, block3 + x * 8, 8); //yes, this is a HOTSPOT
215 }
216 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, 2 * (BLOCKSZ - 1));
217 memmove(block, block + (BLOCKSZ - 1) * 64, 8 * 8 * sizeof(int16_t)); //cycling
218 memmove(block3, block3 + (BLOCKSZ - 1) * 64, 6 * 8 * sizeof(int16_t));
219 }
220
221 es = width + 8 - x0; // 8, ...
222 if (es > 8)
223 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y & 1), stride, (es - 4) >> 2);
224
225 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block, block3, es&(~1));
226 if (es > 3)
227 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, es >> 2);
228
229 if (!(y1 & 7) && y1) {
230 if (y1 & 8)
231 p->store_slice(dst + (y1 - 8) * dst_stride, p->temp + 8 + 8 * stride,
232 dst_stride, stride, width, 8, 5 - p->log2_count);
233 else
234 p->store_slice2(dst + (y1 - 8) * dst_stride, p->temp + 8 + 0 * stride,
235 dst_stride, stride, width, 8, 5 - p->log2_count);
236 }
237 }
238
239 if (y & 7) { // height % 8 != 0
240 if (y & 8)
241 p->store_slice(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 8 * stride,
242 dst_stride, stride, width, y&7, 5 - p->log2_count);
243 else
244 p->store_slice2(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 0 * stride,
245 dst_stride, stride, width, y&7, 5 - p->log2_count);
246 }
247 }
248
column_fidct_c(int16_t * thr_adr,int16_t * data,int16_t * output,int cnt)249 static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
250 {
251 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
252 int_simd16_t tmp10, tmp11, tmp12, tmp13;
253 int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
254 int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
255
256 int16_t *dataptr;
257 int16_t *wsptr;
258 int16_t *threshold;
259 int ctr;
260
261 dataptr = data;
262 wsptr = output;
263
264 for (; cnt > 0; cnt -= 2) { //start positions
265 threshold = (int16_t *)thr_adr;//threshold_mtx
266 for (ctr = DCTSIZE; ctr > 0; ctr--) {
267 // Process columns from input, add to output.
268 tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
269 tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
270
271 tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
272 tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
273
274 tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
275 tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
276
277 tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
278 tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
279
280 // Even part of FDCT
281
282 tmp10 = tmp0 + tmp3;
283 tmp13 = tmp0 - tmp3;
284 tmp11 = tmp1 + tmp2;
285 tmp12 = tmp1 - tmp2;
286
287 d0 = tmp10 + tmp11;
288 d4 = tmp10 - tmp11;
289
290 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
291 d2 = tmp13 + z1;
292 d6 = tmp13 - z1;
293
294 // Even part of IDCT
295
296 THRESHOLD(tmp0, d0, threshold[0 * 8]);
297 THRESHOLD(tmp1, d2, threshold[2 * 8]);
298 THRESHOLD(tmp2, d4, threshold[4 * 8]);
299 THRESHOLD(tmp3, d6, threshold[6 * 8]);
300 tmp0 += 2;
301 tmp10 = (tmp0 + tmp2) >> 2;
302 tmp11 = (tmp0 - tmp2) >> 2;
303
304 tmp13 = (tmp1 + tmp3) >>2; //+2 ! (psnr decides)
305 tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
306
307 tmp0 = tmp10 + tmp13; //->temps
308 tmp3 = tmp10 - tmp13; //->temps
309 tmp1 = tmp11 + tmp12; //->temps
310 tmp2 = tmp11 - tmp12; //->temps
311
312 // Odd part of FDCT
313
314 tmp10 = tmp4 + tmp5;
315 tmp11 = tmp5 + tmp6;
316 tmp12 = tmp6 + tmp7;
317
318 z5 = MULTIPLY16H((tmp10 - tmp12) << 2, FIX_0_382683433);
319 z2 = MULTIPLY16H(tmp10 << 2, FIX_0_541196100) + z5;
320 z4 = MULTIPLY16H(tmp12 << 2, FIX_1_306562965) + z5;
321 z3 = MULTIPLY16H(tmp11 << 2, FIX_0_707106781);
322
323 z11 = tmp7 + z3;
324 z13 = tmp7 - z3;
325
326 d5 = z13 + z2;
327 d3 = z13 - z2;
328 d1 = z11 + z4;
329 d7 = z11 - z4;
330
331 // Odd part of IDCT
332
333 THRESHOLD(tmp4, d1, threshold[1 * 8]);
334 THRESHOLD(tmp5, d3, threshold[3 * 8]);
335 THRESHOLD(tmp6, d5, threshold[5 * 8]);
336 THRESHOLD(tmp7, d7, threshold[7 * 8]);
337
338 //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
339 z13 = tmp6 + tmp5;
340 z10 = (tmp6 - tmp5) << 1;
341 z11 = tmp4 + tmp7;
342 z12 = (tmp4 - tmp7) << 1;
343
344 tmp7 = (z11 + z13) >> 2; //+2 !
345 tmp11 = MULTIPLY16H((z11 - z13) << 1, FIX_1_414213562);
346 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
347 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
348 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
349
350 tmp6 = tmp12 - tmp7;
351 tmp5 = tmp11 - tmp6;
352 tmp4 = tmp10 + tmp5;
353
354 wsptr[DCTSIZE * 0] += (tmp0 + tmp7);
355 wsptr[DCTSIZE * 1] += (tmp1 + tmp6);
356 wsptr[DCTSIZE * 2] += (tmp2 + tmp5);
357 wsptr[DCTSIZE * 3] += (tmp3 - tmp4);
358 wsptr[DCTSIZE * 4] += (tmp3 + tmp4);
359 wsptr[DCTSIZE * 5] += (tmp2 - tmp5);
360 wsptr[DCTSIZE * 6] = (tmp1 - tmp6);
361 wsptr[DCTSIZE * 7] = (tmp0 - tmp7);
362 //
363 dataptr++; //next column
364 wsptr++;
365 threshold++;
366 }
367 dataptr += 8; //skip each second start pos
368 wsptr += 8;
369 }
370 }
371
row_idct_c(int16_t * workspace,int16_t * output_adr,ptrdiff_t output_stride,int cnt)372 static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
373 {
374 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
375 int_simd16_t tmp10, tmp11, tmp12, tmp13;
376 int_simd16_t z5, z10, z11, z12, z13;
377 int16_t *outptr;
378 int16_t *wsptr;
379
380 cnt *= 4;
381 wsptr = workspace;
382 outptr = output_adr;
383 for (; cnt > 0; cnt--) {
384 // Even part
385 //Simd version reads 4x4 block and transposes it
386 tmp10 = wsptr[2] + wsptr[3];
387 tmp11 = wsptr[2] - wsptr[3];
388
389 tmp13 = wsptr[0] + wsptr[1];
390 tmp12 = (MULTIPLY16H(wsptr[0] - wsptr[1], FIX_1_414213562_A) << 2) - tmp13;//this shift order to avoid overflow
391
392 tmp0 = tmp10 + tmp13; //->temps
393 tmp3 = tmp10 - tmp13; //->temps
394 tmp1 = tmp11 + tmp12;
395 tmp2 = tmp11 - tmp12;
396
397 // Odd part
398 //Also transpose, with previous:
399 // ---- ---- ||||
400 // ---- ---- idct ||||
401 // ---- ---- ---> ||||
402 // ---- ---- ||||
403 z13 = wsptr[4] + wsptr[5];
404 z10 = wsptr[4] - wsptr[5];
405 z11 = wsptr[6] + wsptr[7];
406 z12 = wsptr[6] - wsptr[7];
407
408 tmp7 = z11 + z13;
409 tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
410
411 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
412 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
413 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
414
415 tmp6 = (tmp12 << 3) - tmp7;
416 tmp5 = (tmp11 << 3) - tmp6;
417 tmp4 = (tmp10 << 3) + tmp5;
418
419 // Final output stage: descale and write column
420 outptr[0 * output_stride] += DESCALE(tmp0 + tmp7, 3);
421 outptr[1 * output_stride] += DESCALE(tmp1 + tmp6, 3);
422 outptr[2 * output_stride] += DESCALE(tmp2 + tmp5, 3);
423 outptr[3 * output_stride] += DESCALE(tmp3 - tmp4, 3);
424 outptr[4 * output_stride] += DESCALE(tmp3 + tmp4, 3);
425 outptr[5 * output_stride] += DESCALE(tmp2 - tmp5, 3);
426 outptr[6 * output_stride] += DESCALE(tmp1 - tmp6, 3); //no += ?
427 outptr[7 * output_stride] += DESCALE(tmp0 - tmp7, 3); //no += ?
428 outptr++;
429
430 wsptr += DCTSIZE; // advance pointer to next row
431 }
432 }
433
row_fdct_c(int16_t * data,const uint8_t * pixels,ptrdiff_t line_size,int cnt)434 static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
435 {
436 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
437 int_simd16_t tmp10, tmp11, tmp12, tmp13;
438 int_simd16_t z1, z2, z3, z4, z5, z11, z13;
439 int16_t *dataptr;
440
441 cnt *= 4;
442 // Pass 1: process rows.
443
444 dataptr = data;
445 for (; cnt > 0; cnt--) {
446 tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
447 tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
448 tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
449 tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
450 tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
451 tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
452 tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
453 tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
454
455 // Even part
456
457 tmp10 = tmp0 + tmp3;
458 tmp13 = tmp0 - tmp3;
459 tmp11 = tmp1 + tmp2;
460 tmp12 = tmp1 - tmp2;
461 //Even columns are written first, this leads to different order of columns
462 //in column_fidct(), but they are processed independently, so all ok.
463 //Later in the row_idct() columns readed at the same order.
464 dataptr[2] = tmp10 + tmp11;
465 dataptr[3] = tmp10 - tmp11;
466
467 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
468 dataptr[0] = tmp13 + z1;
469 dataptr[1] = tmp13 - z1;
470
471 // Odd part
472
473 tmp10 = (tmp4 + tmp5) << 2;
474 tmp11 = (tmp5 + tmp6) << 2;
475 tmp12 = (tmp6 + tmp7) << 2;
476
477 z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433);
478 z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5;
479 z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5;
480 z3 = MULTIPLY16H(tmp11, FIX_0_707106781);
481
482 z11 = tmp7 + z3;
483 z13 = tmp7 - z3;
484
485 dataptr[4] = z13 + z2;
486 dataptr[5] = z13 - z2;
487 dataptr[6] = z11 + z4;
488 dataptr[7] = z11 - z4;
489
490 pixels++; // advance pointer to next column
491 dataptr += DCTSIZE;
492 }
493 }
494
query_formats(AVFilterContext * ctx)495 static int query_formats(AVFilterContext *ctx)
496 {
497 static const enum AVPixelFormat pix_fmts[] = {
498 AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P,
499 AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P,
500 AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV440P,
501 AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P,
502 AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ440P,
503 AV_PIX_FMT_GBRP, AV_PIX_FMT_GRAY8,
504 AV_PIX_FMT_NONE
505 };
506
507 AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
508 if (!fmts_list)
509 return AVERROR(ENOMEM);
510 return ff_set_common_formats(ctx, fmts_list);
511 }
512
config_input(AVFilterLink * inlink)513 static int config_input(AVFilterLink *inlink)
514 {
515 AVFilterContext *ctx = inlink->dst;
516 FSPPContext *fspp = ctx->priv;
517 const int h = FFALIGN(inlink->h + 16, 16);
518 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
519
520 fspp->hsub = desc->log2_chroma_w;
521 fspp->vsub = desc->log2_chroma_h;
522
523 fspp->temp_stride = FFALIGN(inlink->w + 16, 16);
524 fspp->temp = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->temp));
525 fspp->src = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->src));
526
527 if (!fspp->temp || !fspp->src)
528 return AVERROR(ENOMEM);
529
530 fspp->store_slice = store_slice_c;
531 fspp->store_slice2 = store_slice2_c;
532 fspp->mul_thrmat = mul_thrmat_c;
533 fspp->column_fidct = column_fidct_c;
534 fspp->row_idct = row_idct_c;
535 fspp->row_fdct = row_fdct_c;
536
537 if (ARCH_X86)
538 ff_fspp_init_x86(fspp);
539
540 return 0;
541 }
542
filter_frame(AVFilterLink * inlink,AVFrame * in)543 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
544 {
545 AVFilterContext *ctx = inlink->dst;
546 FSPPContext *fspp = ctx->priv;
547 AVFilterLink *outlink = ctx->outputs[0];
548 AVFrame *out = in;
549
550 int qp_stride = 0;
551 int8_t *qp_table = NULL;
552 int i, bias;
553 int ret = 0;
554 int custom_threshold_m[64];
555
556 bias = (1 << 4) + fspp->strength;
557
558 for (i = 0; i < 64; i++) //FIXME: tune custom_threshold[] and remove this !
559 custom_threshold_m[i] = (int)(custom_threshold[i] * (bias / 71.0) + 0.5);
560
561 for (i = 0; i < 8; i++) {
562 fspp->threshold_mtx_noq[2 * i] = (uint64_t)custom_threshold_m[i * 8 + 2]
563 |(((uint64_t)custom_threshold_m[i * 8 + 6]) << 16)
564 |(((uint64_t)custom_threshold_m[i * 8 + 0]) << 32)
565 |(((uint64_t)custom_threshold_m[i * 8 + 4]) << 48);
566
567 fspp->threshold_mtx_noq[2 * i + 1] = (uint64_t)custom_threshold_m[i * 8 + 5]
568 |(((uint64_t)custom_threshold_m[i * 8 + 3]) << 16)
569 |(((uint64_t)custom_threshold_m[i * 8 + 1]) << 32)
570 |(((uint64_t)custom_threshold_m[i * 8 + 7]) << 48);
571 }
572
573 if (fspp->qp)
574 fspp->prev_q = fspp->qp, fspp->mul_thrmat((int16_t *)(&fspp->threshold_mtx_noq[0]), (int16_t *)(&fspp->threshold_mtx[0]), fspp->qp);
575
576 /* if we are not in a constant user quantizer mode and we don't want to use
577 * the quantizers from the B-frames (B-frames often have a higher QP), we
578 * need to save the qp table from the last non B-frame; this is what the
579 * following code block does */
580 if (!fspp->qp && (fspp->use_bframe_qp || in->pict_type != AV_PICTURE_TYPE_B)) {
581 ret = ff_qp_table_extract(in, &qp_table, &qp_stride, NULL, &fspp->qscale_type);
582 if (ret < 0) {
583 av_frame_free(&in);
584 return ret;
585 }
586
587 if (!fspp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) {
588 av_freep(&fspp->non_b_qp_table);
589 fspp->non_b_qp_table = qp_table;
590 fspp->non_b_qp_stride = qp_stride;
591 }
592 }
593
594 if (fspp->log2_count && !ctx->is_disabled) {
595 if (!fspp->use_bframe_qp && fspp->non_b_qp_table) {
596 qp_table = fspp->non_b_qp_table;
597 qp_stride = fspp->non_b_qp_stride;
598 }
599
600 if (qp_table || fspp->qp) {
601 const int cw = AV_CEIL_RSHIFT(inlink->w, fspp->hsub);
602 const int ch = AV_CEIL_RSHIFT(inlink->h, fspp->vsub);
603
604 /* get a new frame if in-place is not possible or if the dimensions
605 * are not multiple of 8 */
606 if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) {
607 const int aligned_w = FFALIGN(inlink->w, 8);
608 const int aligned_h = FFALIGN(inlink->h, 8);
609
610 out = ff_get_video_buffer(outlink, aligned_w, aligned_h);
611 if (!out) {
612 av_frame_free(&in);
613 ret = AVERROR(ENOMEM);
614 goto finish;
615 }
616 av_frame_copy_props(out, in);
617 out->width = in->width;
618 out->height = in->height;
619 }
620
621 filter(fspp, out->data[0], in->data[0], out->linesize[0], in->linesize[0],
622 inlink->w, inlink->h, qp_table, qp_stride, 1);
623 filter(fspp, out->data[1], in->data[1], out->linesize[1], in->linesize[1],
624 cw, ch, qp_table, qp_stride, 0);
625 filter(fspp, out->data[2], in->data[2], out->linesize[2], in->linesize[2],
626 cw, ch, qp_table, qp_stride, 0);
627 emms_c();
628 }
629 }
630
631 if (in != out) {
632 if (in->data[3])
633 av_image_copy_plane(out->data[3], out->linesize[3],
634 in ->data[3], in ->linesize[3],
635 inlink->w, inlink->h);
636 av_frame_free(&in);
637 }
638 ret = ff_filter_frame(outlink, out);
639 finish:
640 if (qp_table != fspp->non_b_qp_table)
641 av_freep(&qp_table);
642 return ret;
643 }
644
uninit(AVFilterContext * ctx)645 static av_cold void uninit(AVFilterContext *ctx)
646 {
647 FSPPContext *fspp = ctx->priv;
648 av_freep(&fspp->temp);
649 av_freep(&fspp->src);
650 av_freep(&fspp->non_b_qp_table);
651 }
652
653 static const AVFilterPad fspp_inputs[] = {
654 {
655 .name = "default",
656 .type = AVMEDIA_TYPE_VIDEO,
657 .config_props = config_input,
658 .filter_frame = filter_frame,
659 },
660 { NULL }
661 };
662
663 static const AVFilterPad fspp_outputs[] = {
664 {
665 .name = "default",
666 .type = AVMEDIA_TYPE_VIDEO,
667 },
668 { NULL }
669 };
670
671 AVFilter ff_vf_fspp = {
672 .name = "fspp",
673 .description = NULL_IF_CONFIG_SMALL("Apply Fast Simple Post-processing filter."),
674 .priv_size = sizeof(FSPPContext),
675 .uninit = uninit,
676 .query_formats = query_formats,
677 .inputs = fspp_inputs,
678 .outputs = fspp_outputs,
679 .priv_class = &fspp_class,
680 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
681 };
682