1 /*
2 * This file is part of FFmpeg.
3 *
4 * Copyright (c) 2011, 2012 Hyllian/Jararaca <sergiogdb@gmail.com>
5 * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * XBR Filter is used for depixelization of image.
25 * This is based on Hyllian's xBR shader.
26 *
27 * @see https://forums.libretro.com/t/xbr-algorithm-tutorial/123
28 * @see https://github.com/yoyofr/iFBA/blob/master/fba_src/src/intf/video/scalers/xbr.cpp
29 */
30
31 #include "libavutil/opt.h"
32 #include "libavutil/avassert.h"
33 #include "libavutil/pixdesc.h"
34 #include "internal.h"
35
36 #define LB_MASK 0x00FEFEFE
37 #define RED_BLUE_MASK 0x00FF00FF
38 #define GREEN_MASK 0x0000FF00
39
40 #ifdef PI
41 #undef PI
42 #endif
43
44 typedef int (*xbrfunc_t)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
45
46 typedef struct XBRContext {
47 const AVClass *class;
48 int n;
49 xbrfunc_t func;
50 uint32_t rgbtoyuv[1<<24];
51 } XBRContext;
52
53 typedef struct ThreadData {
54 AVFrame *in, *out;
55 const uint32_t *rgbtoyuv;
56 } ThreadData;
57
58 #define OFFSET(x) offsetof(XBRContext, x)
59 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
60 static const AVOption xbr_options[] = {
61 { "n", "set scale factor", OFFSET(n), AV_OPT_TYPE_INT, {.i64 = 3}, 2, 4, .flags = FLAGS },
62 { NULL }
63 };
64
65 AVFILTER_DEFINE_CLASS(xbr);
66
pixel_diff(uint32_t x,uint32_t y,const uint32_t * r2y)67 static uint32_t pixel_diff(uint32_t x, uint32_t y, const uint32_t *r2y)
68 {
69 #define YMASK 0xff0000
70 #define UMASK 0x00ff00
71 #define VMASK 0x0000ff
72 #define ABSDIFF(a,b) (abs((int)(a)-(int)(b)))
73
74 uint32_t yuv1 = r2y[x & 0xffffff];
75 uint32_t yuv2 = r2y[y & 0xffffff];
76
77 return (ABSDIFF(yuv1 & YMASK, yuv2 & YMASK) >> 16) +
78 (ABSDIFF(yuv1 & UMASK, yuv2 & UMASK) >> 8) +
79 ABSDIFF(yuv1 & VMASK, yuv2 & VMASK);
80 }
81
82 #define ALPHA_BLEND_128_W(a, b) ((((a) & LB_MASK) >> 1) + (((b) & LB_MASK) >> 1))
83 #define ALPHA_BLEND_BASE(a, b, m, s) ( (RED_BLUE_MASK & (((a) & RED_BLUE_MASK) + (((((b) & RED_BLUE_MASK) - ((a) & RED_BLUE_MASK)) * (m)) >> (s)))) \
84 | (GREEN_MASK & (((a) & GREEN_MASK) + (((((b) & GREEN_MASK) - ((a) & GREEN_MASK)) * (m)) >> (s)))))
85 #define ALPHA_BLEND_32_W(a, b) ALPHA_BLEND_BASE(a, b, 1, 3)
86 #define ALPHA_BLEND_64_W(a, b) ALPHA_BLEND_BASE(a, b, 1, 2)
87 #define ALPHA_BLEND_192_W(a, b) ALPHA_BLEND_BASE(a, b, 3, 2)
88 #define ALPHA_BLEND_224_W(a, b) ALPHA_BLEND_BASE(a, b, 7, 3)
89
90 #define df(A, B) pixel_diff(A, B, r2y)
91 #define eq(A, B) (df(A, B) < 155)
92
93 #define FILT2(PE, PI, PH, PF, PG, PC, PD, PB, PA, G5, C4, G0, D0, C1, B1, F4, I4, H5, I5, A0, A1, \
94 N0, N1, N2, N3) do { \
95 if (PE != PH && PE != PF) { \
96 const unsigned e = df(PE,PC) + df(PE,PG) + df(PI,H5) + df(PI,F4) + (df(PH,PF)<<2); \
97 const unsigned i = df(PH,PD) + df(PH,I5) + df(PF,I4) + df(PF,PB) + (df(PE,PI)<<2); \
98 if (e <= i) { \
99 const unsigned px = df(PE,PF) <= df(PE,PH) ? PF : PH; \
100 if (e < i && (!eq(PF,PB) && !eq(PH,PD) || eq(PE,PI) \
101 && (!eq(PF,I4) && !eq(PH,I5)) \
102 || eq(PE,PG) || eq(PE,PC))) { \
103 const unsigned ke = df(PF,PG); \
104 const unsigned ki = df(PH,PC); \
105 const int left = ke<<1 <= ki && PE != PG && PD != PG; \
106 const int up = ke >= ki<<1 && PE != PC && PB != PC; \
107 if (left && up) { \
108 E[N3] = ALPHA_BLEND_224_W(E[N3], px); \
109 E[N2] = ALPHA_BLEND_64_W( E[N2], px); \
110 E[N1] = E[N2]; \
111 } else if (left) { \
112 E[N3] = ALPHA_BLEND_192_W(E[N3], px); \
113 E[N2] = ALPHA_BLEND_64_W( E[N2], px); \
114 } else if (up) { \
115 E[N3] = ALPHA_BLEND_192_W(E[N3], px); \
116 E[N1] = ALPHA_BLEND_64_W( E[N1], px); \
117 } else { /* diagonal */ \
118 E[N3] = ALPHA_BLEND_128_W(E[N3], px); \
119 } \
120 } else { \
121 E[N3] = ALPHA_BLEND_128_W(E[N3], px); \
122 } \
123 } \
124 } \
125 } while (0)
126
127 #define FILT3(PE, PI, PH, PF, PG, PC, PD, PB, PA, G5, C4, G0, D0, C1, B1, F4, I4, H5, I5, A0, A1, \
128 N0, N1, N2, N3, N4, N5, N6, N7, N8) do { \
129 if (PE != PH && PE != PF) { \
130 const unsigned e = df(PE,PC) + df(PE,PG) + df(PI,H5) + df(PI,F4) + (df(PH,PF)<<2); \
131 const unsigned i = df(PH,PD) + df(PH,I5) + df(PF,I4) + df(PF,PB) + (df(PE,PI)<<2); \
132 if (e <= i) { \
133 const unsigned px = df(PE,PF) <= df(PE,PH) ? PF : PH; \
134 if (e < i && (!eq(PF,PB) && !eq(PF,PC) || !eq(PH,PD) && !eq(PH,PG) || eq(PE,PI) \
135 && (!eq(PF,F4) && !eq(PF,I4) || !eq(PH,H5) && !eq(PH,I5)) \
136 || eq(PE,PG) || eq(PE,PC))) { \
137 const unsigned ke = df(PF,PG); \
138 const unsigned ki = df(PH,PC); \
139 const int left = ke<<1 <= ki && PE != PG && PD != PG; \
140 const int up = ke >= ki<<1 && PE != PC && PB != PC; \
141 if (left && up) { \
142 E[N7] = ALPHA_BLEND_192_W(E[N7], px); \
143 E[N6] = ALPHA_BLEND_64_W( E[N6], px); \
144 E[N5] = E[N7]; \
145 E[N2] = E[N6]; \
146 E[N8] = px; \
147 } else if (left) { \
148 E[N7] = ALPHA_BLEND_192_W(E[N7], px); \
149 E[N5] = ALPHA_BLEND_64_W( E[N5], px); \
150 E[N6] = ALPHA_BLEND_64_W( E[N6], px); \
151 E[N8] = px; \
152 } else if (up) { \
153 E[N5] = ALPHA_BLEND_192_W(E[N5], px); \
154 E[N7] = ALPHA_BLEND_64_W( E[N7], px); \
155 E[N2] = ALPHA_BLEND_64_W( E[N2], px); \
156 E[N8] = px; \
157 } else { /* diagonal */ \
158 E[N8] = ALPHA_BLEND_224_W(E[N8], px); \
159 E[N5] = ALPHA_BLEND_32_W( E[N5], px); \
160 E[N7] = ALPHA_BLEND_32_W( E[N7], px); \
161 } \
162 } else { \
163 E[N8] = ALPHA_BLEND_128_W(E[N8], px); \
164 } \
165 } \
166 } \
167 } while (0)
168
169 #define FILT4(PE, PI, PH, PF, PG, PC, PD, PB, PA, G5, C4, G0, D0, C1, B1, F4, I4, H5, I5, A0, A1, \
170 N15, N14, N11, N3, N7, N10, N13, N12, N9, N6, N2, N1, N5, N8, N4, N0) do { \
171 if (PE != PH && PE != PF) { \
172 const unsigned e = df(PE,PC) + df(PE,PG) + df(PI,H5) + df(PI,F4) + (df(PH,PF)<<2); \
173 const unsigned i = df(PH,PD) + df(PH,I5) + df(PF,I4) + df(PF,PB) + (df(PE,PI)<<2); \
174 if (e <= i) { \
175 const unsigned px = df(PE,PF) <= df(PE,PH) ? PF : PH; \
176 if (e < i && (!eq(PF,PB) && !eq(PH,PD) || eq(PE,PI) \
177 && (!eq(PF,I4) && !eq(PH,I5)) \
178 || eq(PE,PG) || eq(PE,PC))) { \
179 const unsigned ke = df(PF,PG); \
180 const unsigned ki = df(PH,PC); \
181 const int left = ke<<1 <= ki && PE != PG && PD != PG; \
182 const int up = ke >= ki<<1 && PE != PC && PB != PC; \
183 if (left && up) { \
184 E[N13] = ALPHA_BLEND_192_W(E[N13], px); \
185 E[N12] = ALPHA_BLEND_64_W( E[N12], px); \
186 E[N15] = E[N14] = E[N11] = px; \
187 E[N10] = E[N3] = E[N12]; \
188 E[N7] = E[N13]; \
189 } else if (left) { \
190 E[N11] = ALPHA_BLEND_192_W(E[N11], px); \
191 E[N13] = ALPHA_BLEND_192_W(E[N13], px); \
192 E[N10] = ALPHA_BLEND_64_W( E[N10], px); \
193 E[N12] = ALPHA_BLEND_64_W( E[N12], px); \
194 E[N14] = px; \
195 E[N15] = px; \
196 } else if (up) { \
197 E[N14] = ALPHA_BLEND_192_W(E[N14], px); \
198 E[N7 ] = ALPHA_BLEND_192_W(E[N7 ], px); \
199 E[N10] = ALPHA_BLEND_64_W( E[N10], px); \
200 E[N3 ] = ALPHA_BLEND_64_W( E[N3 ], px); \
201 E[N11] = px; \
202 E[N15] = px; \
203 } else { /* diagonal */ \
204 E[N11] = ALPHA_BLEND_128_W(E[N11], px); \
205 E[N14] = ALPHA_BLEND_128_W(E[N14], px); \
206 E[N15] = px; \
207 } \
208 } else { \
209 E[N15] = ALPHA_BLEND_128_W(E[N15], px); \
210 } \
211 } \
212 } \
213 } while (0)
214
xbr_filter(const ThreadData * td,int jobnr,int nb_jobs,int n)215 static av_always_inline void xbr_filter(const ThreadData *td, int jobnr, int nb_jobs, int n)
216 {
217 int x, y;
218 const AVFrame *input = td->in;
219 AVFrame *output = td->out;
220 const uint32_t *r2y = td->rgbtoyuv;
221 const int slice_start = (input->height * jobnr ) / nb_jobs;
222 const int slice_end = (input->height * (jobnr+1)) / nb_jobs;
223 const int nl = output->linesize[0] >> 2;
224 const int nl1 = nl + nl;
225 const int nl2 = nl1 + nl;
226
227 for (y = slice_start; y < slice_end; y++) {
228
229 uint32_t *E = (uint32_t *)(output->data[0] + y * output->linesize[0] * n);
230 const uint32_t *sa2 = (uint32_t *)(input->data[0] + y * input->linesize[0] - 8); /* center */
231 const uint32_t *sa1 = sa2 - (input->linesize[0]>>2); /* up x1 */
232 const uint32_t *sa0 = sa1 - (input->linesize[0]>>2); /* up x2 */
233 const uint32_t *sa3 = sa2 + (input->linesize[0]>>2); /* down x1 */
234 const uint32_t *sa4 = sa3 + (input->linesize[0]>>2); /* down x2 */
235
236 if (y <= 1) {
237 sa0 = sa1;
238 if (y == 0) {
239 sa0 = sa1 = sa2;
240 }
241 }
242
243 if (y >= input->height - 2) {
244 sa4 = sa3;
245 if (y == input->height - 1) {
246 sa4 = sa3 = sa2;
247 }
248 }
249
250 for (x = 0; x < input->width; x++) {
251 const uint32_t B1 = sa0[2];
252 const uint32_t PB = sa1[2];
253 const uint32_t PE = sa2[2];
254 const uint32_t PH = sa3[2];
255 const uint32_t H5 = sa4[2];
256
257 const int pprev = 2 - (x > 0);
258 const uint32_t A1 = sa0[pprev];
259 const uint32_t PA = sa1[pprev];
260 const uint32_t PD = sa2[pprev];
261 const uint32_t PG = sa3[pprev];
262 const uint32_t G5 = sa4[pprev];
263
264 const int pprev2 = pprev - (x > 1);
265 const uint32_t A0 = sa1[pprev2];
266 const uint32_t D0 = sa2[pprev2];
267 const uint32_t G0 = sa3[pprev2];
268
269 const int pnext = 3 - (x == input->width - 1);
270 const uint32_t C1 = sa0[pnext];
271 const uint32_t PC = sa1[pnext];
272 const uint32_t PF = sa2[pnext];
273 const uint32_t PI = sa3[pnext];
274 const uint32_t I5 = sa4[pnext];
275
276 const int pnext2 = pnext + 1 - (x >= input->width - 2);
277 const uint32_t C4 = sa1[pnext2];
278 const uint32_t F4 = sa2[pnext2];
279 const uint32_t I4 = sa3[pnext2];
280
281 if (n == 2) {
282 E[0] = E[1] = // 0, 1
283 E[nl] = E[nl + 1] = PE; // 2, 3
284
285 FILT2(PE, PI, PH, PF, PG, PC, PD, PB, PA, G5, C4, G0, D0, C1, B1, F4, I4, H5, I5, A0, A1, 0, 1, nl, nl+1);
286 FILT2(PE, PC, PF, PB, PI, PA, PH, PD, PG, I4, A1, I5, H5, A0, D0, B1, C1, F4, C4, G5, G0, nl, 0, nl+1, 1);
287 FILT2(PE, PA, PB, PD, PC, PG, PF, PH, PI, C1, G0, C4, F4, G5, H5, D0, A0, B1, A1, I4, I5, nl+1, nl, 1, 0);
288 FILT2(PE, PG, PD, PH, PA, PI, PB, PF, PC, A0, I5, A1, B1, I4, F4, H5, G5, D0, G0, C1, C4, 1, nl+1, 0, nl);
289 } else if (n == 3) {
290 E[0] = E[1] = E[2] = // 0, 1, 2
291 E[nl] = E[nl+1] = E[nl+2] = // 3, 4, 5
292 E[nl1] = E[nl1+1] = E[nl1+2] = PE; // 6, 7, 8
293
294 FILT3(PE, PI, PH, PF, PG, PC, PD, PB, PA, G5, C4, G0, D0, C1, B1, F4, I4, H5, I5, A0, A1, 0, 1, 2, nl, nl+1, nl+2, nl1, nl1+1, nl1+2);
295 FILT3(PE, PC, PF, PB, PI, PA, PH, PD, PG, I4, A1, I5, H5, A0, D0, B1, C1, F4, C4, G5, G0, nl1, nl, 0, nl1+1, nl+1, 1, nl1+2, nl+2, 2);
296 FILT3(PE, PA, PB, PD, PC, PG, PF, PH, PI, C1, G0, C4, F4, G5, H5, D0, A0, B1, A1, I4, I5, nl1+2, nl1+1, nl1, nl+2, nl+1, nl, 2, 1, 0);
297 FILT3(PE, PG, PD, PH, PA, PI, PB, PF, PC, A0, I5, A1, B1, I4, F4, H5, G5, D0, G0, C1, C4, 2, nl+2, nl1+2, 1, nl+1, nl1+1, 0, nl, nl1);
298 } else if (n == 4) {
299 E[0] = E[1] = E[2] = E[3] = // 0, 1, 2, 3
300 E[nl] = E[nl+1] = E[nl+2] = E[nl+3] = // 4, 5, 6, 7
301 E[nl1] = E[nl1+1] = E[nl1+2] = E[nl1+3] = // 8, 9, 10, 11
302 E[nl2] = E[nl2+1] = E[nl2+2] = E[nl2+3] = PE; // 12, 13, 14, 15
303
304 FILT4(PE, PI, PH, PF, PG, PC, PD, PB, PA, G5, C4, G0, D0, C1, B1, F4, I4, H5, I5, A0, A1, nl2+3, nl2+2, nl1+3, 3, nl+3, nl1+2, nl2+1, nl2, nl1+1, nl+2, 2, 1, nl+1, nl1, nl, 0);
305 FILT4(PE, PC, PF, PB, PI, PA, PH, PD, PG, I4, A1, I5, H5, A0, D0, B1, C1, F4, C4, G5, G0, 3, nl+3, 2, 0, 1, nl+2, nl1+3, nl2+3, nl1+2, nl+1, nl, nl1, nl1+1, nl2+2, nl2+1, nl2);
306 FILT4(PE, PA, PB, PD, PC, PG, PF, PH, PI, C1, G0, C4, F4, G5, H5, D0, A0, B1, A1, I4, I5, 0, 1, nl, nl2, nl1, nl+1, 2, 3, nl+2, nl1+1, nl2+1, nl2+2, nl1+2, nl+3, nl1+3, nl2+3);
307 FILT4(PE, PG, PD, PH, PA, PI, PB, PF, PC, A0, I5, A1, B1, I4, F4, H5, G5, D0, G0, C1, C4, nl2, nl1, nl2+1, nl2+3, nl2+2, nl1+1, nl, 0, nl+1, nl1+2, nl1+3, nl+3, nl+2, 1, 2, 3);
308 }
309
310 sa0 += 1;
311 sa1 += 1;
312 sa2 += 1;
313 sa3 += 1;
314 sa4 += 1;
315
316 E += n;
317 }
318 }
319 }
320
321 #define XBR_FUNC(size) \
322 static int xbr##size##x(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \
323 { \
324 xbr_filter(arg, jobnr, nb_jobs, size); \
325 return 0; \
326 }
327
328 XBR_FUNC(2)
329 XBR_FUNC(3)
330 XBR_FUNC(4)
331
332
config_output(AVFilterLink * outlink)333 static int config_output(AVFilterLink *outlink)
334 {
335 AVFilterContext *ctx = outlink->src;
336 XBRContext *s = ctx->priv;
337 AVFilterLink *inlink = ctx->inputs[0];
338
339 outlink->w = inlink->w * s->n;
340 outlink->h = inlink->h * s->n;
341 return 0;
342 }
343
query_formats(AVFilterContext * ctx)344 static int query_formats(AVFilterContext *ctx)
345 {
346 static const enum AVPixelFormat pix_fmts[] = {
347 AV_PIX_FMT_0RGB32, AV_PIX_FMT_NONE,
348 };
349
350 AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
351 if (!fmts_list)
352 return AVERROR(ENOMEM);
353 return ff_set_common_formats(ctx, fmts_list);
354 }
355
filter_frame(AVFilterLink * inlink,AVFrame * in)356 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
357 {
358 AVFilterContext *ctx = inlink->dst;
359 AVFilterLink *outlink = ctx->outputs[0];
360 XBRContext *s = ctx->priv;
361 ThreadData td;
362
363 AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
364 if (!out) {
365 av_frame_free(&in);
366 return AVERROR(ENOMEM);
367 }
368
369 av_frame_copy_props(out, in);
370
371 td.in = in;
372 td.out = out;
373 td.rgbtoyuv = s->rgbtoyuv;
374 ctx->internal->execute(ctx, s->func, &td, NULL, FFMIN(inlink->h, ff_filter_get_nb_threads(ctx)));
375
376 out->width = outlink->w;
377 out->height = outlink->h;
378
379 av_frame_free(&in);
380 return ff_filter_frame(outlink, out);
381 }
382
init(AVFilterContext * ctx)383 static av_cold int init(AVFilterContext *ctx)
384 {
385 XBRContext *s = ctx->priv;
386 static const xbrfunc_t xbrfuncs[] = {xbr2x, xbr3x, xbr4x};
387
388 uint32_t c;
389 int bg, rg, g;
390
391 for (bg = -255; bg < 256; bg++) {
392 for (rg = -255; rg < 256; rg++) {
393 const uint32_t u = (uint32_t)((-169*rg + 500*bg)/1000) + 128;
394 const uint32_t v = (uint32_t)(( 500*rg - 81*bg)/1000) + 128;
395 int startg = FFMAX3(-bg, -rg, 0);
396 int endg = FFMIN3(255-bg, 255-rg, 255);
397 uint32_t y = (uint32_t)(( 299*rg + 1000*startg + 114*bg)/1000);
398 c = bg + rg * (1 << 16) + 0x010101 * startg;
399 for (g = startg; g <= endg; g++) {
400 s->rgbtoyuv[c] = ((y++) << 16) + (u << 8) + v;
401 c+= 0x010101;
402 }
403 }
404 }
405
406 s->func = xbrfuncs[s->n - 2];
407 return 0;
408 }
409
410 static const AVFilterPad xbr_inputs[] = {
411 {
412 .name = "default",
413 .type = AVMEDIA_TYPE_VIDEO,
414 .filter_frame = filter_frame,
415 },
416 { NULL }
417 };
418
419 static const AVFilterPad xbr_outputs[] = {
420 {
421 .name = "default",
422 .type = AVMEDIA_TYPE_VIDEO,
423 .config_props = config_output,
424 },
425 { NULL }
426 };
427
428 AVFilter ff_vf_xbr = {
429 .name = "xbr",
430 .description = NULL_IF_CONFIG_SMALL("Scale the input using xBR algorithm."),
431 .inputs = xbr_inputs,
432 .outputs = xbr_outputs,
433 .query_formats = query_formats,
434 .priv_size = sizeof(XBRContext),
435 .priv_class = &xbr_class,
436 .init = init,
437 .flags = AVFILTER_FLAG_SLICE_THREADS,
438 };
439