• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2001-2012 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <math.h>
22 #include <stdint.h>
23 #include <stdio.h>
24 #include <string.h>
25 
26 #include "libavutil/attributes.h"
27 #include "libavutil/avutil.h"
28 #include "libavutil/avassert.h"
29 #include "libavutil/bswap.h"
30 #include "libavutil/intreadwrite.h"
31 #include "libavutil/mathematics.h"
32 #include "libavutil/mem_internal.h"
33 #include "libavutil/pixdesc.h"
34 #include "config.h"
35 #include "rgb2rgb.h"
36 #include "swscale.h"
37 #include "swscale_internal.h"
38 
39 DECLARE_ALIGNED(8, const uint8_t, ff_dither_2x2_4)[][8] = {
40 {  1,   3,   1,   3,   1,   3,   1,   3, },
41 {  2,   0,   2,   0,   2,   0,   2,   0, },
42 {  1,   3,   1,   3,   1,   3,   1,   3, },
43 };
44 
45 DECLARE_ALIGNED(8, const uint8_t, ff_dither_2x2_8)[][8] = {
46 {  6,   2,   6,   2,   6,   2,   6,   2, },
47 {  0,   4,   0,   4,   0,   4,   0,   4, },
48 {  6,   2,   6,   2,   6,   2,   6,   2, },
49 };
50 
51 DECLARE_ALIGNED(8, const uint8_t, ff_dither_4x4_16)[][8] = {
52 {  8,   4,  11,   7,   8,   4,  11,   7, },
53 {  2,  14,   1,  13,   2,  14,   1,  13, },
54 { 10,   6,   9,   5,  10,   6,   9,   5, },
55 {  0,  12,   3,  15,   0,  12,   3,  15, },
56 {  8,   4,  11,   7,   8,   4,  11,   7, },
57 };
58 
59 DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_32)[][8] = {
60 { 17,   9,  23,  15,  16,   8,  22,  14, },
61 {  5,  29,   3,  27,   4,  28,   2,  26, },
62 { 21,  13,  19,  11,  20,  12,  18,  10, },
63 {  0,  24,   6,  30,   1,  25,   7,  31, },
64 { 16,   8,  22,  14,  17,   9,  23,  15, },
65 {  4,  28,   2,  26,   5,  29,   3,  27, },
66 { 20,  12,  18,  10,  21,  13,  19,  11, },
67 {  1,  25,   7,  31,   0,  24,   6,  30, },
68 { 17,   9,  23,  15,  16,   8,  22,  14, },
69 };
70 
71 DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_73)[][8] = {
72 {  0,  55,  14,  68,   3,  58,  17,  72, },
73 { 37,  18,  50,  32,  40,  22,  54,  35, },
74 {  9,  64,   5,  59,  13,  67,   8,  63, },
75 { 46,  27,  41,  23,  49,  31,  44,  26, },
76 {  2,  57,  16,  71,   1,  56,  15,  70, },
77 { 39,  21,  52,  34,  38,  19,  51,  33, },
78 { 11,  66,   7,  62,  10,  65,   6,  60, },
79 { 48,  30,  43,  25,  47,  29,  42,  24, },
80 {  0,  55,  14,  68,   3,  58,  17,  72, },
81 };
82 
83 #if 1
84 DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
85 {117,  62, 158, 103, 113,  58, 155, 100, },
86 { 34, 199,  21, 186,  31, 196,  17, 182, },
87 {144,  89, 131,  76, 141,  86, 127,  72, },
88 {  0, 165,  41, 206,  10, 175,  52, 217, },
89 {110,  55, 151,  96, 120,  65, 162, 107, },
90 { 28, 193,  14, 179,  38, 203,  24, 189, },
91 {138,  83, 124,  69, 148,  93, 134,  79, },
92 {  7, 172,  48, 213,   3, 168,  45, 210, },
93 {117,  62, 158, 103, 113,  58, 155, 100, },
94 };
95 #elif 1
96 // tries to correct a gamma of 1.5
97 DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
98 {  0, 143,  18, 200,   2, 156,  25, 215, },
99 { 78,  28, 125,  64,  89,  36, 138,  74, },
100 { 10, 180,   3, 161,  16, 195,   8, 175, },
101 {109,  51,  93,  38, 121,  60, 105,  47, },
102 {  1, 152,  23, 210,   0, 147,  20, 205, },
103 { 85,  33, 134,  71,  81,  30, 130,  67, },
104 { 14, 190,   6, 171,  12, 185,   5, 166, },
105 {117,  57, 101,  44, 113,  54,  97,  41, },
106 {  0, 143,  18, 200,   2, 156,  25, 215, },
107 };
108 #elif 1
109 // tries to correct a gamma of 2.0
110 DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
111 {  0, 124,   8, 193,   0, 140,  12, 213, },
112 { 55,  14, 104,  42,  66,  19, 119,  52, },
113 {  3, 168,   1, 145,   6, 187,   3, 162, },
114 { 86,  31,  70,  21,  99,  39,  82,  28, },
115 {  0, 134,  11, 206,   0, 129,   9, 200, },
116 { 62,  17, 114,  48,  58,  16, 109,  45, },
117 {  5, 181,   2, 157,   4, 175,   1, 151, },
118 { 95,  36,  78,  26,  90,  34,  74,  24, },
119 {  0, 124,   8, 193,   0, 140,  12, 213, },
120 };
121 #else
122 // tries to correct a gamma of 2.5
123 DECLARE_ALIGNED(8, const uint8_t, ff_dither_8x8_220)[][8] = {
124 {  0, 107,   3, 187,   0, 125,   6, 212, },
125 { 39,   7,  86,  28,  49,  11, 102,  36, },
126 {  1, 158,   0, 131,   3, 180,   1, 151, },
127 { 68,  19,  52,  12,  81,  25,  64,  17, },
128 {  0, 119,   5, 203,   0, 113,   4, 195, },
129 { 45,   9,  96,  33,  42,   8,  91,  30, },
130 {  2, 172,   1, 144,   2, 165,   0, 137, },
131 { 77,  23,  60,  15,  72,  21,  56,  14, },
132 {  0, 107,   3, 187,   0, 125,   6, 212, },
133 };
134 #endif
135 
136 #define output_pixel(pos, val, bias, signedness) \
137     if (big_endian) { \
138         AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
139     } else { \
140         AV_WL16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \
141     }
142 
143 static av_always_inline void
yuv2plane1_16_c_template(const int32_t * src,uint16_t * dest,int dstW,int big_endian,int output_bits)144 yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
145                          int big_endian, int output_bits)
146 {
147     int i;
148     int shift = 3;
149     av_assert0(output_bits == 16);
150 
151     for (i = 0; i < dstW; i++) {
152         int val = src[i] + (1 << (shift - 1));
153         output_pixel(&dest[i], val, 0, uint);
154     }
155 }
156 
157 static av_always_inline void
yuv2planeX_16_c_template(const int16_t * filter,int filterSize,const int32_t ** src,uint16_t * dest,int dstW,int big_endian,int output_bits)158 yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
159                          const int32_t **src, uint16_t *dest, int dstW,
160                          int big_endian, int output_bits)
161 {
162     int i;
163     int shift = 15;
164     av_assert0(output_bits == 16);
165 
166     for (i = 0; i < dstW; i++) {
167         int val = 1 << (shift - 1);
168         int j;
169 
170         /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
171          * filters (or anything with negative coeffs, the range can be slightly
172          * wider in both directions. To account for this overflow, we subtract
173          * a constant so it always fits in the signed range (assuming a
174          * reasonable filterSize), and re-add that at the end. */
175         val -= 0x40000000;
176         for (j = 0; j < filterSize; j++)
177             val += src[j][i] * (unsigned)filter[j];
178 
179         output_pixel(&dest[i], val, 0x8000, int);
180     }
181 }
182 
183 static av_always_inline void
yuv2nv12cX_16_c_template(int big_endian,const uint8_t * chrDither,const int16_t * chrFilter,int chrFilterSize,const int16_t ** chrUSrc,const int16_t ** chrVSrc,uint8_t * dest8,int chrDstW,int output_bits)184 yuv2nv12cX_16_c_template(int big_endian, const uint8_t *chrDither,
185                          const int16_t *chrFilter, int chrFilterSize,
186                          const int16_t **chrUSrc, const int16_t **chrVSrc,
187                          uint8_t *dest8, int chrDstW, int output_bits)
188 {
189     uint16_t *dest = (uint16_t*)dest8;
190     const int32_t **uSrc = (const int32_t **)chrUSrc;
191     const int32_t **vSrc = (const int32_t **)chrVSrc;
192     int shift = 15;
193     int i, j;
194     av_assert0(output_bits == 16);
195 
196     for (i = 0; i < chrDstW; i++) {
197         int u = 1 << (shift - 1);
198         int v = 1 << (shift - 1);
199 
200         /* See yuv2planeX_16_c_template for details. */
201         u -= 0x40000000;
202         v -= 0x40000000;
203         for (j = 0; j < chrFilterSize; j++) {
204             u += uSrc[j][i] * (unsigned)chrFilter[j];
205             v += vSrc[j][i] * (unsigned)chrFilter[j];
206         }
207 
208         output_pixel(&dest[2*i]  , u, 0x8000, int);
209         output_pixel(&dest[2*i+1], v, 0x8000, int);
210     }
211 }
212 
213 static av_always_inline void
yuv2plane1_float_c_template(const int32_t * src,float * dest,int dstW)214 yuv2plane1_float_c_template(const int32_t *src, float *dest, int dstW)
215 {
216     static const int big_endian = HAVE_BIGENDIAN;
217     static const int shift = 3;
218     static const float float_mult = 1.0f / 65535.0f;
219     int i, val;
220     uint16_t val_uint;
221 
222     for (i = 0; i < dstW; ++i){
223         val = src[i] + (1 << (shift - 1));
224         output_pixel(&val_uint, val, 0, uint);
225         dest[i] = float_mult * (float)val_uint;
226     }
227 }
228 
229 static av_always_inline void
yuv2plane1_float_bswap_c_template(const int32_t * src,uint32_t * dest,int dstW)230 yuv2plane1_float_bswap_c_template(const int32_t *src, uint32_t *dest, int dstW)
231 {
232     static const int big_endian = HAVE_BIGENDIAN;
233     static const int shift = 3;
234     static const float float_mult = 1.0f / 65535.0f;
235     int i, val;
236     uint16_t val_uint;
237 
238     for (i = 0; i < dstW; ++i){
239         val = src[i] + (1 << (shift - 1));
240         output_pixel(&val_uint, val, 0, uint);
241         dest[i] = av_bswap32(av_float2int(float_mult * (float)val_uint));
242     }
243 }
244 
245 static av_always_inline void
yuv2planeX_float_c_template(const int16_t * filter,int filterSize,const int32_t ** src,float * dest,int dstW)246 yuv2planeX_float_c_template(const int16_t *filter, int filterSize, const int32_t **src,
247                             float *dest, int dstW)
248 {
249     static const int big_endian = HAVE_BIGENDIAN;
250     static const int shift = 15;
251     static const float float_mult = 1.0f / 65535.0f;
252     int i, j, val;
253     uint16_t val_uint;
254 
255     for (i = 0; i < dstW; ++i){
256         val = (1 << (shift - 1)) - 0x40000000;
257         for (j = 0; j < filterSize; ++j){
258             val += src[j][i] * (unsigned)filter[j];
259         }
260         output_pixel(&val_uint, val, 0x8000, int);
261         dest[i] = float_mult * (float)val_uint;
262     }
263 }
264 
265 static av_always_inline void
yuv2planeX_float_bswap_c_template(const int16_t * filter,int filterSize,const int32_t ** src,uint32_t * dest,int dstW)266 yuv2planeX_float_bswap_c_template(const int16_t *filter, int filterSize, const int32_t **src,
267                             uint32_t *dest, int dstW)
268 {
269     static const int big_endian = HAVE_BIGENDIAN;
270     static const int shift = 15;
271     static const float float_mult = 1.0f / 65535.0f;
272     int i, j, val;
273     uint16_t val_uint;
274 
275     for (i = 0; i < dstW; ++i){
276         val = (1 << (shift - 1)) - 0x40000000;
277         for (j = 0; j < filterSize; ++j){
278             val += src[j][i] * (unsigned)filter[j];
279         }
280         output_pixel(&val_uint, val, 0x8000, int);
281         dest[i] = av_bswap32(av_float2int(float_mult * (float)val_uint));
282     }
283 }
284 
285 #define yuv2plane1_float(template, dest_type, BE_LE) \
286 static void yuv2plane1_float ## BE_LE ## _c(const int16_t *src, uint8_t *dest, int dstW, \
287                                             const uint8_t *dither, int offset) \
288 { \
289     template((const int32_t *)src, (dest_type *)dest, dstW); \
290 }
291 
292 #define yuv2planeX_float(template, dest_type, BE_LE) \
293 static void yuv2planeX_float ## BE_LE ## _c(const int16_t *filter, int filterSize, \
294                                             const int16_t **src, uint8_t *dest, int dstW, \
295                                             const uint8_t *dither, int offset) \
296 { \
297     template(filter, filterSize, (const int32_t **)src, (dest_type *)dest, dstW); \
298 }
299 
300 #if HAVE_BIGENDIAN
yuv2plane1_float(yuv2plane1_float_c_template,float,BE)301 yuv2plane1_float(yuv2plane1_float_c_template,       float,    BE)
302 yuv2plane1_float(yuv2plane1_float_bswap_c_template, uint32_t, LE)
303 yuv2planeX_float(yuv2planeX_float_c_template,       float,    BE)
304 yuv2planeX_float(yuv2planeX_float_bswap_c_template, uint32_t, LE)
305 #else
306 yuv2plane1_float(yuv2plane1_float_c_template,       float,    LE)
307 yuv2plane1_float(yuv2plane1_float_bswap_c_template, uint32_t, BE)
308 yuv2planeX_float(yuv2planeX_float_c_template,       float,    LE)
309 yuv2planeX_float(yuv2planeX_float_bswap_c_template, uint32_t, BE)
310 #endif
311 
312 #undef output_pixel
313 
314 #define output_pixel(pos, val) \
315     if (big_endian) { \
316         AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
317     } else { \
318         AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
319     }
320 
321 static av_always_inline void
322 yuv2plane1_10_c_template(const int16_t *src, uint16_t *dest, int dstW,
323                          int big_endian, int output_bits)
324 {
325     int i;
326     int shift = 15 - output_bits;
327 
328     for (i = 0; i < dstW; i++) {
329         int val = src[i] + (1 << (shift - 1));
330         output_pixel(&dest[i], val);
331     }
332 }
333 
334 static av_always_inline void
yuv2planeX_10_c_template(const int16_t * filter,int filterSize,const int16_t ** src,uint16_t * dest,int dstW,int big_endian,int output_bits)335 yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
336                          const int16_t **src, uint16_t *dest, int dstW,
337                          int big_endian, int output_bits)
338 {
339     int i;
340     int shift = 11 + 16 - output_bits;
341 
342     for (i = 0; i < dstW; i++) {
343         int val = 1 << (shift - 1);
344         int j;
345 
346         for (j = 0; j < filterSize; j++)
347             val += src[j][i] * filter[j];
348 
349         output_pixel(&dest[i], val);
350     }
351 }
352 
353 #undef output_pixel
354 
355 #define yuv2NBPS(bits, BE_LE, is_be, template_size, typeX_t) \
356 static void yuv2plane1_ ## bits ## BE_LE ## _c(const int16_t *src, \
357                               uint8_t *dest, int dstW, \
358                               const uint8_t *dither, int offset)\
359 { \
360     yuv2plane1_ ## template_size ## _c_template((const typeX_t *) src, \
361                          (uint16_t *) dest, dstW, is_be, bits); \
362 }\
363 static void yuv2planeX_ ## bits ## BE_LE ## _c(const int16_t *filter, int filterSize, \
364                               const int16_t **src, uint8_t *dest, int dstW, \
365                               const uint8_t *dither, int offset)\
366 { \
367     yuv2planeX_## template_size ## _c_template(filter, \
368                          filterSize, (const typeX_t **) src, \
369                          (uint16_t *) dest, dstW, is_be, bits); \
370 }
371 
372 yuv2NBPS( 9, BE, 1, 10, int16_t)
373 yuv2NBPS( 9, LE, 0, 10, int16_t)
374 yuv2NBPS(10, BE, 1, 10, int16_t)
375 yuv2NBPS(10, LE, 0, 10, int16_t)
376 yuv2NBPS(12, BE, 1, 10, int16_t)
377 yuv2NBPS(12, LE, 0, 10, int16_t)
378 yuv2NBPS(14, BE, 1, 10, int16_t)
379 yuv2NBPS(14, LE, 0, 10, int16_t)
380 yuv2NBPS(16, BE, 1, 16, int32_t)
381 yuv2NBPS(16, LE, 0, 16, int32_t)
382 
383 
yuv2nv12cX_16LE_c(enum AVPixelFormat dstFormat,const uint8_t * chrDither,const int16_t * chrFilter,int chrFilterSize,const int16_t ** chrUSrc,const int16_t ** chrVSrc,uint8_t * dest8,int chrDstW)384 static void yuv2nv12cX_16LE_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
385                               const int16_t *chrFilter, int chrFilterSize,
386                               const int16_t **chrUSrc, const int16_t **chrVSrc,
387                               uint8_t *dest8, int chrDstW)
388 {
389     yuv2nv12cX_16_c_template(0, chrDither, chrFilter, chrFilterSize, chrUSrc, chrVSrc, dest8, chrDstW, 16);
390 }
391 
yuv2nv12cX_16BE_c(enum AVPixelFormat dstFormat,const uint8_t * chrDither,const int16_t * chrFilter,int chrFilterSize,const int16_t ** chrUSrc,const int16_t ** chrVSrc,uint8_t * dest8,int chrDstW)392 static void yuv2nv12cX_16BE_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
393                               const int16_t *chrFilter, int chrFilterSize,
394                               const int16_t **chrUSrc, const int16_t **chrVSrc,
395                               uint8_t *dest8, int chrDstW)
396 {
397     yuv2nv12cX_16_c_template(1, chrDither, chrFilter, chrFilterSize, chrUSrc, chrVSrc, dest8, chrDstW, 16);
398 }
399 
yuv2planeX_8_c(const int16_t * filter,int filterSize,const int16_t ** src,uint8_t * dest,int dstW,const uint8_t * dither,int offset)400 static void yuv2planeX_8_c(const int16_t *filter, int filterSize,
401                            const int16_t **src, uint8_t *dest, int dstW,
402                            const uint8_t *dither, int offset)
403 {
404     int i;
405     for (i=0; i<dstW; i++) {
406         int val = dither[(i + offset) & 7] << 12;
407         int j;
408         for (j=0; j<filterSize; j++)
409             val += src[j][i] * filter[j];
410 
411         dest[i]= av_clip_uint8(val>>19);
412     }
413 }
414 
yuv2plane1_8_c(const int16_t * src,uint8_t * dest,int dstW,const uint8_t * dither,int offset)415 static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
416                            const uint8_t *dither, int offset)
417 {
418     int i;
419     for (i=0; i<dstW; i++) {
420         int val = (src[i] + dither[(i + offset) & 7]) >> 7;
421         dest[i]= av_clip_uint8(val);
422     }
423 }
424 
yuv2nv12cX_c(enum AVPixelFormat dstFormat,const uint8_t * chrDither,const int16_t * chrFilter,int chrFilterSize,const int16_t ** chrUSrc,const int16_t ** chrVSrc,uint8_t * dest,int chrDstW)425 static void yuv2nv12cX_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
426                          const int16_t *chrFilter, int chrFilterSize,
427                          const int16_t **chrUSrc, const int16_t **chrVSrc,
428                          uint8_t *dest, int chrDstW)
429 {
430     int i;
431 
432     if (!isSwappedChroma(dstFormat))
433         for (i=0; i<chrDstW; i++) {
434             int u = chrDither[i & 7] << 12;
435             int v = chrDither[(i + 3) & 7] << 12;
436             int j;
437             for (j=0; j<chrFilterSize; j++) {
438                 u += chrUSrc[j][i] * chrFilter[j];
439                 v += chrVSrc[j][i] * chrFilter[j];
440             }
441 
442             dest[2*i]= av_clip_uint8(u>>19);
443             dest[2*i+1]= av_clip_uint8(v>>19);
444         }
445     else
446         for (i=0; i<chrDstW; i++) {
447             int u = chrDither[i & 7] << 12;
448             int v = chrDither[(i + 3) & 7] << 12;
449             int j;
450             for (j=0; j<chrFilterSize; j++) {
451                 u += chrUSrc[j][i] * chrFilter[j];
452                 v += chrVSrc[j][i] * chrFilter[j];
453             }
454 
455             dest[2*i]= av_clip_uint8(v>>19);
456             dest[2*i+1]= av_clip_uint8(u>>19);
457         }
458 }
459 
460 
461 #define output_pixel(pos, val) \
462     if (big_endian) { \
463         AV_WB16(pos, av_clip_uintp2(val >> shift, 10) << 6); \
464     } else { \
465         AV_WL16(pos, av_clip_uintp2(val >> shift, 10) << 6); \
466     }
467 
yuv2p010l1_c(const int16_t * src,uint16_t * dest,int dstW,int big_endian)468 static void yuv2p010l1_c(const int16_t *src,
469                          uint16_t *dest, int dstW,
470                          int big_endian)
471 {
472     int i;
473     int shift = 5;
474 
475     for (i = 0; i < dstW; i++) {
476         int val = src[i] + (1 << (shift - 1));
477         output_pixel(&dest[i], val);
478     }
479 }
480 
yuv2p010lX_c(const int16_t * filter,int filterSize,const int16_t ** src,uint16_t * dest,int dstW,int big_endian)481 static void yuv2p010lX_c(const int16_t *filter, int filterSize,
482                          const int16_t **src, uint16_t *dest, int dstW,
483                          int big_endian)
484 {
485     int i, j;
486     int shift = 17;
487 
488     for (i = 0; i < dstW; i++) {
489         int val = 1 << (shift - 1);
490 
491         for (j = 0; j < filterSize; j++)
492             val += src[j][i] * filter[j];
493 
494         output_pixel(&dest[i], val);
495     }
496 }
497 
yuv2p010cX_c(int big_endian,const uint8_t * chrDither,const int16_t * chrFilter,int chrFilterSize,const int16_t ** chrUSrc,const int16_t ** chrVSrc,uint8_t * dest8,int chrDstW)498 static void yuv2p010cX_c(int big_endian, const uint8_t *chrDither,
499                          const int16_t *chrFilter, int chrFilterSize,
500                          const int16_t **chrUSrc, const int16_t **chrVSrc,
501                          uint8_t *dest8, int chrDstW)
502 {
503     uint16_t *dest = (uint16_t*)dest8;
504     int shift = 17;
505     int i, j;
506 
507     for (i = 0; i < chrDstW; i++) {
508         int u = 1 << (shift - 1);
509         int v = 1 << (shift - 1);
510 
511         for (j = 0; j < chrFilterSize; j++) {
512             u += chrUSrc[j][i] * chrFilter[j];
513             v += chrVSrc[j][i] * chrFilter[j];
514         }
515 
516         output_pixel(&dest[2*i]  , u);
517         output_pixel(&dest[2*i+1], v);
518     }
519 }
520 
yuv2p010l1_LE_c(const int16_t * src,uint8_t * dest,int dstW,const uint8_t * dither,int offset)521 static void yuv2p010l1_LE_c(const int16_t *src,
522                             uint8_t *dest, int dstW,
523                             const uint8_t *dither, int offset)
524 {
525     yuv2p010l1_c(src, (uint16_t*)dest, dstW, 0);
526 }
527 
yuv2p010l1_BE_c(const int16_t * src,uint8_t * dest,int dstW,const uint8_t * dither,int offset)528 static void yuv2p010l1_BE_c(const int16_t *src,
529                             uint8_t *dest, int dstW,
530                             const uint8_t *dither, int offset)
531 {
532     yuv2p010l1_c(src, (uint16_t*)dest, dstW, 1);
533 }
534 
yuv2p010lX_LE_c(const int16_t * filter,int filterSize,const int16_t ** src,uint8_t * dest,int dstW,const uint8_t * dither,int offset)535 static void yuv2p010lX_LE_c(const int16_t *filter, int filterSize,
536                             const int16_t **src, uint8_t *dest, int dstW,
537                             const uint8_t *dither, int offset)
538 {
539     yuv2p010lX_c(filter, filterSize, src, (uint16_t*)dest, dstW, 0);
540 }
541 
yuv2p010lX_BE_c(const int16_t * filter,int filterSize,const int16_t ** src,uint8_t * dest,int dstW,const uint8_t * dither,int offset)542 static void yuv2p010lX_BE_c(const int16_t *filter, int filterSize,
543                             const int16_t **src, uint8_t *dest, int dstW,
544                             const uint8_t *dither, int offset)
545 {
546     yuv2p010lX_c(filter, filterSize, src, (uint16_t*)dest, dstW, 1);
547 }
548 
yuv2p010cX_LE_c(enum AVPixelFormat dstFormat,const uint8_t * chrDither,const int16_t * chrFilter,int chrFilterSize,const int16_t ** chrUSrc,const int16_t ** chrVSrc,uint8_t * dest8,int chrDstW)549 static void yuv2p010cX_LE_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
550                             const int16_t *chrFilter, int chrFilterSize,
551                             const int16_t **chrUSrc, const int16_t **chrVSrc,
552                             uint8_t *dest8, int chrDstW)
553 {
554     yuv2p010cX_c(0, chrDither, chrFilter, chrFilterSize, chrUSrc, chrVSrc, dest8, chrDstW);
555 }
556 
yuv2p010cX_BE_c(enum AVPixelFormat dstFormat,const uint8_t * chrDither,const int16_t * chrFilter,int chrFilterSize,const int16_t ** chrUSrc,const int16_t ** chrVSrc,uint8_t * dest8,int chrDstW)557 static void yuv2p010cX_BE_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
558                             const int16_t *chrFilter, int chrFilterSize,
559                             const int16_t **chrUSrc, const int16_t **chrVSrc,
560                             uint8_t *dest8, int chrDstW)
561 {
562     yuv2p010cX_c(1, chrDither, chrFilter, chrFilterSize, chrUSrc, chrVSrc, dest8, chrDstW);
563 }
564 
565 #undef output_pixel
566 
567 
568 #define accumulate_bit(acc, val) \
569     acc <<= 1; \
570     acc |= (val) >= 234
571 #define output_pixel(pos, acc) \
572     if (target == AV_PIX_FMT_MONOBLACK) { \
573         pos = acc; \
574     } else { \
575         pos = ~acc; \
576     }
577 
578 static av_always_inline void
yuv2mono_X_c_template(SwsContext * c,const int16_t * lumFilter,const int16_t ** lumSrc,int lumFilterSize,const int16_t * chrFilter,const int16_t ** chrUSrc,const int16_t ** chrVSrc,int chrFilterSize,const int16_t ** alpSrc,uint8_t * dest,int dstW,int y,enum AVPixelFormat target)579 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
580                       const int16_t **lumSrc, int lumFilterSize,
581                       const int16_t *chrFilter, const int16_t **chrUSrc,
582                       const int16_t **chrVSrc, int chrFilterSize,
583                       const int16_t **alpSrc, uint8_t *dest, int dstW,
584                       int y, enum AVPixelFormat target)
585 {
586     const uint8_t * const d128 = ff_dither_8x8_220[y&7];
587     int i;
588     unsigned acc = 0;
589     int err = 0;
590 
591     for (i = 0; i < dstW; i += 2) {
592         int j;
593         int Y1 = 1 << 18;
594         int Y2 = 1 << 18;
595 
596         for (j = 0; j < lumFilterSize; j++) {
597             Y1 += lumSrc[j][i]   * lumFilter[j];
598             Y2 += lumSrc[j][i+1] * lumFilter[j];
599         }
600         Y1 >>= 19;
601         Y2 >>= 19;
602         if ((Y1 | Y2) & 0x100) {
603             Y1 = av_clip_uint8(Y1);
604             Y2 = av_clip_uint8(Y2);
605         }
606         if (c->dither == SWS_DITHER_ED) {
607             Y1 += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4;
608             c->dither_error[0][i] = err;
609             acc = 2*acc + (Y1 >= 128);
610             Y1 -= 220*(acc&1);
611 
612             err = Y2 + ((7*Y1 + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4);
613             c->dither_error[0][i+1] = Y1;
614             acc = 2*acc + (err >= 128);
615             err -= 220*(acc&1);
616         } else {
617             accumulate_bit(acc, Y1 + d128[(i + 0) & 7]);
618             accumulate_bit(acc, Y2 + d128[(i + 1) & 7]);
619         }
620         if ((i & 7) == 6) {
621             output_pixel(*dest++, acc);
622         }
623     }
624     c->dither_error[0][i] = err;
625 
626     if (i & 6) {
627         output_pixel(*dest, acc);
628     }
629 }
630 
631 static av_always_inline void
yuv2mono_2_c_template(SwsContext * c,const int16_t * buf[2],const int16_t * ubuf[2],const int16_t * vbuf[2],const int16_t * abuf[2],uint8_t * dest,int dstW,int yalpha,int uvalpha,int y,enum AVPixelFormat target)632 yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
633                       const int16_t *ubuf[2], const int16_t *vbuf[2],
634                       const int16_t *abuf[2], uint8_t *dest, int dstW,
635                       int yalpha, int uvalpha, int y,
636                       enum AVPixelFormat target)
637 {
638     const int16_t *buf0  = buf[0],  *buf1  = buf[1];
639     const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
640     int  yalpha1 = 4096 - yalpha;
641     int i;
642     av_assert2(yalpha  <= 4096U);
643 
644     if (c->dither == SWS_DITHER_ED) {
645         int err = 0;
646         unsigned acc = 0;
647         for (i = 0; i < dstW; i +=2) {
648             int Y;
649 
650             Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19;
651             Y += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4;
652             c->dither_error[0][i] = err;
653             acc = 2*acc + (Y >= 128);
654             Y -= 220*(acc&1);
655 
656             err = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19;
657             err += (7*Y + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4;
658             c->dither_error[0][i+1] = Y;
659             acc = 2*acc + (err >= 128);
660             err -= 220*(acc&1);
661 
662             if ((i & 7) == 6)
663                 output_pixel(*dest++, acc);
664         }
665         c->dither_error[0][i] = err;
666     } else {
667         for (i = 0; i < dstW; i += 8) {
668             int Y;
669             unsigned acc = 0;
670 
671             Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19;
672             accumulate_bit(acc, Y + d128[0]);
673             Y = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19;
674             accumulate_bit(acc, Y + d128[1]);
675             Y = (buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19;
676             accumulate_bit(acc, Y + d128[2]);
677             Y = (buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19;
678             accumulate_bit(acc, Y + d128[3]);
679             Y = (buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19;
680             accumulate_bit(acc, Y + d128[4]);
681             Y = (buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19;
682             accumulate_bit(acc, Y + d128[5]);
683             Y = (buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19;
684             accumulate_bit(acc, Y + d128[6]);
685             Y = (buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19;
686             accumulate_bit(acc, Y + d128[7]);
687 
688             output_pixel(*dest++, acc);
689         }
690     }
691 }
692 
693 static av_always_inline void
yuv2mono_1_c_template(SwsContext * c,const int16_t * buf0,const int16_t * ubuf[2],const int16_t * vbuf[2],const int16_t * abuf0,uint8_t * dest,int dstW,int uvalpha,int y,enum AVPixelFormat target)694 yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
695                       const int16_t *ubuf[2], const int16_t *vbuf[2],
696                       const int16_t *abuf0, uint8_t *dest, int dstW,
697                       int uvalpha, int y, enum AVPixelFormat target)
698 {
699     const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
700     int i;
701 
702     if (c->dither == SWS_DITHER_ED) {
703         int err = 0;
704         unsigned acc = 0;
705         for (i = 0; i < dstW; i +=2) {
706             int Y;
707 
708             Y = ((buf0[i + 0] + 64) >> 7);
709             Y += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2] + 8 - 256)>>4;
710             c->dither_error[0][i] = err;
711             acc = 2*acc + (Y >= 128);
712             Y -= 220*(acc&1);
713 
714             err = ((buf0[i + 1] + 64) >> 7);
715             err += (7*Y + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3] + 8 - 256)>>4;
716             c->dither_error[0][i+1] = Y;
717             acc = 2*acc + (err >= 128);
718             err -= 220*(acc&1);
719 
720             if ((i & 7) == 6)
721                 output_pixel(*dest++, acc);
722         }
723         c->dither_error[0][i] = err;
724     } else {
725         for (i = 0; i < dstW; i += 8) {
726             unsigned acc = 0;
727             accumulate_bit(acc, ((buf0[i + 0] + 64) >> 7) + d128[0]);
728             accumulate_bit(acc, ((buf0[i + 1] + 64) >> 7) + d128[1]);
729             accumulate_bit(acc, ((buf0[i + 2] + 64) >> 7) + d128[2]);
730             accumulate_bit(acc, ((buf0[i + 3] + 64) >> 7) + d128[3]);
731             accumulate_bit(acc, ((buf0[i + 4] + 64) >> 7) + d128[4]);
732             accumulate_bit(acc, ((buf0[i + 5] + 64) >> 7) + d128[5]);
733             accumulate_bit(acc, ((buf0[i + 6] + 64) >> 7) + d128[6]);
734             accumulate_bit(acc, ((buf0[i + 7] + 64) >> 7) + d128[7]);
735 
736             output_pixel(*dest++, acc);
737         }
738     }
739 }
740 
741 #undef output_pixel
742 #undef accumulate_bit
743 
744 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
745 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
746                                 const int16_t **lumSrc, int lumFilterSize, \
747                                 const int16_t *chrFilter, const int16_t **chrUSrc, \
748                                 const int16_t **chrVSrc, int chrFilterSize, \
749                                 const int16_t **alpSrc, uint8_t *dest, int dstW, \
750                                 int y) \
751 { \
752     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
753                                   chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
754                                   alpSrc, dest, dstW, y, fmt); \
755 } \
756  \
757 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
758                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
759                                 const int16_t *abuf[2], uint8_t *dest, int dstW, \
760                                 int yalpha, int uvalpha, int y) \
761 { \
762     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
763                                   dest, dstW, yalpha, uvalpha, y, fmt); \
764 } \
765  \
766 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
767                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
768                                 const int16_t *abuf0, uint8_t *dest, int dstW, \
769                                 int uvalpha, int y) \
770 { \
771     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, \
772                                   abuf0, dest, dstW, uvalpha, \
773                                   y, fmt); \
774 }
775 
776 YUV2PACKEDWRAPPER(yuv2mono,, white, AV_PIX_FMT_MONOWHITE)
777 YUV2PACKEDWRAPPER(yuv2mono,, black, AV_PIX_FMT_MONOBLACK)
778 
779 #define output_pixels(pos, Y1, U, Y2, V) \
780     if (target == AV_PIX_FMT_YUYV422) { \
781         dest[pos + 0] = Y1; \
782         dest[pos + 1] = U;  \
783         dest[pos + 2] = Y2; \
784         dest[pos + 3] = V;  \
785     } else if (target == AV_PIX_FMT_YVYU422) { \
786         dest[pos + 0] = Y1; \
787         dest[pos + 1] = V;  \
788         dest[pos + 2] = Y2; \
789         dest[pos + 3] = U;  \
790     } else { /* AV_PIX_FMT_UYVY422 */ \
791         dest[pos + 0] = U;  \
792         dest[pos + 1] = Y1; \
793         dest[pos + 2] = V;  \
794         dest[pos + 3] = Y2; \
795     }
796 
797 static av_always_inline void
yuv2422_X_c_template(SwsContext * c,const int16_t * lumFilter,const int16_t ** lumSrc,int lumFilterSize,const int16_t * chrFilter,const int16_t ** chrUSrc,const int16_t ** chrVSrc,int chrFilterSize,const int16_t ** alpSrc,uint8_t * dest,int dstW,int y,enum AVPixelFormat target)798 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
799                      const int16_t **lumSrc, int lumFilterSize,
800                      const int16_t *chrFilter, const int16_t **chrUSrc,
801                      const int16_t **chrVSrc, int chrFilterSize,
802                      const int16_t **alpSrc, uint8_t *dest, int dstW,
803                      int y, enum AVPixelFormat target)
804 {
805     int i;
806 
807     for (i = 0; i < ((dstW + 1) >> 1); i++) {
808         int j;
809         int Y1 = 1 << 18;
810         int Y2 = 1 << 18;
811         int U  = 1 << 18;
812         int V  = 1 << 18;
813 
814         for (j = 0; j < lumFilterSize; j++) {
815             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
816             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
817         }
818         for (j = 0; j < chrFilterSize; j++) {
819             U += chrUSrc[j][i] * chrFilter[j];
820             V += chrVSrc[j][i] * chrFilter[j];
821         }
822         Y1 >>= 19;
823         Y2 >>= 19;
824         U  >>= 19;
825         V  >>= 19;
826         if ((Y1 | Y2 | U | V) & 0x100) {
827             Y1 = av_clip_uint8(Y1);
828             Y2 = av_clip_uint8(Y2);
829             U  = av_clip_uint8(U);
830             V  = av_clip_uint8(V);
831         }
832         output_pixels(4*i, Y1, U, Y2, V);
833     }
834 }
835 
836 static av_always_inline void
yuv2422_2_c_template(SwsContext * c,const int16_t * buf[2],const int16_t * ubuf[2],const int16_t * vbuf[2],const int16_t * abuf[2],uint8_t * dest,int dstW,int yalpha,int uvalpha,int y,enum AVPixelFormat target)837 yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
838                      const int16_t *ubuf[2], const int16_t *vbuf[2],
839                      const int16_t *abuf[2], uint8_t *dest, int dstW,
840                      int yalpha, int uvalpha, int y,
841                      enum AVPixelFormat target)
842 {
843     const int16_t *buf0  = buf[0],  *buf1  = buf[1],
844                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
845                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
846     int  yalpha1 = 4096 - yalpha;
847     int uvalpha1 = 4096 - uvalpha;
848     int i;
849     av_assert2(yalpha  <= 4096U);
850     av_assert2(uvalpha <= 4096U);
851 
852     for (i = 0; i < ((dstW + 1) >> 1); i++) {
853         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
854         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
855         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
856         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
857 
858         if ((Y1 | Y2 | U | V) & 0x100) {
859             Y1 = av_clip_uint8(Y1);
860             Y2 = av_clip_uint8(Y2);
861             U  = av_clip_uint8(U);
862             V  = av_clip_uint8(V);
863         }
864 
865         output_pixels(i * 4, Y1, U, Y2, V);
866     }
867 }
868 
869 static av_always_inline void
yuv2422_1_c_template(SwsContext * c,const int16_t * buf0,const int16_t * ubuf[2],const int16_t * vbuf[2],const int16_t * abuf0,uint8_t * dest,int dstW,int uvalpha,int y,enum AVPixelFormat target)870 yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
871                      const int16_t *ubuf[2], const int16_t *vbuf[2],
872                      const int16_t *abuf0, uint8_t *dest, int dstW,
873                      int uvalpha, int y, enum AVPixelFormat target)
874 {
875     const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
876     int i;
877 
878     if (uvalpha < 2048) {
879         for (i = 0; i < ((dstW + 1) >> 1); i++) {
880             int Y1 = (buf0[i * 2    ]+64) >> 7;
881             int Y2 = (buf0[i * 2 + 1]+64) >> 7;
882             int U  = (ubuf0[i]       +64) >> 7;
883             int V  = (vbuf0[i]       +64) >> 7;
884 
885             if ((Y1 | Y2 | U | V) & 0x100) {
886                 Y1 = av_clip_uint8(Y1);
887                 Y2 = av_clip_uint8(Y2);
888                 U  = av_clip_uint8(U);
889                 V  = av_clip_uint8(V);
890             }
891 
892             output_pixels(i * 4, Y1, U, Y2, V);
893         }
894     } else {
895         const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
896         for (i = 0; i < ((dstW + 1) >> 1); i++) {
897             int Y1 = (buf0[i * 2    ]    + 64) >> 7;
898             int Y2 = (buf0[i * 2 + 1]    + 64) >> 7;
899             int U  = (ubuf0[i] + ubuf1[i]+128) >> 8;
900             int V  = (vbuf0[i] + vbuf1[i]+128) >> 8;
901 
902             if ((Y1 | Y2 | U | V) & 0x100) {
903                 Y1 = av_clip_uint8(Y1);
904                 Y2 = av_clip_uint8(Y2);
905                 U  = av_clip_uint8(U);
906                 V  = av_clip_uint8(V);
907             }
908 
909             output_pixels(i * 4, Y1, U, Y2, V);
910         }
911     }
912 }
913 
914 #undef output_pixels
915 
916 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, AV_PIX_FMT_YUYV422)
917 YUV2PACKEDWRAPPER(yuv2, 422, yvyu422, AV_PIX_FMT_YVYU422)
918 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, AV_PIX_FMT_UYVY422)
919 
920 #define R_B ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? R : B)
921 #define B_R ((target == AV_PIX_FMT_RGB48LE || target == AV_PIX_FMT_RGB48BE || target == AV_PIX_FMT_RGBA64LE || target == AV_PIX_FMT_RGBA64BE) ? B : R)
922 #define output_pixel(pos, val) \
923     if (isBE(target)) { \
924         AV_WB16(pos, val); \
925     } else { \
926         AV_WL16(pos, val); \
927     }
928 
929 static av_always_inline void
yuv2ya16_X_c_template(SwsContext * c,const int16_t * lumFilter,const int32_t ** lumSrc,int lumFilterSize,const int16_t * chrFilter,const int32_t ** unused_chrUSrc,const int32_t ** unused_chrVSrc,int unused_chrFilterSize,const int32_t ** alpSrc,uint16_t * dest,int dstW,int y,enum AVPixelFormat target,int unused_hasAlpha,int unused_eightbytes)930 yuv2ya16_X_c_template(SwsContext *c, const int16_t *lumFilter,
931                         const int32_t **lumSrc, int lumFilterSize,
932                         const int16_t *chrFilter, const int32_t **unused_chrUSrc,
933                         const int32_t **unused_chrVSrc, int unused_chrFilterSize,
934                         const int32_t **alpSrc, uint16_t *dest, int dstW,
935                         int y, enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes)
936 {
937     int hasAlpha = !!alpSrc;
938     int i;
939 
940     for (i = 0; i < dstW; i++) {
941         int j;
942         int Y = -0x40000000;
943         int A = 0xffff;
944 
945         for (j = 0; j < lumFilterSize; j++)
946             Y += lumSrc[j][i] * lumFilter[j];
947 
948         Y >>= 15;
949         Y += (1<<3) + 0x8000;
950         Y = av_clip_uint16(Y);
951 
952         if (hasAlpha) {
953             A = -0x40000000 + (1<<14);
954             for (j = 0; j < lumFilterSize; j++)
955                 A += alpSrc[j][i] * lumFilter[j];
956 
957             A >>= 15;
958             A += 0x8000;
959             A = av_clip_uint16(A);
960         }
961 
962         output_pixel(&dest[2 * i    ], Y);
963         output_pixel(&dest[2 * i + 1], A);
964     }
965 }
966 
967 static av_always_inline void
yuv2ya16_2_c_template(SwsContext * c,const int32_t * buf[2],const int32_t * unused_ubuf[2],const int32_t * unused_vbuf[2],const int32_t * abuf[2],uint16_t * dest,int dstW,int yalpha,int unused_uvalpha,int y,enum AVPixelFormat target,int unused_hasAlpha,int unused_eightbytes)968 yuv2ya16_2_c_template(SwsContext *c, const int32_t *buf[2],
969                         const int32_t *unused_ubuf[2], const int32_t *unused_vbuf[2],
970                         const int32_t *abuf[2], uint16_t *dest, int dstW,
971                         int yalpha, int unused_uvalpha, int y,
972                         enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes)
973 {
974     int hasAlpha = abuf && abuf[0] && abuf[1];
975     const int32_t *buf0  = buf[0],  *buf1  = buf[1],
976     *abuf0 = hasAlpha ? abuf[0] : NULL,
977     *abuf1 = hasAlpha ? abuf[1] : NULL;
978     int  yalpha1 = 4096 - yalpha;
979     int i;
980 
981     av_assert2(yalpha  <= 4096U);
982 
983     for (i = 0; i < dstW; i++) {
984         int Y = (buf0[i] * yalpha1 + buf1[i] * yalpha) >> 15;
985         int A;
986 
987         Y = av_clip_uint16(Y);
988 
989         if (hasAlpha) {
990             A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha) >> 15;
991             A = av_clip_uint16(A);
992         }
993 
994         output_pixel(&dest[2 * i    ], Y);
995         output_pixel(&dest[2 * i + 1], hasAlpha ? A : 65535);
996     }
997 }
998 
999 static av_always_inline void
yuv2ya16_1_c_template(SwsContext * c,const int32_t * buf0,const int32_t * unused_ubuf[2],const int32_t * unused_vbuf[2],const int32_t * abuf0,uint16_t * dest,int dstW,int unused_uvalpha,int y,enum AVPixelFormat target,int unused_hasAlpha,int unused_eightbytes)1000 yuv2ya16_1_c_template(SwsContext *c, const int32_t *buf0,
1001                         const int32_t *unused_ubuf[2], const int32_t *unused_vbuf[2],
1002                         const int32_t *abuf0, uint16_t *dest, int dstW,
1003                         int unused_uvalpha, int y, enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes)
1004 {
1005     int hasAlpha = !!abuf0;
1006     int i;
1007 
1008     for (i = 0; i < dstW; i++) {
1009         int Y = buf0[i] >> 3;/* 19 - 16 */
1010         int A;
1011 
1012         Y = av_clip_uint16(Y);
1013 
1014         if (hasAlpha) {
1015             A = abuf0[i] >> 3;
1016             if (A & 0x100)
1017                 A = av_clip_uint16(A);
1018         }
1019 
1020         output_pixel(&dest[2 * i    ], Y);
1021         output_pixel(&dest[2 * i + 1], hasAlpha ? A : 65535);
1022     }
1023 }
1024 
1025 static av_always_inline void
yuv2rgba64_X_c_template(SwsContext * c,const int16_t * lumFilter,const int32_t ** lumSrc,int lumFilterSize,const int16_t * chrFilter,const int32_t ** chrUSrc,const int32_t ** chrVSrc,int chrFilterSize,const int32_t ** alpSrc,uint16_t * dest,int dstW,int y,enum AVPixelFormat target,int hasAlpha,int eightbytes)1026 yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter,
1027                        const int32_t **lumSrc, int lumFilterSize,
1028                        const int16_t *chrFilter, const int32_t **chrUSrc,
1029                        const int32_t **chrVSrc, int chrFilterSize,
1030                        const int32_t **alpSrc, uint16_t *dest, int dstW,
1031                        int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
1032 {
1033     int i;
1034     int A1 = 0xffff<<14, A2 = 0xffff<<14;
1035 
1036     for (i = 0; i < ((dstW + 1) >> 1); i++) {
1037         int j;
1038         int Y1 = -0x40000000;
1039         int Y2 = -0x40000000;
1040         int U  = -(128 << 23); // 19
1041         int V  = -(128 << 23);
1042         int R, G, B;
1043 
1044         for (j = 0; j < lumFilterSize; j++) {
1045             Y1 += lumSrc[j][i * 2]     * (unsigned)lumFilter[j];
1046             Y2 += lumSrc[j][i * 2 + 1] * (unsigned)lumFilter[j];
1047         }
1048         for (j = 0; j < chrFilterSize; j++) {;
1049             U += chrUSrc[j][i] * (unsigned)chrFilter[j];
1050             V += chrVSrc[j][i] * (unsigned)chrFilter[j];
1051         }
1052 
1053         if (hasAlpha) {
1054             A1 = -0x40000000;
1055             A2 = -0x40000000;
1056             for (j = 0; j < lumFilterSize; j++) {
1057                 A1 += alpSrc[j][i * 2]     * (unsigned)lumFilter[j];
1058                 A2 += alpSrc[j][i * 2 + 1] * (unsigned)lumFilter[j];
1059             }
1060             A1 >>= 1;
1061             A1 += 0x20002000;
1062             A2 >>= 1;
1063             A2 += 0x20002000;
1064         }
1065 
1066         // 8 bits: 12+15=27; 16 bits: 12+19=31
1067         Y1 >>= 14; // 10
1068         Y1 += 0x10000;
1069         Y2 >>= 14;
1070         Y2 += 0x10000;
1071         U  >>= 14;
1072         V  >>= 14;
1073 
1074         // 8 bits: 27 -> 17 bits, 16 bits: 31 - 14 = 17 bits
1075         Y1 -= c->yuv2rgb_y_offset;
1076         Y2 -= c->yuv2rgb_y_offset;
1077         Y1 *= c->yuv2rgb_y_coeff;
1078         Y2 *= c->yuv2rgb_y_coeff;
1079         Y1 += (1 << 13) - (1 << 29); // 21
1080         Y2 += (1 << 13) - (1 << 29);
1081         // 8 bits: 17 + 13 bits = 30 bits, 16 bits: 17 + 13 bits = 30 bits
1082 
1083         R = V * c->yuv2rgb_v2r_coeff;
1084         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1085         B =                            U * c->yuv2rgb_u2b_coeff;
1086 
1087         // 8 bits: 30 - 22 = 8 bits, 16 bits: 30 bits - 14 = 16 bits
1088         output_pixel(&dest[0], av_clip_uintp2(((R_B + Y1) >> 14) + (1<<15), 16));
1089         output_pixel(&dest[1], av_clip_uintp2(((  G + Y1) >> 14) + (1<<15), 16));
1090         output_pixel(&dest[2], av_clip_uintp2(((B_R + Y1) >> 14) + (1<<15), 16));
1091         if (eightbytes) {
1092             output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
1093             output_pixel(&dest[4], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
1094             output_pixel(&dest[5], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
1095             output_pixel(&dest[6], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
1096             output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
1097             dest += 8;
1098         } else {
1099             output_pixel(&dest[3], av_clip_uintp2(((R_B + Y2) >> 14) + (1<<15), 16));
1100             output_pixel(&dest[4], av_clip_uintp2(((  G + Y2) >> 14) + (1<<15), 16));
1101             output_pixel(&dest[5], av_clip_uintp2(((B_R + Y2) >> 14) + (1<<15), 16));
1102             dest += 6;
1103         }
1104     }
1105 }
1106 
1107 static av_always_inline void
yuv2rgba64_2_c_template(SwsContext * c,const int32_t * buf[2],const int32_t * ubuf[2],const int32_t * vbuf[2],const int32_t * abuf[2],uint16_t * dest,int dstW,int yalpha,int uvalpha,int y,enum AVPixelFormat target,int hasAlpha,int eightbytes)1108 yuv2rgba64_2_c_template(SwsContext *c, const int32_t *buf[2],
1109                        const int32_t *ubuf[2], const int32_t *vbuf[2],
1110                        const int32_t *abuf[2], uint16_t *dest, int dstW,
1111                        int yalpha, int uvalpha, int y,
1112                        enum AVPixelFormat target, int hasAlpha, int eightbytes)
1113 {
1114     const int32_t *buf0  = buf[0],  *buf1  = buf[1],
1115                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1116                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1117                   *abuf0 = hasAlpha ? abuf[0] : NULL,
1118                   *abuf1 = hasAlpha ? abuf[1] : NULL;
1119     int  yalpha1 = 4096 - yalpha;
1120     int uvalpha1 = 4096 - uvalpha;
1121     int i;
1122     int A1 = 0xffff<<14, A2 = 0xffff<<14;
1123 
1124     av_assert2(yalpha  <= 4096U);
1125     av_assert2(uvalpha <= 4096U);
1126 
1127     for (i = 0; i < ((dstW + 1) >> 1); i++) {
1128         unsigned Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha) >> 14;
1129         unsigned Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha) >> 14;
1130         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha - (128 << 23)) >> 14;
1131         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha - (128 << 23)) >> 14;
1132         int R, G, B;
1133 
1134         Y1 -= c->yuv2rgb_y_offset;
1135         Y2 -= c->yuv2rgb_y_offset;
1136         Y1 *= c->yuv2rgb_y_coeff;
1137         Y2 *= c->yuv2rgb_y_coeff;
1138         Y1 += (1 << 13) - (1 << 29);
1139         Y2 += (1 << 13) - (1 << 29);
1140 
1141         R = V * c->yuv2rgb_v2r_coeff;
1142         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1143         B =                            U * c->yuv2rgb_u2b_coeff;
1144 
1145         if (hasAlpha) {
1146             A1 = (abuf0[i * 2    ] * yalpha1 + abuf1[i * 2    ] * yalpha) >> 1;
1147             A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 1;
1148 
1149             A1 += 1 << 13;
1150             A2 += 1 << 13;
1151         }
1152 
1153         output_pixel(&dest[0], av_clip_uintp2(((int)(R_B + Y1) >> 14) + (1<<15), 16));
1154         output_pixel(&dest[1], av_clip_uintp2(((int)(  G + Y1) >> 14) + (1<<15), 16));
1155         output_pixel(&dest[2], av_clip_uintp2(((int)(B_R + Y1) >> 14) + (1<<15), 16));
1156         if (eightbytes) {
1157             output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
1158             output_pixel(&dest[4], av_clip_uintp2(((int)(R_B + Y2) >> 14) + (1<<15), 16));
1159             output_pixel(&dest[5], av_clip_uintp2(((int)(  G + Y2) >> 14) + (1<<15), 16));
1160             output_pixel(&dest[6], av_clip_uintp2(((int)(B_R + Y2) >> 14) + (1<<15), 16));
1161             output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
1162             dest += 8;
1163         } else {
1164             output_pixel(&dest[3], av_clip_uintp2(((int)(R_B + Y2) >> 14) + (1<<15), 16));
1165             output_pixel(&dest[4], av_clip_uintp2(((int)(  G + Y2) >> 14) + (1<<15), 16));
1166             output_pixel(&dest[5], av_clip_uintp2(((int)(B_R + Y2) >> 14) + (1<<15), 16));
1167             dest += 6;
1168         }
1169     }
1170 }
1171 
1172 static av_always_inline void
yuv2rgba64_1_c_template(SwsContext * c,const int32_t * buf0,const int32_t * ubuf[2],const int32_t * vbuf[2],const int32_t * abuf0,uint16_t * dest,int dstW,int uvalpha,int y,enum AVPixelFormat target,int hasAlpha,int eightbytes)1173 yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0,
1174                        const int32_t *ubuf[2], const int32_t *vbuf[2],
1175                        const int32_t *abuf0, uint16_t *dest, int dstW,
1176                        int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
1177 {
1178     const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
1179     int i;
1180     int A1 = 0xffff<<14, A2= 0xffff<<14;
1181 
1182     if (uvalpha < 2048) {
1183         for (i = 0; i < ((dstW + 1) >> 1); i++) {
1184             SUINT Y1 = (buf0[i * 2]    ) >> 2;
1185             SUINT Y2 = (buf0[i * 2 + 1]) >> 2;
1186             int U  = (ubuf0[i] - (128 << 11)) >> 2;
1187             int V  = (vbuf0[i] - (128 << 11)) >> 2;
1188             int R, G, B;
1189 
1190             Y1 -= c->yuv2rgb_y_offset;
1191             Y2 -= c->yuv2rgb_y_offset;
1192             Y1 *= c->yuv2rgb_y_coeff;
1193             Y2 *= c->yuv2rgb_y_coeff;
1194             Y1 += (1 << 13) - (1 << 29);
1195             Y2 += (1 << 13) - (1 << 29);
1196 
1197             if (hasAlpha) {
1198                 A1 = abuf0[i * 2    ] << 11;
1199                 A2 = abuf0[i * 2 + 1] << 11;
1200 
1201                 A1 += 1 << 13;
1202                 A2 += 1 << 13;
1203             }
1204 
1205             R = V * c->yuv2rgb_v2r_coeff;
1206             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1207             B =                            U * c->yuv2rgb_u2b_coeff;
1208 
1209             output_pixel(&dest[0], av_clip_uintp2(((int)(R_B + Y1) >> 14) + (1<<15), 16));
1210             output_pixel(&dest[1], av_clip_uintp2(((int)(  G + Y1) >> 14) + (1<<15), 16));
1211             output_pixel(&dest[2], av_clip_uintp2(((int)(B_R + Y1) >> 14) + (1<<15), 16));
1212             if (eightbytes) {
1213                 output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
1214                 output_pixel(&dest[4], av_clip_uintp2(((int)(R_B + Y2) >> 14) + (1<<15), 16));
1215                 output_pixel(&dest[5], av_clip_uintp2(((int)(  G + Y2) >> 14) + (1<<15), 16));
1216                 output_pixel(&dest[6], av_clip_uintp2(((int)(B_R + Y2) >> 14) + (1<<15), 16));
1217                 output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
1218                 dest += 8;
1219             } else {
1220                 output_pixel(&dest[3], av_clip_uintp2(((int)(R_B + Y2) >> 14) + (1<<15), 16));
1221                 output_pixel(&dest[4], av_clip_uintp2(((int)(  G + Y2) >> 14) + (1<<15), 16));
1222                 output_pixel(&dest[5], av_clip_uintp2(((int)(B_R + Y2) >> 14) + (1<<15), 16));
1223                 dest += 6;
1224             }
1225         }
1226     } else {
1227         const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
1228         int A1 = 0xffff<<14, A2 = 0xffff<<14;
1229         for (i = 0; i < ((dstW + 1) >> 1); i++) {
1230             SUINT Y1 = (buf0[i * 2]    ) >> 2;
1231             SUINT Y2 = (buf0[i * 2 + 1]) >> 2;
1232             int U  = (ubuf0[i] + ubuf1[i] - (128 << 12)) >> 3;
1233             int V  = (vbuf0[i] + vbuf1[i] - (128 << 12)) >> 3;
1234             int R, G, B;
1235 
1236             Y1 -= c->yuv2rgb_y_offset;
1237             Y2 -= c->yuv2rgb_y_offset;
1238             Y1 *= c->yuv2rgb_y_coeff;
1239             Y2 *= c->yuv2rgb_y_coeff;
1240             Y1 += (1 << 13) - (1 << 29);
1241             Y2 += (1 << 13) - (1 << 29);
1242 
1243             if (hasAlpha) {
1244                 A1 = abuf0[i * 2    ] << 11;
1245                 A2 = abuf0[i * 2 + 1] << 11;
1246 
1247                 A1 += 1 << 13;
1248                 A2 += 1 << 13;
1249             }
1250 
1251             R = V * c->yuv2rgb_v2r_coeff;
1252             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1253             B =                            U * c->yuv2rgb_u2b_coeff;
1254 
1255             output_pixel(&dest[0], av_clip_uintp2(((int)(R_B + Y1) >> 14) + (1<<15), 16));
1256             output_pixel(&dest[1], av_clip_uintp2(((int)(  G + Y1) >> 14) + (1<<15), 16));
1257             output_pixel(&dest[2], av_clip_uintp2(((int)(B_R + Y1) >> 14) + (1<<15), 16));
1258             if (eightbytes) {
1259                 output_pixel(&dest[3], av_clip_uintp2(A1      , 30) >> 14);
1260                 output_pixel(&dest[4], av_clip_uintp2(((int)(R_B + Y2) >> 14) + (1<<15), 16));
1261                 output_pixel(&dest[5], av_clip_uintp2(((int)(  G + Y2) >> 14) + (1<<15), 16));
1262                 output_pixel(&dest[6], av_clip_uintp2(((int)(B_R + Y2) >> 14) + (1<<15), 16));
1263                 output_pixel(&dest[7], av_clip_uintp2(A2      , 30) >> 14);
1264                 dest += 8;
1265             } else {
1266                 output_pixel(&dest[3], av_clip_uintp2(((int)(R_B + Y2) >> 14) + (1<<15), 16));
1267                 output_pixel(&dest[4], av_clip_uintp2(((int)(  G + Y2) >> 14) + (1<<15), 16));
1268                 output_pixel(&dest[5], av_clip_uintp2(((int)(B_R + Y2) >> 14) + (1<<15), 16));
1269                 dest += 6;
1270             }
1271         }
1272     }
1273 }
1274 
1275 static av_always_inline void
yuv2rgba64_full_X_c_template(SwsContext * c,const int16_t * lumFilter,const int32_t ** lumSrc,int lumFilterSize,const int16_t * chrFilter,const int32_t ** chrUSrc,const int32_t ** chrVSrc,int chrFilterSize,const int32_t ** alpSrc,uint16_t * dest,int dstW,int y,enum AVPixelFormat target,int hasAlpha,int eightbytes)1276 yuv2rgba64_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
1277                        const int32_t **lumSrc, int lumFilterSize,
1278                        const int16_t *chrFilter, const int32_t **chrUSrc,
1279                        const int32_t **chrVSrc, int chrFilterSize,
1280                        const int32_t **alpSrc, uint16_t *dest, int dstW,
1281                        int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
1282 {
1283     int i;
1284     int A = 0xffff<<14;
1285 
1286     for (i = 0; i < dstW; i++) {
1287         int j;
1288         int Y  = -0x40000000;
1289         int U  = -(128 << 23); // 19
1290         int V  = -(128 << 23);
1291         int R, G, B;
1292 
1293         for (j = 0; j < lumFilterSize; j++) {
1294             Y += lumSrc[j][i]  * (unsigned)lumFilter[j];
1295         }
1296         for (j = 0; j < chrFilterSize; j++) {;
1297             U += chrUSrc[j][i] * (unsigned)chrFilter[j];
1298             V += chrVSrc[j][i] * (unsigned)chrFilter[j];
1299         }
1300 
1301         if (hasAlpha) {
1302             A = -0x40000000;
1303             for (j = 0; j < lumFilterSize; j++) {
1304                 A += alpSrc[j][i] * (unsigned)lumFilter[j];
1305             }
1306             A >>= 1;
1307             A += 0x20002000;
1308         }
1309 
1310         // 8bit: 12+15=27; 16-bit: 12+19=31
1311         Y  >>= 14; // 10
1312         Y += 0x10000;
1313         U  >>= 14;
1314         V  >>= 14;
1315 
1316         // 8bit: 27 -> 17bit, 16bit: 31 - 14 = 17bit
1317         Y -= c->yuv2rgb_y_offset;
1318         Y *= c->yuv2rgb_y_coeff;
1319         Y += (1 << 13) - (1<<29); // 21
1320         // 8bit: 17 + 13bit = 30bit, 16bit: 17 + 13bit = 30bit
1321 
1322         R = V * c->yuv2rgb_v2r_coeff;
1323         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1324         B =                            U * c->yuv2rgb_u2b_coeff;
1325 
1326         // 8bit: 30 - 22 = 8bit, 16bit: 30bit - 14 = 16bit
1327         output_pixel(&dest[0], av_clip_uintp2(((R_B + Y)>>14) + (1<<15), 16));
1328         output_pixel(&dest[1], av_clip_uintp2(((  G + Y)>>14) + (1<<15), 16));
1329         output_pixel(&dest[2], av_clip_uintp2(((B_R + Y)>>14) + (1<<15), 16));
1330         if (eightbytes) {
1331             output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
1332             dest += 4;
1333         } else {
1334             dest += 3;
1335         }
1336     }
1337 }
1338 
1339 static av_always_inline void
yuv2rgba64_full_2_c_template(SwsContext * c,const int32_t * buf[2],const int32_t * ubuf[2],const int32_t * vbuf[2],const int32_t * abuf[2],uint16_t * dest,int dstW,int yalpha,int uvalpha,int y,enum AVPixelFormat target,int hasAlpha,int eightbytes)1340 yuv2rgba64_full_2_c_template(SwsContext *c, const int32_t *buf[2],
1341                        const int32_t *ubuf[2], const int32_t *vbuf[2],
1342                        const int32_t *abuf[2], uint16_t *dest, int dstW,
1343                        int yalpha, int uvalpha, int y,
1344                        enum AVPixelFormat target, int hasAlpha, int eightbytes)
1345 {
1346     const int32_t *buf0  = buf[0],  *buf1  = buf[1],
1347                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1348                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1349                   *abuf0 = hasAlpha ? abuf[0] : NULL,
1350                   *abuf1 = hasAlpha ? abuf[1] : NULL;
1351     int  yalpha1 = 4096 - yalpha;
1352     int uvalpha1 = 4096 - uvalpha;
1353     int i;
1354     int A = 0xffff<<14;
1355 
1356     av_assert2(yalpha  <= 4096U);
1357     av_assert2(uvalpha <= 4096U);
1358 
1359     for (i = 0; i < dstW; i++) {
1360         int Y  = (buf0[i]     * yalpha1  + buf1[i]     * yalpha) >> 14;
1361         int U  = (ubuf0[i]   * uvalpha1 + ubuf1[i]     * uvalpha - (128 << 23)) >> 14;
1362         int V  = (vbuf0[i]   * uvalpha1 + vbuf1[i]     * uvalpha - (128 << 23)) >> 14;
1363         int R, G, B;
1364 
1365         Y -= c->yuv2rgb_y_offset;
1366         Y *= c->yuv2rgb_y_coeff;
1367         Y += (1 << 13) - (1 << 29);
1368 
1369         R = V * c->yuv2rgb_v2r_coeff;
1370         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1371         B =                            U * c->yuv2rgb_u2b_coeff;
1372 
1373         if (hasAlpha) {
1374             A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha) >> 1;
1375 
1376             A += 1 << 13;
1377         }
1378 
1379         output_pixel(&dest[0], av_clip_uintp2(((R_B + Y) >> 14) + (1<<15), 16));
1380         output_pixel(&dest[1], av_clip_uintp2(((  G + Y) >> 14) + (1<<15), 16));
1381         output_pixel(&dest[2], av_clip_uintp2(((B_R + Y) >> 14) + (1<<15), 16));
1382         if (eightbytes) {
1383             output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
1384             dest += 4;
1385         } else {
1386             dest += 3;
1387         }
1388     }
1389 }
1390 
1391 static av_always_inline void
yuv2rgba64_full_1_c_template(SwsContext * c,const int32_t * buf0,const int32_t * ubuf[2],const int32_t * vbuf[2],const int32_t * abuf0,uint16_t * dest,int dstW,int uvalpha,int y,enum AVPixelFormat target,int hasAlpha,int eightbytes)1392 yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0,
1393                        const int32_t *ubuf[2], const int32_t *vbuf[2],
1394                        const int32_t *abuf0, uint16_t *dest, int dstW,
1395                        int uvalpha, int y, enum AVPixelFormat target, int hasAlpha, int eightbytes)
1396 {
1397     const int32_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
1398     int i;
1399     int A = 0xffff<<14;
1400 
1401     if (uvalpha < 2048) {
1402         for (i = 0; i < dstW; i++) {
1403             int Y  = (buf0[i]) >> 2;
1404             int U  = (ubuf0[i] - (128 << 11)) >> 2;
1405             int V  = (vbuf0[i] - (128 << 11)) >> 2;
1406             int R, G, B;
1407 
1408             Y -= c->yuv2rgb_y_offset;
1409             Y *= c->yuv2rgb_y_coeff;
1410             Y += (1 << 13) - (1 << 29);
1411 
1412             if (hasAlpha) {
1413                 A = abuf0[i] << 11;
1414 
1415                 A += 1 << 13;
1416             }
1417 
1418             R = V * c->yuv2rgb_v2r_coeff;
1419             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1420             B =                            U * c->yuv2rgb_u2b_coeff;
1421 
1422             output_pixel(&dest[0], av_clip_uintp2(((R_B + Y) >> 14) + (1<<15), 16));
1423             output_pixel(&dest[1], av_clip_uintp2(((  G + Y) >> 14) + (1<<15), 16));
1424             output_pixel(&dest[2], av_clip_uintp2(((B_R + Y) >> 14) + (1<<15), 16));
1425             if (eightbytes) {
1426                 output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
1427                 dest += 4;
1428             } else {
1429                 dest += 3;
1430             }
1431         }
1432     } else {
1433         const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
1434         int A = 0xffff<<14;
1435         for (i = 0; i < dstW; i++) {
1436             int Y  = (buf0[i]    ) >> 2;
1437             int U  = (ubuf0[i] + ubuf1[i] - (128 << 12)) >> 3;
1438             int V  = (vbuf0[i] + vbuf1[i] - (128 << 12)) >> 3;
1439             int R, G, B;
1440 
1441             Y -= c->yuv2rgb_y_offset;
1442             Y *= c->yuv2rgb_y_coeff;
1443             Y += (1 << 13) - (1 << 29);
1444 
1445             if (hasAlpha) {
1446                 A = abuf0[i] << 11;
1447 
1448                 A += 1 << 13;
1449             }
1450 
1451             R = V * c->yuv2rgb_v2r_coeff;
1452             G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
1453             B =                            U * c->yuv2rgb_u2b_coeff;
1454 
1455             output_pixel(&dest[0], av_clip_uintp2(((R_B + Y) >> 14) + (1<<15), 16));
1456             output_pixel(&dest[1], av_clip_uintp2(((  G + Y) >> 14) + (1<<15), 16));
1457             output_pixel(&dest[2], av_clip_uintp2(((B_R + Y) >> 14) + (1<<15), 16));
1458             if (eightbytes) {
1459                 output_pixel(&dest[3], av_clip_uintp2(A, 30) >> 14);
1460                 dest += 4;
1461             } else {
1462                 dest += 3;
1463             }
1464         }
1465     }
1466 }
1467 
1468 #undef output_pixel
1469 #undef r_b
1470 #undef b_r
1471 
1472 #define YUV2PACKED16WRAPPER(name, base, ext, fmt, hasAlpha, eightbytes) \
1473 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1474                         const int16_t **_lumSrc, int lumFilterSize, \
1475                         const int16_t *chrFilter, const int16_t **_chrUSrc, \
1476                         const int16_t **_chrVSrc, int chrFilterSize, \
1477                         const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
1478                         int y) \
1479 { \
1480     const int32_t **lumSrc  = (const int32_t **) _lumSrc, \
1481                   **chrUSrc = (const int32_t **) _chrUSrc, \
1482                   **chrVSrc = (const int32_t **) _chrVSrc, \
1483                   **alpSrc  = (const int32_t **) _alpSrc; \
1484     uint16_t *dest = (uint16_t *) _dest; \
1485     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1486                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1487                           alpSrc, dest, dstW, y, fmt, hasAlpha, eightbytes); \
1488 } \
1489  \
1490 static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
1491                         const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
1492                         const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
1493                         int yalpha, int uvalpha, int y) \
1494 { \
1495     const int32_t **buf  = (const int32_t **) _buf, \
1496                   **ubuf = (const int32_t **) _ubuf, \
1497                   **vbuf = (const int32_t **) _vbuf, \
1498                   **abuf = (const int32_t **) _abuf; \
1499     uint16_t *dest = (uint16_t *) _dest; \
1500     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1501                           dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha, eightbytes); \
1502 } \
1503  \
1504 static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
1505                         const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
1506                         const int16_t *_abuf0, uint8_t *_dest, int dstW, \
1507                         int uvalpha, int y) \
1508 { \
1509     const int32_t *buf0  = (const int32_t *)  _buf0, \
1510                  **ubuf  = (const int32_t **) _ubuf, \
1511                  **vbuf  = (const int32_t **) _vbuf, \
1512                   *abuf0 = (const int32_t *)  _abuf0; \
1513     uint16_t *dest = (uint16_t *) _dest; \
1514     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1515                                   dstW, uvalpha, y, fmt, hasAlpha, eightbytes); \
1516 }
1517 
1518 YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48be, AV_PIX_FMT_RGB48BE, 0, 0)
1519 YUV2PACKED16WRAPPER(yuv2, rgba64, rgb48le, AV_PIX_FMT_RGB48LE, 0, 0)
1520 YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48be, AV_PIX_FMT_BGR48BE, 0, 0)
1521 YUV2PACKED16WRAPPER(yuv2, rgba64, bgr48le, AV_PIX_FMT_BGR48LE, 0, 0)
1522 YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64be, AV_PIX_FMT_RGBA64BE, 1, 1)
1523 YUV2PACKED16WRAPPER(yuv2, rgba64, rgba64le, AV_PIX_FMT_RGBA64LE, 1, 1)
1524 YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64be, AV_PIX_FMT_RGBA64BE, 0, 1)
1525 YUV2PACKED16WRAPPER(yuv2, rgba64, rgbx64le, AV_PIX_FMT_RGBA64LE, 0, 1)
1526 YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64BE, 1, 1)
1527 YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1, 1)
1528 YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0, 1)
1529 YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0, 1)
1530 YUV2PACKED16WRAPPER(yuv2, ya16, ya16be, AV_PIX_FMT_YA16BE, 1, 0)
1531 YUV2PACKED16WRAPPER(yuv2, ya16, ya16le, AV_PIX_FMT_YA16LE, 1, 0)
1532 
1533 YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48BE, 0, 0)
1534 YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48LE, 0, 0)
1535 YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48be_full, AV_PIX_FMT_BGR48BE, 0, 0)
1536 YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgr48le_full, AV_PIX_FMT_BGR48LE, 0, 0)
1537 YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64be_full, AV_PIX_FMT_RGBA64BE, 1, 1)
1538 YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgba64le_full, AV_PIX_FMT_RGBA64LE, 1, 1)
1539 YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64be_full, AV_PIX_FMT_RGBA64BE, 0, 1)
1540 YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgbx64le_full, AV_PIX_FMT_RGBA64LE, 0, 1)
1541 YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64be_full, AV_PIX_FMT_BGRA64BE, 1, 1)
1542 YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgra64le_full, AV_PIX_FMT_BGRA64LE, 1, 1)
1543 YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64be_full, AV_PIX_FMT_BGRA64BE, 0, 1)
1544 YUV2PACKED16WRAPPER(yuv2, rgba64_full, bgrx64le_full, AV_PIX_FMT_BGRA64LE, 0, 1)
1545 
1546 /*
1547  * Write out 2 RGB pixels in the target pixel format. This function takes a
1548  * R/G/B LUT as generated by ff_yuv2rgb_c_init_tables(), which takes care of
1549  * things like endianness conversion and shifting. The caller takes care of
1550  * setting the correct offset in these tables from the chroma (U/V) values.
1551  * This function then uses the luminance (Y1/Y2) values to write out the
1552  * correct RGB values into the destination buffer.
1553  */
1554 static av_always_inline void
yuv2rgb_write(uint8_t * _dest,int i,int Y1,int Y2,unsigned A1,unsigned A2,const void * _r,const void * _g,const void * _b,int y,enum AVPixelFormat target,int hasAlpha)1555 yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
1556               unsigned A1, unsigned A2,
1557               const void *_r, const void *_g, const void *_b, int y,
1558               enum AVPixelFormat target, int hasAlpha)
1559 {
1560     if (target == AV_PIX_FMT_ARGB || target == AV_PIX_FMT_RGBA ||
1561         target == AV_PIX_FMT_ABGR || target == AV_PIX_FMT_BGRA) {
1562         uint32_t *dest = (uint32_t *) _dest;
1563         const uint32_t *r = (const uint32_t *) _r;
1564         const uint32_t *g = (const uint32_t *) _g;
1565         const uint32_t *b = (const uint32_t *) _b;
1566 
1567 #if CONFIG_SMALL
1568         int sh = hasAlpha ? ((target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24) : 0;
1569 
1570         dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (hasAlpha ? A1 << sh : 0);
1571         dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (hasAlpha ? A2 << sh : 0);
1572 #else
1573         if (hasAlpha) {
1574             int sh = (target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24;
1575 
1576             av_assert2((((r[Y1] + g[Y1] + b[Y1]) >> sh) & 0xFF) == 0);
1577             dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1] + (A1 << sh);
1578             dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2] + (A2 << sh);
1579         } else {
1580 #if defined(ASSERT_LEVEL) && ASSERT_LEVEL > 1
1581             int sh = (target == AV_PIX_FMT_RGB32_1 || target == AV_PIX_FMT_BGR32_1) ? 0 : 24;
1582 
1583             av_assert2((((r[Y1] + g[Y1] + b[Y1]) >> sh) & 0xFF) == 0xFF);
1584 #endif
1585             dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
1586             dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
1587         }
1588 #endif
1589     } else if (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) {
1590         uint8_t *dest = (uint8_t *) _dest;
1591         const uint8_t *r = (const uint8_t *) _r;
1592         const uint8_t *g = (const uint8_t *) _g;
1593         const uint8_t *b = (const uint8_t *) _b;
1594 
1595 #define r_b ((target == AV_PIX_FMT_RGB24) ? r : b)
1596 #define b_r ((target == AV_PIX_FMT_RGB24) ? b : r)
1597 
1598         dest[i * 6 + 0] = r_b[Y1];
1599         dest[i * 6 + 1] =   g[Y1];
1600         dest[i * 6 + 2] = b_r[Y1];
1601         dest[i * 6 + 3] = r_b[Y2];
1602         dest[i * 6 + 4] =   g[Y2];
1603         dest[i * 6 + 5] = b_r[Y2];
1604 #undef r_b
1605 #undef b_r
1606     } else if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565 ||
1607                target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555 ||
1608                target == AV_PIX_FMT_RGB444 || target == AV_PIX_FMT_BGR444) {
1609         uint16_t *dest = (uint16_t *) _dest;
1610         const uint16_t *r = (const uint16_t *) _r;
1611         const uint16_t *g = (const uint16_t *) _g;
1612         const uint16_t *b = (const uint16_t *) _b;
1613         int dr1, dg1, db1, dr2, dg2, db2;
1614 
1615         if (target == AV_PIX_FMT_RGB565 || target == AV_PIX_FMT_BGR565) {
1616             dr1 = ff_dither_2x2_8[ y & 1     ][0];
1617             dg1 = ff_dither_2x2_4[ y & 1     ][0];
1618             db1 = ff_dither_2x2_8[(y & 1) ^ 1][0];
1619             dr2 = ff_dither_2x2_8[ y & 1     ][1];
1620             dg2 = ff_dither_2x2_4[ y & 1     ][1];
1621             db2 = ff_dither_2x2_8[(y & 1) ^ 1][1];
1622         } else if (target == AV_PIX_FMT_RGB555 || target == AV_PIX_FMT_BGR555) {
1623             dr1 = ff_dither_2x2_8[ y & 1     ][0];
1624             dg1 = ff_dither_2x2_8[ y & 1     ][1];
1625             db1 = ff_dither_2x2_8[(y & 1) ^ 1][0];
1626             dr2 = ff_dither_2x2_8[ y & 1     ][1];
1627             dg2 = ff_dither_2x2_8[ y & 1     ][0];
1628             db2 = ff_dither_2x2_8[(y & 1) ^ 1][1];
1629         } else {
1630             dr1 = ff_dither_4x4_16[ y & 3     ][0];
1631             dg1 = ff_dither_4x4_16[ y & 3     ][1];
1632             db1 = ff_dither_4x4_16[(y & 3) ^ 3][0];
1633             dr2 = ff_dither_4x4_16[ y & 3     ][1];
1634             dg2 = ff_dither_4x4_16[ y & 3     ][0];
1635             db2 = ff_dither_4x4_16[(y & 3) ^ 3][1];
1636         }
1637 
1638         dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1639         dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1640     } else if (target == AV_PIX_FMT_X2RGB10 || target == AV_PIX_FMT_X2BGR10) {
1641         uint32_t *dest = (uint32_t *) _dest;
1642         const uint32_t *r = (const uint32_t *) _r;
1643         const uint32_t *g = (const uint32_t *) _g;
1644         const uint32_t *b = (const uint32_t *) _b;
1645         dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1];
1646         dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2];
1647     } else /* 8/4 bits */ {
1648         uint8_t *dest = (uint8_t *) _dest;
1649         const uint8_t *r = (const uint8_t *) _r;
1650         const uint8_t *g = (const uint8_t *) _g;
1651         const uint8_t *b = (const uint8_t *) _b;
1652         int dr1, dg1, db1, dr2, dg2, db2;
1653 
1654         if (target == AV_PIX_FMT_RGB8 || target == AV_PIX_FMT_BGR8) {
1655             const uint8_t * const d64 = ff_dither_8x8_73[y & 7];
1656             const uint8_t * const d32 = ff_dither_8x8_32[y & 7];
1657             dr1 = dg1 = d32[(i * 2 + 0) & 7];
1658             db1 =       d64[(i * 2 + 0) & 7];
1659             dr2 = dg2 = d32[(i * 2 + 1) & 7];
1660             db2 =       d64[(i * 2 + 1) & 7];
1661         } else {
1662             const uint8_t * const d64  = ff_dither_8x8_73 [y & 7];
1663             const uint8_t * const d128 = ff_dither_8x8_220[y & 7];
1664             dr1 = db1 = d128[(i * 2 + 0) & 7];
1665             dg1 =        d64[(i * 2 + 0) & 7];
1666             dr2 = db2 = d128[(i * 2 + 1) & 7];
1667             dg2 =        d64[(i * 2 + 1) & 7];
1668         }
1669 
1670         if (target == AV_PIX_FMT_RGB4 || target == AV_PIX_FMT_BGR4) {
1671             dest[i] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1] +
1672                     ((r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]) << 4);
1673         } else {
1674             dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1];
1675             dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2];
1676         }
1677     }
1678 }
1679 
1680 static av_always_inline void
yuv2rgb_X_c_template(SwsContext * c,const int16_t * lumFilter,const int16_t ** lumSrc,int lumFilterSize,const int16_t * chrFilter,const int16_t ** chrUSrc,const int16_t ** chrVSrc,int chrFilterSize,const int16_t ** alpSrc,uint8_t * dest,int dstW,int y,enum AVPixelFormat target,int hasAlpha)1681 yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
1682                      const int16_t **lumSrc, int lumFilterSize,
1683                      const int16_t *chrFilter, const int16_t **chrUSrc,
1684                      const int16_t **chrVSrc, int chrFilterSize,
1685                      const int16_t **alpSrc, uint8_t *dest, int dstW,
1686                      int y, enum AVPixelFormat target, int hasAlpha)
1687 {
1688     int i;
1689 
1690     for (i = 0; i < ((dstW + 1) >> 1); i++) {
1691         int j, A1, A2;
1692         int Y1 = 1 << 18;
1693         int Y2 = 1 << 18;
1694         int U  = 1 << 18;
1695         int V  = 1 << 18;
1696         const void *r, *g, *b;
1697 
1698         for (j = 0; j < lumFilterSize; j++) {
1699             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
1700             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
1701         }
1702         for (j = 0; j < chrFilterSize; j++) {
1703             U += chrUSrc[j][i] * chrFilter[j];
1704             V += chrVSrc[j][i] * chrFilter[j];
1705         }
1706         Y1 >>= 19;
1707         Y2 >>= 19;
1708         U  >>= 19;
1709         V  >>= 19;
1710         if (hasAlpha) {
1711             A1 = 1 << 18;
1712             A2 = 1 << 18;
1713             for (j = 0; j < lumFilterSize; j++) {
1714                 A1 += alpSrc[j][i * 2    ] * lumFilter[j];
1715                 A2 += alpSrc[j][i * 2 + 1] * lumFilter[j];
1716             }
1717             A1 >>= 19;
1718             A2 >>= 19;
1719             if ((A1 | A2) & 0x100) {
1720                 A1 = av_clip_uint8(A1);
1721                 A2 = av_clip_uint8(A2);
1722             }
1723         }
1724 
1725         r =  c->table_rV[V + YUVRGB_TABLE_HEADROOM];
1726         g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]);
1727         b =  c->table_bU[U + YUVRGB_TABLE_HEADROOM];
1728 
1729         yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1730                       r, g, b, y, target, hasAlpha);
1731     }
1732 }
1733 
1734 static av_always_inline void
yuv2rgb_2_c_template(SwsContext * c,const int16_t * buf[2],const int16_t * ubuf[2],const int16_t * vbuf[2],const int16_t * abuf[2],uint8_t * dest,int dstW,int yalpha,int uvalpha,int y,enum AVPixelFormat target,int hasAlpha)1735 yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
1736                      const int16_t *ubuf[2], const int16_t *vbuf[2],
1737                      const int16_t *abuf[2], uint8_t *dest, int dstW,
1738                      int yalpha, int uvalpha, int y,
1739                      enum AVPixelFormat target, int hasAlpha)
1740 {
1741     const int16_t *buf0  = buf[0],  *buf1  = buf[1],
1742                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
1743                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
1744                   *abuf0 = hasAlpha ? abuf[0] : NULL,
1745                   *abuf1 = hasAlpha ? abuf[1] : NULL;
1746     int  yalpha1 = 4096 - yalpha;
1747     int uvalpha1 = 4096 - uvalpha;
1748     int i;
1749     av_assert2(yalpha  <= 4096U);
1750     av_assert2(uvalpha <= 4096U);
1751 
1752     for (i = 0; i < ((dstW + 1) >> 1); i++) {
1753         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
1754         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
1755         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
1756         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
1757         int A1, A2;
1758         const void *r =  c->table_rV[V + YUVRGB_TABLE_HEADROOM],
1759                    *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
1760                    *b =  c->table_bU[U + YUVRGB_TABLE_HEADROOM];
1761 
1762         if (hasAlpha) {
1763             A1 = (abuf0[i * 2    ] * yalpha1 + abuf1[i * 2    ] * yalpha) >> 19;
1764             A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
1765             A1 = av_clip_uint8(A1);
1766             A2 = av_clip_uint8(A2);
1767         }
1768 
1769         yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1770                       r, g, b, y, target, hasAlpha);
1771     }
1772 }
1773 
1774 static av_always_inline void
yuv2rgb_1_c_template(SwsContext * c,const int16_t * buf0,const int16_t * ubuf[2],const int16_t * vbuf[2],const int16_t * abuf0,uint8_t * dest,int dstW,int uvalpha,int y,enum AVPixelFormat target,int hasAlpha)1775 yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
1776                      const int16_t *ubuf[2], const int16_t *vbuf[2],
1777                      const int16_t *abuf0, uint8_t *dest, int dstW,
1778                      int uvalpha, int y, enum AVPixelFormat target,
1779                      int hasAlpha)
1780 {
1781     const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
1782     int i;
1783 
1784     if (uvalpha < 2048) {
1785         for (i = 0; i < ((dstW + 1) >> 1); i++) {
1786             int Y1 = (buf0[i * 2    ] + 64) >> 7;
1787             int Y2 = (buf0[i * 2 + 1] + 64) >> 7;
1788             int U  = (ubuf0[i]        + 64) >> 7;
1789             int V  = (vbuf0[i]        + 64) >> 7;
1790             int A1, A2;
1791             const void *r =  c->table_rV[V + YUVRGB_TABLE_HEADROOM],
1792                        *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
1793                        *b =  c->table_bU[U + YUVRGB_TABLE_HEADROOM];
1794 
1795             if (hasAlpha) {
1796                 A1 = abuf0[i * 2    ] * 255 + 16384 >> 15;
1797                 A2 = abuf0[i * 2 + 1] * 255 + 16384 >> 15;
1798                 A1 = av_clip_uint8(A1);
1799                 A2 = av_clip_uint8(A2);
1800             }
1801 
1802             yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1803                           r, g, b, y, target, hasAlpha);
1804         }
1805     } else {
1806         const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
1807         for (i = 0; i < ((dstW + 1) >> 1); i++) {
1808             int Y1 = (buf0[i * 2    ]     +  64) >> 7;
1809             int Y2 = (buf0[i * 2 + 1]     +  64) >> 7;
1810             int U  = (ubuf0[i] + ubuf1[i] + 128) >> 8;
1811             int V  = (vbuf0[i] + vbuf1[i] + 128) >> 8;
1812             int A1, A2;
1813             const void *r =  c->table_rV[V + YUVRGB_TABLE_HEADROOM],
1814                        *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
1815                        *b =  c->table_bU[U + YUVRGB_TABLE_HEADROOM];
1816 
1817             if (hasAlpha) {
1818                 A1 = (abuf0[i * 2    ] + 64) >> 7;
1819                 A2 = (abuf0[i * 2 + 1] + 64) >> 7;
1820                 A1 = av_clip_uint8(A1);
1821                 A2 = av_clip_uint8(A2);
1822             }
1823 
1824             yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
1825                           r, g, b, y, target, hasAlpha);
1826         }
1827     }
1828 }
1829 
1830 #define YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1831 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
1832                                 const int16_t **lumSrc, int lumFilterSize, \
1833                                 const int16_t *chrFilter, const int16_t **chrUSrc, \
1834                                 const int16_t **chrVSrc, int chrFilterSize, \
1835                                 const int16_t **alpSrc, uint8_t *dest, int dstW, \
1836                                 int y) \
1837 { \
1838     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
1839                                   chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
1840                                   alpSrc, dest, dstW, y, fmt, hasAlpha); \
1841 }
1842 
1843 #define YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \
1844 YUV2RGBWRAPPERX(name, base, ext, fmt, hasAlpha) \
1845 static void name ## ext ## _2_c(SwsContext *c, const int16_t *buf[2], \
1846                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1847                                 const int16_t *abuf[2], uint8_t *dest, int dstW, \
1848                                 int yalpha, int uvalpha, int y) \
1849 { \
1850     name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
1851                                   dest, dstW, yalpha, uvalpha, y, fmt, hasAlpha); \
1852 }
1853 
1854 #define YUV2RGBWRAPPER(name, base, ext, fmt, hasAlpha) \
1855 YUV2RGBWRAPPERX2(name, base, ext, fmt, hasAlpha) \
1856 static void name ## ext ## _1_c(SwsContext *c, const int16_t *buf0, \
1857                                 const int16_t *ubuf[2], const int16_t *vbuf[2], \
1858                                 const int16_t *abuf0, uint8_t *dest, int dstW, \
1859                                 int uvalpha, int y) \
1860 { \
1861     name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
1862                                   dstW, uvalpha, y, fmt, hasAlpha); \
1863 }
1864 
1865 #if CONFIG_SMALL
1866 YUV2RGBWRAPPER(yuv2rgb,,  32_1,  AV_PIX_FMT_RGB32_1,   CONFIG_SWSCALE_ALPHA && c->needAlpha)
1867 YUV2RGBWRAPPER(yuv2rgb,,  32,    AV_PIX_FMT_RGB32,     CONFIG_SWSCALE_ALPHA && c->needAlpha)
1868 #else
1869 #if CONFIG_SWSCALE_ALPHA
1870 YUV2RGBWRAPPER(yuv2rgb,, a32_1,  AV_PIX_FMT_RGB32_1,   1)
1871 YUV2RGBWRAPPER(yuv2rgb,, a32,    AV_PIX_FMT_RGB32,     1)
1872 #endif
1873 YUV2RGBWRAPPER(yuv2rgb,, x32_1,  AV_PIX_FMT_RGB32_1,   0)
1874 YUV2RGBWRAPPER(yuv2rgb,, x32,    AV_PIX_FMT_RGB32,     0)
1875 #endif
1876 YUV2RGBWRAPPER(yuv2, rgb, rgb24, AV_PIX_FMT_RGB24,   0)
1877 YUV2RGBWRAPPER(yuv2, rgb, bgr24, AV_PIX_FMT_BGR24,   0)
1878 YUV2RGBWRAPPER(yuv2rgb,,  16,    AV_PIX_FMT_RGB565,    0)
1879 YUV2RGBWRAPPER(yuv2rgb,,  15,    AV_PIX_FMT_RGB555,    0)
1880 YUV2RGBWRAPPER(yuv2rgb,,  12,    AV_PIX_FMT_RGB444,    0)
1881 YUV2RGBWRAPPER(yuv2rgb,,   8,    AV_PIX_FMT_RGB8,      0)
1882 YUV2RGBWRAPPER(yuv2rgb,,   4,    AV_PIX_FMT_RGB4,      0)
1883 YUV2RGBWRAPPER(yuv2rgb,,   4b,   AV_PIX_FMT_RGB4_BYTE, 0)
1884 YUV2RGBWRAPPER(yuv2, rgb, x2rgb10, AV_PIX_FMT_X2RGB10, 0)
1885 YUV2RGBWRAPPER(yuv2, rgb, x2bgr10, AV_PIX_FMT_X2BGR10, 0)
1886 
yuv2rgb_write_full(SwsContext * c,uint8_t * dest,int i,int Y,int A,int U,int V,int y,enum AVPixelFormat target,int hasAlpha,int err[4])1887 static av_always_inline void yuv2rgb_write_full(SwsContext *c,
1888     uint8_t *dest, int i, int Y, int A, int U, int V,
1889     int y, enum AVPixelFormat target, int hasAlpha, int err[4])
1890 {
1891     int R, G, B;
1892     int isrgb8 = target == AV_PIX_FMT_BGR8 || target == AV_PIX_FMT_RGB8;
1893 
1894     Y -= c->yuv2rgb_y_offset;
1895     Y *= c->yuv2rgb_y_coeff;
1896     Y += 1 << 21;
1897     R = (unsigned)Y + V*c->yuv2rgb_v2r_coeff;
1898     G = (unsigned)Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;
1899     B = (unsigned)Y +                          U*c->yuv2rgb_u2b_coeff;
1900     if ((R | G | B) & 0xC0000000) {
1901         R = av_clip_uintp2(R, 30);
1902         G = av_clip_uintp2(G, 30);
1903         B = av_clip_uintp2(B, 30);
1904     }
1905 
1906     switch(target) {
1907     case AV_PIX_FMT_ARGB:
1908         dest[0] = hasAlpha ? A : 255;
1909         dest[1] = R >> 22;
1910         dest[2] = G >> 22;
1911         dest[3] = B >> 22;
1912         break;
1913     case AV_PIX_FMT_RGB24:
1914         dest[0] = R >> 22;
1915         dest[1] = G >> 22;
1916         dest[2] = B >> 22;
1917         break;
1918     case AV_PIX_FMT_RGBA:
1919         dest[0] = R >> 22;
1920         dest[1] = G >> 22;
1921         dest[2] = B >> 22;
1922         dest[3] = hasAlpha ? A : 255;
1923         break;
1924     case AV_PIX_FMT_ABGR:
1925         dest[0] = hasAlpha ? A : 255;
1926         dest[1] = B >> 22;
1927         dest[2] = G >> 22;
1928         dest[3] = R >> 22;
1929         break;
1930     case AV_PIX_FMT_BGR24:
1931         dest[0] = B >> 22;
1932         dest[1] = G >> 22;
1933         dest[2] = R >> 22;
1934         break;
1935     case AV_PIX_FMT_BGRA:
1936         dest[0] = B >> 22;
1937         dest[1] = G >> 22;
1938         dest[2] = R >> 22;
1939         dest[3] = hasAlpha ? A : 255;
1940         break;
1941     case AV_PIX_FMT_BGR4_BYTE:
1942     case AV_PIX_FMT_RGB4_BYTE:
1943     case AV_PIX_FMT_BGR8:
1944     case AV_PIX_FMT_RGB8:
1945     {
1946         int r,g,b;
1947 
1948         switch (c->dither) {
1949         case SWS_DITHER_NONE:
1950             if (isrgb8) {
1951                 r = av_clip_uintp2(R >> 27, 3);
1952                 g = av_clip_uintp2(G >> 27, 3);
1953                 b = av_clip_uintp2(B >> 28, 2);
1954             } else {
1955                 r = av_clip_uintp2(R >> 29, 1);
1956                 g = av_clip_uintp2(G >> 28, 2);
1957                 b = av_clip_uintp2(B >> 29, 1);
1958             }
1959             break;
1960         default:
1961         case SWS_DITHER_AUTO:
1962         case SWS_DITHER_ED:
1963             R >>= 22;
1964             G >>= 22;
1965             B >>= 22;
1966             R += (7*err[0] + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2])>>4;
1967             G += (7*err[1] + 1*c->dither_error[1][i] + 5*c->dither_error[1][i+1] + 3*c->dither_error[1][i+2])>>4;
1968             B += (7*err[2] + 1*c->dither_error[2][i] + 5*c->dither_error[2][i+1] + 3*c->dither_error[2][i+2])>>4;
1969             c->dither_error[0][i] = err[0];
1970             c->dither_error[1][i] = err[1];
1971             c->dither_error[2][i] = err[2];
1972             r = R >> (isrgb8 ? 5 : 7);
1973             g = G >> (isrgb8 ? 5 : 6);
1974             b = B >> (isrgb8 ? 6 : 7);
1975             r = av_clip(r, 0, isrgb8 ? 7 : 1);
1976             g = av_clip(g, 0, isrgb8 ? 7 : 3);
1977             b = av_clip(b, 0, isrgb8 ? 3 : 1);
1978             err[0] = R - r*(isrgb8 ? 36 : 255);
1979             err[1] = G - g*(isrgb8 ? 36 : 85);
1980             err[2] = B - b*(isrgb8 ? 85 : 255);
1981             break;
1982         case SWS_DITHER_A_DITHER:
1983             if (isrgb8) {
1984   /* see http://pippin.gimp.org/a_dither/ for details/origin */
1985 #define A_DITHER(u,v)   (((((u)+((v)*236))*119)&0xff))
1986                 r = (((R >> 19) + A_DITHER(i,y)  -96)>>8);
1987                 g = (((G >> 19) + A_DITHER(i + 17,y) - 96)>>8);
1988                 b = (((B >> 20) + A_DITHER(i + 17*2,y) -96)>>8);
1989                 r = av_clip_uintp2(r, 3);
1990                 g = av_clip_uintp2(g, 3);
1991                 b = av_clip_uintp2(b, 2);
1992             } else {
1993                 r = (((R >> 21) + A_DITHER(i,y)-256)>>8);
1994                 g = (((G >> 19) + A_DITHER(i + 17,y)-256)>>8);
1995                 b = (((B >> 21) + A_DITHER(i + 17*2,y)-256)>>8);
1996                 r = av_clip_uintp2(r, 1);
1997                 g = av_clip_uintp2(g, 2);
1998                 b = av_clip_uintp2(b, 1);
1999             }
2000             break;
2001         case SWS_DITHER_X_DITHER:
2002             if (isrgb8) {
2003   /* see http://pippin.gimp.org/a_dither/ for details/origin */
2004 #define X_DITHER(u,v)   (((((u)^((v)*237))*181)&0x1ff)/2)
2005                 r = (((R >> 19) + X_DITHER(i,y) - 96)>>8);
2006                 g = (((G >> 19) + X_DITHER(i + 17,y) - 96)>>8);
2007                 b = (((B >> 20) + X_DITHER(i + 17*2,y) - 96)>>8);
2008                 r = av_clip_uintp2(r, 3);
2009                 g = av_clip_uintp2(g, 3);
2010                 b = av_clip_uintp2(b, 2);
2011             } else {
2012                 r = (((R >> 21) + X_DITHER(i,y)-256)>>8);
2013                 g = (((G >> 19) + X_DITHER(i + 17,y)-256)>>8);
2014                 b = (((B >> 21) + X_DITHER(i + 17*2,y)-256)>>8);
2015                 r = av_clip_uintp2(r, 1);
2016                 g = av_clip_uintp2(g, 2);
2017                 b = av_clip_uintp2(b, 1);
2018             }
2019 
2020             break;
2021         }
2022 
2023         if(target == AV_PIX_FMT_BGR4_BYTE) {
2024             dest[0] = r + 2*g + 8*b;
2025         } else if(target == AV_PIX_FMT_RGB4_BYTE) {
2026             dest[0] = b + 2*g + 8*r;
2027         } else if(target == AV_PIX_FMT_BGR8) {
2028             dest[0] = r + 8*g + 64*b;
2029         } else if(target == AV_PIX_FMT_RGB8) {
2030             dest[0] = b + 4*g + 32*r;
2031         } else
2032             av_assert2(0);
2033         break;}
2034     }
2035 }
2036 
2037 static av_always_inline void
yuv2rgb_full_X_c_template(SwsContext * c,const int16_t * lumFilter,const int16_t ** lumSrc,int lumFilterSize,const int16_t * chrFilter,const int16_t ** chrUSrc,const int16_t ** chrVSrc,int chrFilterSize,const int16_t ** alpSrc,uint8_t * dest,int dstW,int y,enum AVPixelFormat target,int hasAlpha)2038 yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
2039                           const int16_t **lumSrc, int lumFilterSize,
2040                           const int16_t *chrFilter, const int16_t **chrUSrc,
2041                           const int16_t **chrVSrc, int chrFilterSize,
2042                           const int16_t **alpSrc, uint8_t *dest,
2043                           int dstW, int y, enum AVPixelFormat target, int hasAlpha)
2044 {
2045     int i;
2046     int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4;
2047     int err[4] = {0};
2048     int A = 0; //init to silence warning
2049 
2050     if(   target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
2051        || target == AV_PIX_FMT_BGR8      || target == AV_PIX_FMT_RGB8)
2052         step = 1;
2053 
2054     for (i = 0; i < dstW; i++) {
2055         int j;
2056         int Y = 1<<9;
2057         int U = (1<<9)-(128 << 19);
2058         int V = (1<<9)-(128 << 19);
2059 
2060         for (j = 0; j < lumFilterSize; j++) {
2061             Y += lumSrc[j][i] * lumFilter[j];
2062         }
2063         for (j = 0; j < chrFilterSize; j++) {
2064             U += chrUSrc[j][i] * chrFilter[j];
2065             V += chrVSrc[j][i] * chrFilter[j];
2066         }
2067         Y >>= 10;
2068         U >>= 10;
2069         V >>= 10;
2070         if (hasAlpha) {
2071             A = 1 << 18;
2072             for (j = 0; j < lumFilterSize; j++) {
2073                 A += alpSrc[j][i] * lumFilter[j];
2074             }
2075             A >>= 19;
2076             if (A & 0x100)
2077                 A = av_clip_uint8(A);
2078         }
2079         yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
2080         dest += step;
2081     }
2082     c->dither_error[0][i] = err[0];
2083     c->dither_error[1][i] = err[1];
2084     c->dither_error[2][i] = err[2];
2085 }
2086 
2087 static av_always_inline void
yuv2rgb_full_2_c_template(SwsContext * c,const int16_t * buf[2],const int16_t * ubuf[2],const int16_t * vbuf[2],const int16_t * abuf[2],uint8_t * dest,int dstW,int yalpha,int uvalpha,int y,enum AVPixelFormat target,int hasAlpha)2088 yuv2rgb_full_2_c_template(SwsContext *c, const int16_t *buf[2],
2089                      const int16_t *ubuf[2], const int16_t *vbuf[2],
2090                      const int16_t *abuf[2], uint8_t *dest, int dstW,
2091                      int yalpha, int uvalpha, int y,
2092                      enum AVPixelFormat target, int hasAlpha)
2093 {
2094     const int16_t *buf0  = buf[0],  *buf1  = buf[1],
2095                   *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
2096                   *vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
2097                   *abuf0 = hasAlpha ? abuf[0] : NULL,
2098                   *abuf1 = hasAlpha ? abuf[1] : NULL;
2099     int  yalpha1 = 4096 - yalpha;
2100     int uvalpha1 = 4096 - uvalpha;
2101     int i;
2102     int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4;
2103     int err[4] = {0};
2104     int A = 0; // init to silcene warning
2105 
2106     av_assert2(yalpha  <= 4096U);
2107     av_assert2(uvalpha <= 4096U);
2108 
2109     if(   target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
2110        || target == AV_PIX_FMT_BGR8      || target == AV_PIX_FMT_RGB8)
2111         step = 1;
2112 
2113     for (i = 0; i < dstW; i++) {
2114         int Y = ( buf0[i] * yalpha1  +  buf1[i] * yalpha             ) >> 10; //FIXME rounding
2115         int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha-(128 << 19)) >> 10;
2116         int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha-(128 << 19)) >> 10;
2117 
2118         if (hasAlpha) {
2119             A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha + (1<<18)) >> 19;
2120             if (A & 0x100)
2121                 A = av_clip_uint8(A);
2122         }
2123 
2124         yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
2125         dest += step;
2126     }
2127     c->dither_error[0][i] = err[0];
2128     c->dither_error[1][i] = err[1];
2129     c->dither_error[2][i] = err[2];
2130 }
2131 
2132 static av_always_inline void
yuv2rgb_full_1_c_template(SwsContext * c,const int16_t * buf0,const int16_t * ubuf[2],const int16_t * vbuf[2],const int16_t * abuf0,uint8_t * dest,int dstW,int uvalpha,int y,enum AVPixelFormat target,int hasAlpha)2133 yuv2rgb_full_1_c_template(SwsContext *c, const int16_t *buf0,
2134                      const int16_t *ubuf[2], const int16_t *vbuf[2],
2135                      const int16_t *abuf0, uint8_t *dest, int dstW,
2136                      int uvalpha, int y, enum AVPixelFormat target,
2137                      int hasAlpha)
2138 {
2139     const int16_t *ubuf0 = ubuf[0], *vbuf0 = vbuf[0];
2140     int i;
2141     int step = (target == AV_PIX_FMT_RGB24 || target == AV_PIX_FMT_BGR24) ? 3 : 4;
2142     int err[4] = {0};
2143 
2144     if(   target == AV_PIX_FMT_BGR4_BYTE || target == AV_PIX_FMT_RGB4_BYTE
2145        || target == AV_PIX_FMT_BGR8      || target == AV_PIX_FMT_RGB8)
2146         step = 1;
2147 
2148     if (uvalpha < 2048) {
2149         int A = 0; //init to silence warning
2150         for (i = 0; i < dstW; i++) {
2151             int Y = buf0[i] * 4;
2152             int U = (ubuf0[i] - (128<<7)) * 4;
2153             int V = (vbuf0[i] - (128<<7)) * 4;
2154 
2155             if (hasAlpha) {
2156                 A = (abuf0[i] + 64) >> 7;
2157                 if (A & 0x100)
2158                     A = av_clip_uint8(A);
2159             }
2160 
2161             yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
2162             dest += step;
2163         }
2164     } else {
2165         const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
2166         int A = 0; //init to silence warning
2167         for (i = 0; i < dstW; i++) {
2168             int Y = buf0[i] * 4;
2169             int U = (ubuf0[i] + ubuf1[i] - (128<<8)) * 2;
2170             int V = (vbuf0[i] + vbuf1[i] - (128<<8)) * 2;
2171 
2172             if (hasAlpha) {
2173                 A = (abuf0[i] + 64) >> 7;
2174                 if (A & 0x100)
2175                     A = av_clip_uint8(A);
2176             }
2177 
2178             yuv2rgb_write_full(c, dest, i, Y, A, U, V, y, target, hasAlpha, err);
2179             dest += step;
2180         }
2181     }
2182 
2183     c->dither_error[0][i] = err[0];
2184     c->dither_error[1][i] = err[1];
2185     c->dither_error[2][i] = err[2];
2186 }
2187 
2188 #if CONFIG_SMALL
2189 YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA,  CONFIG_SWSCALE_ALPHA && c->needAlpha)
2190 YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR,  CONFIG_SWSCALE_ALPHA && c->needAlpha)
2191 YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA,  CONFIG_SWSCALE_ALPHA && c->needAlpha)
2192 YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB,  CONFIG_SWSCALE_ALPHA && c->needAlpha)
2193 #else
2194 #if CONFIG_SWSCALE_ALPHA
2195 YUV2RGBWRAPPER(yuv2, rgb_full, bgra32_full, AV_PIX_FMT_BGRA,  1)
2196 YUV2RGBWRAPPER(yuv2, rgb_full, abgr32_full, AV_PIX_FMT_ABGR,  1)
2197 YUV2RGBWRAPPER(yuv2, rgb_full, rgba32_full, AV_PIX_FMT_RGBA,  1)
2198 YUV2RGBWRAPPER(yuv2, rgb_full, argb32_full, AV_PIX_FMT_ARGB,  1)
2199 #endif
2200 YUV2RGBWRAPPER(yuv2, rgb_full, bgrx32_full, AV_PIX_FMT_BGRA,  0)
2201 YUV2RGBWRAPPER(yuv2, rgb_full, xbgr32_full, AV_PIX_FMT_ABGR,  0)
2202 YUV2RGBWRAPPER(yuv2, rgb_full, rgbx32_full, AV_PIX_FMT_RGBA,  0)
2203 YUV2RGBWRAPPER(yuv2, rgb_full, xrgb32_full, AV_PIX_FMT_ARGB,  0)
2204 #endif
2205 YUV2RGBWRAPPER(yuv2, rgb_full, bgr24_full,  AV_PIX_FMT_BGR24, 0)
2206 YUV2RGBWRAPPER(yuv2, rgb_full, rgb24_full,  AV_PIX_FMT_RGB24, 0)
2207 
2208 YUV2RGBWRAPPER(yuv2, rgb_full, bgr4_byte_full,  AV_PIX_FMT_BGR4_BYTE, 0)
2209 YUV2RGBWRAPPER(yuv2, rgb_full, rgb4_byte_full,  AV_PIX_FMT_RGB4_BYTE, 0)
2210 YUV2RGBWRAPPER(yuv2, rgb_full, bgr8_full,   AV_PIX_FMT_BGR8,  0)
2211 YUV2RGBWRAPPER(yuv2, rgb_full, rgb8_full,   AV_PIX_FMT_RGB8,  0)
2212 
2213 static void
yuv2gbrp_full_X_c(SwsContext * c,const int16_t * lumFilter,const int16_t ** lumSrc,int lumFilterSize,const int16_t * chrFilter,const int16_t ** chrUSrc,const int16_t ** chrVSrc,int chrFilterSize,const int16_t ** alpSrc,uint8_t ** dest,int dstW,int y)2214 yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter,
2215                   const int16_t **lumSrc, int lumFilterSize,
2216                   const int16_t *chrFilter, const int16_t **chrUSrc,
2217                   const int16_t **chrVSrc, int chrFilterSize,
2218                   const int16_t **alpSrc, uint8_t **dest,
2219                   int dstW, int y)
2220 {
2221     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
2222     int i;
2223     int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrc;
2224     uint16_t **dest16 = (uint16_t**)dest;
2225     int SH = 22 + 8 - desc->comp[0].depth;
2226     int A = 0; // init to silence warning
2227 
2228     for (i = 0; i < dstW; i++) {
2229         int j;
2230         int Y = 1 << 9;
2231         int U = (1 << 9) - (128 << 19);
2232         int V = (1 << 9) - (128 << 19);
2233         int R, G, B;
2234 
2235         for (j = 0; j < lumFilterSize; j++)
2236             Y += lumSrc[j][i] * lumFilter[j];
2237 
2238         for (j = 0; j < chrFilterSize; j++) {
2239             U += chrUSrc[j][i] * chrFilter[j];
2240             V += chrVSrc[j][i] * chrFilter[j];
2241         }
2242 
2243         Y >>= 10;
2244         U >>= 10;
2245         V >>= 10;
2246 
2247         if (hasAlpha) {
2248             A = 1 << 18;
2249 
2250             for (j = 0; j < lumFilterSize; j++)
2251                 A += alpSrc[j][i] * lumFilter[j];
2252 
2253             if (A & 0xF8000000)
2254                 A =  av_clip_uintp2(A, 27);
2255         }
2256 
2257         Y -= c->yuv2rgb_y_offset;
2258         Y *= c->yuv2rgb_y_coeff;
2259         Y += 1 << (SH-1);
2260         R = Y + V * c->yuv2rgb_v2r_coeff;
2261         G = Y + V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
2262         B = Y +                            U * c->yuv2rgb_u2b_coeff;
2263 
2264         if ((R | G | B) & 0xC0000000) {
2265             R = av_clip_uintp2(R, 30);
2266             G = av_clip_uintp2(G, 30);
2267             B = av_clip_uintp2(B, 30);
2268         }
2269 
2270         if (SH != 22) {
2271             dest16[0][i] = G >> SH;
2272             dest16[1][i] = B >> SH;
2273             dest16[2][i] = R >> SH;
2274             if (hasAlpha)
2275                 dest16[3][i] = A >> (SH - 3);
2276         } else {
2277             dest[0][i] = G >> 22;
2278             dest[1][i] = B >> 22;
2279             dest[2][i] = R >> 22;
2280             if (hasAlpha)
2281                 dest[3][i] = A >> 19;
2282         }
2283     }
2284     if (SH != 22 && (!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
2285         for (i = 0; i < dstW; i++) {
2286             dest16[0][i] = av_bswap16(dest16[0][i]);
2287             dest16[1][i] = av_bswap16(dest16[1][i]);
2288             dest16[2][i] = av_bswap16(dest16[2][i]);
2289             if (hasAlpha)
2290                 dest16[3][i] = av_bswap16(dest16[3][i]);
2291         }
2292     }
2293 }
2294 
2295 static void
yuv2gbrp16_full_X_c(SwsContext * c,const int16_t * lumFilter,const int16_t ** lumSrcx,int lumFilterSize,const int16_t * chrFilter,const int16_t ** chrUSrcx,const int16_t ** chrVSrcx,int chrFilterSize,const int16_t ** alpSrcx,uint8_t ** dest,int dstW,int y)2296 yuv2gbrp16_full_X_c(SwsContext *c, const int16_t *lumFilter,
2297                     const int16_t **lumSrcx, int lumFilterSize,
2298                     const int16_t *chrFilter, const int16_t **chrUSrcx,
2299                     const int16_t **chrVSrcx, int chrFilterSize,
2300                     const int16_t **alpSrcx, uint8_t **dest,
2301                     int dstW, int y)
2302 {
2303     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
2304     int i;
2305     int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrcx;
2306     uint16_t **dest16 = (uint16_t**)dest;
2307     const int32_t **lumSrc  = (const int32_t**)lumSrcx;
2308     const int32_t **chrUSrc = (const int32_t**)chrUSrcx;
2309     const int32_t **chrVSrc = (const int32_t**)chrVSrcx;
2310     const int32_t **alpSrc  = (const int32_t**)alpSrcx;
2311 
2312     for (i = 0; i < dstW; i++) {
2313         int j;
2314         int Y = -0x40000000;
2315         int U = -(128 << 23);
2316         int V = -(128 << 23);
2317         int R, G, B, A;
2318 
2319         for (j = 0; j < lumFilterSize; j++)
2320             Y += lumSrc[j][i] * (unsigned)lumFilter[j];
2321 
2322         for (j = 0; j < chrFilterSize; j++) {
2323             U += chrUSrc[j][i] * (unsigned)chrFilter[j];
2324             V += chrVSrc[j][i] * (unsigned)chrFilter[j];
2325         }
2326 
2327         Y >>= 14;
2328         Y += 0x10000;
2329         U >>= 14;
2330         V >>= 14;
2331 
2332         if (hasAlpha) {
2333             A = -0x40000000;
2334 
2335             for (j = 0; j < lumFilterSize; j++)
2336                 A += alpSrc[j][i] * (unsigned)lumFilter[j];
2337 
2338             A >>= 1;
2339             A += 0x20002000;
2340         }
2341 
2342         Y -= c->yuv2rgb_y_offset;
2343         Y *= c->yuv2rgb_y_coeff;
2344         Y += (1 << 13) - (1 << 29);
2345         R = V * c->yuv2rgb_v2r_coeff;
2346         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
2347         B =                            U * c->yuv2rgb_u2b_coeff;
2348 
2349         dest16[2][i] = av_clip_uintp2(((Y + R) >> 14) + (1<<15), 16);
2350         dest16[0][i] = av_clip_uintp2(((Y + G) >> 14) + (1<<15), 16);
2351         dest16[1][i] = av_clip_uintp2(((Y + B) >> 14) + (1<<15), 16);
2352 
2353         if (hasAlpha)
2354             dest16[3][i] = av_clip_uintp2(A, 30) >> 14;
2355     }
2356     if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
2357         for (i = 0; i < dstW; i++) {
2358             dest16[0][i] = av_bswap16(dest16[0][i]);
2359             dest16[1][i] = av_bswap16(dest16[1][i]);
2360             dest16[2][i] = av_bswap16(dest16[2][i]);
2361             if (hasAlpha)
2362                 dest16[3][i] = av_bswap16(dest16[3][i]);
2363         }
2364     }
2365 }
2366 
2367 static void
yuv2gbrpf32_full_X_c(SwsContext * c,const int16_t * lumFilter,const int16_t ** lumSrcx,int lumFilterSize,const int16_t * chrFilter,const int16_t ** chrUSrcx,const int16_t ** chrVSrcx,int chrFilterSize,const int16_t ** alpSrcx,uint8_t ** dest,int dstW,int y)2368 yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t *lumFilter,
2369                     const int16_t **lumSrcx, int lumFilterSize,
2370                     const int16_t *chrFilter, const int16_t **chrUSrcx,
2371                     const int16_t **chrVSrcx, int chrFilterSize,
2372                     const int16_t **alpSrcx, uint8_t **dest,
2373                     int dstW, int y)
2374 {
2375     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat);
2376     int i;
2377     int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrcx;
2378     uint32_t **dest32 = (uint32_t**)dest;
2379     const int32_t **lumSrc  = (const int32_t**)lumSrcx;
2380     const int32_t **chrUSrc = (const int32_t**)chrUSrcx;
2381     const int32_t **chrVSrc = (const int32_t**)chrVSrcx;
2382     const int32_t **alpSrc  = (const int32_t**)alpSrcx;
2383     static const float float_mult = 1.0f / 65535.0f;
2384 
2385     for (i = 0; i < dstW; i++) {
2386         int j;
2387         int Y = -0x40000000;
2388         int U = -(128 << 23);
2389         int V = -(128 << 23);
2390         int R, G, B, A;
2391 
2392         for (j = 0; j < lumFilterSize; j++)
2393             Y += lumSrc[j][i] * (unsigned)lumFilter[j];
2394 
2395         for (j = 0; j < chrFilterSize; j++) {
2396             U += chrUSrc[j][i] * (unsigned)chrFilter[j];
2397             V += chrVSrc[j][i] * (unsigned)chrFilter[j];
2398         }
2399 
2400         Y >>= 14;
2401         Y += 0x10000;
2402         U >>= 14;
2403         V >>= 14;
2404 
2405         if (hasAlpha) {
2406             A = -0x40000000;
2407 
2408             for (j = 0; j < lumFilterSize; j++)
2409                 A += alpSrc[j][i] * (unsigned)lumFilter[j];
2410 
2411             A >>= 1;
2412             A += 0x20002000;
2413         }
2414 
2415         Y -= c->yuv2rgb_y_offset;
2416         Y *= c->yuv2rgb_y_coeff;
2417         Y += (1 << 13) - (1 << 29);
2418         R = V * c->yuv2rgb_v2r_coeff;
2419         G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff;
2420         B =                            U * c->yuv2rgb_u2b_coeff;
2421 
2422         R = av_clip_uintp2(((Y + R) >> 14) + (1<<15), 16);
2423         G = av_clip_uintp2(((Y + G) >> 14) + (1<<15), 16);
2424         B = av_clip_uintp2(((Y + B) >> 14) + (1<<15), 16);
2425 
2426         dest32[0][i] = av_float2int(float_mult * (float)G);
2427         dest32[1][i] = av_float2int(float_mult * (float)B);
2428         dest32[2][i] = av_float2int(float_mult * (float)R);
2429         if (hasAlpha)
2430             dest32[3][i] = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 14));
2431     }
2432     if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) {
2433         for (i = 0; i < dstW; i++) {
2434             dest32[0][i] = av_bswap32(dest32[0][i]);
2435             dest32[1][i] = av_bswap32(dest32[1][i]);
2436             dest32[2][i] = av_bswap32(dest32[2][i]);
2437             if (hasAlpha)
2438                 dest32[3][i] = av_bswap32(dest32[3][i]);
2439         }
2440     }
2441 }
2442 
2443 static void
yuv2ya8_1_c(SwsContext * c,const int16_t * buf0,const int16_t * ubuf[2],const int16_t * vbuf[2],const int16_t * abuf0,uint8_t * dest,int dstW,int uvalpha,int y)2444 yuv2ya8_1_c(SwsContext *c, const int16_t *buf0,
2445             const int16_t *ubuf[2], const int16_t *vbuf[2],
2446             const int16_t *abuf0, uint8_t *dest, int dstW,
2447             int uvalpha, int y)
2448 {
2449     int hasAlpha = !!abuf0;
2450     int i;
2451 
2452     for (i = 0; i < dstW; i++) {
2453         int Y = (buf0[i] + 64) >> 7;
2454         int A;
2455 
2456         Y = av_clip_uint8(Y);
2457 
2458         if (hasAlpha) {
2459             A = (abuf0[i] + 64) >> 7;
2460             if (A & 0x100)
2461                 A = av_clip_uint8(A);
2462         }
2463 
2464         dest[i * 2    ] = Y;
2465         dest[i * 2 + 1] = hasAlpha ? A : 255;
2466     }
2467 }
2468 
2469 static void
yuv2ya8_2_c(SwsContext * c,const int16_t * buf[2],const int16_t * ubuf[2],const int16_t * vbuf[2],const int16_t * abuf[2],uint8_t * dest,int dstW,int yalpha,int uvalpha,int y)2470 yuv2ya8_2_c(SwsContext *c, const int16_t *buf[2],
2471             const int16_t *ubuf[2], const int16_t *vbuf[2],
2472             const int16_t *abuf[2], uint8_t *dest, int dstW,
2473             int yalpha, int uvalpha, int y)
2474 {
2475     int hasAlpha = abuf && abuf[0] && abuf[1];
2476     const int16_t *buf0  = buf[0],  *buf1  = buf[1],
2477                   *abuf0 = hasAlpha ? abuf[0] : NULL,
2478                   *abuf1 = hasAlpha ? abuf[1] : NULL;
2479     int  yalpha1 = 4096 - yalpha;
2480     int i;
2481 
2482     av_assert2(yalpha  <= 4096U);
2483 
2484     for (i = 0; i < dstW; i++) {
2485         int Y = (buf0[i] * yalpha1 + buf1[i] * yalpha) >> 19;
2486         int A;
2487 
2488         Y = av_clip_uint8(Y);
2489 
2490         if (hasAlpha) {
2491             A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha) >> 19;
2492             A = av_clip_uint8(A);
2493         }
2494 
2495         dest[i * 2    ] = Y;
2496         dest[i * 2 + 1] = hasAlpha ? A : 255;
2497     }
2498 }
2499 
2500 static void
yuv2ya8_X_c(SwsContext * c,const int16_t * lumFilter,const int16_t ** lumSrc,int lumFilterSize,const int16_t * chrFilter,const int16_t ** chrUSrc,const int16_t ** chrVSrc,int chrFilterSize,const int16_t ** alpSrc,uint8_t * dest,int dstW,int y)2501 yuv2ya8_X_c(SwsContext *c, const int16_t *lumFilter,
2502             const int16_t **lumSrc, int lumFilterSize,
2503             const int16_t *chrFilter, const int16_t **chrUSrc,
2504             const int16_t **chrVSrc, int chrFilterSize,
2505             const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
2506 {
2507     int hasAlpha = !!alpSrc;
2508     int i;
2509 
2510     for (i = 0; i < dstW; i++) {
2511         int j;
2512         int Y = 1 << 18, A = 1 << 18;
2513 
2514         for (j = 0; j < lumFilterSize; j++)
2515             Y += lumSrc[j][i] * lumFilter[j];
2516 
2517         Y >>= 19;
2518         if (Y  & 0x100)
2519             Y = av_clip_uint8(Y);
2520 
2521         if (hasAlpha) {
2522             for (j = 0; j < lumFilterSize; j++)
2523                 A += alpSrc[j][i] * lumFilter[j];
2524 
2525             A >>= 19;
2526 
2527             if (A & 0x100)
2528                 A = av_clip_uint8(A);
2529         }
2530 
2531         dest[2 * i    ] = Y;
2532         dest[2 * i + 1] = hasAlpha ? A : 255;
2533     }
2534 }
2535 
2536 static void
yuv2ayuv64le_X_c(SwsContext * c,const int16_t * lumFilter,const int16_t ** _lumSrc,int lumFilterSize,const int16_t * chrFilter,const int16_t ** _chrUSrc,const int16_t ** _chrVSrc,int chrFilterSize,const int16_t ** _alpSrc,uint8_t * dest,int dstW,int y)2537 yuv2ayuv64le_X_c(SwsContext *c, const int16_t *lumFilter,
2538                  const int16_t **_lumSrc, int lumFilterSize,
2539                  const int16_t *chrFilter, const int16_t **_chrUSrc,
2540                  const int16_t **_chrVSrc, int chrFilterSize,
2541                  const int16_t **_alpSrc, uint8_t *dest, int dstW, int y)
2542 {
2543     const int32_t **lumSrc  = (const int32_t **) _lumSrc,
2544                   **chrUSrc = (const int32_t **) _chrUSrc,
2545                   **chrVSrc = (const int32_t **) _chrVSrc,
2546                   **alpSrc  = (const int32_t **) _alpSrc;
2547     int hasAlpha = !!alpSrc;
2548     int i;
2549 
2550     for (i = 0; i < dstW; i++) {
2551         int Y = 1 << 14, U = 1 << 14;
2552         int V = 1 << 14, A = 1 << 14;
2553         int j;
2554 
2555         Y -= 0x40000000;
2556         U -= 0x40000000;
2557         V -= 0x40000000;
2558         A -= 0x40000000;
2559 
2560         for (j = 0; j < lumFilterSize; j++)
2561             Y += lumSrc[j][i] * (unsigned)lumFilter[j];
2562 
2563         for (j = 0; j < chrFilterSize; j++)
2564             U += chrUSrc[j][i] * (unsigned)chrFilter[j];
2565 
2566         for (j = 0; j < chrFilterSize; j++)
2567             V += chrVSrc[j][i] * (unsigned)chrFilter[j];
2568 
2569         if (hasAlpha)
2570             for (j = 0; j < lumFilterSize; j++)
2571                 A += alpSrc[j][i] * (unsigned)lumFilter[j];
2572 
2573         Y = 0x8000 + av_clip_int16(Y >> 15);
2574         U = 0x8000 + av_clip_int16(U >> 15);
2575         V = 0x8000 + av_clip_int16(V >> 15);
2576         A = 0x8000 + av_clip_int16(A >> 15);
2577 
2578         AV_WL16(dest + 8 * i, hasAlpha ? A : 65535);
2579         AV_WL16(dest + 8 * i + 2, Y);
2580         AV_WL16(dest + 8 * i + 4, U);
2581         AV_WL16(dest + 8 * i + 6, V);
2582     }
2583 }
2584 
ff_sws_init_output_funcs(SwsContext * c,yuv2planar1_fn * yuv2plane1,yuv2planarX_fn * yuv2planeX,yuv2interleavedX_fn * yuv2nv12cX,yuv2packed1_fn * yuv2packed1,yuv2packed2_fn * yuv2packed2,yuv2packedX_fn * yuv2packedX,yuv2anyX_fn * yuv2anyX)2585 av_cold void ff_sws_init_output_funcs(SwsContext *c,
2586                                       yuv2planar1_fn *yuv2plane1,
2587                                       yuv2planarX_fn *yuv2planeX,
2588                                       yuv2interleavedX_fn *yuv2nv12cX,
2589                                       yuv2packed1_fn *yuv2packed1,
2590                                       yuv2packed2_fn *yuv2packed2,
2591                                       yuv2packedX_fn *yuv2packedX,
2592                                       yuv2anyX_fn *yuv2anyX)
2593 {
2594     enum AVPixelFormat dstFormat = c->dstFormat;
2595     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dstFormat);
2596 
2597     if (isSemiPlanarYUV(dstFormat) && isDataInHighBits(dstFormat)) {
2598         av_assert0(desc->comp[0].depth == 10);
2599         *yuv2plane1 = isBE(dstFormat) ? yuv2p010l1_BE_c : yuv2p010l1_LE_c;
2600         *yuv2planeX = isBE(dstFormat) ? yuv2p010lX_BE_c : yuv2p010lX_LE_c;
2601         *yuv2nv12cX = isBE(dstFormat) ? yuv2p010cX_BE_c : yuv2p010cX_LE_c;
2602     } else if (is16BPS(dstFormat)) {
2603         *yuv2planeX = isBE(dstFormat) ? yuv2planeX_16BE_c  : yuv2planeX_16LE_c;
2604         *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_16BE_c  : yuv2plane1_16LE_c;
2605         if (isSemiPlanarYUV(dstFormat)) {
2606           *yuv2nv12cX = isBE(dstFormat) ? yuv2nv12cX_16BE_c : yuv2nv12cX_16LE_c;
2607         }
2608     } else if (isNBPS(dstFormat)) {
2609         if (desc->comp[0].depth == 9) {
2610             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_9BE_c  : yuv2planeX_9LE_c;
2611             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_9BE_c  : yuv2plane1_9LE_c;
2612         } else if (desc->comp[0].depth == 10) {
2613             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_10BE_c  : yuv2planeX_10LE_c;
2614             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_10BE_c  : yuv2plane1_10LE_c;
2615         } else if (desc->comp[0].depth == 12) {
2616             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_12BE_c  : yuv2planeX_12LE_c;
2617             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_12BE_c  : yuv2plane1_12LE_c;
2618         } else if (desc->comp[0].depth == 14) {
2619             *yuv2planeX = isBE(dstFormat) ? yuv2planeX_14BE_c  : yuv2planeX_14LE_c;
2620             *yuv2plane1 = isBE(dstFormat) ? yuv2plane1_14BE_c  : yuv2plane1_14LE_c;
2621         } else
2622             av_assert0(0);
2623     } else if (dstFormat == AV_PIX_FMT_GRAYF32BE) {
2624         *yuv2planeX = yuv2planeX_floatBE_c;
2625         *yuv2plane1 = yuv2plane1_floatBE_c;
2626     } else if (dstFormat == AV_PIX_FMT_GRAYF32LE) {
2627         *yuv2planeX = yuv2planeX_floatLE_c;
2628         *yuv2plane1 = yuv2plane1_floatLE_c;
2629     } else {
2630         *yuv2plane1 = yuv2plane1_8_c;
2631         *yuv2planeX = yuv2planeX_8_c;
2632         if (isSemiPlanarYUV(dstFormat))
2633             *yuv2nv12cX = yuv2nv12cX_c;
2634     }
2635 
2636     if(c->flags & SWS_FULL_CHR_H_INT) {
2637         switch (dstFormat) {
2638             case AV_PIX_FMT_RGBA:
2639 #if CONFIG_SMALL
2640                 *yuv2packedX = yuv2rgba32_full_X_c;
2641                 *yuv2packed2 = yuv2rgba32_full_2_c;
2642                 *yuv2packed1 = yuv2rgba32_full_1_c;
2643 #else
2644 #if CONFIG_SWSCALE_ALPHA
2645                 if (c->needAlpha) {
2646                     *yuv2packedX = yuv2rgba32_full_X_c;
2647                     *yuv2packed2 = yuv2rgba32_full_2_c;
2648                     *yuv2packed1 = yuv2rgba32_full_1_c;
2649                 } else
2650 #endif /* CONFIG_SWSCALE_ALPHA */
2651                 {
2652                     *yuv2packedX = yuv2rgbx32_full_X_c;
2653                     *yuv2packed2 = yuv2rgbx32_full_2_c;
2654                     *yuv2packed1 = yuv2rgbx32_full_1_c;
2655                 }
2656 #endif /* !CONFIG_SMALL */
2657                 break;
2658             case AV_PIX_FMT_ARGB:
2659 #if CONFIG_SMALL
2660                 *yuv2packedX = yuv2argb32_full_X_c;
2661                 *yuv2packed2 = yuv2argb32_full_2_c;
2662                 *yuv2packed1 = yuv2argb32_full_1_c;
2663 #else
2664 #if CONFIG_SWSCALE_ALPHA
2665                 if (c->needAlpha) {
2666                     *yuv2packedX = yuv2argb32_full_X_c;
2667                     *yuv2packed2 = yuv2argb32_full_2_c;
2668                     *yuv2packed1 = yuv2argb32_full_1_c;
2669                 } else
2670 #endif /* CONFIG_SWSCALE_ALPHA */
2671                 {
2672                     *yuv2packedX = yuv2xrgb32_full_X_c;
2673                     *yuv2packed2 = yuv2xrgb32_full_2_c;
2674                     *yuv2packed1 = yuv2xrgb32_full_1_c;
2675                 }
2676 #endif /* !CONFIG_SMALL */
2677                 break;
2678             case AV_PIX_FMT_BGRA:
2679 #if CONFIG_SMALL
2680                 *yuv2packedX = yuv2bgra32_full_X_c;
2681                 *yuv2packed2 = yuv2bgra32_full_2_c;
2682                 *yuv2packed1 = yuv2bgra32_full_1_c;
2683 #else
2684 #if CONFIG_SWSCALE_ALPHA
2685                 if (c->needAlpha) {
2686                     *yuv2packedX = yuv2bgra32_full_X_c;
2687                     *yuv2packed2 = yuv2bgra32_full_2_c;
2688                     *yuv2packed1 = yuv2bgra32_full_1_c;
2689                 } else
2690 #endif /* CONFIG_SWSCALE_ALPHA */
2691                 {
2692                     *yuv2packedX = yuv2bgrx32_full_X_c;
2693                     *yuv2packed2 = yuv2bgrx32_full_2_c;
2694                     *yuv2packed1 = yuv2bgrx32_full_1_c;
2695                 }
2696 #endif /* !CONFIG_SMALL */
2697                 break;
2698             case AV_PIX_FMT_ABGR:
2699 #if CONFIG_SMALL
2700                 *yuv2packedX = yuv2abgr32_full_X_c;
2701                 *yuv2packed2 = yuv2abgr32_full_2_c;
2702                 *yuv2packed1 = yuv2abgr32_full_1_c;
2703 #else
2704 #if CONFIG_SWSCALE_ALPHA
2705                 if (c->needAlpha) {
2706                     *yuv2packedX = yuv2abgr32_full_X_c;
2707                     *yuv2packed2 = yuv2abgr32_full_2_c;
2708                     *yuv2packed1 = yuv2abgr32_full_1_c;
2709                 } else
2710 #endif /* CONFIG_SWSCALE_ALPHA */
2711                 {
2712                     *yuv2packedX = yuv2xbgr32_full_X_c;
2713                     *yuv2packed2 = yuv2xbgr32_full_2_c;
2714                     *yuv2packed1 = yuv2xbgr32_full_1_c;
2715                 }
2716 #endif /* !CONFIG_SMALL */
2717                 break;
2718         case AV_PIX_FMT_RGBA64LE:
2719 #if CONFIG_SWSCALE_ALPHA
2720             if (c->needAlpha) {
2721                 *yuv2packedX = yuv2rgba64le_full_X_c;
2722                 *yuv2packed2 = yuv2rgba64le_full_2_c;
2723                 *yuv2packed1 = yuv2rgba64le_full_1_c;
2724             } else
2725 #endif /* CONFIG_SWSCALE_ALPHA */
2726             {
2727                 *yuv2packedX = yuv2rgbx64le_full_X_c;
2728                 *yuv2packed2 = yuv2rgbx64le_full_2_c;
2729                 *yuv2packed1 = yuv2rgbx64le_full_1_c;
2730             }
2731             break;
2732         case AV_PIX_FMT_RGBA64BE:
2733 #if CONFIG_SWSCALE_ALPHA
2734             if (c->needAlpha) {
2735                 *yuv2packedX = yuv2rgba64be_full_X_c;
2736                 *yuv2packed2 = yuv2rgba64be_full_2_c;
2737                 *yuv2packed1 = yuv2rgba64be_full_1_c;
2738             } else
2739 #endif /* CONFIG_SWSCALE_ALPHA */
2740             {
2741                 *yuv2packedX = yuv2rgbx64be_full_X_c;
2742                 *yuv2packed2 = yuv2rgbx64be_full_2_c;
2743                 *yuv2packed1 = yuv2rgbx64be_full_1_c;
2744             }
2745             break;
2746         case AV_PIX_FMT_BGRA64LE:
2747 #if CONFIG_SWSCALE_ALPHA
2748             if (c->needAlpha) {
2749                 *yuv2packedX = yuv2bgra64le_full_X_c;
2750                 *yuv2packed2 = yuv2bgra64le_full_2_c;
2751                 *yuv2packed1 = yuv2bgra64le_full_1_c;
2752             } else
2753 #endif /* CONFIG_SWSCALE_ALPHA */
2754             {
2755                 *yuv2packedX = yuv2bgrx64le_full_X_c;
2756                 *yuv2packed2 = yuv2bgrx64le_full_2_c;
2757                 *yuv2packed1 = yuv2bgrx64le_full_1_c;
2758             }
2759             break;
2760         case AV_PIX_FMT_BGRA64BE:
2761 #if CONFIG_SWSCALE_ALPHA
2762             if (c->needAlpha) {
2763                 *yuv2packedX = yuv2bgra64be_full_X_c;
2764                 *yuv2packed2 = yuv2bgra64be_full_2_c;
2765                 *yuv2packed1 = yuv2bgra64be_full_1_c;
2766             } else
2767 #endif /* CONFIG_SWSCALE_ALPHA */
2768             {
2769                 *yuv2packedX = yuv2bgrx64be_full_X_c;
2770                 *yuv2packed2 = yuv2bgrx64be_full_2_c;
2771                 *yuv2packed1 = yuv2bgrx64be_full_1_c;
2772             }
2773             break;
2774 
2775         case AV_PIX_FMT_RGB24:
2776             *yuv2packedX = yuv2rgb24_full_X_c;
2777             *yuv2packed2 = yuv2rgb24_full_2_c;
2778             *yuv2packed1 = yuv2rgb24_full_1_c;
2779             break;
2780         case AV_PIX_FMT_BGR24:
2781             *yuv2packedX = yuv2bgr24_full_X_c;
2782             *yuv2packed2 = yuv2bgr24_full_2_c;
2783             *yuv2packed1 = yuv2bgr24_full_1_c;
2784             break;
2785         case AV_PIX_FMT_RGB48LE:
2786             *yuv2packedX = yuv2rgb48le_full_X_c;
2787             *yuv2packed2 = yuv2rgb48le_full_2_c;
2788             *yuv2packed1 = yuv2rgb48le_full_1_c;
2789             break;
2790         case AV_PIX_FMT_BGR48LE:
2791             *yuv2packedX = yuv2bgr48le_full_X_c;
2792             *yuv2packed2 = yuv2bgr48le_full_2_c;
2793             *yuv2packed1 = yuv2bgr48le_full_1_c;
2794             break;
2795         case AV_PIX_FMT_RGB48BE:
2796             *yuv2packedX = yuv2rgb48be_full_X_c;
2797             *yuv2packed2 = yuv2rgb48be_full_2_c;
2798             *yuv2packed1 = yuv2rgb48be_full_1_c;
2799             break;
2800         case AV_PIX_FMT_BGR48BE:
2801             *yuv2packedX = yuv2bgr48be_full_X_c;
2802             *yuv2packed2 = yuv2bgr48be_full_2_c;
2803             *yuv2packed1 = yuv2bgr48be_full_1_c;
2804             break;
2805         case AV_PIX_FMT_BGR4_BYTE:
2806             *yuv2packedX = yuv2bgr4_byte_full_X_c;
2807             *yuv2packed2 = yuv2bgr4_byte_full_2_c;
2808             *yuv2packed1 = yuv2bgr4_byte_full_1_c;
2809             break;
2810         case AV_PIX_FMT_RGB4_BYTE:
2811             *yuv2packedX = yuv2rgb4_byte_full_X_c;
2812             *yuv2packed2 = yuv2rgb4_byte_full_2_c;
2813             *yuv2packed1 = yuv2rgb4_byte_full_1_c;
2814             break;
2815         case AV_PIX_FMT_BGR8:
2816             *yuv2packedX = yuv2bgr8_full_X_c;
2817             *yuv2packed2 = yuv2bgr8_full_2_c;
2818             *yuv2packed1 = yuv2bgr8_full_1_c;
2819             break;
2820         case AV_PIX_FMT_RGB8:
2821             *yuv2packedX = yuv2rgb8_full_X_c;
2822             *yuv2packed2 = yuv2rgb8_full_2_c;
2823             *yuv2packed1 = yuv2rgb8_full_1_c;
2824             break;
2825         case AV_PIX_FMT_GBRP:
2826         case AV_PIX_FMT_GBRP9BE:
2827         case AV_PIX_FMT_GBRP9LE:
2828         case AV_PIX_FMT_GBRP10BE:
2829         case AV_PIX_FMT_GBRP10LE:
2830         case AV_PIX_FMT_GBRP12BE:
2831         case AV_PIX_FMT_GBRP12LE:
2832         case AV_PIX_FMT_GBRP14BE:
2833         case AV_PIX_FMT_GBRP14LE:
2834         case AV_PIX_FMT_GBRAP:
2835         case AV_PIX_FMT_GBRAP10BE:
2836         case AV_PIX_FMT_GBRAP10LE:
2837         case AV_PIX_FMT_GBRAP12BE:
2838         case AV_PIX_FMT_GBRAP12LE:
2839             *yuv2anyX = yuv2gbrp_full_X_c;
2840             break;
2841         case AV_PIX_FMT_GBRP16BE:
2842         case AV_PIX_FMT_GBRP16LE:
2843         case AV_PIX_FMT_GBRAP16BE:
2844         case AV_PIX_FMT_GBRAP16LE:
2845             *yuv2anyX = yuv2gbrp16_full_X_c;
2846             break;
2847         case AV_PIX_FMT_GBRPF32BE:
2848         case AV_PIX_FMT_GBRPF32LE:
2849         case AV_PIX_FMT_GBRAPF32BE:
2850         case AV_PIX_FMT_GBRAPF32LE:
2851             *yuv2anyX = yuv2gbrpf32_full_X_c;
2852             break;
2853         }
2854         if (!*yuv2packedX && !*yuv2anyX)
2855             goto YUV_PACKED;
2856     } else {
2857         YUV_PACKED:
2858         switch (dstFormat) {
2859         case AV_PIX_FMT_RGBA64LE:
2860 #if CONFIG_SWSCALE_ALPHA
2861             if (c->needAlpha) {
2862                 *yuv2packed1 = yuv2rgba64le_1_c;
2863                 *yuv2packed2 = yuv2rgba64le_2_c;
2864                 *yuv2packedX = yuv2rgba64le_X_c;
2865             } else
2866 #endif /* CONFIG_SWSCALE_ALPHA */
2867             {
2868                 *yuv2packed1 = yuv2rgbx64le_1_c;
2869                 *yuv2packed2 = yuv2rgbx64le_2_c;
2870                 *yuv2packedX = yuv2rgbx64le_X_c;
2871             }
2872             break;
2873         case AV_PIX_FMT_RGBA64BE:
2874 #if CONFIG_SWSCALE_ALPHA
2875             if (c->needAlpha) {
2876                 *yuv2packed1 = yuv2rgba64be_1_c;
2877                 *yuv2packed2 = yuv2rgba64be_2_c;
2878                 *yuv2packedX = yuv2rgba64be_X_c;
2879             } else
2880 #endif /* CONFIG_SWSCALE_ALPHA */
2881             {
2882                 *yuv2packed1 = yuv2rgbx64be_1_c;
2883                 *yuv2packed2 = yuv2rgbx64be_2_c;
2884                 *yuv2packedX = yuv2rgbx64be_X_c;
2885             }
2886             break;
2887         case AV_PIX_FMT_BGRA64LE:
2888 #if CONFIG_SWSCALE_ALPHA
2889             if (c->needAlpha) {
2890                 *yuv2packed1 = yuv2bgra64le_1_c;
2891                 *yuv2packed2 = yuv2bgra64le_2_c;
2892                 *yuv2packedX = yuv2bgra64le_X_c;
2893             } else
2894 #endif /* CONFIG_SWSCALE_ALPHA */
2895             {
2896                 *yuv2packed1 = yuv2bgrx64le_1_c;
2897                 *yuv2packed2 = yuv2bgrx64le_2_c;
2898                 *yuv2packedX = yuv2bgrx64le_X_c;
2899             }
2900             break;
2901         case AV_PIX_FMT_BGRA64BE:
2902 #if CONFIG_SWSCALE_ALPHA
2903             if (c->needAlpha) {
2904                 *yuv2packed1 = yuv2bgra64be_1_c;
2905                 *yuv2packed2 = yuv2bgra64be_2_c;
2906                 *yuv2packedX = yuv2bgra64be_X_c;
2907             } else
2908 #endif /* CONFIG_SWSCALE_ALPHA */
2909             {
2910                 *yuv2packed1 = yuv2bgrx64be_1_c;
2911                 *yuv2packed2 = yuv2bgrx64be_2_c;
2912                 *yuv2packedX = yuv2bgrx64be_X_c;
2913             }
2914             break;
2915         case AV_PIX_FMT_RGB48LE:
2916             *yuv2packed1 = yuv2rgb48le_1_c;
2917             *yuv2packed2 = yuv2rgb48le_2_c;
2918             *yuv2packedX = yuv2rgb48le_X_c;
2919             break;
2920         case AV_PIX_FMT_RGB48BE:
2921             *yuv2packed1 = yuv2rgb48be_1_c;
2922             *yuv2packed2 = yuv2rgb48be_2_c;
2923             *yuv2packedX = yuv2rgb48be_X_c;
2924             break;
2925         case AV_PIX_FMT_BGR48LE:
2926             *yuv2packed1 = yuv2bgr48le_1_c;
2927             *yuv2packed2 = yuv2bgr48le_2_c;
2928             *yuv2packedX = yuv2bgr48le_X_c;
2929             break;
2930         case AV_PIX_FMT_BGR48BE:
2931             *yuv2packed1 = yuv2bgr48be_1_c;
2932             *yuv2packed2 = yuv2bgr48be_2_c;
2933             *yuv2packedX = yuv2bgr48be_X_c;
2934             break;
2935         case AV_PIX_FMT_RGB32:
2936         case AV_PIX_FMT_BGR32:
2937 #if CONFIG_SMALL
2938             *yuv2packed1 = yuv2rgb32_1_c;
2939             *yuv2packed2 = yuv2rgb32_2_c;
2940             *yuv2packedX = yuv2rgb32_X_c;
2941 #else
2942 #if CONFIG_SWSCALE_ALPHA
2943                 if (c->needAlpha) {
2944                     *yuv2packed1 = yuv2rgba32_1_c;
2945                     *yuv2packed2 = yuv2rgba32_2_c;
2946                     *yuv2packedX = yuv2rgba32_X_c;
2947                 } else
2948 #endif /* CONFIG_SWSCALE_ALPHA */
2949                 {
2950                     *yuv2packed1 = yuv2rgbx32_1_c;
2951                     *yuv2packed2 = yuv2rgbx32_2_c;
2952                     *yuv2packedX = yuv2rgbx32_X_c;
2953                 }
2954 #endif /* !CONFIG_SMALL */
2955             break;
2956         case AV_PIX_FMT_RGB32_1:
2957         case AV_PIX_FMT_BGR32_1:
2958 #if CONFIG_SMALL
2959                 *yuv2packed1 = yuv2rgb32_1_1_c;
2960                 *yuv2packed2 = yuv2rgb32_1_2_c;
2961                 *yuv2packedX = yuv2rgb32_1_X_c;
2962 #else
2963 #if CONFIG_SWSCALE_ALPHA
2964                 if (c->needAlpha) {
2965                     *yuv2packed1 = yuv2rgba32_1_1_c;
2966                     *yuv2packed2 = yuv2rgba32_1_2_c;
2967                     *yuv2packedX = yuv2rgba32_1_X_c;
2968                 } else
2969 #endif /* CONFIG_SWSCALE_ALPHA */
2970                 {
2971                     *yuv2packed1 = yuv2rgbx32_1_1_c;
2972                     *yuv2packed2 = yuv2rgbx32_1_2_c;
2973                     *yuv2packedX = yuv2rgbx32_1_X_c;
2974                 }
2975 #endif /* !CONFIG_SMALL */
2976                 break;
2977         case AV_PIX_FMT_RGB24:
2978             *yuv2packed1 = yuv2rgb24_1_c;
2979             *yuv2packed2 = yuv2rgb24_2_c;
2980             *yuv2packedX = yuv2rgb24_X_c;
2981             break;
2982         case AV_PIX_FMT_BGR24:
2983             *yuv2packed1 = yuv2bgr24_1_c;
2984             *yuv2packed2 = yuv2bgr24_2_c;
2985             *yuv2packedX = yuv2bgr24_X_c;
2986             break;
2987         case AV_PIX_FMT_RGB565LE:
2988         case AV_PIX_FMT_RGB565BE:
2989         case AV_PIX_FMT_BGR565LE:
2990         case AV_PIX_FMT_BGR565BE:
2991             *yuv2packed1 = yuv2rgb16_1_c;
2992             *yuv2packed2 = yuv2rgb16_2_c;
2993             *yuv2packedX = yuv2rgb16_X_c;
2994             break;
2995         case AV_PIX_FMT_RGB555LE:
2996         case AV_PIX_FMT_RGB555BE:
2997         case AV_PIX_FMT_BGR555LE:
2998         case AV_PIX_FMT_BGR555BE:
2999             *yuv2packed1 = yuv2rgb15_1_c;
3000             *yuv2packed2 = yuv2rgb15_2_c;
3001             *yuv2packedX = yuv2rgb15_X_c;
3002             break;
3003         case AV_PIX_FMT_RGB444LE:
3004         case AV_PIX_FMT_RGB444BE:
3005         case AV_PIX_FMT_BGR444LE:
3006         case AV_PIX_FMT_BGR444BE:
3007             *yuv2packed1 = yuv2rgb12_1_c;
3008             *yuv2packed2 = yuv2rgb12_2_c;
3009             *yuv2packedX = yuv2rgb12_X_c;
3010             break;
3011         case AV_PIX_FMT_RGB8:
3012         case AV_PIX_FMT_BGR8:
3013             *yuv2packed1 = yuv2rgb8_1_c;
3014             *yuv2packed2 = yuv2rgb8_2_c;
3015             *yuv2packedX = yuv2rgb8_X_c;
3016             break;
3017         case AV_PIX_FMT_RGB4:
3018         case AV_PIX_FMT_BGR4:
3019             *yuv2packed1 = yuv2rgb4_1_c;
3020             *yuv2packed2 = yuv2rgb4_2_c;
3021             *yuv2packedX = yuv2rgb4_X_c;
3022             break;
3023         case AV_PIX_FMT_RGB4_BYTE:
3024         case AV_PIX_FMT_BGR4_BYTE:
3025             *yuv2packed1 = yuv2rgb4b_1_c;
3026             *yuv2packed2 = yuv2rgb4b_2_c;
3027             *yuv2packedX = yuv2rgb4b_X_c;
3028             break;
3029         case AV_PIX_FMT_X2RGB10LE:
3030         case AV_PIX_FMT_X2RGB10BE:
3031             *yuv2packed1 = yuv2x2rgb10_1_c;
3032             *yuv2packed2 = yuv2x2rgb10_2_c;
3033             *yuv2packedX = yuv2x2rgb10_X_c;
3034             break;
3035         case AV_PIX_FMT_X2BGR10LE:
3036         case AV_PIX_FMT_X2BGR10BE:
3037             *yuv2packed1 = yuv2x2bgr10_1_c;
3038             *yuv2packed2 = yuv2x2bgr10_2_c;
3039             *yuv2packedX = yuv2x2bgr10_X_c;
3040             break;
3041         }
3042     }
3043     switch (dstFormat) {
3044     case AV_PIX_FMT_MONOWHITE:
3045         *yuv2packed1 = yuv2monowhite_1_c;
3046         *yuv2packed2 = yuv2monowhite_2_c;
3047         *yuv2packedX = yuv2monowhite_X_c;
3048         break;
3049     case AV_PIX_FMT_MONOBLACK:
3050         *yuv2packed1 = yuv2monoblack_1_c;
3051         *yuv2packed2 = yuv2monoblack_2_c;
3052         *yuv2packedX = yuv2monoblack_X_c;
3053         break;
3054     case AV_PIX_FMT_YUYV422:
3055         *yuv2packed1 = yuv2yuyv422_1_c;
3056         *yuv2packed2 = yuv2yuyv422_2_c;
3057         *yuv2packedX = yuv2yuyv422_X_c;
3058         break;
3059     case AV_PIX_FMT_YVYU422:
3060         *yuv2packed1 = yuv2yvyu422_1_c;
3061         *yuv2packed2 = yuv2yvyu422_2_c;
3062         *yuv2packedX = yuv2yvyu422_X_c;
3063         break;
3064     case AV_PIX_FMT_UYVY422:
3065         *yuv2packed1 = yuv2uyvy422_1_c;
3066         *yuv2packed2 = yuv2uyvy422_2_c;
3067         *yuv2packedX = yuv2uyvy422_X_c;
3068         break;
3069     case AV_PIX_FMT_YA8:
3070         *yuv2packed1 = yuv2ya8_1_c;
3071         *yuv2packed2 = yuv2ya8_2_c;
3072         *yuv2packedX = yuv2ya8_X_c;
3073         break;
3074     case AV_PIX_FMT_YA16LE:
3075         *yuv2packed1 = yuv2ya16le_1_c;
3076         *yuv2packed2 = yuv2ya16le_2_c;
3077         *yuv2packedX = yuv2ya16le_X_c;
3078         break;
3079     case AV_PIX_FMT_YA16BE:
3080         *yuv2packed1 = yuv2ya16be_1_c;
3081         *yuv2packed2 = yuv2ya16be_2_c;
3082         *yuv2packedX = yuv2ya16be_X_c;
3083         break;
3084     case AV_PIX_FMT_AYUV64LE:
3085         *yuv2packedX = yuv2ayuv64le_X_c;
3086         break;
3087     }
3088 }
3089