1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/row.h"
12
13 #include <assert.h>
14 #include <string.h> // For memcpy and memset.
15
16 #include "libyuv/basic_types.h"
17 #include "libyuv/convert_argb.h" // For kYuvI601Constants
18
19 #ifdef __cplusplus
20 namespace libyuv {
21 extern "C" {
22 #endif
23
24 #ifdef __cplusplus
25 #define STATIC_CAST(type, expr) static_cast<type>(expr)
26 #else
27 #define STATIC_CAST(type, expr) (type)(expr)
28 #endif
29
30 // This macro controls YUV to RGB using unsigned math to extend range of
31 // YUV to RGB coefficients to 0 to 4 instead of 0 to 2 for more accuracy on B:
32 // LIBYUV_UNLIMITED_DATA
33
34 // Macros to enable unlimited data for each colorspace
35 // LIBYUV_UNLIMITED_BT601
36 // LIBYUV_UNLIMITED_BT709
37 // LIBYUV_UNLIMITED_BT2020
38
39 // The following macro from row_win makes the C code match the row_win code,
40 // which is 7 bit fixed point for ARGBToI420:
41 #if !defined(LIBYUV_BIT_EXACT) && !defined(LIBYUV_DISABLE_X86) && \
42 defined(_MSC_VER) && !defined(__clang__) && \
43 (defined(_M_IX86) || defined(_M_X64))
44 #define LIBYUV_RGB7 1
45 #endif
46
47 #if !defined(LIBYUV_BIT_EXACT) && (defined(__x86_64__) || defined(_M_X64) || \
48 defined(__i386__) || defined(_M_IX86))
49 #define LIBYUV_ARGBTOUV_PAVGB 1
50 #define LIBYUV_RGBTOU_TRUNCATE 1
51 #endif
52 #if defined(LIBYUV_BIT_EXACT)
53 #define LIBYUV_UNATTENUATE_DUP 1
54 #endif
55
56 // llvm x86 is poor at ternary operator, so use branchless min/max.
57
58 #define USE_BRANCHLESS 1
59 #if USE_BRANCHLESS
clamp0(int32_t v)60 static __inline int32_t clamp0(int32_t v) {
61 return -(v >= 0) & v;
62 }
63 // TODO(fbarchard): make clamp255 preserve negative values.
clamp255(int32_t v)64 static __inline int32_t clamp255(int32_t v) {
65 return (-(v >= 255) | v) & 255;
66 }
67
clamp1023(int32_t v)68 static __inline int32_t clamp1023(int32_t v) {
69 return (-(v >= 1023) | v) & 1023;
70 }
71
72 // clamp to max
ClampMax(int32_t v,int32_t max)73 static __inline int32_t ClampMax(int32_t v, int32_t max) {
74 return (-(v >= max) | v) & max;
75 }
76
Abs(int32_t v)77 static __inline uint32_t Abs(int32_t v) {
78 int m = -(v < 0);
79 return (v + m) ^ m;
80 }
81 #else // USE_BRANCHLESS
82 static __inline int32_t clamp0(int32_t v) {
83 return (v < 0) ? 0 : v;
84 }
85
86 static __inline int32_t clamp255(int32_t v) {
87 return (v > 255) ? 255 : v;
88 }
89
90 static __inline int32_t clamp1023(int32_t v) {
91 return (v > 1023) ? 1023 : v;
92 }
93
94 static __inline int32_t ClampMax(int32_t v, int32_t max) {
95 return (v > max) ? max : v;
96 }
97
98 static __inline uint32_t Abs(int32_t v) {
99 return (v < 0) ? -v : v;
100 }
101 #endif // USE_BRANCHLESS
Clamp(int32_t val)102 static __inline uint32_t Clamp(int32_t val) {
103 int v = clamp0(val);
104 return (uint32_t)(clamp255(v));
105 }
106
Clamp10(int32_t val)107 static __inline uint32_t Clamp10(int32_t val) {
108 int v = clamp0(val);
109 return (uint32_t)(clamp1023(v));
110 }
111
112 // Little Endian
113 #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
114 defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) || \
115 (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
116 #define WRITEWORD(p, v) *(uint32_t*)(p) = v
117 #else
WRITEWORD(uint8_t * p,uint32_t v)118 static inline void WRITEWORD(uint8_t* p, uint32_t v) {
119 p[0] = (uint8_t)(v & 255);
120 p[1] = (uint8_t)((v >> 8) & 255);
121 p[2] = (uint8_t)((v >> 16) & 255);
122 p[3] = (uint8_t)((v >> 24) & 255);
123 }
124 #endif
125
RGB24ToARGBRow_C(const uint8_t * src_rgb24,uint8_t * dst_argb,int width)126 void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
127 int x;
128 for (x = 0; x < width; ++x) {
129 uint8_t b = src_rgb24[0];
130 uint8_t g = src_rgb24[1];
131 uint8_t r = src_rgb24[2];
132 dst_argb[0] = b;
133 dst_argb[1] = g;
134 dst_argb[2] = r;
135 dst_argb[3] = 255u;
136 dst_argb += 4;
137 src_rgb24 += 3;
138 }
139 }
140
RAWToARGBRow_C(const uint8_t * src_raw,uint8_t * dst_argb,int width)141 void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
142 int x;
143 for (x = 0; x < width; ++x) {
144 uint8_t r = src_raw[0];
145 uint8_t g = src_raw[1];
146 uint8_t b = src_raw[2];
147 dst_argb[0] = b;
148 dst_argb[1] = g;
149 dst_argb[2] = r;
150 dst_argb[3] = 255u;
151 dst_argb += 4;
152 src_raw += 3;
153 }
154 }
155
RAWToRGBARow_C(const uint8_t * src_raw,uint8_t * dst_rgba,int width)156 void RAWToRGBARow_C(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
157 int x;
158 for (x = 0; x < width; ++x) {
159 uint8_t r = src_raw[0];
160 uint8_t g = src_raw[1];
161 uint8_t b = src_raw[2];
162 dst_rgba[0] = 255u;
163 dst_rgba[1] = b;
164 dst_rgba[2] = g;
165 dst_rgba[3] = r;
166 dst_rgba += 4;
167 src_raw += 3;
168 }
169 }
170
RAWToRGB24Row_C(const uint8_t * src_raw,uint8_t * dst_rgb24,int width)171 void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
172 int x;
173 for (x = 0; x < width; ++x) {
174 uint8_t r = src_raw[0];
175 uint8_t g = src_raw[1];
176 uint8_t b = src_raw[2];
177 dst_rgb24[0] = b;
178 dst_rgb24[1] = g;
179 dst_rgb24[2] = r;
180 dst_rgb24 += 3;
181 src_raw += 3;
182 }
183 }
184
RGB565ToARGBRow_C(const uint8_t * src_rgb565,uint8_t * dst_argb,int width)185 void RGB565ToARGBRow_C(const uint8_t* src_rgb565,
186 uint8_t* dst_argb,
187 int width) {
188 int x;
189 for (x = 0; x < width; ++x) {
190 uint8_t b = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
191 uint8_t g = STATIC_CAST(
192 uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
193 uint8_t r = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
194 dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
195 dst_argb[1] = STATIC_CAST(uint8_t, (g << 2) | (g >> 4));
196 dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
197 dst_argb[3] = 255u;
198 dst_argb += 4;
199 src_rgb565 += 2;
200 }
201 }
202
ARGB1555ToARGBRow_C(const uint8_t * src_argb1555,uint8_t * dst_argb,int width)203 void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
204 uint8_t* dst_argb,
205 int width) {
206 int x;
207 for (x = 0; x < width; ++x) {
208 uint8_t b = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
209 uint8_t g = STATIC_CAST(
210 uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
211 uint8_t r = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
212 uint8_t a = STATIC_CAST(uint8_t, src_argb1555[1] >> 7);
213 dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
214 dst_argb[1] = STATIC_CAST(uint8_t, (g << 3) | (g >> 2));
215 dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
216 dst_argb[3] = -a;
217 dst_argb += 4;
218 src_argb1555 += 2;
219 }
220 }
221
ARGB4444ToARGBRow_C(const uint8_t * src_argb4444,uint8_t * dst_argb,int width)222 void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
223 uint8_t* dst_argb,
224 int width) {
225 int x;
226 for (x = 0; x < width; ++x) {
227 uint8_t b = STATIC_CAST(uint8_t, src_argb4444[0] & 0x0f);
228 uint8_t g = STATIC_CAST(uint8_t, src_argb4444[0] >> 4);
229 uint8_t r = STATIC_CAST(uint8_t, src_argb4444[1] & 0x0f);
230 uint8_t a = STATIC_CAST(uint8_t, src_argb4444[1] >> 4);
231 dst_argb[0] = STATIC_CAST(uint8_t, (b << 4) | b);
232 dst_argb[1] = STATIC_CAST(uint8_t, (g << 4) | g);
233 dst_argb[2] = STATIC_CAST(uint8_t, (r << 4) | r);
234 dst_argb[3] = STATIC_CAST(uint8_t, (a << 4) | a);
235 dst_argb += 4;
236 src_argb4444 += 2;
237 }
238 }
239
AR30ToARGBRow_C(const uint8_t * src_ar30,uint8_t * dst_argb,int width)240 void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width) {
241 int x;
242 for (x = 0; x < width; ++x) {
243 uint32_t ar30;
244 memcpy(&ar30, src_ar30, sizeof ar30);
245 uint32_t b = (ar30 >> 2) & 0xff;
246 uint32_t g = (ar30 >> 12) & 0xff;
247 uint32_t r = (ar30 >> 22) & 0xff;
248 uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits.
249 *(uint32_t*)(dst_argb) = b | (g << 8) | (r << 16) | (a << 24);
250 dst_argb += 4;
251 src_ar30 += 4;
252 }
253 }
254
AR30ToABGRRow_C(const uint8_t * src_ar30,uint8_t * dst_abgr,int width)255 void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width) {
256 int x;
257 for (x = 0; x < width; ++x) {
258 uint32_t ar30;
259 memcpy(&ar30, src_ar30, sizeof ar30);
260 uint32_t b = (ar30 >> 2) & 0xff;
261 uint32_t g = (ar30 >> 12) & 0xff;
262 uint32_t r = (ar30 >> 22) & 0xff;
263 uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits.
264 *(uint32_t*)(dst_abgr) = r | (g << 8) | (b << 16) | (a << 24);
265 dst_abgr += 4;
266 src_ar30 += 4;
267 }
268 }
269
AR30ToAB30Row_C(const uint8_t * src_ar30,uint8_t * dst_ab30,int width)270 void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) {
271 int x;
272 for (x = 0; x < width; ++x) {
273 uint32_t ar30;
274 memcpy(&ar30, src_ar30, sizeof ar30);
275 uint32_t b = ar30 & 0x3ff;
276 uint32_t ga = ar30 & 0xc00ffc00;
277 uint32_t r = (ar30 >> 20) & 0x3ff;
278 *(uint32_t*)(dst_ab30) = r | ga | (b << 20);
279 dst_ab30 += 4;
280 src_ar30 += 4;
281 }
282 }
283
ARGBToABGRRow_C(const uint8_t * src_argb,uint8_t * dst_abgr,int width)284 void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width) {
285 int x;
286 for (x = 0; x < width; ++x) {
287 uint8_t b = src_argb[0];
288 uint8_t g = src_argb[1];
289 uint8_t r = src_argb[2];
290 uint8_t a = src_argb[3];
291 dst_abgr[0] = r;
292 dst_abgr[1] = g;
293 dst_abgr[2] = b;
294 dst_abgr[3] = a;
295 dst_abgr += 4;
296 src_argb += 4;
297 }
298 }
299
ARGBToBGRARow_C(const uint8_t * src_argb,uint8_t * dst_bgra,int width)300 void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width) {
301 int x;
302 for (x = 0; x < width; ++x) {
303 uint8_t b = src_argb[0];
304 uint8_t g = src_argb[1];
305 uint8_t r = src_argb[2];
306 uint8_t a = src_argb[3];
307 dst_bgra[0] = a;
308 dst_bgra[1] = r;
309 dst_bgra[2] = g;
310 dst_bgra[3] = b;
311 dst_bgra += 4;
312 src_argb += 4;
313 }
314 }
315
ARGBToRGBARow_C(const uint8_t * src_argb,uint8_t * dst_rgba,int width)316 void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgba, int width) {
317 int x;
318 for (x = 0; x < width; ++x) {
319 uint8_t b = src_argb[0];
320 uint8_t g = src_argb[1];
321 uint8_t r = src_argb[2];
322 uint8_t a = src_argb[3];
323 dst_rgba[0] = a;
324 dst_rgba[1] = b;
325 dst_rgba[2] = g;
326 dst_rgba[3] = r;
327 dst_rgba += 4;
328 src_argb += 4;
329 }
330 }
331
ARGBToRGB24Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)332 void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
333 int x;
334 for (x = 0; x < width; ++x) {
335 uint8_t b = src_argb[0];
336 uint8_t g = src_argb[1];
337 uint8_t r = src_argb[2];
338 dst_rgb[0] = b;
339 dst_rgb[1] = g;
340 dst_rgb[2] = r;
341 dst_rgb += 3;
342 src_argb += 4;
343 }
344 }
345
ARGBToRAWRow_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)346 void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
347 int x;
348 for (x = 0; x < width; ++x) {
349 uint8_t b = src_argb[0];
350 uint8_t g = src_argb[1];
351 uint8_t r = src_argb[2];
352 dst_rgb[0] = r;
353 dst_rgb[1] = g;
354 dst_rgb[2] = b;
355 dst_rgb += 3;
356 src_argb += 4;
357 }
358 }
359
RGBAToARGBRow_C(const uint8_t * src_rgba,uint8_t * dst_argb,int width)360 void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width) {
361 int x;
362 for (x = 0; x < width; ++x) {
363 uint8_t a = src_rgba[0];
364 uint8_t b = src_rgba[1];
365 uint8_t g = src_rgba[2];
366 uint8_t r = src_rgba[3];
367 dst_argb[0] = b;
368 dst_argb[1] = g;
369 dst_argb[2] = r;
370 dst_argb[3] = a;
371 dst_argb += 4;
372 src_rgba += 4;
373 }
374 }
375
ARGBToRGB565Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)376 void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
377 int x;
378 for (x = 0; x < width - 1; x += 2) {
379 uint8_t b0 = src_argb[0] >> 3;
380 uint8_t g0 = src_argb[1] >> 2;
381 uint8_t r0 = src_argb[2] >> 3;
382 uint8_t b1 = src_argb[4] >> 3;
383 uint8_t g1 = src_argb[5] >> 2;
384 uint8_t r1 = src_argb[6] >> 3;
385 WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
386 (r1 << 27));
387 dst_rgb += 4;
388 src_argb += 8;
389 }
390 if (width & 1) {
391 uint8_t b0 = src_argb[0] >> 3;
392 uint8_t g0 = src_argb[1] >> 2;
393 uint8_t r0 = src_argb[2] >> 3;
394 *(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
395 }
396 }
397
398 // dither4 is a row of 4 values from 4x4 dither matrix.
399 // The 4x4 matrix contains values to increase RGB. When converting to
400 // fewer bits (565) this provides an ordered dither.
401 // The order in the 4x4 matrix in first byte is upper left.
402 // The 4 values are passed as an int, then referenced as an array, so
403 // endian will not affect order of the original matrix. But the dither4
404 // will containing the first pixel in the lower byte for little endian
405 // or the upper byte for big endian.
ARGBToRGB565DitherRow_C(const uint8_t * src_argb,uint8_t * dst_rgb,uint32_t dither4,int width)406 void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
407 uint8_t* dst_rgb,
408 uint32_t dither4,
409 int width) {
410 int x;
411 for (x = 0; x < width - 1; x += 2) {
412 int dither0 = ((const unsigned char*)(&dither4))[x & 3];
413 int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
414 uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3);
415 uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2);
416 uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3);
417 uint8_t b1 = STATIC_CAST(uint8_t, clamp255(src_argb[4] + dither1) >> 3);
418 uint8_t g1 = STATIC_CAST(uint8_t, clamp255(src_argb[5] + dither1) >> 2);
419 uint8_t r1 = STATIC_CAST(uint8_t, clamp255(src_argb[6] + dither1) >> 3);
420 *(uint16_t*)(dst_rgb + 0) =
421 STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
422 *(uint16_t*)(dst_rgb + 2) =
423 STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 11));
424 dst_rgb += 4;
425 src_argb += 8;
426 }
427 if (width & 1) {
428 int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
429 uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3);
430 uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2);
431 uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3);
432 *(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
433 }
434 }
435
ARGBToARGB1555Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)436 void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
437 int x;
438 for (x = 0; x < width - 1; x += 2) {
439 uint8_t b0 = src_argb[0] >> 3;
440 uint8_t g0 = src_argb[1] >> 3;
441 uint8_t r0 = src_argb[2] >> 3;
442 uint8_t a0 = src_argb[3] >> 7;
443 uint8_t b1 = src_argb[4] >> 3;
444 uint8_t g1 = src_argb[5] >> 3;
445 uint8_t r1 = src_argb[6] >> 3;
446 uint8_t a1 = src_argb[7] >> 7;
447 *(uint16_t*)(dst_rgb + 0) =
448 STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15));
449 *(uint16_t*)(dst_rgb + 2) =
450 STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 10) | (a1 << 15));
451 dst_rgb += 4;
452 src_argb += 8;
453 }
454 if (width & 1) {
455 uint8_t b0 = src_argb[0] >> 3;
456 uint8_t g0 = src_argb[1] >> 3;
457 uint8_t r0 = src_argb[2] >> 3;
458 uint8_t a0 = src_argb[3] >> 7;
459 *(uint16_t*)(dst_rgb) =
460 STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15));
461 }
462 }
463
ARGBToARGB4444Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)464 void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
465 int x;
466 for (x = 0; x < width - 1; x += 2) {
467 uint8_t b0 = src_argb[0] >> 4;
468 uint8_t g0 = src_argb[1] >> 4;
469 uint8_t r0 = src_argb[2] >> 4;
470 uint8_t a0 = src_argb[3] >> 4;
471 uint8_t b1 = src_argb[4] >> 4;
472 uint8_t g1 = src_argb[5] >> 4;
473 uint8_t r1 = src_argb[6] >> 4;
474 uint8_t a1 = src_argb[7] >> 4;
475 *(uint16_t*)(dst_rgb + 0) =
476 STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12));
477 *(uint16_t*)(dst_rgb + 2) =
478 STATIC_CAST(uint16_t, b1 | (g1 << 4) | (r1 << 8) | (a1 << 12));
479 dst_rgb += 4;
480 src_argb += 8;
481 }
482 if (width & 1) {
483 uint8_t b0 = src_argb[0] >> 4;
484 uint8_t g0 = src_argb[1] >> 4;
485 uint8_t r0 = src_argb[2] >> 4;
486 uint8_t a0 = src_argb[3] >> 4;
487 *(uint16_t*)(dst_rgb) =
488 STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12));
489 }
490 }
491
ABGRToAR30Row_C(const uint8_t * src_abgr,uint8_t * dst_ar30,int width)492 void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) {
493 int x;
494 for (x = 0; x < width; ++x) {
495 uint32_t r0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2);
496 uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2);
497 uint32_t b0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
498 uint32_t a0 = (src_abgr[3] >> 6);
499 *(uint32_t*)(dst_ar30) =
500 STATIC_CAST(uint32_t, b0 | (g0 << 10) | (r0 << 20) | (a0 << 30));
501 dst_ar30 += 4;
502 src_abgr += 4;
503 }
504 }
505
ARGBToAR30Row_C(const uint8_t * src_argb,uint8_t * dst_ar30,int width)506 void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
507 int x;
508 for (x = 0; x < width; ++x) {
509 uint32_t b0 = (src_argb[0] >> 6) | ((uint32_t)(src_argb[0]) << 2);
510 uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2);
511 uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2);
512 uint32_t a0 = (src_argb[3] >> 6);
513 *(uint32_t*)(dst_ar30) =
514 STATIC_CAST(uint32_t, b0 | (g0 << 10) | (r0 << 20) | (a0 << 30));
515 dst_ar30 += 4;
516 src_argb += 4;
517 }
518 }
519
ARGBToAR64Row_C(const uint8_t * src_argb,uint16_t * dst_ar64,int width)520 void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
521 int x;
522 for (x = 0; x < width; ++x) {
523 uint16_t b = src_argb[0] * 0x0101;
524 uint16_t g = src_argb[1] * 0x0101;
525 uint16_t r = src_argb[2] * 0x0101;
526 uint16_t a = src_argb[3] * 0x0101;
527 dst_ar64[0] = b;
528 dst_ar64[1] = g;
529 dst_ar64[2] = r;
530 dst_ar64[3] = a;
531 dst_ar64 += 4;
532 src_argb += 4;
533 }
534 }
535
ARGBToAB64Row_C(const uint8_t * src_argb,uint16_t * dst_ab64,int width)536 void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
537 int x;
538 for (x = 0; x < width; ++x) {
539 uint16_t b = src_argb[0] * 0x0101;
540 uint16_t g = src_argb[1] * 0x0101;
541 uint16_t r = src_argb[2] * 0x0101;
542 uint16_t a = src_argb[3] * 0x0101;
543 dst_ab64[0] = r;
544 dst_ab64[1] = g;
545 dst_ab64[2] = b;
546 dst_ab64[3] = a;
547 dst_ab64 += 4;
548 src_argb += 4;
549 }
550 }
551
AR64ToARGBRow_C(const uint16_t * src_ar64,uint8_t * dst_argb,int width)552 void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
553 int x;
554 for (x = 0; x < width; ++x) {
555 uint8_t b = src_ar64[0] >> 8;
556 uint8_t g = src_ar64[1] >> 8;
557 uint8_t r = src_ar64[2] >> 8;
558 uint8_t a = src_ar64[3] >> 8;
559 dst_argb[0] = b;
560 dst_argb[1] = g;
561 dst_argb[2] = r;
562 dst_argb[3] = a;
563 dst_argb += 4;
564 src_ar64 += 4;
565 }
566 }
567
AB64ToARGBRow_C(const uint16_t * src_ab64,uint8_t * dst_argb,int width)568 void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
569 int x;
570 for (x = 0; x < width; ++x) {
571 uint8_t r = src_ab64[0] >> 8;
572 uint8_t g = src_ab64[1] >> 8;
573 uint8_t b = src_ab64[2] >> 8;
574 uint8_t a = src_ab64[3] >> 8;
575 dst_argb[0] = b;
576 dst_argb[1] = g;
577 dst_argb[2] = r;
578 dst_argb[3] = a;
579 dst_argb += 4;
580 src_ab64 += 4;
581 }
582 }
583
AR64ToAB64Row_C(const uint16_t * src_ar64,uint16_t * dst_ab64,int width)584 void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width) {
585 int x;
586 for (x = 0; x < width; ++x) {
587 uint16_t b = src_ar64[0];
588 uint16_t g = src_ar64[1];
589 uint16_t r = src_ar64[2];
590 uint16_t a = src_ar64[3];
591 dst_ab64[0] = r;
592 dst_ab64[1] = g;
593 dst_ab64[2] = b;
594 dst_ab64[3] = a;
595 dst_ab64 += 4;
596 src_ar64 += 4;
597 }
598 }
599
600 // TODO(fbarchard): Make shuffle compatible with SIMD versions
AR64ShuffleRow_C(const uint8_t * src_ar64,uint8_t * dst_ar64,const uint8_t * shuffler,int width)601 void AR64ShuffleRow_C(const uint8_t* src_ar64,
602 uint8_t* dst_ar64,
603 const uint8_t* shuffler,
604 int width) {
605 const uint16_t* src_ar64_16 = (const uint16_t*)src_ar64;
606 uint16_t* dst_ar64_16 = (uint16_t*)dst_ar64;
607 int index0 = shuffler[0] / 2;
608 int index1 = shuffler[2] / 2;
609 int index2 = shuffler[4] / 2;
610 int index3 = shuffler[6] / 2;
611 // Shuffle a row of AR64.
612 int x;
613 for (x = 0; x < width / 2; ++x) {
614 // To support in-place conversion.
615 uint16_t b = src_ar64_16[index0];
616 uint16_t g = src_ar64_16[index1];
617 uint16_t r = src_ar64_16[index2];
618 uint16_t a = src_ar64_16[index3];
619 dst_ar64_16[0] = b;
620 dst_ar64_16[1] = g;
621 dst_ar64_16[2] = r;
622 dst_ar64_16[3] = a;
623 src_ar64_16 += 4;
624 dst_ar64_16 += 4;
625 }
626 }
627
628 #ifdef LIBYUV_RGB7
629 // Old 7 bit math for compatibility on unsupported platforms.
RGBToY(uint8_t r,uint8_t g,uint8_t b)630 static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) {
631 return STATIC_CAST(uint8_t, ((33 * r + 65 * g + 13 * b) >> 7) + 16);
632 }
633 #else
634 // 8 bit
635 // Intel SSE/AVX uses the following equivalent formula
636 // 0x7e80 = (66 + 129 + 25) * -128 + 0x1000 (for +16) and 0x0080 for round.
637 // return (66 * ((int)r - 128) + 129 * ((int)g - 128) + 25 * ((int)b - 128) +
638 // 0x7e80) >> 8;
639
RGBToY(uint8_t r,uint8_t g,uint8_t b)640 static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) {
641 return STATIC_CAST(uint8_t, (66 * r + 129 * g + 25 * b + 0x1080) >> 8);
642 }
643 #endif
644
645 #define AVGB(a, b) (((a) + (b) + 1) >> 1)
646
647 // LIBYUV_RGBTOU_TRUNCATE mimics x86 code that does not round.
648 #ifdef LIBYUV_RGBTOU_TRUNCATE
RGBToU(uint8_t r,uint8_t g,uint8_t b)649 static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
650 return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8000) >> 8);
651 }
RGBToV(uint8_t r,uint8_t g,uint8_t b)652 static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
653 return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8000) >> 8);
654 }
655 #else
656 // TODO(fbarchard): Add rounding to x86 SIMD and use this
RGBToU(uint8_t r,uint8_t g,uint8_t b)657 static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
658 return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8080) >> 8);
659 }
RGBToV(uint8_t r,uint8_t g,uint8_t b)660 static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
661 return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8080) >> 8);
662 }
663 #endif
664
665 // LIBYUV_ARGBTOUV_PAVGB mimics x86 code that subsamples with 2 pavgb.
666 #if !defined(LIBYUV_ARGBTOUV_PAVGB)
RGB2xToU(uint16_t r,uint16_t g,uint16_t b)667 static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) {
668 return STATIC_CAST(
669 uint8_t, ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8);
670 }
RGB2xToV(uint16_t r,uint16_t g,uint16_t b)671 static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
672 return STATIC_CAST(
673 uint8_t, ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8);
674 }
675 #endif
676
677 // ARGBToY_C and ARGBToUV_C
678 // Intel version mimic SSE/AVX which does 2 pavgb
679 #if LIBYUV_ARGBTOUV_PAVGB
680 #define MAKEROWY(NAME, R, G, B, BPP) \
681 void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
682 int x; \
683 for (x = 0; x < width; ++x) { \
684 dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]); \
685 src_rgb += BPP; \
686 dst_y += 1; \
687 } \
688 } \
689 void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
690 uint8_t* dst_u, uint8_t* dst_v, int width) { \
691 const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
692 int x; \
693 for (x = 0; x < width - 1; x += 2) { \
694 uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]), \
695 AVGB(src_rgb[B + BPP], src_rgb1[B + BPP])); \
696 uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]), \
697 AVGB(src_rgb[G + BPP], src_rgb1[G + BPP])); \
698 uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]), \
699 AVGB(src_rgb[R + BPP], src_rgb1[R + BPP])); \
700 dst_u[0] = RGBToU(ar, ag, ab); \
701 dst_v[0] = RGBToV(ar, ag, ab); \
702 src_rgb += BPP * 2; \
703 src_rgb1 += BPP * 2; \
704 dst_u += 1; \
705 dst_v += 1; \
706 } \
707 if (width & 1) { \
708 uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]); \
709 uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]); \
710 uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]); \
711 dst_u[0] = RGBToU(ar, ag, ab); \
712 dst_v[0] = RGBToV(ar, ag, ab); \
713 } \
714 }
715 #else
716 // ARM version does sum / 2 then multiply by 2x smaller coefficients
717 #define MAKEROWY(NAME, R, G, B, BPP) \
718 void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
719 int x; \
720 for (x = 0; x < width; ++x) { \
721 dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]); \
722 src_rgb += BPP; \
723 dst_y += 1; \
724 } \
725 } \
726 void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
727 uint8_t* dst_u, uint8_t* dst_v, int width) { \
728 const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
729 int x; \
730 for (x = 0; x < width - 1; x += 2) { \
731 uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
732 src_rgb1[B + BPP] + 1) >> \
733 1; \
734 uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
735 src_rgb1[G + BPP] + 1) >> \
736 1; \
737 uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
738 src_rgb1[R + BPP] + 1) >> \
739 1; \
740 dst_u[0] = RGB2xToU(ar, ag, ab); \
741 dst_v[0] = RGB2xToV(ar, ag, ab); \
742 src_rgb += BPP * 2; \
743 src_rgb1 += BPP * 2; \
744 dst_u += 1; \
745 dst_v += 1; \
746 } \
747 if (width & 1) { \
748 uint16_t ab = src_rgb[B] + src_rgb1[B]; \
749 uint16_t ag = src_rgb[G] + src_rgb1[G]; \
750 uint16_t ar = src_rgb[R] + src_rgb1[R]; \
751 dst_u[0] = RGB2xToU(ar, ag, ab); \
752 dst_v[0] = RGB2xToV(ar, ag, ab); \
753 } \
754 }
755 #endif
756
757 MAKEROWY(ARGB, 2, 1, 0, 4)
758 MAKEROWY(BGRA, 1, 2, 3, 4)
759 MAKEROWY(ABGR, 0, 1, 2, 4)
760 MAKEROWY(RGBA, 3, 2, 1, 4)
761 MAKEROWY(RGB24, 2, 1, 0, 3)
762 MAKEROWY(RAW, 0, 1, 2, 3)
763 #undef MAKEROWY
764
765 // JPeg uses a variation on BT.601-1 full range
766 // y = 0.29900 * r + 0.58700 * g + 0.11400 * b
767 // u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
768 // v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
769 // BT.601 Mpeg range uses:
770 // b 0.1016 * 255 = 25.908 = 25
771 // g 0.5078 * 255 = 129.489 = 129
772 // r 0.2578 * 255 = 65.739 = 66
773 // JPeg 7 bit Y (deprecated)
774 // b 0.11400 * 128 = 14.592 = 15
775 // g 0.58700 * 128 = 75.136 = 75
776 // r 0.29900 * 128 = 38.272 = 38
777 // JPeg 8 bit Y:
778 // b 0.11400 * 256 = 29.184 = 29
779 // g 0.58700 * 256 = 150.272 = 150
780 // r 0.29900 * 256 = 76.544 = 77
781 // JPeg 8 bit U:
782 // b 0.50000 * 255 = 127.5 = 127
783 // g -0.33126 * 255 = -84.4713 = -84
784 // r -0.16874 * 255 = -43.0287 = -43
785 // JPeg 8 bit V:
786 // b -0.08131 * 255 = -20.73405 = -20
787 // g -0.41869 * 255 = -106.76595 = -107
788 // r 0.50000 * 255 = 127.5 = 127
789
790 #ifdef LIBYUV_RGB7
791 // Old 7 bit math for compatibility on unsupported platforms.
RGBToYJ(uint8_t r,uint8_t g,uint8_t b)792 static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
793 return (38 * r + 75 * g + 15 * b + 64) >> 7;
794 }
795 #else
796 // 8 bit
797 static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
798 return (77 * r + 150 * g + 29 * b + 128) >> 8;
799 }
800 #endif
801
802 #if defined(LIBYUV_ARGBTOUV_PAVGB)
RGBToUJ(uint8_t r,uint8_t g,uint8_t b)803 static __inline uint8_t RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
804 return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
805 }
RGBToVJ(uint8_t r,uint8_t g,uint8_t b)806 static __inline uint8_t RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
807 return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
808 }
809 #else
RGB2xToUJ(uint16_t r,uint16_t g,uint16_t b)810 static __inline uint8_t RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) {
811 return ((127 / 2) * b - (84 / 2) * g - (43 / 2) * r + 0x8080) >> 8;
812 }
RGB2xToVJ(uint16_t r,uint16_t g,uint16_t b)813 static __inline uint8_t RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
814 return ((127 / 2) * r - (107 / 2) * g - (20 / 2) * b + 0x8080) >> 8;
815 }
816 #endif
817
818 // ARGBToYJ_C and ARGBToUVJ_C
819 // Intel version mimic SSE/AVX which does 2 pavgb
820 #if LIBYUV_ARGBTOUV_PAVGB
821 #define MAKEROWYJ(NAME, R, G, B, BPP) \
822 void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
823 int x; \
824 for (x = 0; x < width; ++x) { \
825 dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]); \
826 src_rgb += BPP; \
827 dst_y += 1; \
828 } \
829 } \
830 void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
831 uint8_t* dst_u, uint8_t* dst_v, int width) { \
832 const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
833 int x; \
834 for (x = 0; x < width - 1; x += 2) { \
835 uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]), \
836 AVGB(src_rgb[B + BPP], src_rgb1[B + BPP])); \
837 uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]), \
838 AVGB(src_rgb[G + BPP], src_rgb1[G + BPP])); \
839 uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]), \
840 AVGB(src_rgb[R + BPP], src_rgb1[R + BPP])); \
841 dst_u[0] = RGBToUJ(ar, ag, ab); \
842 dst_v[0] = RGBToVJ(ar, ag, ab); \
843 src_rgb += BPP * 2; \
844 src_rgb1 += BPP * 2; \
845 dst_u += 1; \
846 dst_v += 1; \
847 } \
848 if (width & 1) { \
849 uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]); \
850 uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]); \
851 uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]); \
852 dst_u[0] = RGBToUJ(ar, ag, ab); \
853 dst_v[0] = RGBToVJ(ar, ag, ab); \
854 } \
855 }
856 #else
857 // ARM version does sum / 2 then multiply by 2x smaller coefficients
858 #define MAKEROWYJ(NAME, R, G, B, BPP) \
859 void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
860 int x; \
861 for (x = 0; x < width; ++x) { \
862 dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]); \
863 src_rgb += BPP; \
864 dst_y += 1; \
865 } \
866 } \
867 void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
868 uint8_t* dst_u, uint8_t* dst_v, int width) { \
869 const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
870 int x; \
871 for (x = 0; x < width - 1; x += 2) { \
872 uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
873 src_rgb1[B + BPP] + 1) >> \
874 1; \
875 uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
876 src_rgb1[G + BPP] + 1) >> \
877 1; \
878 uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
879 src_rgb1[R + BPP] + 1) >> \
880 1; \
881 dst_u[0] = RGB2xToUJ(ar, ag, ab); \
882 dst_v[0] = RGB2xToVJ(ar, ag, ab); \
883 src_rgb += BPP * 2; \
884 src_rgb1 += BPP * 2; \
885 dst_u += 1; \
886 dst_v += 1; \
887 } \
888 if (width & 1) { \
889 uint16_t ab = (src_rgb[B] + src_rgb1[B]); \
890 uint16_t ag = (src_rgb[G] + src_rgb1[G]); \
891 uint16_t ar = (src_rgb[R] + src_rgb1[R]); \
892 dst_u[0] = RGB2xToUJ(ar, ag, ab); \
893 dst_v[0] = RGB2xToVJ(ar, ag, ab); \
894 } \
895 }
896
897 #endif
898
899 MAKEROWYJ(ARGB, 2, 1, 0, 4)
900 MAKEROWYJ(ABGR, 0, 1, 2, 4)
901 MAKEROWYJ(RGBA, 3, 2, 1, 4)
902 MAKEROWYJ(RGB24, 2, 1, 0, 3)
903 MAKEROWYJ(RAW, 0, 1, 2, 3)
904 #undef MAKEROWYJ
905
RGB565ToYRow_C(const uint8_t * src_rgb565,uint8_t * dst_y,int width)906 void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
907 int x;
908 for (x = 0; x < width; ++x) {
909 uint8_t b = src_rgb565[0] & 0x1f;
910 uint8_t g = STATIC_CAST(
911 uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
912 uint8_t r = src_rgb565[1] >> 3;
913 b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
914 g = STATIC_CAST(uint8_t, (g << 2) | (g >> 4));
915 r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
916 dst_y[0] = RGBToY(r, g, b);
917 src_rgb565 += 2;
918 dst_y += 1;
919 }
920 }
921
ARGB1555ToYRow_C(const uint8_t * src_argb1555,uint8_t * dst_y,int width)922 void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
923 int x;
924 for (x = 0; x < width; ++x) {
925 uint8_t b = src_argb1555[0] & 0x1f;
926 uint8_t g = STATIC_CAST(
927 uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
928 uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
929 b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
930 g = STATIC_CAST(uint8_t, (g << 3) | (g >> 2));
931 r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
932 dst_y[0] = RGBToY(r, g, b);
933 src_argb1555 += 2;
934 dst_y += 1;
935 }
936 }
937
ARGB4444ToYRow_C(const uint8_t * src_argb4444,uint8_t * dst_y,int width)938 void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) {
939 int x;
940 for (x = 0; x < width; ++x) {
941 uint8_t b = src_argb4444[0] & 0x0f;
942 uint8_t g = src_argb4444[0] >> 4;
943 uint8_t r = src_argb4444[1] & 0x0f;
944 b = STATIC_CAST(uint8_t, (b << 4) | b);
945 g = STATIC_CAST(uint8_t, (g << 4) | g);
946 r = STATIC_CAST(uint8_t, (r << 4) | r);
947 dst_y[0] = RGBToY(r, g, b);
948 src_argb4444 += 2;
949 dst_y += 1;
950 }
951 }
952
RGB565ToUVRow_C(const uint8_t * src_rgb565,int src_stride_rgb565,uint8_t * dst_u,uint8_t * dst_v,int width)953 void RGB565ToUVRow_C(const uint8_t* src_rgb565,
954 int src_stride_rgb565,
955 uint8_t* dst_u,
956 uint8_t* dst_v,
957 int width) {
958 const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565;
959 int x;
960 for (x = 0; x < width - 1; x += 2) {
961 uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
962 uint8_t g0 = STATIC_CAST(
963 uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
964 uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
965 uint8_t b1 = STATIC_CAST(uint8_t, src_rgb565[2] & 0x1f);
966 uint8_t g1 = STATIC_CAST(
967 uint8_t, (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3));
968 uint8_t r1 = STATIC_CAST(uint8_t, src_rgb565[3] >> 3);
969 uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f);
970 uint8_t g2 = STATIC_CAST(
971 uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3));
972 uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3);
973 uint8_t b3 = STATIC_CAST(uint8_t, next_rgb565[2] & 0x1f);
974 uint8_t g3 = STATIC_CAST(
975 uint8_t, (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3));
976 uint8_t r3 = STATIC_CAST(uint8_t, next_rgb565[3] >> 3);
977
978 b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
979 g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4));
980 r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
981 b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2));
982 g1 = STATIC_CAST(uint8_t, (g1 << 2) | (g1 >> 4));
983 r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2));
984 b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
985 g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
986 r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
987 b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2));
988 g3 = STATIC_CAST(uint8_t, (g3 << 2) | (g3 >> 4));
989 r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2));
990
991 #if LIBYUV_ARGBTOUV_PAVGB
992 uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
993 uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
994 uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
995 dst_u[0] = RGBToU(ar, ag, ab);
996 dst_v[0] = RGBToV(ar, ag, ab);
997 #else
998 uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
999 uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
1000 uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
1001 dst_u[0] = RGB2xToU(r, g, b);
1002 dst_v[0] = RGB2xToV(r, g, b);
1003 #endif
1004
1005 src_rgb565 += 4;
1006 next_rgb565 += 4;
1007 dst_u += 1;
1008 dst_v += 1;
1009 }
1010 if (width & 1) {
1011 uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
1012 uint8_t g0 = STATIC_CAST(
1013 uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
1014 uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
1015 uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f);
1016 uint8_t g2 = STATIC_CAST(
1017 uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3));
1018 uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3);
1019 b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
1020 g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4));
1021 r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
1022 b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
1023 g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
1024 r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
1025
1026 #if LIBYUV_ARGBTOUV_PAVGB
1027 uint8_t ab = AVGB(b0, b2);
1028 uint8_t ag = AVGB(g0, g2);
1029 uint8_t ar = AVGB(r0, r2);
1030 dst_u[0] = RGBToU(ar, ag, ab);
1031 dst_v[0] = RGBToV(ar, ag, ab);
1032 #else
1033 uint16_t b = b0 + b2;
1034 uint16_t g = g0 + g2;
1035 uint16_t r = r0 + r2;
1036 dst_u[0] = RGB2xToU(r, g, b);
1037 dst_v[0] = RGB2xToV(r, g, b);
1038 #endif
1039 }
1040 }
1041
ARGB1555ToUVRow_C(const uint8_t * src_argb1555,int src_stride_argb1555,uint8_t * dst_u,uint8_t * dst_v,int width)1042 void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
1043 int src_stride_argb1555,
1044 uint8_t* dst_u,
1045 uint8_t* dst_v,
1046 int width) {
1047 const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555;
1048 int x;
1049 for (x = 0; x < width - 1; x += 2) {
1050 uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
1051 uint8_t g0 = STATIC_CAST(
1052 uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
1053 uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
1054 uint8_t b1 = STATIC_CAST(uint8_t, src_argb1555[2] & 0x1f);
1055 uint8_t g1 = STATIC_CAST(
1056 uint8_t, (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3));
1057 uint8_t r1 = STATIC_CAST(uint8_t, (src_argb1555[3] & 0x7c) >> 2);
1058 uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f);
1059 uint8_t g2 = STATIC_CAST(
1060 uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3));
1061 uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2);
1062 uint8_t b3 = STATIC_CAST(uint8_t, next_argb1555[2] & 0x1f);
1063 uint8_t g3 = STATIC_CAST(
1064 uint8_t, (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3));
1065 uint8_t r3 = STATIC_CAST(uint8_t, (next_argb1555[3] & 0x7c) >> 2);
1066
1067 b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
1068 g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2));
1069 r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
1070 b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2));
1071 g1 = STATIC_CAST(uint8_t, (g1 << 3) | (g1 >> 2));
1072 r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2));
1073 b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
1074 g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2));
1075 r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
1076 b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2));
1077 g3 = STATIC_CAST(uint8_t, (g3 << 3) | (g3 >> 2));
1078 r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2));
1079
1080 #if LIBYUV_ARGBTOUV_PAVGB
1081 uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
1082 uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
1083 uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
1084 dst_u[0] = RGBToU(ar, ag, ab);
1085 dst_v[0] = RGBToV(ar, ag, ab);
1086 #else
1087 uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
1088 uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
1089 uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
1090 dst_u[0] = RGB2xToU(r, g, b);
1091 dst_v[0] = RGB2xToV(r, g, b);
1092 #endif
1093
1094 src_argb1555 += 4;
1095 next_argb1555 += 4;
1096 dst_u += 1;
1097 dst_v += 1;
1098 }
1099 if (width & 1) {
1100 uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
1101 uint8_t g0 = STATIC_CAST(
1102 uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
1103 uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
1104 uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f);
1105 uint8_t g2 = STATIC_CAST(
1106 uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3));
1107 uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2);
1108
1109 b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
1110 g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2));
1111 r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
1112 b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
1113 g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2));
1114 r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
1115
1116 #if LIBYUV_ARGBTOUV_PAVGB
1117 uint8_t ab = AVGB(b0, b2);
1118 uint8_t ag = AVGB(g0, g2);
1119 uint8_t ar = AVGB(r0, r2);
1120 dst_u[0] = RGBToU(ar, ag, ab);
1121 dst_v[0] = RGBToV(ar, ag, ab);
1122 #else
1123 uint16_t b = b0 + b2;
1124 uint16_t g = g0 + g2;
1125 uint16_t r = r0 + r2;
1126 dst_u[0] = RGB2xToU(r, g, b);
1127 dst_v[0] = RGB2xToV(r, g, b);
1128 #endif
1129 }
1130 }
1131
ARGB4444ToUVRow_C(const uint8_t * src_argb4444,int src_stride_argb4444,uint8_t * dst_u,uint8_t * dst_v,int width)1132 void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
1133 int src_stride_argb4444,
1134 uint8_t* dst_u,
1135 uint8_t* dst_v,
1136 int width) {
1137 const uint8_t* next_argb4444 = src_argb4444 + src_stride_argb4444;
1138 int x;
1139 for (x = 0; x < width - 1; x += 2) {
1140 uint8_t b0 = src_argb4444[0] & 0x0f;
1141 uint8_t g0 = src_argb4444[0] >> 4;
1142 uint8_t r0 = src_argb4444[1] & 0x0f;
1143 uint8_t b1 = src_argb4444[2] & 0x0f;
1144 uint8_t g1 = src_argb4444[2] >> 4;
1145 uint8_t r1 = src_argb4444[3] & 0x0f;
1146 uint8_t b2 = next_argb4444[0] & 0x0f;
1147 uint8_t g2 = next_argb4444[0] >> 4;
1148 uint8_t r2 = next_argb4444[1] & 0x0f;
1149 uint8_t b3 = next_argb4444[2] & 0x0f;
1150 uint8_t g3 = next_argb4444[2] >> 4;
1151 uint8_t r3 = next_argb4444[3] & 0x0f;
1152
1153 b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0);
1154 g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0);
1155 r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0);
1156 b1 = STATIC_CAST(uint8_t, (b1 << 4) | b1);
1157 g1 = STATIC_CAST(uint8_t, (g1 << 4) | g1);
1158 r1 = STATIC_CAST(uint8_t, (r1 << 4) | r1);
1159 b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2);
1160 g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2);
1161 r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2);
1162 b3 = STATIC_CAST(uint8_t, (b3 << 4) | b3);
1163 g3 = STATIC_CAST(uint8_t, (g3 << 4) | g3);
1164 r3 = STATIC_CAST(uint8_t, (r3 << 4) | r3);
1165
1166 #if LIBYUV_ARGBTOUV_PAVGB
1167 uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
1168 uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
1169 uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
1170 dst_u[0] = RGBToU(ar, ag, ab);
1171 dst_v[0] = RGBToV(ar, ag, ab);
1172 #else
1173 uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
1174 uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
1175 uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
1176 dst_u[0] = RGB2xToU(r, g, b);
1177 dst_v[0] = RGB2xToV(r, g, b);
1178 #endif
1179
1180 src_argb4444 += 4;
1181 next_argb4444 += 4;
1182 dst_u += 1;
1183 dst_v += 1;
1184 }
1185 if (width & 1) {
1186 uint8_t b0 = src_argb4444[0] & 0x0f;
1187 uint8_t g0 = src_argb4444[0] >> 4;
1188 uint8_t r0 = src_argb4444[1] & 0x0f;
1189 uint8_t b2 = next_argb4444[0] & 0x0f;
1190 uint8_t g2 = next_argb4444[0] >> 4;
1191 uint8_t r2 = next_argb4444[1] & 0x0f;
1192
1193 b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0);
1194 g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0);
1195 r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0);
1196 b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2);
1197 g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2);
1198 r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2);
1199
1200 #if LIBYUV_ARGBTOUV_PAVGB
1201 uint8_t ab = AVGB(b0, b2);
1202 uint8_t ag = AVGB(g0, g2);
1203 uint8_t ar = AVGB(r0, r2);
1204 dst_u[0] = RGBToU(ar, ag, ab);
1205 dst_v[0] = RGBToV(ar, ag, ab);
1206 #else
1207 uint16_t b = b0 + b2;
1208 uint16_t g = g0 + g2;
1209 uint16_t r = r0 + r2;
1210 dst_u[0] = RGB2xToU(r, g, b);
1211 dst_v[0] = RGB2xToV(r, g, b);
1212 #endif
1213 }
1214 }
1215
ARGBToUV444Row_C(const uint8_t * src_argb,uint8_t * dst_u,uint8_t * dst_v,int width)1216 void ARGBToUV444Row_C(const uint8_t* src_argb,
1217 uint8_t* dst_u,
1218 uint8_t* dst_v,
1219 int width) {
1220 int x;
1221 for (x = 0; x < width; ++x) {
1222 uint8_t ab = src_argb[0];
1223 uint8_t ag = src_argb[1];
1224 uint8_t ar = src_argb[2];
1225 dst_u[0] = RGBToU(ar, ag, ab);
1226 dst_v[0] = RGBToV(ar, ag, ab);
1227 src_argb += 4;
1228 dst_u += 1;
1229 dst_v += 1;
1230 }
1231 }
1232
ARGBGrayRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)1233 void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
1234 int x;
1235 for (x = 0; x < width; ++x) {
1236 uint8_t y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
1237 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
1238 dst_argb[3] = src_argb[3];
1239 dst_argb += 4;
1240 src_argb += 4;
1241 }
1242 }
1243
1244 // Convert a row of image to Sepia tone.
ARGBSepiaRow_C(uint8_t * dst_argb,int width)1245 void ARGBSepiaRow_C(uint8_t* dst_argb, int width) {
1246 int x;
1247 for (x = 0; x < width; ++x) {
1248 int b = dst_argb[0];
1249 int g = dst_argb[1];
1250 int r = dst_argb[2];
1251 int sb = (b * 17 + g * 68 + r * 35) >> 7;
1252 int sg = (b * 22 + g * 88 + r * 45) >> 7;
1253 int sr = (b * 24 + g * 98 + r * 50) >> 7;
1254 // b does not over flow. a is preserved from original.
1255 dst_argb[0] = STATIC_CAST(uint8_t, sb);
1256 dst_argb[1] = STATIC_CAST(uint8_t, clamp255(sg));
1257 dst_argb[2] = STATIC_CAST(uint8_t, clamp255(sr));
1258 dst_argb += 4;
1259 }
1260 }
1261
1262 // Apply color matrix to a row of image. Matrix is signed.
1263 // TODO(fbarchard): Consider adding rounding (+32).
ARGBColorMatrixRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const int8_t * matrix_argb,int width)1264 void ARGBColorMatrixRow_C(const uint8_t* src_argb,
1265 uint8_t* dst_argb,
1266 const int8_t* matrix_argb,
1267 int width) {
1268 int x;
1269 for (x = 0; x < width; ++x) {
1270 int b = src_argb[0];
1271 int g = src_argb[1];
1272 int r = src_argb[2];
1273 int a = src_argb[3];
1274 int sb = (b * matrix_argb[0] + g * matrix_argb[1] + r * matrix_argb[2] +
1275 a * matrix_argb[3]) >>
1276 6;
1277 int sg = (b * matrix_argb[4] + g * matrix_argb[5] + r * matrix_argb[6] +
1278 a * matrix_argb[7]) >>
1279 6;
1280 int sr = (b * matrix_argb[8] + g * matrix_argb[9] + r * matrix_argb[10] +
1281 a * matrix_argb[11]) >>
1282 6;
1283 int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] +
1284 a * matrix_argb[15]) >>
1285 6;
1286 dst_argb[0] = STATIC_CAST(uint8_t, Clamp(sb));
1287 dst_argb[1] = STATIC_CAST(uint8_t, Clamp(sg));
1288 dst_argb[2] = STATIC_CAST(uint8_t, Clamp(sr));
1289 dst_argb[3] = STATIC_CAST(uint8_t, Clamp(sa));
1290 src_argb += 4;
1291 dst_argb += 4;
1292 }
1293 }
1294
1295 // Apply color table to a row of image.
ARGBColorTableRow_C(uint8_t * dst_argb,const uint8_t * table_argb,int width)1296 void ARGBColorTableRow_C(uint8_t* dst_argb,
1297 const uint8_t* table_argb,
1298 int width) {
1299 int x;
1300 for (x = 0; x < width; ++x) {
1301 int b = dst_argb[0];
1302 int g = dst_argb[1];
1303 int r = dst_argb[2];
1304 int a = dst_argb[3];
1305 dst_argb[0] = table_argb[b * 4 + 0];
1306 dst_argb[1] = table_argb[g * 4 + 1];
1307 dst_argb[2] = table_argb[r * 4 + 2];
1308 dst_argb[3] = table_argb[a * 4 + 3];
1309 dst_argb += 4;
1310 }
1311 }
1312
1313 // Apply color table to a row of image.
RGBColorTableRow_C(uint8_t * dst_argb,const uint8_t * table_argb,int width)1314 void RGBColorTableRow_C(uint8_t* dst_argb,
1315 const uint8_t* table_argb,
1316 int width) {
1317 int x;
1318 for (x = 0; x < width; ++x) {
1319 int b = dst_argb[0];
1320 int g = dst_argb[1];
1321 int r = dst_argb[2];
1322 dst_argb[0] = table_argb[b * 4 + 0];
1323 dst_argb[1] = table_argb[g * 4 + 1];
1324 dst_argb[2] = table_argb[r * 4 + 2];
1325 dst_argb += 4;
1326 }
1327 }
1328
ARGBQuantizeRow_C(uint8_t * dst_argb,int scale,int interval_size,int interval_offset,int width)1329 void ARGBQuantizeRow_C(uint8_t* dst_argb,
1330 int scale,
1331 int interval_size,
1332 int interval_offset,
1333 int width) {
1334 int x;
1335 for (x = 0; x < width; ++x) {
1336 int b = dst_argb[0];
1337 int g = dst_argb[1];
1338 int r = dst_argb[2];
1339 dst_argb[0] = STATIC_CAST(
1340 uint8_t, (b * scale >> 16) * interval_size + interval_offset);
1341 dst_argb[1] = STATIC_CAST(
1342 uint8_t, (g * scale >> 16) * interval_size + interval_offset);
1343 dst_argb[2] = STATIC_CAST(
1344 uint8_t, (r * scale >> 16) * interval_size + interval_offset);
1345 dst_argb += 4;
1346 }
1347 }
1348
1349 #define REPEAT8(v) (v) | ((v) << 8)
1350 #define SHADE(f, v) v* f >> 24
1351
ARGBShadeRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width,uint32_t value)1352 void ARGBShadeRow_C(const uint8_t* src_argb,
1353 uint8_t* dst_argb,
1354 int width,
1355 uint32_t value) {
1356 const uint32_t b_scale = REPEAT8(value & 0xff);
1357 const uint32_t g_scale = REPEAT8((value >> 8) & 0xff);
1358 const uint32_t r_scale = REPEAT8((value >> 16) & 0xff);
1359 const uint32_t a_scale = REPEAT8(value >> 24);
1360
1361 int i;
1362 for (i = 0; i < width; ++i) {
1363 const uint32_t b = REPEAT8(src_argb[0]);
1364 const uint32_t g = REPEAT8(src_argb[1]);
1365 const uint32_t r = REPEAT8(src_argb[2]);
1366 const uint32_t a = REPEAT8(src_argb[3]);
1367 dst_argb[0] = SHADE(b, b_scale);
1368 dst_argb[1] = SHADE(g, g_scale);
1369 dst_argb[2] = SHADE(r, r_scale);
1370 dst_argb[3] = SHADE(a, a_scale);
1371 src_argb += 4;
1372 dst_argb += 4;
1373 }
1374 }
1375 #undef REPEAT8
1376 #undef SHADE
1377
1378 #define REPEAT8(v) (v) | ((v) << 8)
1379 #define SHADE(f, v) v* f >> 16
1380
ARGBMultiplyRow_C(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)1381 void ARGBMultiplyRow_C(const uint8_t* src_argb,
1382 const uint8_t* src_argb1,
1383 uint8_t* dst_argb,
1384 int width) {
1385 int i;
1386 for (i = 0; i < width; ++i) {
1387 const uint32_t b = REPEAT8(src_argb[0]);
1388 const uint32_t g = REPEAT8(src_argb[1]);
1389 const uint32_t r = REPEAT8(src_argb[2]);
1390 const uint32_t a = REPEAT8(src_argb[3]);
1391 const uint32_t b_scale = src_argb1[0];
1392 const uint32_t g_scale = src_argb1[1];
1393 const uint32_t r_scale = src_argb1[2];
1394 const uint32_t a_scale = src_argb1[3];
1395 dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_scale));
1396 dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_scale));
1397 dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_scale));
1398 dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_scale));
1399 src_argb += 4;
1400 src_argb1 += 4;
1401 dst_argb += 4;
1402 }
1403 }
1404 #undef REPEAT8
1405 #undef SHADE
1406
1407 #define SHADE(f, v) clamp255(v + f)
1408
ARGBAddRow_C(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)1409 void ARGBAddRow_C(const uint8_t* src_argb,
1410 const uint8_t* src_argb1,
1411 uint8_t* dst_argb,
1412 int width) {
1413 int i;
1414 for (i = 0; i < width; ++i) {
1415 const int b = src_argb[0];
1416 const int g = src_argb[1];
1417 const int r = src_argb[2];
1418 const int a = src_argb[3];
1419 const int b_add = src_argb1[0];
1420 const int g_add = src_argb1[1];
1421 const int r_add = src_argb1[2];
1422 const int a_add = src_argb1[3];
1423 dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_add));
1424 dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_add));
1425 dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_add));
1426 dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_add));
1427 src_argb += 4;
1428 src_argb1 += 4;
1429 dst_argb += 4;
1430 }
1431 }
1432 #undef SHADE
1433
1434 #define SHADE(f, v) clamp0(f - v)
1435
ARGBSubtractRow_C(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)1436 void ARGBSubtractRow_C(const uint8_t* src_argb,
1437 const uint8_t* src_argb1,
1438 uint8_t* dst_argb,
1439 int width) {
1440 int i;
1441 for (i = 0; i < width; ++i) {
1442 const int b = src_argb[0];
1443 const int g = src_argb[1];
1444 const int r = src_argb[2];
1445 const int a = src_argb[3];
1446 const int b_sub = src_argb1[0];
1447 const int g_sub = src_argb1[1];
1448 const int r_sub = src_argb1[2];
1449 const int a_sub = src_argb1[3];
1450 dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_sub));
1451 dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_sub));
1452 dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_sub));
1453 dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_sub));
1454 src_argb += 4;
1455 src_argb1 += 4;
1456 dst_argb += 4;
1457 }
1458 }
1459 #undef SHADE
1460
1461 // Sobel functions which mimics SSSE3.
SobelXRow_C(const uint8_t * src_y0,const uint8_t * src_y1,const uint8_t * src_y2,uint8_t * dst_sobelx,int width)1462 void SobelXRow_C(const uint8_t* src_y0,
1463 const uint8_t* src_y1,
1464 const uint8_t* src_y2,
1465 uint8_t* dst_sobelx,
1466 int width) {
1467 int i;
1468 for (i = 0; i < width; ++i) {
1469 int a = src_y0[i];
1470 int b = src_y1[i];
1471 int c = src_y2[i];
1472 int a_sub = src_y0[i + 2];
1473 int b_sub = src_y1[i + 2];
1474 int c_sub = src_y2[i + 2];
1475 int a_diff = a - a_sub;
1476 int b_diff = b - b_sub;
1477 int c_diff = c - c_sub;
1478 int sobel = Abs(a_diff + b_diff * 2 + c_diff);
1479 dst_sobelx[i] = (uint8_t)(clamp255(sobel));
1480 }
1481 }
1482
SobelYRow_C(const uint8_t * src_y0,const uint8_t * src_y1,uint8_t * dst_sobely,int width)1483 void SobelYRow_C(const uint8_t* src_y0,
1484 const uint8_t* src_y1,
1485 uint8_t* dst_sobely,
1486 int width) {
1487 int i;
1488 for (i = 0; i < width; ++i) {
1489 int a = src_y0[i + 0];
1490 int b = src_y0[i + 1];
1491 int c = src_y0[i + 2];
1492 int a_sub = src_y1[i + 0];
1493 int b_sub = src_y1[i + 1];
1494 int c_sub = src_y1[i + 2];
1495 int a_diff = a - a_sub;
1496 int b_diff = b - b_sub;
1497 int c_diff = c - c_sub;
1498 int sobel = Abs(a_diff + b_diff * 2 + c_diff);
1499 dst_sobely[i] = (uint8_t)(clamp255(sobel));
1500 }
1501 }
1502
SobelRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)1503 void SobelRow_C(const uint8_t* src_sobelx,
1504 const uint8_t* src_sobely,
1505 uint8_t* dst_argb,
1506 int width) {
1507 int i;
1508 for (i = 0; i < width; ++i) {
1509 int r = src_sobelx[i];
1510 int b = src_sobely[i];
1511 int s = clamp255(r + b);
1512 dst_argb[0] = (uint8_t)(s);
1513 dst_argb[1] = (uint8_t)(s);
1514 dst_argb[2] = (uint8_t)(s);
1515 dst_argb[3] = (uint8_t)(255u);
1516 dst_argb += 4;
1517 }
1518 }
1519
SobelToPlaneRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_y,int width)1520 void SobelToPlaneRow_C(const uint8_t* src_sobelx,
1521 const uint8_t* src_sobely,
1522 uint8_t* dst_y,
1523 int width) {
1524 int i;
1525 for (i = 0; i < width; ++i) {
1526 int r = src_sobelx[i];
1527 int b = src_sobely[i];
1528 int s = clamp255(r + b);
1529 dst_y[i] = (uint8_t)(s);
1530 }
1531 }
1532
SobelXYRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)1533 void SobelXYRow_C(const uint8_t* src_sobelx,
1534 const uint8_t* src_sobely,
1535 uint8_t* dst_argb,
1536 int width) {
1537 int i;
1538 for (i = 0; i < width; ++i) {
1539 int r = src_sobelx[i];
1540 int b = src_sobely[i];
1541 int g = clamp255(r + b);
1542 dst_argb[0] = (uint8_t)(b);
1543 dst_argb[1] = (uint8_t)(g);
1544 dst_argb[2] = (uint8_t)(r);
1545 dst_argb[3] = (uint8_t)(255u);
1546 dst_argb += 4;
1547 }
1548 }
1549
J400ToARGBRow_C(const uint8_t * src_y,uint8_t * dst_argb,int width)1550 void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
1551 // Copy a Y to RGB.
1552 int x;
1553 for (x = 0; x < width; ++x) {
1554 uint8_t y = src_y[0];
1555 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
1556 dst_argb[3] = 255u;
1557 dst_argb += 4;
1558 ++src_y;
1559 }
1560 }
1561
1562 // Macros to create SIMD specific yuv to rgb conversion constants.
1563
1564 // clang-format off
1565
1566 #if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
1567 // Bias values include subtract 128 from U and V, bias from Y and rounding.
1568 // For B and R bias is negative. For G bias is positive.
1569 #define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \
1570 {{UB, VR, UG, VG, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, \
1571 {YG, (UB * 128 - YB), (UG * 128 + VG * 128 + YB), (VR * 128 - YB), YB, 0, \
1572 0, 0}}
1573 #else
1574 #define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \
1575 {{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, \
1576 UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, \
1577 {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, \
1578 UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, \
1579 {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, \
1580 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, \
1581 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \
1582 {YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}}
1583 #endif
1584
1585 // clang-format on
1586
1587 #define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR) \
1588 const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = \
1589 YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR); \
1590 const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = \
1591 YUVCONSTANTSBODY(YG, YB, VR, VG, UG, UB);
1592
1593 // TODO(fbarchard): Generate SIMD structures from float matrix.
1594
1595 // BT.601 limited range YUV to RGB reference
1596 // R = (Y - 16) * 1.164 + V * 1.596
1597 // G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
1598 // B = (Y - 16) * 1.164 + U * 2.018
1599 // KR = 0.299; KB = 0.114
1600
1601 // U and V contributions to R,G,B.
1602 #if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT601)
1603 #define UB 129 /* round(2.018 * 64) */
1604 #else
1605 #define UB 128 /* max(128, round(2.018 * 64)) */
1606 #endif
1607 #define UG 25 /* round(0.391 * 64) */
1608 #define VG 52 /* round(0.813 * 64) */
1609 #define VR 102 /* round(1.596 * 64) */
1610
1611 // Y contribution to R,G,B. Scale and bias.
1612 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1613 #define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1614
MAKEYUVCONSTANTS(I601,YG,YB,UB,UG,VG,VR)1615 MAKEYUVCONSTANTS(I601, YG, YB, UB, UG, VG, VR)
1616
1617 #undef YG
1618 #undef YB
1619 #undef UB
1620 #undef UG
1621 #undef VG
1622 #undef VR
1623
1624 // BT.601 full range YUV to RGB reference (aka JPEG)
1625 // * R = Y + V * 1.40200
1626 // * G = Y - U * 0.34414 - V * 0.71414
1627 // * B = Y + U * 1.77200
1628 // KR = 0.299; KB = 0.114
1629
1630 // U and V contributions to R,G,B.
1631 #define UB 113 /* round(1.77200 * 64) */
1632 #define UG 22 /* round(0.34414 * 64) */
1633 #define VG 46 /* round(0.71414 * 64) */
1634 #define VR 90 /* round(1.40200 * 64) */
1635
1636 // Y contribution to R,G,B. Scale and bias.
1637 #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
1638 #define YB 32 /* 64 / 2 */
1639
1640 MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR)
1641
1642 #undef YG
1643 #undef YB
1644 #undef UB
1645 #undef UG
1646 #undef VG
1647 #undef VR
1648
1649 // BT.709 limited range YUV to RGB reference
1650 // R = (Y - 16) * 1.164 + V * 1.793
1651 // G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533
1652 // B = (Y - 16) * 1.164 + U * 2.112
1653 // KR = 0.2126, KB = 0.0722
1654
1655 // U and V contributions to R,G,B.
1656 #if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT709)
1657 #define UB 135 /* round(2.112 * 64) */
1658 #else
1659 #define UB 128 /* max(128, round(2.112 * 64)) */
1660 #endif
1661 #define UG 14 /* round(0.213 * 64) */
1662 #define VG 34 /* round(0.533 * 64) */
1663 #define VR 115 /* round(1.793 * 64) */
1664
1665 // Y contribution to R,G,B. Scale and bias.
1666 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1667 #define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1668
1669 MAKEYUVCONSTANTS(H709, YG, YB, UB, UG, VG, VR)
1670
1671 #undef YG
1672 #undef YB
1673 #undef UB
1674 #undef UG
1675 #undef VG
1676 #undef VR
1677
1678 // BT.709 full range YUV to RGB reference
1679 // R = Y + V * 1.5748
1680 // G = Y - U * 0.18732 - V * 0.46812
1681 // B = Y + U * 1.8556
1682 // KR = 0.2126, KB = 0.0722
1683
1684 // U and V contributions to R,G,B.
1685 #define UB 119 /* round(1.8556 * 64) */
1686 #define UG 12 /* round(0.18732 * 64) */
1687 #define VG 30 /* round(0.46812 * 64) */
1688 #define VR 101 /* round(1.5748 * 64) */
1689
1690 // Y contribution to R,G,B. Scale and bias. (same as jpeg)
1691 #define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
1692 #define YB 32 /* 64 / 2 */
1693
1694 MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR)
1695
1696 #undef YG
1697 #undef YB
1698 #undef UB
1699 #undef UG
1700 #undef VG
1701 #undef VR
1702
1703 // BT.2020 limited range YUV to RGB reference
1704 // R = (Y - 16) * 1.164384 + V * 1.67867
1705 // G = (Y - 16) * 1.164384 - U * 0.187326 - V * 0.65042
1706 // B = (Y - 16) * 1.164384 + U * 2.14177
1707 // KR = 0.2627; KB = 0.0593
1708
1709 // U and V contributions to R,G,B.
1710 #if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT2020)
1711 #define UB 137 /* round(2.142 * 64) */
1712 #else
1713 #define UB 128 /* max(128, round(2.142 * 64)) */
1714 #endif
1715 #define UG 12 /* round(0.187326 * 64) */
1716 #define VG 42 /* round(0.65042 * 64) */
1717 #define VR 107 /* round(1.67867 * 64) */
1718
1719 // Y contribution to R,G,B. Scale and bias.
1720 #define YG 19003 /* round(1.164384 * 64 * 256 * 256 / 257) */
1721 #define YB -1160 /* 1.164384 * 64 * -16 + 64 / 2 */
1722
1723 MAKEYUVCONSTANTS(2020, YG, YB, UB, UG, VG, VR)
1724
1725 #undef YG
1726 #undef YB
1727 #undef UB
1728 #undef UG
1729 #undef VG
1730 #undef VR
1731
1732 // BT.2020 full range YUV to RGB reference
1733 // R = Y + V * 1.474600
1734 // G = Y - U * 0.164553 - V * 0.571353
1735 // B = Y + U * 1.881400
1736 // KR = 0.2627; KB = 0.0593
1737
1738 #define UB 120 /* round(1.881400 * 64) */
1739 #define UG 11 /* round(0.164553 * 64) */
1740 #define VG 37 /* round(0.571353 * 64) */
1741 #define VR 94 /* round(1.474600 * 64) */
1742
1743 // Y contribution to R,G,B. Scale and bias. (same as jpeg)
1744 #define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
1745 #define YB 32 /* 64 / 2 */
1746
1747 MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR)
1748
1749 #undef YG
1750 #undef YB
1751 #undef UB
1752 #undef UG
1753 #undef VG
1754 #undef VR
1755
1756 #undef BB
1757 #undef BG
1758 #undef BR
1759
1760 #undef MAKEYUVCONSTANTS
1761
1762 #if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
1763 #define LOAD_YUV_CONSTANTS \
1764 int ub = yuvconstants->kUVCoeff[0]; \
1765 int vr = yuvconstants->kUVCoeff[1]; \
1766 int ug = yuvconstants->kUVCoeff[2]; \
1767 int vg = yuvconstants->kUVCoeff[3]; \
1768 int yg = yuvconstants->kRGBCoeffBias[0]; \
1769 int bb = yuvconstants->kRGBCoeffBias[1]; \
1770 int bg = yuvconstants->kRGBCoeffBias[2]; \
1771 int br = yuvconstants->kRGBCoeffBias[3]
1772
1773 #define CALC_RGB16 \
1774 int32_t y1 = (uint32_t)(y32 * yg) >> 16; \
1775 int b16 = y1 + (u * ub) - bb; \
1776 int g16 = y1 + bg - (u * ug + v * vg); \
1777 int r16 = y1 + (v * vr) - br
1778 #else
1779 #define LOAD_YUV_CONSTANTS \
1780 int ub = yuvconstants->kUVToB[0]; \
1781 int ug = yuvconstants->kUVToG[0]; \
1782 int vg = yuvconstants->kUVToG[1]; \
1783 int vr = yuvconstants->kUVToR[1]; \
1784 int yg = yuvconstants->kYToRgb[0]; \
1785 int yb = yuvconstants->kYBiasToRgb[0]
1786
1787 #define CALC_RGB16 \
1788 int32_t y1 = ((uint32_t)(y32 * yg) >> 16) + yb; \
1789 int8_t ui = (int8_t)u; \
1790 int8_t vi = (int8_t)v; \
1791 ui -= 0x80; \
1792 vi -= 0x80; \
1793 int b16 = y1 + (ui * ub); \
1794 int g16 = y1 - (ui * ug + vi * vg); \
1795 int r16 = y1 + (vi * vr)
1796 #endif
1797
1798 // C reference code that mimics the YUV assembly.
1799 // Reads 8 bit YUV and leaves result as 16 bit.
1800 static __inline void YuvPixel(uint8_t y,
1801 uint8_t u,
1802 uint8_t v,
1803 uint8_t* b,
1804 uint8_t* g,
1805 uint8_t* r,
1806 const struct YuvConstants* yuvconstants) {
1807 LOAD_YUV_CONSTANTS;
1808 uint32_t y32 = y * 0x0101;
1809 CALC_RGB16;
1810 *b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6));
1811 *g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6));
1812 *r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6));
1813 }
1814
1815 // Reads 8 bit YUV and leaves result as 16 bit.
YuvPixel8_16(uint8_t y,uint8_t u,uint8_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1816 static __inline void YuvPixel8_16(uint8_t y,
1817 uint8_t u,
1818 uint8_t v,
1819 int* b,
1820 int* g,
1821 int* r,
1822 const struct YuvConstants* yuvconstants) {
1823 LOAD_YUV_CONSTANTS;
1824 uint32_t y32 = y * 0x0101;
1825 CALC_RGB16;
1826 *b = b16;
1827 *g = g16;
1828 *r = r16;
1829 }
1830
1831 // C reference code that mimics the YUV 16 bit assembly.
1832 // Reads 10 bit YUV and leaves result as 16 bit.
YuvPixel10_16(uint16_t y,uint16_t u,uint16_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1833 static __inline void YuvPixel10_16(uint16_t y,
1834 uint16_t u,
1835 uint16_t v,
1836 int* b,
1837 int* g,
1838 int* r,
1839 const struct YuvConstants* yuvconstants) {
1840 LOAD_YUV_CONSTANTS;
1841 uint32_t y32 = (y << 6) | (y >> 4);
1842 u = STATIC_CAST(uint8_t, clamp255(u >> 2));
1843 v = STATIC_CAST(uint8_t, clamp255(v >> 2));
1844 CALC_RGB16;
1845 *b = b16;
1846 *g = g16;
1847 *r = r16;
1848 }
1849
1850 // C reference code that mimics the YUV 16 bit assembly.
1851 // Reads 12 bit YUV and leaves result as 16 bit.
YuvPixel12_16(int16_t y,int16_t u,int16_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1852 static __inline void YuvPixel12_16(int16_t y,
1853 int16_t u,
1854 int16_t v,
1855 int* b,
1856 int* g,
1857 int* r,
1858 const struct YuvConstants* yuvconstants) {
1859 LOAD_YUV_CONSTANTS;
1860 uint32_t y32 = (y << 4) | (y >> 8);
1861 u = STATIC_CAST(uint8_t, clamp255(u >> 4));
1862 v = STATIC_CAST(uint8_t, clamp255(v >> 4));
1863 CALC_RGB16;
1864 *b = b16;
1865 *g = g16;
1866 *r = r16;
1867 }
1868
1869 // C reference code that mimics the YUV 10 bit assembly.
1870 // Reads 10 bit YUV and clamps down to 8 bit RGB.
YuvPixel10(uint16_t y,uint16_t u,uint16_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1871 static __inline void YuvPixel10(uint16_t y,
1872 uint16_t u,
1873 uint16_t v,
1874 uint8_t* b,
1875 uint8_t* g,
1876 uint8_t* r,
1877 const struct YuvConstants* yuvconstants) {
1878 int b16;
1879 int g16;
1880 int r16;
1881 YuvPixel10_16(y, u, v, &b16, &g16, &r16, yuvconstants);
1882 *b = STATIC_CAST(uint8_t, Clamp(b16 >> 6));
1883 *g = STATIC_CAST(uint8_t, Clamp(g16 >> 6));
1884 *r = STATIC_CAST(uint8_t, Clamp(r16 >> 6));
1885 }
1886
1887 // C reference code that mimics the YUV 12 bit assembly.
1888 // Reads 12 bit YUV and clamps down to 8 bit RGB.
YuvPixel12(uint16_t y,uint16_t u,uint16_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1889 static __inline void YuvPixel12(uint16_t y,
1890 uint16_t u,
1891 uint16_t v,
1892 uint8_t* b,
1893 uint8_t* g,
1894 uint8_t* r,
1895 const struct YuvConstants* yuvconstants) {
1896 int b16;
1897 int g16;
1898 int r16;
1899 YuvPixel12_16(y, u, v, &b16, &g16, &r16, yuvconstants);
1900 *b = STATIC_CAST(uint8_t, Clamp(b16 >> 6));
1901 *g = STATIC_CAST(uint8_t, Clamp(g16 >> 6));
1902 *r = STATIC_CAST(uint8_t, Clamp(r16 >> 6));
1903 }
1904
1905 // C reference code that mimics the YUV 16 bit assembly.
1906 // Reads 16 bit YUV and leaves result as 8 bit.
YuvPixel16_8(uint16_t y,uint16_t u,uint16_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1907 static __inline void YuvPixel16_8(uint16_t y,
1908 uint16_t u,
1909 uint16_t v,
1910 uint8_t* b,
1911 uint8_t* g,
1912 uint8_t* r,
1913 const struct YuvConstants* yuvconstants) {
1914 LOAD_YUV_CONSTANTS;
1915 uint32_t y32 = y;
1916 u = STATIC_CAST(uint16_t, clamp255(u >> 8));
1917 v = STATIC_CAST(uint16_t, clamp255(v >> 8));
1918 CALC_RGB16;
1919 *b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6));
1920 *g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6));
1921 *r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6));
1922 }
1923
1924 // C reference code that mimics the YUV 16 bit assembly.
1925 // Reads 16 bit YUV and leaves result as 16 bit.
YuvPixel16_16(uint16_t y,uint16_t u,uint16_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1926 static __inline void YuvPixel16_16(uint16_t y,
1927 uint16_t u,
1928 uint16_t v,
1929 int* b,
1930 int* g,
1931 int* r,
1932 const struct YuvConstants* yuvconstants) {
1933 LOAD_YUV_CONSTANTS;
1934 uint32_t y32 = y;
1935 u = STATIC_CAST(uint16_t, clamp255(u >> 8));
1936 v = STATIC_CAST(uint16_t, clamp255(v >> 8));
1937 CALC_RGB16;
1938 *b = b16;
1939 *g = g16;
1940 *r = r16;
1941 }
1942
1943 // C reference code that mimics the YUV assembly.
1944 // Reads 8 bit YUV and leaves result as 8 bit.
YPixel(uint8_t y,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1945 static __inline void YPixel(uint8_t y,
1946 uint8_t* b,
1947 uint8_t* g,
1948 uint8_t* r,
1949 const struct YuvConstants* yuvconstants) {
1950 #if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
1951 int yg = yuvconstants->kRGBCoeffBias[0];
1952 int ygb = yuvconstants->kRGBCoeffBias[4];
1953 #else
1954 int ygb = yuvconstants->kYBiasToRgb[0];
1955 int yg = yuvconstants->kYToRgb[0];
1956 #endif
1957 uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
1958 uint8_t b8 = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6));
1959 *b = b8;
1960 *g = b8;
1961 *r = b8;
1962 }
1963
I444ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1964 void I444ToARGBRow_C(const uint8_t* src_y,
1965 const uint8_t* src_u,
1966 const uint8_t* src_v,
1967 uint8_t* rgb_buf,
1968 const struct YuvConstants* yuvconstants,
1969 int width) {
1970 int x;
1971 for (x = 0; x < width; ++x) {
1972 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1973 rgb_buf + 2, yuvconstants);
1974 rgb_buf[3] = 255;
1975 src_y += 1;
1976 src_u += 1;
1977 src_v += 1;
1978 rgb_buf += 4; // Advance 1 pixel.
1979 }
1980 }
1981
I444ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1982 void I444ToRGB24Row_C(const uint8_t* src_y,
1983 const uint8_t* src_u,
1984 const uint8_t* src_v,
1985 uint8_t* rgb_buf,
1986 const struct YuvConstants* yuvconstants,
1987 int width) {
1988 int x;
1989 for (x = 0; x < width; ++x) {
1990 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1991 rgb_buf + 2, yuvconstants);
1992 src_y += 1;
1993 src_u += 1;
1994 src_v += 1;
1995 rgb_buf += 3; // Advance 1 pixel.
1996 }
1997 }
1998
1999 // Also used for 420
I422ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2000 void I422ToARGBRow_C(const uint8_t* src_y,
2001 const uint8_t* src_u,
2002 const uint8_t* src_v,
2003 uint8_t* rgb_buf,
2004 const struct YuvConstants* yuvconstants,
2005 int width) {
2006 int x;
2007 for (x = 0; x < width - 1; x += 2) {
2008 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2009 rgb_buf + 2, yuvconstants);
2010 rgb_buf[3] = 255;
2011 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
2012 rgb_buf + 6, yuvconstants);
2013 rgb_buf[7] = 255;
2014 src_y += 2;
2015 src_u += 1;
2016 src_v += 1;
2017 rgb_buf += 8; // Advance 2 pixels.
2018 }
2019 if (width & 1) {
2020 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2021 rgb_buf + 2, yuvconstants);
2022 rgb_buf[3] = 255;
2023 }
2024 }
2025
2026 // 10 bit YUV to ARGB
I210ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2027 void I210ToARGBRow_C(const uint16_t* src_y,
2028 const uint16_t* src_u,
2029 const uint16_t* src_v,
2030 uint8_t* rgb_buf,
2031 const struct YuvConstants* yuvconstants,
2032 int width) {
2033 int x;
2034 for (x = 0; x < width - 1; x += 2) {
2035 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2036 rgb_buf + 2, yuvconstants);
2037 rgb_buf[3] = 255;
2038 YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
2039 rgb_buf + 6, yuvconstants);
2040 rgb_buf[7] = 255;
2041 src_y += 2;
2042 src_u += 1;
2043 src_v += 1;
2044 rgb_buf += 8; // Advance 2 pixels.
2045 }
2046 if (width & 1) {
2047 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2048 rgb_buf + 2, yuvconstants);
2049 rgb_buf[3] = 255;
2050 }
2051 }
2052
I410ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2053 void I410ToARGBRow_C(const uint16_t* src_y,
2054 const uint16_t* src_u,
2055 const uint16_t* src_v,
2056 uint8_t* rgb_buf,
2057 const struct YuvConstants* yuvconstants,
2058 int width) {
2059 int x;
2060 for (x = 0; x < width; ++x) {
2061 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2062 rgb_buf + 2, yuvconstants);
2063 rgb_buf[3] = 255;
2064 src_y += 1;
2065 src_u += 1;
2066 src_v += 1;
2067 rgb_buf += 4; // Advance 1 pixels.
2068 }
2069 }
2070
I210AlphaToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,const uint16_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2071 void I210AlphaToARGBRow_C(const uint16_t* src_y,
2072 const uint16_t* src_u,
2073 const uint16_t* src_v,
2074 const uint16_t* src_a,
2075 uint8_t* rgb_buf,
2076 const struct YuvConstants* yuvconstants,
2077 int width) {
2078 int x;
2079 for (x = 0; x < width - 1; x += 2) {
2080 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2081 rgb_buf + 2, yuvconstants);
2082 rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
2083 YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
2084 rgb_buf + 6, yuvconstants);
2085 rgb_buf[7] = STATIC_CAST(uint8_t, clamp255(src_a[1] >> 2));
2086 src_y += 2;
2087 src_u += 1;
2088 src_v += 1;
2089 src_a += 2;
2090 rgb_buf += 8; // Advance 2 pixels.
2091 }
2092 if (width & 1) {
2093 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2094 rgb_buf + 2, yuvconstants);
2095 rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
2096 }
2097 }
2098
I410AlphaToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,const uint16_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2099 void I410AlphaToARGBRow_C(const uint16_t* src_y,
2100 const uint16_t* src_u,
2101 const uint16_t* src_v,
2102 const uint16_t* src_a,
2103 uint8_t* rgb_buf,
2104 const struct YuvConstants* yuvconstants,
2105 int width) {
2106 int x;
2107 for (x = 0; x < width; ++x) {
2108 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2109 rgb_buf + 2, yuvconstants);
2110 rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
2111 src_y += 1;
2112 src_u += 1;
2113 src_v += 1;
2114 src_a += 1;
2115 rgb_buf += 4; // Advance 1 pixels.
2116 }
2117 }
2118
2119 // 12 bit YUV to ARGB
I212ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2120 void I212ToARGBRow_C(const uint16_t* src_y,
2121 const uint16_t* src_u,
2122 const uint16_t* src_v,
2123 uint8_t* rgb_buf,
2124 const struct YuvConstants* yuvconstants,
2125 int width) {
2126 int x;
2127 for (x = 0; x < width - 1; x += 2) {
2128 YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2129 rgb_buf + 2, yuvconstants);
2130 rgb_buf[3] = 255;
2131 YuvPixel12(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
2132 rgb_buf + 6, yuvconstants);
2133 rgb_buf[7] = 255;
2134 src_y += 2;
2135 src_u += 1;
2136 src_v += 1;
2137 rgb_buf += 8; // Advance 2 pixels.
2138 }
2139 if (width & 1) {
2140 YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2141 rgb_buf + 2, yuvconstants);
2142 rgb_buf[3] = 255;
2143 }
2144 }
2145
StoreAR30(uint8_t * rgb_buf,int b,int g,int r)2146 static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
2147 uint32_t ar30;
2148 b = b >> 4; // convert 8 bit 10.6 to 10 bit.
2149 g = g >> 4;
2150 r = r >> 4;
2151 b = Clamp10(b);
2152 g = Clamp10(g);
2153 r = Clamp10(r);
2154 ar30 = b | ((uint32_t)g << 10) | ((uint32_t)r << 20) | 0xc0000000;
2155 (*(uint32_t*)rgb_buf) = ar30;
2156 }
2157
2158 // 10 bit YUV to 10 bit AR30
I210ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2159 void I210ToAR30Row_C(const uint16_t* src_y,
2160 const uint16_t* src_u,
2161 const uint16_t* src_v,
2162 uint8_t* rgb_buf,
2163 const struct YuvConstants* yuvconstants,
2164 int width) {
2165 int x;
2166 int b;
2167 int g;
2168 int r;
2169 for (x = 0; x < width - 1; x += 2) {
2170 YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2171 StoreAR30(rgb_buf, b, g, r);
2172 YuvPixel10_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2173 StoreAR30(rgb_buf + 4, b, g, r);
2174 src_y += 2;
2175 src_u += 1;
2176 src_v += 1;
2177 rgb_buf += 8; // Advance 2 pixels.
2178 }
2179 if (width & 1) {
2180 YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2181 StoreAR30(rgb_buf, b, g, r);
2182 }
2183 }
2184
2185 // 12 bit YUV to 10 bit AR30
I212ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2186 void I212ToAR30Row_C(const uint16_t* src_y,
2187 const uint16_t* src_u,
2188 const uint16_t* src_v,
2189 uint8_t* rgb_buf,
2190 const struct YuvConstants* yuvconstants,
2191 int width) {
2192 int x;
2193 int b;
2194 int g;
2195 int r;
2196 for (x = 0; x < width - 1; x += 2) {
2197 YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2198 StoreAR30(rgb_buf, b, g, r);
2199 YuvPixel12_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2200 StoreAR30(rgb_buf + 4, b, g, r);
2201 src_y += 2;
2202 src_u += 1;
2203 src_v += 1;
2204 rgb_buf += 8; // Advance 2 pixels.
2205 }
2206 if (width & 1) {
2207 YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2208 StoreAR30(rgb_buf, b, g, r);
2209 }
2210 }
2211
I410ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2212 void I410ToAR30Row_C(const uint16_t* src_y,
2213 const uint16_t* src_u,
2214 const uint16_t* src_v,
2215 uint8_t* rgb_buf,
2216 const struct YuvConstants* yuvconstants,
2217 int width) {
2218 int x;
2219 int b;
2220 int g;
2221 int r;
2222 for (x = 0; x < width; ++x) {
2223 YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2224 StoreAR30(rgb_buf, b, g, r);
2225 src_y += 1;
2226 src_u += 1;
2227 src_v += 1;
2228 rgb_buf += 4; // Advance 1 pixel.
2229 }
2230 }
2231
2232 // P210 has 10 bits in msb of 16 bit NV12 style layout.
P210ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_uv,uint8_t * dst_argb,const struct YuvConstants * yuvconstants,int width)2233 void P210ToARGBRow_C(const uint16_t* src_y,
2234 const uint16_t* src_uv,
2235 uint8_t* dst_argb,
2236 const struct YuvConstants* yuvconstants,
2237 int width) {
2238 int x;
2239 for (x = 0; x < width - 1; x += 2) {
2240 YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
2241 dst_argb + 2, yuvconstants);
2242 dst_argb[3] = 255;
2243 YuvPixel16_8(src_y[1], src_uv[0], src_uv[1], dst_argb + 4, dst_argb + 5,
2244 dst_argb + 6, yuvconstants);
2245 dst_argb[7] = 255;
2246 src_y += 2;
2247 src_uv += 2;
2248 dst_argb += 8; // Advance 2 pixels.
2249 }
2250 if (width & 1) {
2251 YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
2252 dst_argb + 2, yuvconstants);
2253 dst_argb[3] = 255;
2254 }
2255 }
2256
P410ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_uv,uint8_t * dst_argb,const struct YuvConstants * yuvconstants,int width)2257 void P410ToARGBRow_C(const uint16_t* src_y,
2258 const uint16_t* src_uv,
2259 uint8_t* dst_argb,
2260 const struct YuvConstants* yuvconstants,
2261 int width) {
2262 int x;
2263 for (x = 0; x < width; ++x) {
2264 YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
2265 dst_argb + 2, yuvconstants);
2266 dst_argb[3] = 255;
2267 src_y += 1;
2268 src_uv += 2;
2269 dst_argb += 4; // Advance 1 pixels.
2270 }
2271 }
2272
P210ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_uv,uint8_t * dst_ar30,const struct YuvConstants * yuvconstants,int width)2273 void P210ToAR30Row_C(const uint16_t* src_y,
2274 const uint16_t* src_uv,
2275 uint8_t* dst_ar30,
2276 const struct YuvConstants* yuvconstants,
2277 int width) {
2278 int x;
2279 int b;
2280 int g;
2281 int r;
2282 for (x = 0; x < width - 1; x += 2) {
2283 YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2284 StoreAR30(dst_ar30, b, g, r);
2285 YuvPixel16_16(src_y[1], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2286 StoreAR30(dst_ar30 + 4, b, g, r);
2287 src_y += 2;
2288 src_uv += 2;
2289 dst_ar30 += 8; // Advance 2 pixels.
2290 }
2291 if (width & 1) {
2292 YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2293 StoreAR30(dst_ar30, b, g, r);
2294 }
2295 }
2296
P410ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_uv,uint8_t * dst_ar30,const struct YuvConstants * yuvconstants,int width)2297 void P410ToAR30Row_C(const uint16_t* src_y,
2298 const uint16_t* src_uv,
2299 uint8_t* dst_ar30,
2300 const struct YuvConstants* yuvconstants,
2301 int width) {
2302 int x;
2303 int b;
2304 int g;
2305 int r;
2306 for (x = 0; x < width; ++x) {
2307 YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2308 StoreAR30(dst_ar30, b, g, r);
2309 src_y += 1;
2310 src_uv += 2;
2311 dst_ar30 += 4; // Advance 1 pixel.
2312 }
2313 }
2314
2315 // 8 bit YUV to 10 bit AR30
2316 // Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
I422ToAR30Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2317 void I422ToAR30Row_C(const uint8_t* src_y,
2318 const uint8_t* src_u,
2319 const uint8_t* src_v,
2320 uint8_t* rgb_buf,
2321 const struct YuvConstants* yuvconstants,
2322 int width) {
2323 int x;
2324 int b;
2325 int g;
2326 int r;
2327 for (x = 0; x < width - 1; x += 2) {
2328 YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2329 StoreAR30(rgb_buf, b, g, r);
2330 YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2331 StoreAR30(rgb_buf + 4, b, g, r);
2332 src_y += 2;
2333 src_u += 1;
2334 src_v += 1;
2335 rgb_buf += 8; // Advance 2 pixels.
2336 }
2337 if (width & 1) {
2338 YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2339 StoreAR30(rgb_buf, b, g, r);
2340 }
2341 }
2342
I444AlphaToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,const uint8_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2343 void I444AlphaToARGBRow_C(const uint8_t* src_y,
2344 const uint8_t* src_u,
2345 const uint8_t* src_v,
2346 const uint8_t* src_a,
2347 uint8_t* rgb_buf,
2348 const struct YuvConstants* yuvconstants,
2349 int width) {
2350 int x;
2351 for (x = 0; x < width; ++x) {
2352 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2353 rgb_buf + 2, yuvconstants);
2354 rgb_buf[3] = src_a[0];
2355 src_y += 1;
2356 src_u += 1;
2357 src_v += 1;
2358 src_a += 1;
2359 rgb_buf += 4; // Advance 1 pixel.
2360 }
2361 }
2362
I422AlphaToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,const uint8_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2363 void I422AlphaToARGBRow_C(const uint8_t* src_y,
2364 const uint8_t* src_u,
2365 const uint8_t* src_v,
2366 const uint8_t* src_a,
2367 uint8_t* rgb_buf,
2368 const struct YuvConstants* yuvconstants,
2369 int width) {
2370 int x;
2371 for (x = 0; x < width - 1; x += 2) {
2372 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2373 rgb_buf + 2, yuvconstants);
2374 rgb_buf[3] = src_a[0];
2375 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
2376 rgb_buf + 6, yuvconstants);
2377 rgb_buf[7] = src_a[1];
2378 src_y += 2;
2379 src_u += 1;
2380 src_v += 1;
2381 src_a += 2;
2382 rgb_buf += 8; // Advance 2 pixels.
2383 }
2384 if (width & 1) {
2385 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2386 rgb_buf + 2, yuvconstants);
2387 rgb_buf[3] = src_a[0];
2388 }
2389 }
2390
I422ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2391 void I422ToRGB24Row_C(const uint8_t* src_y,
2392 const uint8_t* src_u,
2393 const uint8_t* src_v,
2394 uint8_t* rgb_buf,
2395 const struct YuvConstants* yuvconstants,
2396 int width) {
2397 int x;
2398 for (x = 0; x < width - 1; x += 2) {
2399 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2400 rgb_buf + 2, yuvconstants);
2401 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
2402 rgb_buf + 5, yuvconstants);
2403 src_y += 2;
2404 src_u += 1;
2405 src_v += 1;
2406 rgb_buf += 6; // Advance 2 pixels.
2407 }
2408 if (width & 1) {
2409 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2410 rgb_buf + 2, yuvconstants);
2411 }
2412 }
2413
I422ToARGB4444Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)2414 void I422ToARGB4444Row_C(const uint8_t* src_y,
2415 const uint8_t* src_u,
2416 const uint8_t* src_v,
2417 uint8_t* dst_argb4444,
2418 const struct YuvConstants* yuvconstants,
2419 int width) {
2420 uint8_t b0;
2421 uint8_t g0;
2422 uint8_t r0;
2423 uint8_t b1;
2424 uint8_t g1;
2425 uint8_t r1;
2426 int x;
2427 for (x = 0; x < width - 1; x += 2) {
2428 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2429 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2430 b0 = b0 >> 4;
2431 g0 = g0 >> 4;
2432 r0 = r0 >> 4;
2433 b1 = b1 >> 4;
2434 g1 = g1 >> 4;
2435 r1 = r1 >> 4;
2436 *(uint16_t*)(dst_argb4444 + 0) =
2437 STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | 0xf000);
2438 *(uint16_t*)(dst_argb4444 + 2) =
2439 STATIC_CAST(uint16_t, b1 | (g1 << 4) | (r1 << 8) | 0xf000);
2440 src_y += 2;
2441 src_u += 1;
2442 src_v += 1;
2443 dst_argb4444 += 4; // Advance 2 pixels.
2444 }
2445 if (width & 1) {
2446 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2447 b0 = b0 >> 4;
2448 g0 = g0 >> 4;
2449 r0 = r0 >> 4;
2450 *(uint16_t*)(dst_argb4444) =
2451 STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | 0xf000);
2452 }
2453 }
2454
I422ToARGB1555Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)2455 void I422ToARGB1555Row_C(const uint8_t* src_y,
2456 const uint8_t* src_u,
2457 const uint8_t* src_v,
2458 uint8_t* dst_argb1555,
2459 const struct YuvConstants* yuvconstants,
2460 int width) {
2461 uint8_t b0;
2462 uint8_t g0;
2463 uint8_t r0;
2464 uint8_t b1;
2465 uint8_t g1;
2466 uint8_t r1;
2467 int x;
2468 for (x = 0; x < width - 1; x += 2) {
2469 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2470 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2471 b0 = b0 >> 3;
2472 g0 = g0 >> 3;
2473 r0 = r0 >> 3;
2474 b1 = b1 >> 3;
2475 g1 = g1 >> 3;
2476 r1 = r1 >> 3;
2477 *(uint16_t*)(dst_argb1555 + 0) =
2478 STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | 0x8000);
2479 *(uint16_t*)(dst_argb1555 + 2) =
2480 STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 10) | 0x8000);
2481 src_y += 2;
2482 src_u += 1;
2483 src_v += 1;
2484 dst_argb1555 += 4; // Advance 2 pixels.
2485 }
2486 if (width & 1) {
2487 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2488 b0 = b0 >> 3;
2489 g0 = g0 >> 3;
2490 r0 = r0 >> 3;
2491 *(uint16_t*)(dst_argb1555) =
2492 STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | 0x8000);
2493 }
2494 }
2495
I422ToRGB565Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2496 void I422ToRGB565Row_C(const uint8_t* src_y,
2497 const uint8_t* src_u,
2498 const uint8_t* src_v,
2499 uint8_t* dst_rgb565,
2500 const struct YuvConstants* yuvconstants,
2501 int width) {
2502 uint8_t b0;
2503 uint8_t g0;
2504 uint8_t r0;
2505 uint8_t b1;
2506 uint8_t g1;
2507 uint8_t r1;
2508 int x;
2509 for (x = 0; x < width - 1; x += 2) {
2510 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2511 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2512 b0 = b0 >> 3;
2513 g0 = g0 >> 2;
2514 r0 = r0 >> 3;
2515 b1 = b1 >> 3;
2516 g1 = g1 >> 2;
2517 r1 = r1 >> 3;
2518 *(uint16_t*)(dst_rgb565 + 0) =
2519 STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
2520 *(uint16_t*)(dst_rgb565 + 2) =
2521 STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 11));
2522 src_y += 2;
2523 src_u += 1;
2524 src_v += 1;
2525 dst_rgb565 += 4; // Advance 2 pixels.
2526 }
2527 if (width & 1) {
2528 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2529 b0 = b0 >> 3;
2530 g0 = g0 >> 2;
2531 r0 = r0 >> 3;
2532 *(uint16_t*)(dst_rgb565 + 0) =
2533 STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
2534 }
2535 }
2536
NV12ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2537 void NV12ToARGBRow_C(const uint8_t* src_y,
2538 const uint8_t* src_uv,
2539 uint8_t* rgb_buf,
2540 const struct YuvConstants* yuvconstants,
2541 int width) {
2542 int x;
2543 for (x = 0; x < width - 1; x += 2) {
2544 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2545 rgb_buf + 2, yuvconstants);
2546 rgb_buf[3] = 255;
2547 YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
2548 rgb_buf + 6, yuvconstants);
2549 rgb_buf[7] = 255;
2550 src_y += 2;
2551 src_uv += 2;
2552 rgb_buf += 8; // Advance 2 pixels.
2553 }
2554 if (width & 1) {
2555 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2556 rgb_buf + 2, yuvconstants);
2557 rgb_buf[3] = 255;
2558 }
2559 }
2560
NV21ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2561 void NV21ToARGBRow_C(const uint8_t* src_y,
2562 const uint8_t* src_vu,
2563 uint8_t* rgb_buf,
2564 const struct YuvConstants* yuvconstants,
2565 int width) {
2566 int x;
2567 for (x = 0; x < width - 1; x += 2) {
2568 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2569 rgb_buf + 2, yuvconstants);
2570 rgb_buf[3] = 255;
2571 YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
2572 rgb_buf + 6, yuvconstants);
2573 rgb_buf[7] = 255;
2574 src_y += 2;
2575 src_vu += 2;
2576 rgb_buf += 8; // Advance 2 pixels.
2577 }
2578 if (width & 1) {
2579 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2580 rgb_buf + 2, yuvconstants);
2581 rgb_buf[3] = 255;
2582 }
2583 }
2584
NV12ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2585 void NV12ToRGB24Row_C(const uint8_t* src_y,
2586 const uint8_t* src_uv,
2587 uint8_t* rgb_buf,
2588 const struct YuvConstants* yuvconstants,
2589 int width) {
2590 int x;
2591 for (x = 0; x < width - 1; x += 2) {
2592 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2593 rgb_buf + 2, yuvconstants);
2594 YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4,
2595 rgb_buf + 5, yuvconstants);
2596 src_y += 2;
2597 src_uv += 2;
2598 rgb_buf += 6; // Advance 2 pixels.
2599 }
2600 if (width & 1) {
2601 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2602 rgb_buf + 2, yuvconstants);
2603 }
2604 }
2605
NV21ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2606 void NV21ToRGB24Row_C(const uint8_t* src_y,
2607 const uint8_t* src_vu,
2608 uint8_t* rgb_buf,
2609 const struct YuvConstants* yuvconstants,
2610 int width) {
2611 int x;
2612 for (x = 0; x < width - 1; x += 2) {
2613 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2614 rgb_buf + 2, yuvconstants);
2615 YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4,
2616 rgb_buf + 5, yuvconstants);
2617 src_y += 2;
2618 src_vu += 2;
2619 rgb_buf += 6; // Advance 2 pixels.
2620 }
2621 if (width & 1) {
2622 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2623 rgb_buf + 2, yuvconstants);
2624 }
2625 }
2626
NV12ToRGB565Row_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2627 void NV12ToRGB565Row_C(const uint8_t* src_y,
2628 const uint8_t* src_uv,
2629 uint8_t* dst_rgb565,
2630 const struct YuvConstants* yuvconstants,
2631 int width) {
2632 uint8_t b0;
2633 uint8_t g0;
2634 uint8_t r0;
2635 uint8_t b1;
2636 uint8_t g1;
2637 uint8_t r1;
2638 int x;
2639 for (x = 0; x < width - 1; x += 2) {
2640 YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
2641 YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
2642 b0 = b0 >> 3;
2643 g0 = g0 >> 2;
2644 r0 = r0 >> 3;
2645 b1 = b1 >> 3;
2646 g1 = g1 >> 2;
2647 r1 = r1 >> 3;
2648 *(uint16_t*)(dst_rgb565 + 0) = STATIC_CAST(uint16_t, b0) |
2649 STATIC_CAST(uint16_t, g0 << 5) |
2650 STATIC_CAST(uint16_t, r0 << 11);
2651 *(uint16_t*)(dst_rgb565 + 2) = STATIC_CAST(uint16_t, b1) |
2652 STATIC_CAST(uint16_t, g1 << 5) |
2653 STATIC_CAST(uint16_t, r1 << 11);
2654 src_y += 2;
2655 src_uv += 2;
2656 dst_rgb565 += 4; // Advance 2 pixels.
2657 }
2658 if (width & 1) {
2659 YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
2660 b0 = b0 >> 3;
2661 g0 = g0 >> 2;
2662 r0 = r0 >> 3;
2663 *(uint16_t*)(dst_rgb565) = STATIC_CAST(uint16_t, b0) |
2664 STATIC_CAST(uint16_t, g0 << 5) |
2665 STATIC_CAST(uint16_t, r0 << 11);
2666 }
2667 }
2668
YUY2ToARGBRow_C(const uint8_t * src_yuy2,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2669 void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
2670 uint8_t* rgb_buf,
2671 const struct YuvConstants* yuvconstants,
2672 int width) {
2673 int x;
2674 for (x = 0; x < width - 1; x += 2) {
2675 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
2676 rgb_buf + 2, yuvconstants);
2677 rgb_buf[3] = 255;
2678 YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
2679 rgb_buf + 6, yuvconstants);
2680 rgb_buf[7] = 255;
2681 src_yuy2 += 4;
2682 rgb_buf += 8; // Advance 2 pixels.
2683 }
2684 if (width & 1) {
2685 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
2686 rgb_buf + 2, yuvconstants);
2687 rgb_buf[3] = 255;
2688 }
2689 }
2690
UYVYToARGBRow_C(const uint8_t * src_uyvy,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2691 void UYVYToARGBRow_C(const uint8_t* src_uyvy,
2692 uint8_t* rgb_buf,
2693 const struct YuvConstants* yuvconstants,
2694 int width) {
2695 int x;
2696 for (x = 0; x < width - 1; x += 2) {
2697 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
2698 rgb_buf + 2, yuvconstants);
2699 rgb_buf[3] = 255;
2700 YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
2701 rgb_buf + 6, yuvconstants);
2702 rgb_buf[7] = 255;
2703 src_uyvy += 4;
2704 rgb_buf += 8; // Advance 2 pixels.
2705 }
2706 if (width & 1) {
2707 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
2708 rgb_buf + 2, yuvconstants);
2709 rgb_buf[3] = 255;
2710 }
2711 }
2712
I422ToRGBARow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2713 void I422ToRGBARow_C(const uint8_t* src_y,
2714 const uint8_t* src_u,
2715 const uint8_t* src_v,
2716 uint8_t* rgb_buf,
2717 const struct YuvConstants* yuvconstants,
2718 int width) {
2719 int x;
2720 for (x = 0; x < width - 1; x += 2) {
2721 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
2722 rgb_buf + 3, yuvconstants);
2723 rgb_buf[0] = 255;
2724 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
2725 rgb_buf + 7, yuvconstants);
2726 rgb_buf[4] = 255;
2727 src_y += 2;
2728 src_u += 1;
2729 src_v += 1;
2730 rgb_buf += 8; // Advance 2 pixels.
2731 }
2732 if (width & 1) {
2733 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
2734 rgb_buf + 3, yuvconstants);
2735 rgb_buf[0] = 255;
2736 }
2737 }
2738
I400ToARGBRow_C(const uint8_t * src_y,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2739 void I400ToARGBRow_C(const uint8_t* src_y,
2740 uint8_t* rgb_buf,
2741 const struct YuvConstants* yuvconstants,
2742 int width) {
2743 int x;
2744 for (x = 0; x < width - 1; x += 2) {
2745 YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
2746 rgb_buf[3] = 255;
2747 YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
2748 rgb_buf[7] = 255;
2749 src_y += 2;
2750 rgb_buf += 8; // Advance 2 pixels.
2751 }
2752 if (width & 1) {
2753 YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
2754 rgb_buf[3] = 255;
2755 }
2756 }
2757
MirrorRow_C(const uint8_t * src,uint8_t * dst,int width)2758 void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2759 int x;
2760 src += width - 1;
2761 for (x = 0; x < width - 1; x += 2) {
2762 dst[x] = src[0];
2763 dst[x + 1] = src[-1];
2764 src -= 2;
2765 }
2766 if (width & 1) {
2767 dst[width - 1] = src[0];
2768 }
2769 }
2770
MirrorRow_16_C(const uint16_t * src,uint16_t * dst,int width)2771 void MirrorRow_16_C(const uint16_t* src, uint16_t* dst, int width) {
2772 int x;
2773 src += width - 1;
2774 for (x = 0; x < width - 1; x += 2) {
2775 dst[x] = src[0];
2776 dst[x + 1] = src[-1];
2777 src -= 2;
2778 }
2779 if (width & 1) {
2780 dst[width - 1] = src[0];
2781 }
2782 }
2783
MirrorUVRow_C(const uint8_t * src_uv,uint8_t * dst_uv,int width)2784 void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
2785 int x;
2786 src_uv += (width - 1) << 1;
2787 for (x = 0; x < width; ++x) {
2788 dst_uv[0] = src_uv[0];
2789 dst_uv[1] = src_uv[1];
2790 src_uv -= 2;
2791 dst_uv += 2;
2792 }
2793 }
2794
MirrorSplitUVRow_C(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)2795 void MirrorSplitUVRow_C(const uint8_t* src_uv,
2796 uint8_t* dst_u,
2797 uint8_t* dst_v,
2798 int width) {
2799 int x;
2800 src_uv += (width - 1) << 1;
2801 for (x = 0; x < width - 1; x += 2) {
2802 dst_u[x] = src_uv[0];
2803 dst_u[x + 1] = src_uv[-2];
2804 dst_v[x] = src_uv[1];
2805 dst_v[x + 1] = src_uv[-2 + 1];
2806 src_uv -= 4;
2807 }
2808 if (width & 1) {
2809 dst_u[width - 1] = src_uv[0];
2810 dst_v[width - 1] = src_uv[1];
2811 }
2812 }
2813
ARGBMirrorRow_C(const uint8_t * src,uint8_t * dst,int width)2814 void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2815 int x;
2816 const uint32_t* src32 = (const uint32_t*)(src);
2817 uint32_t* dst32 = (uint32_t*)(dst);
2818 src32 += width - 1;
2819 for (x = 0; x < width - 1; x += 2) {
2820 dst32[x] = src32[0];
2821 dst32[x + 1] = src32[-1];
2822 src32 -= 2;
2823 }
2824 if (width & 1) {
2825 dst32[width - 1] = src32[0];
2826 }
2827 }
2828
RGB24MirrorRow_C(const uint8_t * src_rgb24,uint8_t * dst_rgb24,int width)2829 void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width) {
2830 int x;
2831 src_rgb24 += width * 3 - 3;
2832 for (x = 0; x < width; ++x) {
2833 uint8_t b = src_rgb24[0];
2834 uint8_t g = src_rgb24[1];
2835 uint8_t r = src_rgb24[2];
2836 dst_rgb24[0] = b;
2837 dst_rgb24[1] = g;
2838 dst_rgb24[2] = r;
2839 src_rgb24 -= 3;
2840 dst_rgb24 += 3;
2841 }
2842 }
2843
SplitUVRow_C(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)2844 void SplitUVRow_C(const uint8_t* src_uv,
2845 uint8_t* dst_u,
2846 uint8_t* dst_v,
2847 int width) {
2848 int x;
2849 for (x = 0; x < width - 1; x += 2) {
2850 dst_u[x] = src_uv[0];
2851 dst_u[x + 1] = src_uv[2];
2852 dst_v[x] = src_uv[1];
2853 dst_v[x + 1] = src_uv[3];
2854 src_uv += 4;
2855 }
2856 if (width & 1) {
2857 dst_u[width - 1] = src_uv[0];
2858 dst_v[width - 1] = src_uv[1];
2859 }
2860 }
2861
MergeUVRow_C(const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_uv,int width)2862 void MergeUVRow_C(const uint8_t* src_u,
2863 const uint8_t* src_v,
2864 uint8_t* dst_uv,
2865 int width) {
2866 int x;
2867 for (x = 0; x < width - 1; x += 2) {
2868 dst_uv[0] = src_u[x];
2869 dst_uv[1] = src_v[x];
2870 dst_uv[2] = src_u[x + 1];
2871 dst_uv[3] = src_v[x + 1];
2872 dst_uv += 4;
2873 }
2874 if (width & 1) {
2875 dst_uv[0] = src_u[width - 1];
2876 dst_uv[1] = src_v[width - 1];
2877 }
2878 }
2879
DetileRow_C(const uint8_t * src,ptrdiff_t src_tile_stride,uint8_t * dst,int width)2880 void DetileRow_C(const uint8_t* src,
2881 ptrdiff_t src_tile_stride,
2882 uint8_t* dst,
2883 int width) {
2884 int x;
2885 for (x = 0; x < width - 15; x += 16) {
2886 memcpy(dst, src, 16);
2887 dst += 16;
2888 src += src_tile_stride;
2889 }
2890 if (width & 15) {
2891 memcpy(dst, src, width & 15);
2892 }
2893 }
2894
DetileRow_16_C(const uint16_t * src,ptrdiff_t src_tile_stride,uint16_t * dst,int width)2895 void DetileRow_16_C(const uint16_t* src,
2896 ptrdiff_t src_tile_stride,
2897 uint16_t* dst,
2898 int width) {
2899 int x;
2900 for (x = 0; x < width - 15; x += 16) {
2901 memcpy(dst, src, 16 * sizeof(uint16_t));
2902 dst += 16;
2903 src += src_tile_stride;
2904 }
2905 if (width & 15) {
2906 memcpy(dst, src, (width & 15) * sizeof(uint16_t));
2907 }
2908 }
2909
DetileSplitUVRow_C(const uint8_t * src_uv,ptrdiff_t src_tile_stride,uint8_t * dst_u,uint8_t * dst_v,int width)2910 void DetileSplitUVRow_C(const uint8_t* src_uv,
2911 ptrdiff_t src_tile_stride,
2912 uint8_t* dst_u,
2913 uint8_t* dst_v,
2914 int width) {
2915 int x;
2916 for (x = 0; x < width - 15; x += 16) {
2917 SplitUVRow_C(src_uv, dst_u, dst_v, 8);
2918 dst_u += 8;
2919 dst_v += 8;
2920 src_uv += src_tile_stride;
2921 }
2922 if (width & 15) {
2923 SplitUVRow_C(src_uv, dst_u, dst_v, ((width & 15) + 1) / 2);
2924 }
2925 }
2926
DetileToYUY2_C(const uint8_t * src_y,ptrdiff_t src_y_tile_stride,const uint8_t * src_uv,ptrdiff_t src_uv_tile_stride,uint8_t * dst_yuy2,int width)2927 void DetileToYUY2_C(const uint8_t* src_y,
2928 ptrdiff_t src_y_tile_stride,
2929 const uint8_t* src_uv,
2930 ptrdiff_t src_uv_tile_stride,
2931 uint8_t* dst_yuy2,
2932 int width) {
2933 for (int x = 0; x < width - 15; x += 16) {
2934 for (int i = 0; i < 8; i++) {
2935 dst_yuy2[0] = src_y[0];
2936 dst_yuy2[1] = src_uv[0];
2937 dst_yuy2[2] = src_y[1];
2938 dst_yuy2[3] = src_uv[1];
2939 dst_yuy2 += 4;
2940 src_y += 2;
2941 src_uv += 2;
2942 }
2943 src_y += src_y_tile_stride - 16;
2944 src_uv += src_uv_tile_stride - 16;
2945 }
2946 }
2947
2948 // Unpack MT2T into tiled P010 64 pixels at a time. MT2T's bitstream is encoded
2949 // in 80 byte blocks representing 64 pixels each. The first 16 bytes of the
2950 // block contain all of the lower 2 bits of each pixel packed together, and the
2951 // next 64 bytes represent all the upper 8 bits of the pixel. The lower bits are
2952 // packed into 1x4 blocks, whereas the upper bits are packed in normal raster
2953 // order.
UnpackMT2T_C(const uint8_t * src,uint16_t * dst,size_t size)2954 void UnpackMT2T_C(const uint8_t* src, uint16_t* dst, size_t size) {
2955 for (size_t i = 0; i < size; i += 80) {
2956 const uint8_t* src_lower_bits = src;
2957 const uint8_t* src_upper_bits = src + 16;
2958
2959 for (int j = 0; j < 4; j++) {
2960 for (int k = 0; k < 16; k++) {
2961 *dst++ = ((src_lower_bits[k] >> (j * 2)) & 0x3) << 6 |
2962 (uint16_t)*src_upper_bits << 8 |
2963 (uint16_t)*src_upper_bits >> 2;
2964 src_upper_bits++;
2965 }
2966 }
2967
2968 src += 80;
2969 }
2970 }
2971
SplitRGBRow_C(const uint8_t * src_rgb,uint8_t * dst_r,uint8_t * dst_g,uint8_t * dst_b,int width)2972 void SplitRGBRow_C(const uint8_t* src_rgb,
2973 uint8_t* dst_r,
2974 uint8_t* dst_g,
2975 uint8_t* dst_b,
2976 int width) {
2977 int x;
2978 for (x = 0; x < width; ++x) {
2979 dst_r[x] = src_rgb[0];
2980 dst_g[x] = src_rgb[1];
2981 dst_b[x] = src_rgb[2];
2982 src_rgb += 3;
2983 }
2984 }
2985
MergeRGBRow_C(const uint8_t * src_r,const uint8_t * src_g,const uint8_t * src_b,uint8_t * dst_rgb,int width)2986 void MergeRGBRow_C(const uint8_t* src_r,
2987 const uint8_t* src_g,
2988 const uint8_t* src_b,
2989 uint8_t* dst_rgb,
2990 int width) {
2991 int x;
2992 for (x = 0; x < width; ++x) {
2993 dst_rgb[0] = src_r[x];
2994 dst_rgb[1] = src_g[x];
2995 dst_rgb[2] = src_b[x];
2996 dst_rgb += 3;
2997 }
2998 }
2999
SplitARGBRow_C(const uint8_t * src_argb,uint8_t * dst_r,uint8_t * dst_g,uint8_t * dst_b,uint8_t * dst_a,int width)3000 void SplitARGBRow_C(const uint8_t* src_argb,
3001 uint8_t* dst_r,
3002 uint8_t* dst_g,
3003 uint8_t* dst_b,
3004 uint8_t* dst_a,
3005 int width) {
3006 int x;
3007 for (x = 0; x < width; ++x) {
3008 dst_b[x] = src_argb[0];
3009 dst_g[x] = src_argb[1];
3010 dst_r[x] = src_argb[2];
3011 dst_a[x] = src_argb[3];
3012 src_argb += 4;
3013 }
3014 }
3015
MergeARGBRow_C(const uint8_t * src_r,const uint8_t * src_g,const uint8_t * src_b,const uint8_t * src_a,uint8_t * dst_argb,int width)3016 void MergeARGBRow_C(const uint8_t* src_r,
3017 const uint8_t* src_g,
3018 const uint8_t* src_b,
3019 const uint8_t* src_a,
3020 uint8_t* dst_argb,
3021 int width) {
3022 int x;
3023 for (x = 0; x < width; ++x) {
3024 dst_argb[0] = src_b[x];
3025 dst_argb[1] = src_g[x];
3026 dst_argb[2] = src_r[x];
3027 dst_argb[3] = src_a[x];
3028 dst_argb += 4;
3029 }
3030 }
3031
MergeXR30Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,uint8_t * dst_ar30,int depth,int width)3032 void MergeXR30Row_C(const uint16_t* src_r,
3033 const uint16_t* src_g,
3034 const uint16_t* src_b,
3035 uint8_t* dst_ar30,
3036 int depth,
3037 int width) {
3038 assert(depth >= 10);
3039 assert(depth <= 16);
3040 int x;
3041 int shift = depth - 10;
3042 uint32_t* dst_ar30_32 = (uint32_t*)dst_ar30;
3043 for (x = 0; x < width; ++x) {
3044 uint32_t r = clamp1023(src_r[x] >> shift);
3045 uint32_t g = clamp1023(src_g[x] >> shift);
3046 uint32_t b = clamp1023(src_b[x] >> shift);
3047 dst_ar30_32[x] = b | (g << 10) | (r << 20) | 0xc0000000;
3048 }
3049 }
3050
MergeAR64Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,const uint16_t * src_a,uint16_t * dst_ar64,int depth,int width)3051 void MergeAR64Row_C(const uint16_t* src_r,
3052 const uint16_t* src_g,
3053 const uint16_t* src_b,
3054 const uint16_t* src_a,
3055 uint16_t* dst_ar64,
3056 int depth,
3057 int width) {
3058 assert(depth >= 1);
3059 assert(depth <= 16);
3060 int x;
3061 int shift = 16 - depth;
3062 int max = (1 << depth) - 1;
3063 for (x = 0; x < width; ++x) {
3064 dst_ar64[0] = STATIC_CAST(uint16_t, ClampMax(src_b[x], max) << shift);
3065 dst_ar64[1] = STATIC_CAST(uint16_t, ClampMax(src_g[x], max) << shift);
3066 dst_ar64[2] = STATIC_CAST(uint16_t, ClampMax(src_r[x], max) << shift);
3067 dst_ar64[3] = STATIC_CAST(uint16_t, ClampMax(src_a[x], max) << shift);
3068 dst_ar64 += 4;
3069 }
3070 }
3071
MergeARGB16To8Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,const uint16_t * src_a,uint8_t * dst_argb,int depth,int width)3072 void MergeARGB16To8Row_C(const uint16_t* src_r,
3073 const uint16_t* src_g,
3074 const uint16_t* src_b,
3075 const uint16_t* src_a,
3076 uint8_t* dst_argb,
3077 int depth,
3078 int width) {
3079 assert(depth >= 8);
3080 assert(depth <= 16);
3081 int x;
3082 int shift = depth - 8;
3083 for (x = 0; x < width; ++x) {
3084 dst_argb[0] = STATIC_CAST(uint8_t, clamp255(src_b[x] >> shift));
3085 dst_argb[1] = STATIC_CAST(uint8_t, clamp255(src_g[x] >> shift));
3086 dst_argb[2] = STATIC_CAST(uint8_t, clamp255(src_r[x] >> shift));
3087 dst_argb[3] = STATIC_CAST(uint8_t, clamp255(src_a[x] >> shift));
3088 dst_argb += 4;
3089 }
3090 }
3091
MergeXR64Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,uint16_t * dst_ar64,int depth,int width)3092 void MergeXR64Row_C(const uint16_t* src_r,
3093 const uint16_t* src_g,
3094 const uint16_t* src_b,
3095 uint16_t* dst_ar64,
3096 int depth,
3097 int width) {
3098 assert(depth >= 1);
3099 assert(depth <= 16);
3100 int x;
3101 int shift = 16 - depth;
3102 int max = (1 << depth) - 1;
3103 for (x = 0; x < width; ++x) {
3104 dst_ar64[0] = STATIC_CAST(uint16_t, ClampMax(src_b[x], max) << shift);
3105 dst_ar64[1] = STATIC_CAST(uint16_t, ClampMax(src_g[x], max) << shift);
3106 dst_ar64[2] = STATIC_CAST(uint16_t, ClampMax(src_r[x], max) << shift);
3107 dst_ar64[3] = 0xffff;
3108 dst_ar64 += 4;
3109 }
3110 }
3111
MergeXRGB16To8Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,uint8_t * dst_argb,int depth,int width)3112 void MergeXRGB16To8Row_C(const uint16_t* src_r,
3113 const uint16_t* src_g,
3114 const uint16_t* src_b,
3115 uint8_t* dst_argb,
3116 int depth,
3117 int width) {
3118 assert(depth >= 8);
3119 assert(depth <= 16);
3120 int x;
3121 int shift = depth - 8;
3122 for (x = 0; x < width; ++x) {
3123 dst_argb[0] = STATIC_CAST(uint8_t, clamp255(src_b[x] >> shift));
3124 dst_argb[1] = STATIC_CAST(uint8_t, clamp255(src_g[x] >> shift));
3125 dst_argb[2] = STATIC_CAST(uint8_t, clamp255(src_r[x] >> shift));
3126 dst_argb[3] = 0xff;
3127 dst_argb += 4;
3128 }
3129 }
3130
SplitXRGBRow_C(const uint8_t * src_argb,uint8_t * dst_r,uint8_t * dst_g,uint8_t * dst_b,int width)3131 void SplitXRGBRow_C(const uint8_t* src_argb,
3132 uint8_t* dst_r,
3133 uint8_t* dst_g,
3134 uint8_t* dst_b,
3135 int width) {
3136 int x;
3137 for (x = 0; x < width; ++x) {
3138 dst_b[x] = src_argb[0];
3139 dst_g[x] = src_argb[1];
3140 dst_r[x] = src_argb[2];
3141 src_argb += 4;
3142 }
3143 }
3144
MergeXRGBRow_C(const uint8_t * src_r,const uint8_t * src_g,const uint8_t * src_b,uint8_t * dst_argb,int width)3145 void MergeXRGBRow_C(const uint8_t* src_r,
3146 const uint8_t* src_g,
3147 const uint8_t* src_b,
3148 uint8_t* dst_argb,
3149 int width) {
3150 int x;
3151 for (x = 0; x < width; ++x) {
3152 dst_argb[0] = src_b[x];
3153 dst_argb[1] = src_g[x];
3154 dst_argb[2] = src_r[x];
3155 dst_argb[3] = 255;
3156 dst_argb += 4;
3157 }
3158 }
3159
3160 // Convert lsb formats to msb, depending on sample depth.
MergeUVRow_16_C(const uint16_t * src_u,const uint16_t * src_v,uint16_t * dst_uv,int depth,int width)3161 void MergeUVRow_16_C(const uint16_t* src_u,
3162 const uint16_t* src_v,
3163 uint16_t* dst_uv,
3164 int depth,
3165 int width) {
3166 int shift = 16 - depth;
3167 assert(depth >= 8);
3168 assert(depth <= 16);
3169 int x;
3170 for (x = 0; x < width; ++x) {
3171 dst_uv[0] = STATIC_CAST(uint16_t, src_u[x] << shift);
3172 dst_uv[1] = STATIC_CAST(uint16_t, src_v[x] << shift);
3173 dst_uv += 2;
3174 }
3175 }
3176
3177 // Convert msb formats to lsb, depending on sample depth.
SplitUVRow_16_C(const uint16_t * src_uv,uint16_t * dst_u,uint16_t * dst_v,int depth,int width)3178 void SplitUVRow_16_C(const uint16_t* src_uv,
3179 uint16_t* dst_u,
3180 uint16_t* dst_v,
3181 int depth,
3182 int width) {
3183 int shift = 16 - depth;
3184 int x;
3185 assert(depth >= 8);
3186 assert(depth <= 16);
3187 for (x = 0; x < width; ++x) {
3188 dst_u[x] = src_uv[0] >> shift;
3189 dst_v[x] = src_uv[1] >> shift;
3190 src_uv += 2;
3191 }
3192 }
3193
MultiplyRow_16_C(const uint16_t * src_y,uint16_t * dst_y,int scale,int width)3194 void MultiplyRow_16_C(const uint16_t* src_y,
3195 uint16_t* dst_y,
3196 int scale,
3197 int width) {
3198 int x;
3199 for (x = 0; x < width; ++x) {
3200 dst_y[x] = STATIC_CAST(uint16_t, src_y[x] * scale);
3201 }
3202 }
3203
DivideRow_16_C(const uint16_t * src_y,uint16_t * dst_y,int scale,int width)3204 void DivideRow_16_C(const uint16_t* src_y,
3205 uint16_t* dst_y,
3206 int scale,
3207 int width) {
3208 int x;
3209 for (x = 0; x < width; ++x) {
3210 dst_y[x] = (src_y[x] * scale) >> 16;
3211 }
3212 }
3213
3214 // Use scale to convert lsb formats to msb, depending how many bits there are:
3215 // 32768 = 9 bits
3216 // 16384 = 10 bits
3217 // 4096 = 12 bits
3218 // 256 = 16 bits
3219 // TODO(fbarchard): change scale to bits
3220 #define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
3221
Convert16To8Row_C(const uint16_t * src_y,uint8_t * dst_y,int scale,int width)3222 void Convert16To8Row_C(const uint16_t* src_y,
3223 uint8_t* dst_y,
3224 int scale,
3225 int width) {
3226 int x;
3227 assert(scale >= 256);
3228 assert(scale <= 32768);
3229
3230 for (x = 0; x < width; ++x) {
3231 dst_y[x] = STATIC_CAST(uint8_t, C16TO8(src_y[x], scale));
3232 }
3233 }
3234
3235 // Use scale to convert lsb formats to msb, depending how many bits there are:
3236 // 1024 = 10 bits
Convert8To16Row_C(const uint8_t * src_y,uint16_t * dst_y,int scale,int width)3237 void Convert8To16Row_C(const uint8_t* src_y,
3238 uint16_t* dst_y,
3239 int scale,
3240 int width) {
3241 int x;
3242 scale *= 0x0101; // replicates the byte.
3243 for (x = 0; x < width; ++x) {
3244 dst_y[x] = (src_y[x] * scale) >> 16;
3245 }
3246 }
3247
CopyRow_C(const uint8_t * src,uint8_t * dst,int count)3248 void CopyRow_C(const uint8_t* src, uint8_t* dst, int count) {
3249 memcpy(dst, src, count);
3250 }
3251
CopyRow_16_C(const uint16_t * src,uint16_t * dst,int count)3252 void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count) {
3253 memcpy(dst, src, count * 2);
3254 }
3255
SetRow_C(uint8_t * dst,uint8_t v8,int width)3256 void SetRow_C(uint8_t* dst, uint8_t v8, int width) {
3257 memset(dst, v8, width);
3258 }
3259
ARGBSetRow_C(uint8_t * dst_argb,uint32_t v32,int width)3260 void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width) {
3261 int x;
3262 for (x = 0; x < width; ++x) {
3263 memcpy(dst_argb + x * sizeof v32, &v32, sizeof v32);
3264 }
3265 }
3266
3267 // Filter 2 rows of YUY2 UV's (422) into U and V (420).
YUY2ToUVRow_C(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)3268 void YUY2ToUVRow_C(const uint8_t* src_yuy2,
3269 int src_stride_yuy2,
3270 uint8_t* dst_u,
3271 uint8_t* dst_v,
3272 int width) {
3273 // Output a row of UV values, filtering 2 rows of YUY2.
3274 int x;
3275 for (x = 0; x < width; x += 2) {
3276 dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
3277 dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
3278 src_yuy2 += 4;
3279 dst_u += 1;
3280 dst_v += 1;
3281 }
3282 }
3283
3284 // Filter 2 rows of YUY2 UV's (422) into UV (NV12).
YUY2ToNVUVRow_C(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_uv,int width)3285 void YUY2ToNVUVRow_C(const uint8_t* src_yuy2,
3286 int src_stride_yuy2,
3287 uint8_t* dst_uv,
3288 int width) {
3289 // Output a row of UV values, filtering 2 rows of YUY2.
3290 int x;
3291 for (x = 0; x < width; x += 2) {
3292 dst_uv[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
3293 dst_uv[1] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
3294 src_yuy2 += 4;
3295 dst_uv += 2;
3296 }
3297 }
3298
3299 // Copy row of YUY2 UV's (422) into U and V (422).
YUY2ToUV422Row_C(const uint8_t * src_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)3300 void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
3301 uint8_t* dst_u,
3302 uint8_t* dst_v,
3303 int width) {
3304 // Output a row of UV values.
3305 int x;
3306 for (x = 0; x < width; x += 2) {
3307 dst_u[0] = src_yuy2[1];
3308 dst_v[0] = src_yuy2[3];
3309 src_yuy2 += 4;
3310 dst_u += 1;
3311 dst_v += 1;
3312 }
3313 }
3314
3315 // Copy row of YUY2 Y's (422) into Y (420/422).
YUY2ToYRow_C(const uint8_t * src_yuy2,uint8_t * dst_y,int width)3316 void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
3317 // Output a row of Y values.
3318 int x;
3319 for (x = 0; x < width - 1; x += 2) {
3320 dst_y[x] = src_yuy2[0];
3321 dst_y[x + 1] = src_yuy2[2];
3322 src_yuy2 += 4;
3323 }
3324 if (width & 1) {
3325 dst_y[width - 1] = src_yuy2[0];
3326 }
3327 }
3328
3329 // Filter 2 rows of UYVY UV's (422) into U and V (420).
UYVYToUVRow_C(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)3330 void UYVYToUVRow_C(const uint8_t* src_uyvy,
3331 int src_stride_uyvy,
3332 uint8_t* dst_u,
3333 uint8_t* dst_v,
3334 int width) {
3335 // Output a row of UV values.
3336 int x;
3337 for (x = 0; x < width; x += 2) {
3338 dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
3339 dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
3340 src_uyvy += 4;
3341 dst_u += 1;
3342 dst_v += 1;
3343 }
3344 }
3345
3346 // Copy row of UYVY UV's (422) into U and V (422).
UYVYToUV422Row_C(const uint8_t * src_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)3347 void UYVYToUV422Row_C(const uint8_t* src_uyvy,
3348 uint8_t* dst_u,
3349 uint8_t* dst_v,
3350 int width) {
3351 // Output a row of UV values.
3352 int x;
3353 for (x = 0; x < width; x += 2) {
3354 dst_u[0] = src_uyvy[0];
3355 dst_v[0] = src_uyvy[2];
3356 src_uyvy += 4;
3357 dst_u += 1;
3358 dst_v += 1;
3359 }
3360 }
3361
3362 // Copy row of UYVY Y's (422) into Y (420/422).
UYVYToYRow_C(const uint8_t * src_uyvy,uint8_t * dst_y,int width)3363 void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
3364 // Output a row of Y values.
3365 int x;
3366 for (x = 0; x < width - 1; x += 2) {
3367 dst_y[x] = src_uyvy[1];
3368 dst_y[x + 1] = src_uyvy[3];
3369 src_uyvy += 4;
3370 }
3371 if (width & 1) {
3372 dst_y[width - 1] = src_uyvy[1];
3373 }
3374 }
3375
3376 #define BLEND(f, b, a) clamp255((((256 - a) * b) >> 8) + f)
3377
3378 // Blend src_argb over src_argb1 and store to dst_argb.
3379 // dst_argb may be src_argb or src_argb1.
3380 // This code mimics the SSSE3 version for better testability.
ARGBBlendRow_C(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)3381 void ARGBBlendRow_C(const uint8_t* src_argb,
3382 const uint8_t* src_argb1,
3383 uint8_t* dst_argb,
3384 int width) {
3385 int x;
3386 for (x = 0; x < width - 1; x += 2) {
3387 uint32_t fb = src_argb[0];
3388 uint32_t fg = src_argb[1];
3389 uint32_t fr = src_argb[2];
3390 uint32_t a = src_argb[3];
3391 uint32_t bb = src_argb1[0];
3392 uint32_t bg = src_argb1[1];
3393 uint32_t br = src_argb1[2];
3394 dst_argb[0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a));
3395 dst_argb[1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a));
3396 dst_argb[2] = STATIC_CAST(uint8_t, BLEND(fr, br, a));
3397 dst_argb[3] = 255u;
3398
3399 fb = src_argb[4 + 0];
3400 fg = src_argb[4 + 1];
3401 fr = src_argb[4 + 2];
3402 a = src_argb[4 + 3];
3403 bb = src_argb1[4 + 0];
3404 bg = src_argb1[4 + 1];
3405 br = src_argb1[4 + 2];
3406 dst_argb[4 + 0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a));
3407 dst_argb[4 + 1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a));
3408 dst_argb[4 + 2] = STATIC_CAST(uint8_t, BLEND(fr, br, a));
3409 dst_argb[4 + 3] = 255u;
3410 src_argb += 8;
3411 src_argb1 += 8;
3412 dst_argb += 8;
3413 }
3414
3415 if (width & 1) {
3416 uint32_t fb = src_argb[0];
3417 uint32_t fg = src_argb[1];
3418 uint32_t fr = src_argb[2];
3419 uint32_t a = src_argb[3];
3420 uint32_t bb = src_argb1[0];
3421 uint32_t bg = src_argb1[1];
3422 uint32_t br = src_argb1[2];
3423 dst_argb[0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a));
3424 dst_argb[1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a));
3425 dst_argb[2] = STATIC_CAST(uint8_t, BLEND(fr, br, a));
3426 dst_argb[3] = 255u;
3427 }
3428 }
3429 #undef BLEND
3430
3431 #define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8
BlendPlaneRow_C(const uint8_t * src0,const uint8_t * src1,const uint8_t * alpha,uint8_t * dst,int width)3432 void BlendPlaneRow_C(const uint8_t* src0,
3433 const uint8_t* src1,
3434 const uint8_t* alpha,
3435 uint8_t* dst,
3436 int width) {
3437 int x;
3438 for (x = 0; x < width - 1; x += 2) {
3439 dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
3440 dst[1] = UBLEND(src0[1], src1[1], alpha[1]);
3441 src0 += 2;
3442 src1 += 2;
3443 alpha += 2;
3444 dst += 2;
3445 }
3446 if (width & 1) {
3447 dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
3448 }
3449 }
3450 #undef UBLEND
3451
3452 #define ATTENUATE(f, a) (f * a + 255) >> 8
3453
3454 // Multiply source RGB by alpha and store to destination.
ARGBAttenuateRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)3455 void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
3456 int i;
3457 for (i = 0; i < width - 1; i += 2) {
3458 uint32_t b = src_argb[0];
3459 uint32_t g = src_argb[1];
3460 uint32_t r = src_argb[2];
3461 uint32_t a = src_argb[3];
3462 dst_argb[0] = ATTENUATE(b, a);
3463 dst_argb[1] = ATTENUATE(g, a);
3464 dst_argb[2] = ATTENUATE(r, a);
3465 dst_argb[3] = STATIC_CAST(uint8_t, a);
3466 b = src_argb[4];
3467 g = src_argb[5];
3468 r = src_argb[6];
3469 a = src_argb[7];
3470 dst_argb[4] = ATTENUATE(b, a);
3471 dst_argb[5] = ATTENUATE(g, a);
3472 dst_argb[6] = ATTENUATE(r, a);
3473 dst_argb[7] = STATIC_CAST(uint8_t, a);
3474 src_argb += 8;
3475 dst_argb += 8;
3476 }
3477
3478 if (width & 1) {
3479 const uint32_t b = src_argb[0];
3480 const uint32_t g = src_argb[1];
3481 const uint32_t r = src_argb[2];
3482 const uint32_t a = src_argb[3];
3483 dst_argb[0] = ATTENUATE(b, a);
3484 dst_argb[1] = ATTENUATE(g, a);
3485 dst_argb[2] = ATTENUATE(r, a);
3486 dst_argb[3] = STATIC_CAST(uint8_t, a);
3487 }
3488 }
3489 #undef ATTENUATE
3490
3491 // Divide source RGB by alpha and store to destination.
3492 // b = (b * 255 + (a / 2)) / a;
3493 // g = (g * 255 + (a / 2)) / a;
3494 // r = (r * 255 + (a / 2)) / a;
3495 // Reciprocal method is off by 1 on some values. ie 125
3496 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
3497 #define T(a) 0x01000000 + (0x10000 / a)
3498 const uint32_t fixed_invtbl8[256] = {
3499 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06),
3500 T(0x07), T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d),
3501 T(0x0e), T(0x0f), T(0x10), T(0x11), T(0x12), T(0x13), T(0x14),
3502 T(0x15), T(0x16), T(0x17), T(0x18), T(0x19), T(0x1a), T(0x1b),
3503 T(0x1c), T(0x1d), T(0x1e), T(0x1f), T(0x20), T(0x21), T(0x22),
3504 T(0x23), T(0x24), T(0x25), T(0x26), T(0x27), T(0x28), T(0x29),
3505 T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f), T(0x30),
3506 T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
3507 T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e),
3508 T(0x3f), T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45),
3509 T(0x46), T(0x47), T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c),
3510 T(0x4d), T(0x4e), T(0x4f), T(0x50), T(0x51), T(0x52), T(0x53),
3511 T(0x54), T(0x55), T(0x56), T(0x57), T(0x58), T(0x59), T(0x5a),
3512 T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f), T(0x60), T(0x61),
3513 T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67), T(0x68),
3514 T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
3515 T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76),
3516 T(0x77), T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d),
3517 T(0x7e), T(0x7f), T(0x80), T(0x81), T(0x82), T(0x83), T(0x84),
3518 T(0x85), T(0x86), T(0x87), T(0x88), T(0x89), T(0x8a), T(0x8b),
3519 T(0x8c), T(0x8d), T(0x8e), T(0x8f), T(0x90), T(0x91), T(0x92),
3520 T(0x93), T(0x94), T(0x95), T(0x96), T(0x97), T(0x98), T(0x99),
3521 T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f), T(0xa0),
3522 T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
3523 T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae),
3524 T(0xaf), T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5),
3525 T(0xb6), T(0xb7), T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc),
3526 T(0xbd), T(0xbe), T(0xbf), T(0xc0), T(0xc1), T(0xc2), T(0xc3),
3527 T(0xc4), T(0xc5), T(0xc6), T(0xc7), T(0xc8), T(0xc9), T(0xca),
3528 T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf), T(0xd0), T(0xd1),
3529 T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7), T(0xd8),
3530 T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
3531 T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6),
3532 T(0xe7), T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed),
3533 T(0xee), T(0xef), T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4),
3534 T(0xf5), T(0xf6), T(0xf7), T(0xf8), T(0xf9), T(0xfa), T(0xfb),
3535 T(0xfc), T(0xfd), T(0xfe), 0x01000100};
3536 #undef T
3537
3538 #if LIBYUV_UNATTENUATE_DUP
3539 // This code mimics the Intel SIMD version for better testability.
3540 #define UNATTENUATE(f, ia) clamp255(((f | (f << 8)) * ia) >> 16)
3541 #else
3542 #define UNATTENUATE(f, ia) clamp255((f * ia) >> 8)
3543 #endif
3544
3545 // mimics the Intel SIMD code for exactness.
ARGBUnattenuateRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)3546 void ARGBUnattenuateRow_C(const uint8_t* src_argb,
3547 uint8_t* dst_argb,
3548 int width) {
3549 int i;
3550 for (i = 0; i < width; ++i) {
3551 uint32_t b = src_argb[0];
3552 uint32_t g = src_argb[1];
3553 uint32_t r = src_argb[2];
3554 const uint32_t a = src_argb[3];
3555 const uint32_t ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point
3556
3557 // Clamping should not be necessary but is free in assembly.
3558 dst_argb[0] = STATIC_CAST(uint8_t, UNATTENUATE(b, ia));
3559 dst_argb[1] = STATIC_CAST(uint8_t, UNATTENUATE(g, ia));
3560 dst_argb[2] = STATIC_CAST(uint8_t, UNATTENUATE(r, ia));
3561 dst_argb[3] = STATIC_CAST(uint8_t, a);
3562 src_argb += 4;
3563 dst_argb += 4;
3564 }
3565 }
3566
ComputeCumulativeSumRow_C(const uint8_t * row,int32_t * cumsum,const int32_t * previous_cumsum,int width)3567 void ComputeCumulativeSumRow_C(const uint8_t* row,
3568 int32_t* cumsum,
3569 const int32_t* previous_cumsum,
3570 int width) {
3571 int32_t row_sum[4] = {0, 0, 0, 0};
3572 int x;
3573 for (x = 0; x < width; ++x) {
3574 row_sum[0] += row[x * 4 + 0];
3575 row_sum[1] += row[x * 4 + 1];
3576 row_sum[2] += row[x * 4 + 2];
3577 row_sum[3] += row[x * 4 + 3];
3578 cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0];
3579 cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1];
3580 cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2];
3581 cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3];
3582 }
3583 }
3584
CumulativeSumToAverageRow_C(const int32_t * tl,const int32_t * bl,int w,int area,uint8_t * dst,int count)3585 void CumulativeSumToAverageRow_C(const int32_t* tl,
3586 const int32_t* bl,
3587 int w,
3588 int area,
3589 uint8_t* dst,
3590 int count) {
3591 float ooa;
3592 int i;
3593 assert(area != 0);
3594
3595 ooa = 1.0f / STATIC_CAST(float, area);
3596 for (i = 0; i < count; ++i) {
3597 dst[0] =
3598 (uint8_t)(STATIC_CAST(float, bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) *
3599 ooa);
3600 dst[1] =
3601 (uint8_t)(STATIC_CAST(float, bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) *
3602 ooa);
3603 dst[2] =
3604 (uint8_t)(STATIC_CAST(float, bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) *
3605 ooa);
3606 dst[3] =
3607 (uint8_t)(STATIC_CAST(float, bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) *
3608 ooa);
3609 dst += 4;
3610 tl += 4;
3611 bl += 4;
3612 }
3613 }
3614
3615 // Copy pixels from rotated source to destination row with a slope.
3616 LIBYUV_API
ARGBAffineRow_C(const uint8_t * src_argb,int src_argb_stride,uint8_t * dst_argb,const float * uv_dudv,int width)3617 void ARGBAffineRow_C(const uint8_t* src_argb,
3618 int src_argb_stride,
3619 uint8_t* dst_argb,
3620 const float* uv_dudv,
3621 int width) {
3622 int i;
3623 // Render a row of pixels from source into a buffer.
3624 float uv[2];
3625 uv[0] = uv_dudv[0];
3626 uv[1] = uv_dudv[1];
3627 for (i = 0; i < width; ++i) {
3628 int x = (int)(uv[0]);
3629 int y = (int)(uv[1]);
3630 *(uint32_t*)(dst_argb) =
3631 *(const uint32_t*)(src_argb + y * src_argb_stride + x * 4);
3632 dst_argb += 4;
3633 uv[0] += uv_dudv[2];
3634 uv[1] += uv_dudv[3];
3635 }
3636 }
3637
3638 // Blend 2 rows into 1.
HalfRow_C(const uint8_t * src_uv,ptrdiff_t src_uv_stride,uint8_t * dst_uv,int width)3639 static void HalfRow_C(const uint8_t* src_uv,
3640 ptrdiff_t src_uv_stride,
3641 uint8_t* dst_uv,
3642 int width) {
3643 int x;
3644 for (x = 0; x < width; ++x) {
3645 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
3646 }
3647 }
3648
HalfRow_16_C(const uint16_t * src_uv,ptrdiff_t src_uv_stride,uint16_t * dst_uv,int width)3649 static void HalfRow_16_C(const uint16_t* src_uv,
3650 ptrdiff_t src_uv_stride,
3651 uint16_t* dst_uv,
3652 int width) {
3653 int x;
3654 for (x = 0; x < width; ++x) {
3655 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
3656 }
3657 }
3658
HalfRow_16To8_C(const uint16_t * src_uv,ptrdiff_t src_uv_stride,uint8_t * dst_uv,int scale,int width)3659 static void HalfRow_16To8_C(const uint16_t* src_uv,
3660 ptrdiff_t src_uv_stride,
3661 uint8_t* dst_uv,
3662 int scale,
3663 int width) {
3664 int x;
3665 for (x = 0; x < width; ++x) {
3666 dst_uv[x] = STATIC_CAST(
3667 uint8_t,
3668 C16TO8((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1, scale));
3669 }
3670 }
3671
3672 // C version 2x2 -> 2x1.
InterpolateRow_C(uint8_t * dst_ptr,const uint8_t * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)3673 void InterpolateRow_C(uint8_t* dst_ptr,
3674 const uint8_t* src_ptr,
3675 ptrdiff_t src_stride,
3676 int width,
3677 int source_y_fraction) {
3678 int y1_fraction = source_y_fraction;
3679 int y0_fraction = 256 - y1_fraction;
3680 const uint8_t* src_ptr1 = src_ptr + src_stride;
3681 int x;
3682 assert(source_y_fraction >= 0);
3683 assert(source_y_fraction < 256);
3684
3685 if (y1_fraction == 0) {
3686 memcpy(dst_ptr, src_ptr, width);
3687 return;
3688 }
3689 if (y1_fraction == 128) {
3690 HalfRow_C(src_ptr, src_stride, dst_ptr, width);
3691 return;
3692 }
3693 for (x = 0; x < width; ++x) {
3694 dst_ptr[0] = STATIC_CAST(
3695 uint8_t,
3696 (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8);
3697 ++src_ptr;
3698 ++src_ptr1;
3699 ++dst_ptr;
3700 }
3701 }
3702
3703 // C version 2x2 -> 2x1.
InterpolateRow_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)3704 void InterpolateRow_16_C(uint16_t* dst_ptr,
3705 const uint16_t* src_ptr,
3706 ptrdiff_t src_stride,
3707 int width,
3708 int source_y_fraction) {
3709 int y1_fraction = source_y_fraction;
3710 int y0_fraction = 256 - y1_fraction;
3711 const uint16_t* src_ptr1 = src_ptr + src_stride;
3712 int x;
3713 assert(source_y_fraction >= 0);
3714 assert(source_y_fraction < 256);
3715
3716 if (y1_fraction == 0) {
3717 memcpy(dst_ptr, src_ptr, width * 2);
3718 return;
3719 }
3720 if (y1_fraction == 128) {
3721 HalfRow_16_C(src_ptr, src_stride, dst_ptr, width);
3722 return;
3723 }
3724 for (x = 0; x < width; ++x) {
3725 dst_ptr[0] = STATIC_CAST(
3726 uint16_t,
3727 (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8);
3728 ++src_ptr;
3729 ++src_ptr1;
3730 ++dst_ptr;
3731 }
3732 }
3733
3734 // C version 2x2 16 bit-> 2x1 8 bit.
3735 // Use scale to convert lsb formats to msb, depending how many bits there are:
3736 // 32768 = 9 bits
3737 // 16384 = 10 bits
3738 // 4096 = 12 bits
3739 // 256 = 16 bits
3740 // TODO(fbarchard): change scale to bits
3741
InterpolateRow_16To8_C(uint8_t * dst_ptr,const uint16_t * src_ptr,ptrdiff_t src_stride,int scale,int width,int source_y_fraction)3742 void InterpolateRow_16To8_C(uint8_t* dst_ptr,
3743 const uint16_t* src_ptr,
3744 ptrdiff_t src_stride,
3745 int scale,
3746 int width,
3747 int source_y_fraction) {
3748 int y1_fraction = source_y_fraction;
3749 int y0_fraction = 256 - y1_fraction;
3750 const uint16_t* src_ptr1 = src_ptr + src_stride;
3751 int x;
3752 assert(source_y_fraction >= 0);
3753 assert(source_y_fraction < 256);
3754
3755 if (source_y_fraction == 0) {
3756 Convert16To8Row_C(src_ptr, dst_ptr, scale, width);
3757 return;
3758 }
3759 if (source_y_fraction == 128) {
3760 HalfRow_16To8_C(src_ptr, src_stride, dst_ptr, scale, width);
3761 return;
3762 }
3763 for (x = 0; x < width; ++x) {
3764 dst_ptr[0] = STATIC_CAST(
3765 uint8_t,
3766 C16TO8(
3767 (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8,
3768 scale));
3769 src_ptr += 1;
3770 src_ptr1 += 1;
3771 dst_ptr += 1;
3772 }
3773 }
3774
3775 // Use first 4 shuffler values to reorder ARGB channels.
ARGBShuffleRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const uint8_t * shuffler,int width)3776 void ARGBShuffleRow_C(const uint8_t* src_argb,
3777 uint8_t* dst_argb,
3778 const uint8_t* shuffler,
3779 int width) {
3780 int index0 = shuffler[0];
3781 int index1 = shuffler[1];
3782 int index2 = shuffler[2];
3783 int index3 = shuffler[3];
3784 // Shuffle a row of ARGB.
3785 int x;
3786 for (x = 0; x < width; ++x) {
3787 // To support in-place conversion.
3788 uint8_t b = src_argb[index0];
3789 uint8_t g = src_argb[index1];
3790 uint8_t r = src_argb[index2];
3791 uint8_t a = src_argb[index3];
3792 dst_argb[0] = b;
3793 dst_argb[1] = g;
3794 dst_argb[2] = r;
3795 dst_argb[3] = a;
3796 src_argb += 4;
3797 dst_argb += 4;
3798 }
3799 }
3800
I422ToYUY2Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)3801 void I422ToYUY2Row_C(const uint8_t* src_y,
3802 const uint8_t* src_u,
3803 const uint8_t* src_v,
3804 uint8_t* dst_frame,
3805 int width) {
3806 int x;
3807 for (x = 0; x < width - 1; x += 2) {
3808 dst_frame[0] = src_y[0];
3809 dst_frame[1] = src_u[0];
3810 dst_frame[2] = src_y[1];
3811 dst_frame[3] = src_v[0];
3812 dst_frame += 4;
3813 src_y += 2;
3814 src_u += 1;
3815 src_v += 1;
3816 }
3817 if (width & 1) {
3818 dst_frame[0] = src_y[0];
3819 dst_frame[1] = src_u[0];
3820 dst_frame[2] = 0;
3821 dst_frame[3] = src_v[0];
3822 }
3823 }
3824
I422ToUYVYRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)3825 void I422ToUYVYRow_C(const uint8_t* src_y,
3826 const uint8_t* src_u,
3827 const uint8_t* src_v,
3828 uint8_t* dst_frame,
3829 int width) {
3830 int x;
3831 for (x = 0; x < width - 1; x += 2) {
3832 dst_frame[0] = src_u[0];
3833 dst_frame[1] = src_y[0];
3834 dst_frame[2] = src_v[0];
3835 dst_frame[3] = src_y[1];
3836 dst_frame += 4;
3837 src_y += 2;
3838 src_u += 1;
3839 src_v += 1;
3840 }
3841 if (width & 1) {
3842 dst_frame[0] = src_u[0];
3843 dst_frame[1] = src_y[0];
3844 dst_frame[2] = src_v[0];
3845 dst_frame[3] = 0;
3846 }
3847 }
3848
ARGBPolynomialRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const float * poly,int width)3849 void ARGBPolynomialRow_C(const uint8_t* src_argb,
3850 uint8_t* dst_argb,
3851 const float* poly,
3852 int width) {
3853 int i;
3854 for (i = 0; i < width; ++i) {
3855 float b = (float)(src_argb[0]);
3856 float g = (float)(src_argb[1]);
3857 float r = (float)(src_argb[2]);
3858 float a = (float)(src_argb[3]);
3859 float b2 = b * b;
3860 float g2 = g * g;
3861 float r2 = r * r;
3862 float a2 = a * a;
3863 float db = poly[0] + poly[4] * b;
3864 float dg = poly[1] + poly[5] * g;
3865 float dr = poly[2] + poly[6] * r;
3866 float da = poly[3] + poly[7] * a;
3867 float b3 = b2 * b;
3868 float g3 = g2 * g;
3869 float r3 = r2 * r;
3870 float a3 = a2 * a;
3871 db += poly[8] * b2;
3872 dg += poly[9] * g2;
3873 dr += poly[10] * r2;
3874 da += poly[11] * a2;
3875 db += poly[12] * b3;
3876 dg += poly[13] * g3;
3877 dr += poly[14] * r3;
3878 da += poly[15] * a3;
3879
3880 dst_argb[0] = STATIC_CAST(uint8_t, Clamp((int32_t)(db)));
3881 dst_argb[1] = STATIC_CAST(uint8_t, Clamp((int32_t)(dg)));
3882 dst_argb[2] = STATIC_CAST(uint8_t, Clamp((int32_t)(dr)));
3883 dst_argb[3] = STATIC_CAST(uint8_t, Clamp((int32_t)(da)));
3884 src_argb += 4;
3885 dst_argb += 4;
3886 }
3887 }
3888
3889 // Samples assumed to be unsigned in low 9, 10 or 12 bits. Scale factor
3890 // adjust the source integer range to the half float range desired.
3891
3892 // This magic constant is 2^-112. Multiplying by this
3893 // is the same as subtracting 112 from the exponent, which
3894 // is the difference in exponent bias between 32-bit and
3895 // 16-bit floats. Once we've done this subtraction, we can
3896 // simply extract the low bits of the exponent and the high
3897 // bits of the mantissa from our float and we're done.
3898
3899 // Work around GCC 7 punning warning -Wstrict-aliasing
3900 #if defined(__GNUC__)
3901 typedef uint32_t __attribute__((__may_alias__)) uint32_alias_t;
3902 #else
3903 typedef uint32_t uint32_alias_t;
3904 #endif
3905
HalfFloatRow_C(const uint16_t * src,uint16_t * dst,float scale,int width)3906 void HalfFloatRow_C(const uint16_t* src,
3907 uint16_t* dst,
3908 float scale,
3909 int width) {
3910 int i;
3911 float mult = 1.9259299444e-34f * scale;
3912 for (i = 0; i < width; ++i) {
3913 float value = src[i] * mult;
3914 dst[i] = (uint16_t)((*(const uint32_alias_t*)&value) >> 13);
3915 }
3916 }
3917
ByteToFloatRow_C(const uint8_t * src,float * dst,float scale,int width)3918 void ByteToFloatRow_C(const uint8_t* src, float* dst, float scale, int width) {
3919 int i;
3920 for (i = 0; i < width; ++i) {
3921 float value = src[i] * scale;
3922 dst[i] = value;
3923 }
3924 }
3925
ARGBLumaColorTableRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width,const uint8_t * luma,uint32_t lumacoeff)3926 void ARGBLumaColorTableRow_C(const uint8_t* src_argb,
3927 uint8_t* dst_argb,
3928 int width,
3929 const uint8_t* luma,
3930 uint32_t lumacoeff) {
3931 uint32_t bc = lumacoeff & 0xff;
3932 uint32_t gc = (lumacoeff >> 8) & 0xff;
3933 uint32_t rc = (lumacoeff >> 16) & 0xff;
3934
3935 int i;
3936 for (i = 0; i < width - 1; i += 2) {
3937 // Luminance in rows, color values in columns.
3938 const uint8_t* luma0 =
3939 ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
3940 luma;
3941 const uint8_t* luma1;
3942 dst_argb[0] = luma0[src_argb[0]];
3943 dst_argb[1] = luma0[src_argb[1]];
3944 dst_argb[2] = luma0[src_argb[2]];
3945 dst_argb[3] = src_argb[3];
3946 luma1 =
3947 ((src_argb[4] * bc + src_argb[5] * gc + src_argb[6] * rc) & 0x7F00u) +
3948 luma;
3949 dst_argb[4] = luma1[src_argb[4]];
3950 dst_argb[5] = luma1[src_argb[5]];
3951 dst_argb[6] = luma1[src_argb[6]];
3952 dst_argb[7] = src_argb[7];
3953 src_argb += 8;
3954 dst_argb += 8;
3955 }
3956 if (width & 1) {
3957 // Luminance in rows, color values in columns.
3958 const uint8_t* luma0 =
3959 ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
3960 luma;
3961 dst_argb[0] = luma0[src_argb[0]];
3962 dst_argb[1] = luma0[src_argb[1]];
3963 dst_argb[2] = luma0[src_argb[2]];
3964 dst_argb[3] = src_argb[3];
3965 }
3966 }
3967
ARGBCopyAlphaRow_C(const uint8_t * src,uint8_t * dst,int width)3968 void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
3969 int i;
3970 for (i = 0; i < width - 1; i += 2) {
3971 dst[3] = src[3];
3972 dst[7] = src[7];
3973 dst += 8;
3974 src += 8;
3975 }
3976 if (width & 1) {
3977 dst[3] = src[3];
3978 }
3979 }
3980
ARGBExtractAlphaRow_C(const uint8_t * src_argb,uint8_t * dst_a,int width)3981 void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width) {
3982 int i;
3983 for (i = 0; i < width - 1; i += 2) {
3984 dst_a[0] = src_argb[3];
3985 dst_a[1] = src_argb[7];
3986 dst_a += 2;
3987 src_argb += 8;
3988 }
3989 if (width & 1) {
3990 dst_a[0] = src_argb[3];
3991 }
3992 }
3993
ARGBCopyYToAlphaRow_C(const uint8_t * src,uint8_t * dst,int width)3994 void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
3995 int i;
3996 for (i = 0; i < width - 1; i += 2) {
3997 dst[3] = src[0];
3998 dst[7] = src[1];
3999 dst += 8;
4000 src += 2;
4001 }
4002 if (width & 1) {
4003 dst[3] = src[0];
4004 }
4005 }
4006
4007 // Maximum temporary width for wrappers to process at a time, in pixels.
4008 #define MAXTWIDTH 2048
4009
4010 #if !(defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86)) && \
4011 defined(HAS_I422TORGB565ROW_SSSE3)
4012 // row_win.cc has asm version, but GCC uses 2 step wrapper.
I422ToRGB565Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)4013 void I422ToRGB565Row_SSSE3(const uint8_t* src_y,
4014 const uint8_t* src_u,
4015 const uint8_t* src_v,
4016 uint8_t* dst_rgb565,
4017 const struct YuvConstants* yuvconstants,
4018 int width) {
4019 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4020 while (width > 0) {
4021 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4022 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
4023 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
4024 src_y += twidth;
4025 src_u += twidth / 2;
4026 src_v += twidth / 2;
4027 dst_rgb565 += twidth * 2;
4028 width -= twidth;
4029 }
4030 }
4031 #endif
4032
4033 #if defined(HAS_I422TOARGB1555ROW_SSSE3)
I422ToARGB1555Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)4034 void I422ToARGB1555Row_SSSE3(const uint8_t* src_y,
4035 const uint8_t* src_u,
4036 const uint8_t* src_v,
4037 uint8_t* dst_argb1555,
4038 const struct YuvConstants* yuvconstants,
4039 int width) {
4040 // Row buffer for intermediate ARGB pixels.
4041 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4042 while (width > 0) {
4043 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4044 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
4045 ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
4046 src_y += twidth;
4047 src_u += twidth / 2;
4048 src_v += twidth / 2;
4049 dst_argb1555 += twidth * 2;
4050 width -= twidth;
4051 }
4052 }
4053 #endif
4054
4055 #if defined(HAS_I422TOARGB4444ROW_SSSE3)
I422ToARGB4444Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)4056 void I422ToARGB4444Row_SSSE3(const uint8_t* src_y,
4057 const uint8_t* src_u,
4058 const uint8_t* src_v,
4059 uint8_t* dst_argb4444,
4060 const struct YuvConstants* yuvconstants,
4061 int width) {
4062 // Row buffer for intermediate ARGB pixels.
4063 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4064 while (width > 0) {
4065 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4066 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
4067 ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
4068 src_y += twidth;
4069 src_u += twidth / 2;
4070 src_v += twidth / 2;
4071 dst_argb4444 += twidth * 2;
4072 width -= twidth;
4073 }
4074 }
4075 #endif
4076
4077 #if defined(HAS_NV12TORGB565ROW_SSSE3)
NV12ToRGB565Row_SSSE3(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)4078 void NV12ToRGB565Row_SSSE3(const uint8_t* src_y,
4079 const uint8_t* src_uv,
4080 uint8_t* dst_rgb565,
4081 const struct YuvConstants* yuvconstants,
4082 int width) {
4083 // Row buffer for intermediate ARGB pixels.
4084 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4085 while (width > 0) {
4086 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4087 NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
4088 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
4089 src_y += twidth;
4090 src_uv += twidth;
4091 dst_rgb565 += twidth * 2;
4092 width -= twidth;
4093 }
4094 }
4095 #endif
4096
4097 #if defined(HAS_NV12TORGB24ROW_SSSE3)
NV12ToRGB24Row_SSSE3(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)4098 void NV12ToRGB24Row_SSSE3(const uint8_t* src_y,
4099 const uint8_t* src_uv,
4100 uint8_t* dst_rgb24,
4101 const struct YuvConstants* yuvconstants,
4102 int width) {
4103 // Row buffer for intermediate ARGB pixels.
4104 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4105 while (width > 0) {
4106 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4107 NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
4108 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4109 src_y += twidth;
4110 src_uv += twidth;
4111 dst_rgb24 += twidth * 3;
4112 width -= twidth;
4113 }
4114 }
4115 #endif
4116
4117 #if defined(HAS_NV21TORGB24ROW_SSSE3)
NV21ToRGB24Row_SSSE3(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)4118 void NV21ToRGB24Row_SSSE3(const uint8_t* src_y,
4119 const uint8_t* src_vu,
4120 uint8_t* dst_rgb24,
4121 const struct YuvConstants* yuvconstants,
4122 int width) {
4123 // Row buffer for intermediate ARGB pixels.
4124 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4125 while (width > 0) {
4126 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4127 NV21ToARGBRow_SSSE3(src_y, src_vu, row, yuvconstants, twidth);
4128 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4129 src_y += twidth;
4130 src_vu += twidth;
4131 dst_rgb24 += twidth * 3;
4132 width -= twidth;
4133 }
4134 }
4135 #endif
4136
4137 #if defined(HAS_NV12TORGB24ROW_AVX2)
NV12ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)4138 void NV12ToRGB24Row_AVX2(const uint8_t* src_y,
4139 const uint8_t* src_uv,
4140 uint8_t* dst_rgb24,
4141 const struct YuvConstants* yuvconstants,
4142 int width) {
4143 // Row buffer for intermediate ARGB pixels.
4144 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4145 while (width > 0) {
4146 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4147 NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
4148 #if defined(HAS_ARGBTORGB24ROW_AVX2)
4149 ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
4150 #else
4151 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4152 #endif
4153 src_y += twidth;
4154 src_uv += twidth;
4155 dst_rgb24 += twidth * 3;
4156 width -= twidth;
4157 }
4158 }
4159 #endif
4160
4161 #if defined(HAS_NV21TORGB24ROW_AVX2)
NV21ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)4162 void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
4163 const uint8_t* src_vu,
4164 uint8_t* dst_rgb24,
4165 const struct YuvConstants* yuvconstants,
4166 int width) {
4167 // Row buffer for intermediate ARGB pixels.
4168 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4169 while (width > 0) {
4170 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4171 NV21ToARGBRow_AVX2(src_y, src_vu, row, yuvconstants, twidth);
4172 #if defined(HAS_ARGBTORGB24ROW_AVX2)
4173 ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
4174 #else
4175 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4176 #endif
4177 src_y += twidth;
4178 src_vu += twidth;
4179 dst_rgb24 += twidth * 3;
4180 width -= twidth;
4181 }
4182 }
4183 #endif
4184
4185 #if defined(HAS_I422TORGB565ROW_AVX2)
I422ToRGB565Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)4186 void I422ToRGB565Row_AVX2(const uint8_t* src_y,
4187 const uint8_t* src_u,
4188 const uint8_t* src_v,
4189 uint8_t* dst_rgb565,
4190 const struct YuvConstants* yuvconstants,
4191 int width) {
4192 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4193 while (width > 0) {
4194 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4195 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4196 #if defined(HAS_ARGBTORGB565ROW_AVX2)
4197 ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
4198 #else
4199 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
4200 #endif
4201 src_y += twidth;
4202 src_u += twidth / 2;
4203 src_v += twidth / 2;
4204 dst_rgb565 += twidth * 2;
4205 width -= twidth;
4206 }
4207 }
4208 #endif
4209
4210 #if defined(HAS_I422TOARGB1555ROW_AVX2)
I422ToARGB1555Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)4211 void I422ToARGB1555Row_AVX2(const uint8_t* src_y,
4212 const uint8_t* src_u,
4213 const uint8_t* src_v,
4214 uint8_t* dst_argb1555,
4215 const struct YuvConstants* yuvconstants,
4216 int width) {
4217 // Row buffer for intermediate ARGB pixels.
4218 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4219 while (width > 0) {
4220 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4221 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4222 #if defined(HAS_ARGBTOARGB1555ROW_AVX2)
4223 ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
4224 #else
4225 ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
4226 #endif
4227 src_y += twidth;
4228 src_u += twidth / 2;
4229 src_v += twidth / 2;
4230 dst_argb1555 += twidth * 2;
4231 width -= twidth;
4232 }
4233 }
4234 #endif
4235
4236 #if defined(HAS_I422TOARGB4444ROW_AVX2)
I422ToARGB4444Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)4237 void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
4238 const uint8_t* src_u,
4239 const uint8_t* src_v,
4240 uint8_t* dst_argb4444,
4241 const struct YuvConstants* yuvconstants,
4242 int width) {
4243 // Row buffer for intermediate ARGB pixels.
4244 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4245 while (width > 0) {
4246 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4247 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4248 #if defined(HAS_ARGBTOARGB4444ROW_AVX2)
4249 ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
4250 #else
4251 ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
4252 #endif
4253 src_y += twidth;
4254 src_u += twidth / 2;
4255 src_v += twidth / 2;
4256 dst_argb4444 += twidth * 2;
4257 width -= twidth;
4258 }
4259 }
4260 #endif
4261
4262 #if defined(HAS_I422TORGB24ROW_AVX2)
I422ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)4263 void I422ToRGB24Row_AVX2(const uint8_t* src_y,
4264 const uint8_t* src_u,
4265 const uint8_t* src_v,
4266 uint8_t* dst_rgb24,
4267 const struct YuvConstants* yuvconstants,
4268 int width) {
4269 // Row buffer for intermediate ARGB pixels.
4270 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4271 while (width > 0) {
4272 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4273 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4274 #if defined(HAS_ARGBTORGB24ROW_AVX2)
4275 ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
4276 #else
4277 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4278 #endif
4279 src_y += twidth;
4280 src_u += twidth / 2;
4281 src_v += twidth / 2;
4282 dst_rgb24 += twidth * 3;
4283 width -= twidth;
4284 }
4285 }
4286 #endif
4287
4288 #if defined(HAS_I444TORGB24ROW_AVX2)
I444ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)4289 void I444ToRGB24Row_AVX2(const uint8_t* src_y,
4290 const uint8_t* src_u,
4291 const uint8_t* src_v,
4292 uint8_t* dst_rgb24,
4293 const struct YuvConstants* yuvconstants,
4294 int width) {
4295 // Row buffer for intermediate ARGB pixels.
4296 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4297 while (width > 0) {
4298 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4299 I444ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4300 #if defined(HAS_ARGBTORGB24ROW_AVX2)
4301 ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
4302 #else
4303 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4304 #endif
4305 src_y += twidth;
4306 src_u += twidth;
4307 src_v += twidth;
4308 dst_rgb24 += twidth * 3;
4309 width -= twidth;
4310 }
4311 }
4312 #endif
4313
4314 #if defined(HAS_NV12TORGB565ROW_AVX2)
NV12ToRGB565Row_AVX2(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)4315 void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
4316 const uint8_t* src_uv,
4317 uint8_t* dst_rgb565,
4318 const struct YuvConstants* yuvconstants,
4319 int width) {
4320 // Row buffer for intermediate ARGB pixels.
4321 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4322 while (width > 0) {
4323 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4324 NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
4325 #if defined(HAS_ARGBTORGB565ROW_AVX2)
4326 ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
4327 #else
4328 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
4329 #endif
4330 src_y += twidth;
4331 src_uv += twidth;
4332 dst_rgb565 += twidth * 2;
4333 width -= twidth;
4334 }
4335 }
4336 #endif
4337
4338 #ifdef HAS_RGB24TOYJROW_AVX2
4339 // Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
RGB24ToYJRow_AVX2(const uint8_t * src_rgb24,uint8_t * dst_yj,int width)4340 void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
4341 // Row buffer for intermediate ARGB pixels.
4342 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4343 while (width > 0) {
4344 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4345 RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
4346 ARGBToYJRow_AVX2(row, dst_yj, twidth);
4347 src_rgb24 += twidth * 3;
4348 dst_yj += twidth;
4349 width -= twidth;
4350 }
4351 }
4352 #endif // HAS_RGB24TOYJROW_AVX2
4353
4354 #ifdef HAS_RAWTOYJROW_AVX2
4355 // Convert 16 RAW pixels (64 bytes) to 16 YJ values.
RAWToYJRow_AVX2(const uint8_t * src_raw,uint8_t * dst_yj,int width)4356 void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
4357 // Row buffer for intermediate ARGB pixels.
4358 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4359 while (width > 0) {
4360 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4361 RAWToARGBRow_SSSE3(src_raw, row, twidth);
4362 ARGBToYJRow_AVX2(row, dst_yj, twidth);
4363 src_raw += twidth * 3;
4364 dst_yj += twidth;
4365 width -= twidth;
4366 }
4367 }
4368 #endif // HAS_RAWTOYJROW_AVX2
4369
4370 #ifdef HAS_RGB24TOYJROW_SSSE3
4371 // Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
RGB24ToYJRow_SSSE3(const uint8_t * src_rgb24,uint8_t * dst_yj,int width)4372 void RGB24ToYJRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
4373 // Row buffer for intermediate ARGB pixels.
4374 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4375 while (width > 0) {
4376 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4377 RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
4378 ARGBToYJRow_SSSE3(row, dst_yj, twidth);
4379 src_rgb24 += twidth * 3;
4380 dst_yj += twidth;
4381 width -= twidth;
4382 }
4383 }
4384 #endif // HAS_RGB24TOYJROW_SSSE3
4385
4386 #ifdef HAS_RAWTOYJROW_SSSE3
4387 // Convert 16 RAW pixels (64 bytes) to 16 YJ values.
RAWToYJRow_SSSE3(const uint8_t * src_raw,uint8_t * dst_yj,int width)4388 void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
4389 // Row buffer for intermediate ARGB pixels.
4390 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4391 while (width > 0) {
4392 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4393 RAWToARGBRow_SSSE3(src_raw, row, twidth);
4394 ARGBToYJRow_SSSE3(row, dst_yj, twidth);
4395 src_raw += twidth * 3;
4396 dst_yj += twidth;
4397 width -= twidth;
4398 }
4399 }
4400 #endif // HAS_RAWTOYJROW_SSSE3
4401
4402 #ifdef HAS_INTERPOLATEROW_16TO8_AVX2
InterpolateRow_16To8_AVX2(uint8_t * dst_ptr,const uint16_t * src_ptr,ptrdiff_t src_stride,int scale,int width,int source_y_fraction)4403 void InterpolateRow_16To8_AVX2(uint8_t* dst_ptr,
4404 const uint16_t* src_ptr,
4405 ptrdiff_t src_stride,
4406 int scale,
4407 int width,
4408 int source_y_fraction) {
4409 // Row buffer for intermediate 16 bit pixels.
4410 SIMD_ALIGNED(uint16_t row[MAXTWIDTH]);
4411 while (width > 0) {
4412 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4413 InterpolateRow_16_C(row, src_ptr, src_stride, twidth, source_y_fraction);
4414 Convert16To8Row_AVX2(row, dst_ptr, scale, twidth);
4415 src_ptr += twidth;
4416 dst_ptr += twidth;
4417 width -= twidth;
4418 }
4419 }
4420 #endif // HAS_INTERPOLATEROW_16TO8_AVX2
4421
ScaleSumSamples_C(const float * src,float * dst,float scale,int width)4422 float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) {
4423 float fsum = 0.f;
4424 int i;
4425 for (i = 0; i < width; ++i) {
4426 float v = *src++;
4427 fsum += v * v;
4428 *dst++ = v * scale;
4429 }
4430 return fsum;
4431 }
4432
ScaleMaxSamples_C(const float * src,float * dst,float scale,int width)4433 float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width) {
4434 float fmax = 0.f;
4435 int i;
4436 for (i = 0; i < width; ++i) {
4437 float v = *src++;
4438 float vs = v * scale;
4439 fmax = (v > fmax) ? v : fmax;
4440 *dst++ = vs;
4441 }
4442 return fmax;
4443 }
4444
ScaleSamples_C(const float * src,float * dst,float scale,int width)4445 void ScaleSamples_C(const float* src, float* dst, float scale, int width) {
4446 int i;
4447 for (i = 0; i < width; ++i) {
4448 *dst++ = *src++ * scale;
4449 }
4450 }
4451
GaussRow_C(const uint32_t * src,uint16_t * dst,int width)4452 void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) {
4453 int i;
4454 for (i = 0; i < width; ++i) {
4455 *dst++ = STATIC_CAST(
4456 uint16_t,
4457 (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8);
4458 ++src;
4459 }
4460 }
4461
4462 // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
GaussCol_C(const uint16_t * src0,const uint16_t * src1,const uint16_t * src2,const uint16_t * src3,const uint16_t * src4,uint32_t * dst,int width)4463 void GaussCol_C(const uint16_t* src0,
4464 const uint16_t* src1,
4465 const uint16_t* src2,
4466 const uint16_t* src3,
4467 const uint16_t* src4,
4468 uint32_t* dst,
4469 int width) {
4470 int i;
4471 for (i = 0; i < width; ++i) {
4472 *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
4473 }
4474 }
4475
GaussRow_F32_C(const float * src,float * dst,int width)4476 void GaussRow_F32_C(const float* src, float* dst, int width) {
4477 int i;
4478 for (i = 0; i < width; ++i) {
4479 *dst++ = (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4]) *
4480 (1.0f / 256.0f);
4481 ++src;
4482 }
4483 }
4484
4485 // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
GaussCol_F32_C(const float * src0,const float * src1,const float * src2,const float * src3,const float * src4,float * dst,int width)4486 void GaussCol_F32_C(const float* src0,
4487 const float* src1,
4488 const float* src2,
4489 const float* src3,
4490 const float* src4,
4491 float* dst,
4492 int width) {
4493 int i;
4494 for (i = 0; i < width; ++i) {
4495 *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
4496 }
4497 }
4498
4499 // Convert biplanar NV21 to packed YUV24
NV21ToYUV24Row_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_yuv24,int width)4500 void NV21ToYUV24Row_C(const uint8_t* src_y,
4501 const uint8_t* src_vu,
4502 uint8_t* dst_yuv24,
4503 int width) {
4504 int x;
4505 for (x = 0; x < width - 1; x += 2) {
4506 dst_yuv24[0] = src_vu[0]; // V
4507 dst_yuv24[1] = src_vu[1]; // U
4508 dst_yuv24[2] = src_y[0]; // Y0
4509 dst_yuv24[3] = src_vu[0]; // V
4510 dst_yuv24[4] = src_vu[1]; // U
4511 dst_yuv24[5] = src_y[1]; // Y1
4512 src_y += 2;
4513 src_vu += 2;
4514 dst_yuv24 += 6; // Advance 2 pixels.
4515 }
4516 if (width & 1) {
4517 dst_yuv24[0] = src_vu[0]; // V
4518 dst_yuv24[1] = src_vu[1]; // U
4519 dst_yuv24[2] = src_y[0]; // Y0
4520 }
4521 }
4522
4523 // Filter 2 rows of AYUV UV's (444) into UV (420).
4524 // AYUV is VUYA in memory. UV for NV12 is UV order in memory.
AYUVToUVRow_C(const uint8_t * src_ayuv,int src_stride_ayuv,uint8_t * dst_uv,int width)4525 void AYUVToUVRow_C(const uint8_t* src_ayuv,
4526 int src_stride_ayuv,
4527 uint8_t* dst_uv,
4528 int width) {
4529 // Output a row of UV values, filtering 2x2 rows of AYUV.
4530 int x;
4531 for (x = 0; x < width - 1; x += 2) {
4532 dst_uv[0] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
4533 src_ayuv[src_stride_ayuv + 5] + 2) >>
4534 2;
4535 dst_uv[1] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
4536 src_ayuv[src_stride_ayuv + 4] + 2) >>
4537 2;
4538 src_ayuv += 8;
4539 dst_uv += 2;
4540 }
4541 if (width & 1) {
4542 dst_uv[0] = (src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + 1) >> 1;
4543 dst_uv[1] = (src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + 1) >> 1;
4544 }
4545 }
4546
4547 // Filter 2 rows of AYUV UV's (444) into VU (420).
AYUVToVURow_C(const uint8_t * src_ayuv,int src_stride_ayuv,uint8_t * dst_vu,int width)4548 void AYUVToVURow_C(const uint8_t* src_ayuv,
4549 int src_stride_ayuv,
4550 uint8_t* dst_vu,
4551 int width) {
4552 // Output a row of VU values, filtering 2x2 rows of AYUV.
4553 int x;
4554 for (x = 0; x < width - 1; x += 2) {
4555 dst_vu[0] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
4556 src_ayuv[src_stride_ayuv + 4] + 2) >>
4557 2;
4558 dst_vu[1] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
4559 src_ayuv[src_stride_ayuv + 5] + 2) >>
4560 2;
4561 src_ayuv += 8;
4562 dst_vu += 2;
4563 }
4564 if (width & 1) {
4565 dst_vu[0] = (src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + 1) >> 1;
4566 dst_vu[1] = (src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + 1) >> 1;
4567 }
4568 }
4569
4570 // Copy row of AYUV Y's into Y
AYUVToYRow_C(const uint8_t * src_ayuv,uint8_t * dst_y,int width)4571 void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
4572 // Output a row of Y values.
4573 int x;
4574 for (x = 0; x < width; ++x) {
4575 dst_y[x] = src_ayuv[2]; // v,u,y,a
4576 src_ayuv += 4;
4577 }
4578 }
4579
4580 // Convert UV plane of NV12 to VU of NV21.
SwapUVRow_C(const uint8_t * src_uv,uint8_t * dst_vu,int width)4581 void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
4582 int x;
4583 for (x = 0; x < width; ++x) {
4584 uint8_t u = src_uv[0];
4585 uint8_t v = src_uv[1];
4586 dst_vu[0] = v;
4587 dst_vu[1] = u;
4588 src_uv += 2;
4589 dst_vu += 2;
4590 }
4591 }
4592
HalfMergeUVRow_C(const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_uv,int width)4593 void HalfMergeUVRow_C(const uint8_t* src_u,
4594 int src_stride_u,
4595 const uint8_t* src_v,
4596 int src_stride_v,
4597 uint8_t* dst_uv,
4598 int width) {
4599 int x;
4600 for (x = 0; x < width - 1; x += 2) {
4601 dst_uv[0] = (src_u[0] + src_u[1] + src_u[src_stride_u] +
4602 src_u[src_stride_u + 1] + 2) >>
4603 2;
4604 dst_uv[1] = (src_v[0] + src_v[1] + src_v[src_stride_v] +
4605 src_v[src_stride_v + 1] + 2) >>
4606 2;
4607 src_u += 2;
4608 src_v += 2;
4609 dst_uv += 2;
4610 }
4611 if (width & 1) {
4612 dst_uv[0] = (src_u[0] + src_u[src_stride_u] + 1) >> 1;
4613 dst_uv[1] = (src_v[0] + src_v[src_stride_v] + 1) >> 1;
4614 }
4615 }
4616
4617 #undef STATIC_CAST
4618
4619 #ifdef __cplusplus
4620 } // extern "C"
4621 } // namespace libyuv
4622 #endif
4623