1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/row.h"
12
13 #include <assert.h>
14 #include <string.h> // For memcpy and memset.
15
16 #include "libyuv/basic_types.h"
17 #include "libyuv/convert_argb.h" // For kYuvI601Constants
18
19 #ifdef __cplusplus
20 namespace libyuv {
21 extern "C" {
22 #endif
23
24 // This macro controls YUV to RGB using unsigned math to extend range of
25 // YUV to RGB coefficients to 0 to 4 instead of 0 to 2 for more accuracy on B:
26 // LIBYUV_UNLIMITED_DATA
27
28 // Macros to enable unlimited data for each colorspace
29 // LIBYUV_UNLIMITED_BT601
30 // LIBYUV_UNLIMITED_BT709
31 // LIBYUV_UNLIMITED_BT2020
32
33 // The following macro from row_win makes the C code match the row_win code,
34 // which is 7 bit fixed point for ARGBToI420:
35 #if !defined(LIBYUV_BIT_EXACT) && !defined(LIBYUV_DISABLE_X86) && \
36 defined(_MSC_VER) && !defined(__clang__) && \
37 (defined(_M_IX86) || defined(_M_X64))
38 #define LIBYUV_RGB7 1
39 #endif
40
41 #if !defined(LIBYUV_BIT_EXACT) && (defined(__x86_64__) || defined(_M_X64) || \
42 defined(__i386__) || defined(_M_IX86))
43 #define LIBYUV_ARGBTOUV_PAVGB 1
44 #define LIBYUV_RGBTOU_TRUNCATE 1
45 #define LIBYUV_ATTENUATE_DUP 1
46 #endif
47 #if defined(LIBYUV_BIT_EXACT)
48 #define LIBYUV_UNATTENUATE_DUP 1
49 #endif
50
51 // llvm x86 is poor at ternary operator, so use branchless min/max.
52
53 #define USE_BRANCHLESS 1
54 #if USE_BRANCHLESS
clamp0(int32_t v)55 static __inline int32_t clamp0(int32_t v) {
56 return -(v >= 0) & v;
57 }
58 // TODO(fbarchard): make clamp255 preserve negative values.
clamp255(int32_t v)59 static __inline int32_t clamp255(int32_t v) {
60 return (-(v >= 255) | v) & 255;
61 }
62
clamp1023(int32_t v)63 static __inline int32_t clamp1023(int32_t v) {
64 return (-(v >= 1023) | v) & 1023;
65 }
66
67 // clamp to max
ClampMax(int32_t v,int32_t max)68 static __inline int32_t ClampMax(int32_t v, int32_t max) {
69 return (-(v >= max) | v) & max;
70 }
71
Abs(int32_t v)72 static __inline uint32_t Abs(int32_t v) {
73 int m = -(v < 0);
74 return (v + m) ^ m;
75 }
76 #else // USE_BRANCHLESS
77 static __inline int32_t clamp0(int32_t v) {
78 return (v < 0) ? 0 : v;
79 }
80
81 static __inline int32_t clamp255(int32_t v) {
82 return (v > 255) ? 255 : v;
83 }
84
85 static __inline int32_t clamp1023(int32_t v) {
86 return (v > 1023) ? 1023 : v;
87 }
88
89 static __inline int32_t ClampMax(int32_t v, int32_t max) {
90 return (v > max) ? max : v;
91 }
92
93 static __inline uint32_t Abs(int32_t v) {
94 return (v < 0) ? -v : v;
95 }
96 #endif // USE_BRANCHLESS
Clamp(int32_t val)97 static __inline uint32_t Clamp(int32_t val) {
98 int v = clamp0(val);
99 return (uint32_t)(clamp255(v));
100 }
101
Clamp10(int32_t val)102 static __inline uint32_t Clamp10(int32_t val) {
103 int v = clamp0(val);
104 return (uint32_t)(clamp1023(v));
105 }
106
107 // Little Endian
108 #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
109 defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) || \
110 (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
111 #define WRITEWORD(p, v) *(uint32_t*)(p) = v
112 #else
WRITEWORD(uint8_t * p,uint32_t v)113 static inline void WRITEWORD(uint8_t* p, uint32_t v) {
114 p[0] = (uint8_t)(v & 255);
115 p[1] = (uint8_t)((v >> 8) & 255);
116 p[2] = (uint8_t)((v >> 16) & 255);
117 p[3] = (uint8_t)((v >> 24) & 255);
118 }
119 #endif
120
RGB24ToARGBRow_C(const uint8_t * src_rgb24,uint8_t * dst_argb,int width)121 void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
122 int x;
123 for (x = 0; x < width; ++x) {
124 uint8_t b = src_rgb24[0];
125 uint8_t g = src_rgb24[1];
126 uint8_t r = src_rgb24[2];
127 dst_argb[0] = b;
128 dst_argb[1] = g;
129 dst_argb[2] = r;
130 dst_argb[3] = 255u;
131 dst_argb += 4;
132 src_rgb24 += 3;
133 }
134 }
135
RAWToARGBRow_C(const uint8_t * src_raw,uint8_t * dst_argb,int width)136 void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
137 int x;
138 for (x = 0; x < width; ++x) {
139 uint8_t r = src_raw[0];
140 uint8_t g = src_raw[1];
141 uint8_t b = src_raw[2];
142 dst_argb[0] = b;
143 dst_argb[1] = g;
144 dst_argb[2] = r;
145 dst_argb[3] = 255u;
146 dst_argb += 4;
147 src_raw += 3;
148 }
149 }
150
RAWToRGBARow_C(const uint8_t * src_raw,uint8_t * dst_rgba,int width)151 void RAWToRGBARow_C(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
152 int x;
153 for (x = 0; x < width; ++x) {
154 uint8_t r = src_raw[0];
155 uint8_t g = src_raw[1];
156 uint8_t b = src_raw[2];
157 dst_rgba[0] = 255u;
158 dst_rgba[1] = b;
159 dst_rgba[2] = g;
160 dst_rgba[3] = r;
161 dst_rgba += 4;
162 src_raw += 3;
163 }
164 }
165
RAWToRGB24Row_C(const uint8_t * src_raw,uint8_t * dst_rgb24,int width)166 void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
167 int x;
168 for (x = 0; x < width; ++x) {
169 uint8_t r = src_raw[0];
170 uint8_t g = src_raw[1];
171 uint8_t b = src_raw[2];
172 dst_rgb24[0] = b;
173 dst_rgb24[1] = g;
174 dst_rgb24[2] = r;
175 dst_rgb24 += 3;
176 src_raw += 3;
177 }
178 }
179
RGB565ToARGBRow_C(const uint8_t * src_rgb565,uint8_t * dst_argb,int width)180 void RGB565ToARGBRow_C(const uint8_t* src_rgb565,
181 uint8_t* dst_argb,
182 int width) {
183 int x;
184 for (x = 0; x < width; ++x) {
185 uint8_t b = src_rgb565[0] & 0x1f;
186 uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
187 uint8_t r = src_rgb565[1] >> 3;
188 dst_argb[0] = (b << 3) | (b >> 2);
189 dst_argb[1] = (g << 2) | (g >> 4);
190 dst_argb[2] = (r << 3) | (r >> 2);
191 dst_argb[3] = 255u;
192 dst_argb += 4;
193 src_rgb565 += 2;
194 }
195 }
196
ARGB1555ToARGBRow_C(const uint8_t * src_argb1555,uint8_t * dst_argb,int width)197 void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
198 uint8_t* dst_argb,
199 int width) {
200 int x;
201 for (x = 0; x < width; ++x) {
202 uint8_t b = src_argb1555[0] & 0x1f;
203 uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
204 uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
205 uint8_t a = src_argb1555[1] >> 7;
206 dst_argb[0] = (b << 3) | (b >> 2);
207 dst_argb[1] = (g << 3) | (g >> 2);
208 dst_argb[2] = (r << 3) | (r >> 2);
209 dst_argb[3] = -a;
210 dst_argb += 4;
211 src_argb1555 += 2;
212 }
213 }
214
ARGB4444ToARGBRow_C(const uint8_t * src_argb4444,uint8_t * dst_argb,int width)215 void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
216 uint8_t* dst_argb,
217 int width) {
218 int x;
219 for (x = 0; x < width; ++x) {
220 uint8_t b = src_argb4444[0] & 0x0f;
221 uint8_t g = src_argb4444[0] >> 4;
222 uint8_t r = src_argb4444[1] & 0x0f;
223 uint8_t a = src_argb4444[1] >> 4;
224 dst_argb[0] = (b << 4) | b;
225 dst_argb[1] = (g << 4) | g;
226 dst_argb[2] = (r << 4) | r;
227 dst_argb[3] = (a << 4) | a;
228 dst_argb += 4;
229 src_argb4444 += 2;
230 }
231 }
232
AR30ToARGBRow_C(const uint8_t * src_ar30,uint8_t * dst_argb,int width)233 void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width) {
234 int x;
235 for (x = 0; x < width; ++x) {
236 uint32_t ar30;
237 memcpy(&ar30, src_ar30, sizeof ar30);
238 uint32_t b = (ar30 >> 2) & 0xff;
239 uint32_t g = (ar30 >> 12) & 0xff;
240 uint32_t r = (ar30 >> 22) & 0xff;
241 uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits.
242 *(uint32_t*)(dst_argb) = b | (g << 8) | (r << 16) | (a << 24);
243 dst_argb += 4;
244 src_ar30 += 4;
245 }
246 }
247
AR30ToABGRRow_C(const uint8_t * src_ar30,uint8_t * dst_abgr,int width)248 void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width) {
249 int x;
250 for (x = 0; x < width; ++x) {
251 uint32_t ar30;
252 memcpy(&ar30, src_ar30, sizeof ar30);
253 uint32_t b = (ar30 >> 2) & 0xff;
254 uint32_t g = (ar30 >> 12) & 0xff;
255 uint32_t r = (ar30 >> 22) & 0xff;
256 uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits.
257 *(uint32_t*)(dst_abgr) = r | (g << 8) | (b << 16) | (a << 24);
258 dst_abgr += 4;
259 src_ar30 += 4;
260 }
261 }
262
AR30ToAB30Row_C(const uint8_t * src_ar30,uint8_t * dst_ab30,int width)263 void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) {
264 int x;
265 for (x = 0; x < width; ++x) {
266 uint32_t ar30;
267 memcpy(&ar30, src_ar30, sizeof ar30);
268 uint32_t b = ar30 & 0x3ff;
269 uint32_t ga = ar30 & 0xc00ffc00;
270 uint32_t r = (ar30 >> 20) & 0x3ff;
271 *(uint32_t*)(dst_ab30) = r | ga | (b << 20);
272 dst_ab30 += 4;
273 src_ar30 += 4;
274 }
275 }
276
ARGBToRGB24Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)277 void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
278 int x;
279 for (x = 0; x < width; ++x) {
280 uint8_t b = src_argb[0];
281 uint8_t g = src_argb[1];
282 uint8_t r = src_argb[2];
283 dst_rgb[0] = b;
284 dst_rgb[1] = g;
285 dst_rgb[2] = r;
286 dst_rgb += 3;
287 src_argb += 4;
288 }
289 }
290
ARGBToRAWRow_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)291 void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
292 int x;
293 for (x = 0; x < width; ++x) {
294 uint8_t b = src_argb[0];
295 uint8_t g = src_argb[1];
296 uint8_t r = src_argb[2];
297 dst_rgb[0] = r;
298 dst_rgb[1] = g;
299 dst_rgb[2] = b;
300 dst_rgb += 3;
301 src_argb += 4;
302 }
303 }
304
ARGBToRGB565Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)305 void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
306 int x;
307 for (x = 0; x < width - 1; x += 2) {
308 uint8_t b0 = src_argb[0] >> 3;
309 uint8_t g0 = src_argb[1] >> 2;
310 uint8_t r0 = src_argb[2] >> 3;
311 uint8_t b1 = src_argb[4] >> 3;
312 uint8_t g1 = src_argb[5] >> 2;
313 uint8_t r1 = src_argb[6] >> 3;
314 WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
315 (r1 << 27));
316 dst_rgb += 4;
317 src_argb += 8;
318 }
319 if (width & 1) {
320 uint8_t b0 = src_argb[0] >> 3;
321 uint8_t g0 = src_argb[1] >> 2;
322 uint8_t r0 = src_argb[2] >> 3;
323 *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
324 }
325 }
326
327 // dither4 is a row of 4 values from 4x4 dither matrix.
328 // The 4x4 matrix contains values to increase RGB. When converting to
329 // fewer bits (565) this provides an ordered dither.
330 // The order in the 4x4 matrix in first byte is upper left.
331 // The 4 values are passed as an int, then referenced as an array, so
332 // endian will not affect order of the original matrix. But the dither4
333 // will containing the first pixel in the lower byte for little endian
334 // or the upper byte for big endian.
ARGBToRGB565DitherRow_C(const uint8_t * src_argb,uint8_t * dst_rgb,const uint32_t dither4,int width)335 void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
336 uint8_t* dst_rgb,
337 const uint32_t dither4,
338 int width) {
339 int x;
340 for (x = 0; x < width - 1; x += 2) {
341 int dither0 = ((const unsigned char*)(&dither4))[x & 3];
342 int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
343 uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
344 uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
345 uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
346 uint8_t b1 = clamp255(src_argb[4] + dither1) >> 3;
347 uint8_t g1 = clamp255(src_argb[5] + dither1) >> 2;
348 uint8_t r1 = clamp255(src_argb[6] + dither1) >> 3;
349 *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 5) | (r0 << 11);
350 *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 5) | (r1 << 11);
351 dst_rgb += 4;
352 src_argb += 8;
353 }
354 if (width & 1) {
355 int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
356 uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
357 uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
358 uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
359 *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
360 }
361 }
362
ARGBToARGB1555Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)363 void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
364 int x;
365 for (x = 0; x < width - 1; x += 2) {
366 uint8_t b0 = src_argb[0] >> 3;
367 uint8_t g0 = src_argb[1] >> 3;
368 uint8_t r0 = src_argb[2] >> 3;
369 uint8_t a0 = src_argb[3] >> 7;
370 uint8_t b1 = src_argb[4] >> 3;
371 uint8_t g1 = src_argb[5] >> 3;
372 uint8_t r1 = src_argb[6] >> 3;
373 uint8_t a1 = src_argb[7] >> 7;
374 *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
375 *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 5) | (r1 << 10) | (a1 << 15);
376 dst_rgb += 4;
377 src_argb += 8;
378 }
379 if (width & 1) {
380 uint8_t b0 = src_argb[0] >> 3;
381 uint8_t g0 = src_argb[1] >> 3;
382 uint8_t r0 = src_argb[2] >> 3;
383 uint8_t a0 = src_argb[3] >> 7;
384 *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
385 }
386 }
387
ARGBToARGB4444Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)388 void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
389 int x;
390 for (x = 0; x < width - 1; x += 2) {
391 uint8_t b0 = src_argb[0] >> 4;
392 uint8_t g0 = src_argb[1] >> 4;
393 uint8_t r0 = src_argb[2] >> 4;
394 uint8_t a0 = src_argb[3] >> 4;
395 uint8_t b1 = src_argb[4] >> 4;
396 uint8_t g1 = src_argb[5] >> 4;
397 uint8_t r1 = src_argb[6] >> 4;
398 uint8_t a1 = src_argb[7] >> 4;
399 *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
400 *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 4) | (r1 << 8) | (a1 << 12);
401 dst_rgb += 4;
402 src_argb += 8;
403 }
404 if (width & 1) {
405 uint8_t b0 = src_argb[0] >> 4;
406 uint8_t g0 = src_argb[1] >> 4;
407 uint8_t r0 = src_argb[2] >> 4;
408 uint8_t a0 = src_argb[3] >> 4;
409 *(uint16_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
410 }
411 }
412
ABGRToAR30Row_C(const uint8_t * src_abgr,uint8_t * dst_ar30,int width)413 void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) {
414 int x;
415 for (x = 0; x < width; ++x) {
416 uint32_t b0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2);
417 uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2);
418 uint32_t r0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
419 uint32_t a0 = (src_abgr[3] >> 6);
420 *(uint32_t*)(dst_ar30) = r0 | (g0 << 10) | (b0 << 20) | (a0 << 30);
421 dst_ar30 += 4;
422 src_abgr += 4;
423 }
424 }
425
ARGBToAR30Row_C(const uint8_t * src_argb,uint8_t * dst_ar30,int width)426 void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
427 int x;
428 for (x = 0; x < width; ++x) {
429 uint32_t b0 = (src_argb[0] >> 6) | ((uint32_t)(src_argb[0]) << 2);
430 uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2);
431 uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2);
432 uint32_t a0 = (src_argb[3] >> 6);
433 *(uint32_t*)(dst_ar30) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30);
434 dst_ar30 += 4;
435 src_argb += 4;
436 }
437 }
438
ARGBToAR64Row_C(const uint8_t * src_argb,uint16_t * dst_ar64,int width)439 void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
440 int x;
441 for (x = 0; x < width; ++x) {
442 dst_ar64[0] = src_argb[0] * 0x0101;
443 dst_ar64[1] = src_argb[1] * 0x0101;
444 dst_ar64[2] = src_argb[2] * 0x0101;
445 dst_ar64[3] = src_argb[3] * 0x0101;
446 dst_ar64 += 4;
447 src_argb += 4;
448 }
449 }
450
ARGBToAB64Row_C(const uint8_t * src_argb,uint16_t * dst_ab64,int width)451 void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
452 int x;
453 for (x = 0; x < width; ++x) {
454 dst_ab64[0] = src_argb[2] * 0x0101;
455 dst_ab64[1] = src_argb[1] * 0x0101;
456 dst_ab64[2] = src_argb[0] * 0x0101;
457 dst_ab64[3] = src_argb[3] * 0x0101;
458 dst_ab64 += 4;
459 src_argb += 4;
460 }
461 }
462
AR64ToARGBRow_C(const uint16_t * src_ar64,uint8_t * dst_argb,int width)463 void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
464 int x;
465 for (x = 0; x < width; ++x) {
466 dst_argb[0] = src_ar64[0] >> 8;
467 dst_argb[1] = src_ar64[1] >> 8;
468 dst_argb[2] = src_ar64[2] >> 8;
469 dst_argb[3] = src_ar64[3] >> 8;
470 dst_argb += 4;
471 src_ar64 += 4;
472 }
473 }
474
AB64ToARGBRow_C(const uint16_t * src_ab64,uint8_t * dst_argb,int width)475 void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
476 int x;
477 for (x = 0; x < width; ++x) {
478 dst_argb[0] = src_ab64[2] >> 8;
479 dst_argb[1] = src_ab64[1] >> 8;
480 dst_argb[2] = src_ab64[0] >> 8;
481 dst_argb[3] = src_ab64[3] >> 8;
482 dst_argb += 4;
483 src_ab64 += 4;
484 }
485 }
486
487 // TODO(fbarchard): Make shuffle compatible with SIMD versions
AR64ShuffleRow_C(const uint8_t * src_ar64,uint8_t * dst_ar64,const uint8_t * shuffler,int width)488 void AR64ShuffleRow_C(const uint8_t* src_ar64,
489 uint8_t* dst_ar64,
490 const uint8_t* shuffler,
491 int width) {
492 const uint16_t* src_ar64_16 = (const uint16_t*)src_ar64;
493 uint16_t* dst_ar64_16 = (uint16_t*)dst_ar64;
494 int index0 = shuffler[0] / 2;
495 int index1 = shuffler[2] / 2;
496 int index2 = shuffler[4] / 2;
497 int index3 = shuffler[6] / 2;
498 // Shuffle a row of AR64.
499 int x;
500 for (x = 0; x < width / 2; ++x) {
501 // To support in-place conversion.
502 uint16_t b = src_ar64_16[index0];
503 uint16_t g = src_ar64_16[index1];
504 uint16_t r = src_ar64_16[index2];
505 uint16_t a = src_ar64_16[index3];
506 dst_ar64_16[0] = b;
507 dst_ar64_16[1] = g;
508 dst_ar64_16[2] = r;
509 dst_ar64_16[3] = a;
510 src_ar64_16 += 4;
511 dst_ar64_16 += 4;
512 }
513 }
514
515 #ifdef LIBYUV_RGB7
516 // Old 7 bit math for compatibility on unsupported platforms.
RGBToY(uint8_t r,uint8_t g,uint8_t b)517 static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
518 return ((33 * r + 65 * g + 13 * b) >> 7) + 16;
519 }
520 #else
521 // 8 bit
522 // Intel SSE/AVX uses the following equivalent formula
523 // 0x7e80 = (66 + 129 + 25) * -128 + 0x1000 (for +16) and 0x0080 for round.
524 // return (66 * ((int)r - 128) + 129 * ((int)g - 128) + 25 * ((int)b - 128) +
525 // 0x7e80) >> 8;
526
RGBToY(uint8_t r,uint8_t g,uint8_t b)527 static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
528 return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
529 }
530 #endif
531
532 #define AVGB(a, b) (((a) + (b) + 1) >> 1)
533
534 // LIBYUV_RGBTOU_TRUNCATE mimics x86 code that does not round.
535 #ifdef LIBYUV_RGBTOU_TRUNCATE
RGBToU(uint8_t r,uint8_t g,uint8_t b)536 static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
537 return (112 * b - 74 * g - 38 * r + 0x8000) >> 8;
538 }
RGBToV(uint8_t r,uint8_t g,uint8_t b)539 static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
540 return (112 * r - 94 * g - 18 * b + 0x8000) >> 8;
541 }
542 #else
543 // TODO(fbarchard): Add rounding to x86 SIMD and use this
RGBToU(uint8_t r,uint8_t g,uint8_t b)544 static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
545 return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
546 }
RGBToV(uint8_t r,uint8_t g,uint8_t b)547 static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
548 return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
549 }
550 #endif
551
552 // LIBYUV_ARGBTOUV_PAVGB mimics x86 code that subsamples with 2 pavgb.
553 #if !defined(LIBYUV_ARGBTOUV_PAVGB)
RGB2xToU(uint16_t r,uint16_t g,uint16_t b)554 static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) {
555 return ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8;
556 }
RGB2xToV(uint16_t r,uint16_t g,uint16_t b)557 static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
558 return ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8;
559 }
560 #endif
561
562 // ARGBToY_C and ARGBToUV_C
563 // Intel version mimic SSE/AVX which does 2 pavgb
564 #if LIBYUV_ARGBTOUV_PAVGB
565 #define MAKEROWY(NAME, R, G, B, BPP) \
566 void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
567 int x; \
568 for (x = 0; x < width; ++x) { \
569 dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]); \
570 src_rgb += BPP; \
571 dst_y += 1; \
572 } \
573 } \
574 void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
575 uint8_t* dst_u, uint8_t* dst_v, int width) { \
576 const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
577 int x; \
578 for (x = 0; x < width - 1; x += 2) { \
579 uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]), \
580 AVGB(src_rgb[B + BPP], src_rgb1[B + BPP])); \
581 uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]), \
582 AVGB(src_rgb[G + BPP], src_rgb1[G + BPP])); \
583 uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]), \
584 AVGB(src_rgb[R + BPP], src_rgb1[R + BPP])); \
585 dst_u[0] = RGBToU(ar, ag, ab); \
586 dst_v[0] = RGBToV(ar, ag, ab); \
587 src_rgb += BPP * 2; \
588 src_rgb1 += BPP * 2; \
589 dst_u += 1; \
590 dst_v += 1; \
591 } \
592 if (width & 1) { \
593 uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]); \
594 uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]); \
595 uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]); \
596 dst_u[0] = RGBToU(ar, ag, ab); \
597 dst_v[0] = RGBToV(ar, ag, ab); \
598 } \
599 }
600 #else
601 // ARM version does sum / 2 then multiply by 2x smaller coefficients
602 #define MAKEROWY(NAME, R, G, B, BPP) \
603 void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
604 int x; \
605 for (x = 0; x < width; ++x) { \
606 dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]); \
607 src_rgb += BPP; \
608 dst_y += 1; \
609 } \
610 } \
611 void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
612 uint8_t* dst_u, uint8_t* dst_v, int width) { \
613 const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
614 int x; \
615 for (x = 0; x < width - 1; x += 2) { \
616 uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
617 src_rgb1[B + BPP] + 1) >> \
618 1; \
619 uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
620 src_rgb1[G + BPP] + 1) >> \
621 1; \
622 uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
623 src_rgb1[R + BPP] + 1) >> \
624 1; \
625 dst_u[0] = RGB2xToU(ar, ag, ab); \
626 dst_v[0] = RGB2xToV(ar, ag, ab); \
627 src_rgb += BPP * 2; \
628 src_rgb1 += BPP * 2; \
629 dst_u += 1; \
630 dst_v += 1; \
631 } \
632 if (width & 1) { \
633 uint16_t ab = src_rgb[B] + src_rgb1[B]; \
634 uint16_t ag = src_rgb[G] + src_rgb1[G]; \
635 uint16_t ar = src_rgb[R] + src_rgb1[R]; \
636 dst_u[0] = RGB2xToU(ar, ag, ab); \
637 dst_v[0] = RGB2xToV(ar, ag, ab); \
638 } \
639 }
640 #endif
641
642 MAKEROWY(ARGB, 2, 1, 0, 4)
643 MAKEROWY(BGRA, 1, 2, 3, 4)
644 MAKEROWY(ABGR, 0, 1, 2, 4)
645 MAKEROWY(RGBA, 3, 2, 1, 4)
646 MAKEROWY(RGB24, 2, 1, 0, 3)
647 MAKEROWY(RAW, 0, 1, 2, 3)
648 #undef MAKEROWY
649
650 // JPeg uses a variation on BT.601-1 full range
651 // y = 0.29900 * r + 0.58700 * g + 0.11400 * b
652 // u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
653 // v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
654 // BT.601 Mpeg range uses:
655 // b 0.1016 * 255 = 25.908 = 25
656 // g 0.5078 * 255 = 129.489 = 129
657 // r 0.2578 * 255 = 65.739 = 66
658 // JPeg 7 bit Y (deprecated)
659 // b 0.11400 * 128 = 14.592 = 15
660 // g 0.58700 * 128 = 75.136 = 75
661 // r 0.29900 * 128 = 38.272 = 38
662 // JPeg 8 bit Y:
663 // b 0.11400 * 256 = 29.184 = 29
664 // g 0.58700 * 256 = 150.272 = 150
665 // r 0.29900 * 256 = 76.544 = 77
666 // JPeg 8 bit U:
667 // b 0.50000 * 255 = 127.5 = 127
668 // g -0.33126 * 255 = -84.4713 = -84
669 // r -0.16874 * 255 = -43.0287 = -43
670 // JPeg 8 bit V:
671 // b -0.08131 * 255 = -20.73405 = -20
672 // g -0.41869 * 255 = -106.76595 = -107
673 // r 0.50000 * 255 = 127.5 = 127
674
675 #ifdef LIBYUV_RGB7
676 // Old 7 bit math for compatibility on unsupported platforms.
RGBToYJ(uint8_t r,uint8_t g,uint8_t b)677 static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
678 return (38 * r + 75 * g + 15 * b + 64) >> 7;
679 }
680 #else
681 // 8 bit
682 static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
683 return (77 * r + 150 * g + 29 * b + 128) >> 8;
684 }
685 #endif
686
687 #if defined(LIBYUV_ARGBTOUV_PAVGB)
RGBToUJ(uint8_t r,uint8_t g,uint8_t b)688 static __inline int RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
689 return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
690 }
RGBToVJ(uint8_t r,uint8_t g,uint8_t b)691 static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
692 return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
693 }
694 #else
RGB2xToUJ(uint16_t r,uint16_t g,uint16_t b)695 static __inline int RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) {
696 return ((127 / 2) * b - (84 / 2) * g - (43 / 2) * r + 0x8080) >> 8;
697 }
RGB2xToVJ(uint16_t r,uint16_t g,uint16_t b)698 static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
699 return ((127 / 2) * r - (107 / 2) * g - (20 / 2) * b + 0x8080) >> 8;
700 }
701 #endif
702
703 // ARGBToYJ_C and ARGBToUVJ_C
704 // Intel version mimic SSE/AVX which does 2 pavgb
705 #if LIBYUV_ARGBTOUV_PAVGB
706 #define MAKEROWYJ(NAME, R, G, B, BPP) \
707 void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
708 int x; \
709 for (x = 0; x < width; ++x) { \
710 dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]); \
711 src_rgb += BPP; \
712 dst_y += 1; \
713 } \
714 } \
715 void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
716 uint8_t* dst_u, uint8_t* dst_v, int width) { \
717 const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
718 int x; \
719 for (x = 0; x < width - 1; x += 2) { \
720 uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]), \
721 AVGB(src_rgb[B + BPP], src_rgb1[B + BPP])); \
722 uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]), \
723 AVGB(src_rgb[G + BPP], src_rgb1[G + BPP])); \
724 uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]), \
725 AVGB(src_rgb[R + BPP], src_rgb1[R + BPP])); \
726 dst_u[0] = RGBToUJ(ar, ag, ab); \
727 dst_v[0] = RGBToVJ(ar, ag, ab); \
728 src_rgb += BPP * 2; \
729 src_rgb1 += BPP * 2; \
730 dst_u += 1; \
731 dst_v += 1; \
732 } \
733 if (width & 1) { \
734 uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]); \
735 uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]); \
736 uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]); \
737 dst_u[0] = RGBToUJ(ar, ag, ab); \
738 dst_v[0] = RGBToVJ(ar, ag, ab); \
739 } \
740 }
741 #else
742 // ARM version does sum / 2 then multiply by 2x smaller coefficients
743 #define MAKEROWYJ(NAME, R, G, B, BPP) \
744 void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
745 int x; \
746 for (x = 0; x < width; ++x) { \
747 dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]); \
748 src_rgb += BPP; \
749 dst_y += 1; \
750 } \
751 } \
752 void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
753 uint8_t* dst_u, uint8_t* dst_v, int width) { \
754 const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
755 int x; \
756 for (x = 0; x < width - 1; x += 2) { \
757 uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
758 src_rgb1[B + BPP] + 1) >> \
759 1; \
760 uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
761 src_rgb1[G + BPP] + 1) >> \
762 1; \
763 uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
764 src_rgb1[R + BPP] + 1) >> \
765 1; \
766 dst_u[0] = RGB2xToUJ(ar, ag, ab); \
767 dst_v[0] = RGB2xToVJ(ar, ag, ab); \
768 src_rgb += BPP * 2; \
769 src_rgb1 += BPP * 2; \
770 dst_u += 1; \
771 dst_v += 1; \
772 } \
773 if (width & 1) { \
774 uint16_t ab = (src_rgb[B] + src_rgb1[B]); \
775 uint16_t ag = (src_rgb[G] + src_rgb1[G]); \
776 uint16_t ar = (src_rgb[R] + src_rgb1[R]); \
777 dst_u[0] = RGB2xToUJ(ar, ag, ab); \
778 dst_v[0] = RGB2xToVJ(ar, ag, ab); \
779 } \
780 }
781
782 #endif
783
784 MAKEROWYJ(ARGB, 2, 1, 0, 4)
785 MAKEROWYJ(RGBA, 3, 2, 1, 4)
786 MAKEROWYJ(RGB24, 2, 1, 0, 3)
787 MAKEROWYJ(RAW, 0, 1, 2, 3)
788 #undef MAKEROWYJ
789
RGB565ToYRow_C(const uint8_t * src_rgb565,uint8_t * dst_y,int width)790 void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
791 int x;
792 for (x = 0; x < width; ++x) {
793 uint8_t b = src_rgb565[0] & 0x1f;
794 uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
795 uint8_t r = src_rgb565[1] >> 3;
796 b = (b << 3) | (b >> 2);
797 g = (g << 2) | (g >> 4);
798 r = (r << 3) | (r >> 2);
799 dst_y[0] = RGBToY(r, g, b);
800 src_rgb565 += 2;
801 dst_y += 1;
802 }
803 }
804
ARGB1555ToYRow_C(const uint8_t * src_argb1555,uint8_t * dst_y,int width)805 void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
806 int x;
807 for (x = 0; x < width; ++x) {
808 uint8_t b = src_argb1555[0] & 0x1f;
809 uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
810 uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
811 b = (b << 3) | (b >> 2);
812 g = (g << 3) | (g >> 2);
813 r = (r << 3) | (r >> 2);
814 dst_y[0] = RGBToY(r, g, b);
815 src_argb1555 += 2;
816 dst_y += 1;
817 }
818 }
819
ARGB4444ToYRow_C(const uint8_t * src_argb4444,uint8_t * dst_y,int width)820 void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) {
821 int x;
822 for (x = 0; x < width; ++x) {
823 uint8_t b = src_argb4444[0] & 0x0f;
824 uint8_t g = src_argb4444[0] >> 4;
825 uint8_t r = src_argb4444[1] & 0x0f;
826 b = (b << 4) | b;
827 g = (g << 4) | g;
828 r = (r << 4) | r;
829 dst_y[0] = RGBToY(r, g, b);
830 src_argb4444 += 2;
831 dst_y += 1;
832 }
833 }
834
RGB565ToUVRow_C(const uint8_t * src_rgb565,int src_stride_rgb565,uint8_t * dst_u,uint8_t * dst_v,int width)835 void RGB565ToUVRow_C(const uint8_t* src_rgb565,
836 int src_stride_rgb565,
837 uint8_t* dst_u,
838 uint8_t* dst_v,
839 int width) {
840 const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565;
841 int x;
842 for (x = 0; x < width - 1; x += 2) {
843 uint8_t b0 = src_rgb565[0] & 0x1f;
844 uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
845 uint8_t r0 = src_rgb565[1] >> 3;
846 uint8_t b1 = src_rgb565[2] & 0x1f;
847 uint8_t g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
848 uint8_t r1 = src_rgb565[3] >> 3;
849 uint8_t b2 = next_rgb565[0] & 0x1f;
850 uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
851 uint8_t r2 = next_rgb565[1] >> 3;
852 uint8_t b3 = next_rgb565[2] & 0x1f;
853 uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
854 uint8_t r3 = next_rgb565[3] >> 3;
855
856 b0 = (b0 << 3) | (b0 >> 2);
857 g0 = (g0 << 2) | (g0 >> 4);
858 r0 = (r0 << 3) | (r0 >> 2);
859 b1 = (b1 << 3) | (b1 >> 2);
860 g1 = (g1 << 2) | (g1 >> 4);
861 r1 = (r1 << 3) | (r1 >> 2);
862 b2 = (b2 << 3) | (b2 >> 2);
863 g2 = (g2 << 2) | (g2 >> 4);
864 r2 = (r2 << 3) | (r2 >> 2);
865 b3 = (b3 << 3) | (b3 >> 2);
866 g3 = (g3 << 2) | (g3 >> 4);
867 r3 = (r3 << 3) | (r3 >> 2);
868
869 #if LIBYUV_ARGBTOUV_PAVGB
870 uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
871 uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
872 uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
873 dst_u[0] = RGBToU(ar, ag, ab);
874 dst_v[0] = RGBToV(ar, ag, ab);
875 #else
876 uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
877 uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
878 uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
879 dst_u[0] = RGB2xToU(r, g, b);
880 dst_v[0] = RGB2xToV(r, g, b);
881 #endif
882
883 src_rgb565 += 4;
884 next_rgb565 += 4;
885 dst_u += 1;
886 dst_v += 1;
887 }
888 if (width & 1) {
889 uint8_t b0 = src_rgb565[0] & 0x1f;
890 uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
891 uint8_t r0 = src_rgb565[1] >> 3;
892 uint8_t b2 = next_rgb565[0] & 0x1f;
893 uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
894 uint8_t r2 = next_rgb565[1] >> 3;
895
896 b0 = (b0 << 3) | (b0 >> 2);
897 g0 = (g0 << 2) | (g0 >> 4);
898 r0 = (r0 << 3) | (r0 >> 2);
899 b2 = (b2 << 3) | (b2 >> 2);
900 g2 = (g2 << 2) | (g2 >> 4);
901 r2 = (r2 << 3) | (r2 >> 2);
902
903 #if LIBYUV_ARGBTOUV_PAVGB
904 uint8_t ab = AVGB(b0, b2);
905 uint8_t ag = AVGB(g0, g2);
906 uint8_t ar = AVGB(r0, r2);
907 dst_u[0] = RGBToU(ar, ag, ab);
908 dst_v[0] = RGBToV(ar, ag, ab);
909 #else
910 uint16_t b = b0 + b2;
911 uint16_t g = g0 + g2;
912 uint16_t r = r0 + r2;
913 dst_u[0] = RGB2xToU(r, g, b);
914 dst_v[0] = RGB2xToV(r, g, b);
915 #endif
916 }
917 }
918
ARGB1555ToUVRow_C(const uint8_t * src_argb1555,int src_stride_argb1555,uint8_t * dst_u,uint8_t * dst_v,int width)919 void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
920 int src_stride_argb1555,
921 uint8_t* dst_u,
922 uint8_t* dst_v,
923 int width) {
924 const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555;
925 int x;
926 for (x = 0; x < width - 1; x += 2) {
927 uint8_t b0 = src_argb1555[0] & 0x1f;
928 uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
929 uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
930 uint8_t b1 = src_argb1555[2] & 0x1f;
931 uint8_t g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
932 uint8_t r1 = (src_argb1555[3] & 0x7c) >> 2;
933 uint8_t b2 = next_argb1555[0] & 0x1f;
934 uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
935 uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2;
936 uint8_t b3 = next_argb1555[2] & 0x1f;
937 uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
938 uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2;
939
940 b0 = (b0 << 3) | (b0 >> 2);
941 g0 = (g0 << 3) | (g0 >> 2);
942 r0 = (r0 << 3) | (r0 >> 2);
943 b1 = (b1 << 3) | (b1 >> 2);
944 g1 = (g1 << 3) | (g1 >> 2);
945 r1 = (r1 << 3) | (r1 >> 2);
946 b2 = (b2 << 3) | (b2 >> 2);
947 g2 = (g2 << 3) | (g2 >> 2);
948 r2 = (r2 << 3) | (r2 >> 2);
949 b3 = (b3 << 3) | (b3 >> 2);
950 g3 = (g3 << 3) | (g3 >> 2);
951 r3 = (r3 << 3) | (r3 >> 2);
952
953 #if LIBYUV_ARGBTOUV_PAVGB
954 uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
955 uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
956 uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
957 dst_u[0] = RGBToU(ar, ag, ab);
958 dst_v[0] = RGBToV(ar, ag, ab);
959 #else
960 uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
961 uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
962 uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
963 dst_u[0] = RGB2xToU(r, g, b);
964 dst_v[0] = RGB2xToV(r, g, b);
965 #endif
966
967 src_argb1555 += 4;
968 next_argb1555 += 4;
969 dst_u += 1;
970 dst_v += 1;
971 }
972 if (width & 1) {
973 uint8_t b0 = src_argb1555[0] & 0x1f;
974 uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
975 uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
976 uint8_t b2 = next_argb1555[0] & 0x1f;
977 uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
978 uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2;
979
980 b0 = (b0 << 3) | (b0 >> 2);
981 g0 = (g0 << 3) | (g0 >> 2);
982 r0 = (r0 << 3) | (r0 >> 2);
983 b2 = (b2 << 3) | (b2 >> 2);
984 g2 = (g2 << 3) | (g2 >> 2);
985 r2 = (r2 << 3) | (r2 >> 2);
986
987 #if LIBYUV_ARGBTOUV_PAVGB
988 uint8_t ab = AVGB(b0, b2);
989 uint8_t ag = AVGB(g0, g2);
990 uint8_t ar = AVGB(r0, r2);
991 dst_u[0] = RGBToU(ar, ag, ab);
992 dst_v[0] = RGBToV(ar, ag, ab);
993 #else
994 uint16_t b = b0 + b2;
995 uint16_t g = g0 + g2;
996 uint16_t r = r0 + r2;
997 dst_u[0] = RGB2xToU(r, g, b);
998 dst_v[0] = RGB2xToV(r, g, b);
999 #endif
1000 }
1001 }
1002
ARGB4444ToUVRow_C(const uint8_t * src_argb4444,int src_stride_argb4444,uint8_t * dst_u,uint8_t * dst_v,int width)1003 void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
1004 int src_stride_argb4444,
1005 uint8_t* dst_u,
1006 uint8_t* dst_v,
1007 int width) {
1008 const uint8_t* next_argb4444 = src_argb4444 + src_stride_argb4444;
1009 int x;
1010 for (x = 0; x < width - 1; x += 2) {
1011 uint8_t b0 = src_argb4444[0] & 0x0f;
1012 uint8_t g0 = src_argb4444[0] >> 4;
1013 uint8_t r0 = src_argb4444[1] & 0x0f;
1014 uint8_t b1 = src_argb4444[2] & 0x0f;
1015 uint8_t g1 = src_argb4444[2] >> 4;
1016 uint8_t r1 = src_argb4444[3] & 0x0f;
1017 uint8_t b2 = next_argb4444[0] & 0x0f;
1018 uint8_t g2 = next_argb4444[0] >> 4;
1019 uint8_t r2 = next_argb4444[1] & 0x0f;
1020 uint8_t b3 = next_argb4444[2] & 0x0f;
1021 uint8_t g3 = next_argb4444[2] >> 4;
1022 uint8_t r3 = next_argb4444[3] & 0x0f;
1023
1024 b0 = (b0 << 4) | b0;
1025 g0 = (g0 << 4) | g0;
1026 r0 = (r0 << 4) | r0;
1027 b1 = (b1 << 4) | b1;
1028 g1 = (g1 << 4) | g1;
1029 r1 = (r1 << 4) | r1;
1030 b2 = (b2 << 4) | b2;
1031 g2 = (g2 << 4) | g2;
1032 r2 = (r2 << 4) | r2;
1033 b3 = (b3 << 4) | b3;
1034 g3 = (g3 << 4) | g3;
1035 r3 = (r3 << 4) | r3;
1036
1037 #if LIBYUV_ARGBTOUV_PAVGB
1038 uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
1039 uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
1040 uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
1041 dst_u[0] = RGBToU(ar, ag, ab);
1042 dst_v[0] = RGBToV(ar, ag, ab);
1043 #else
1044 uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
1045 uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
1046 uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
1047 dst_u[0] = RGB2xToU(r, g, b);
1048 dst_v[0] = RGB2xToV(r, g, b);
1049 #endif
1050
1051 src_argb4444 += 4;
1052 next_argb4444 += 4;
1053 dst_u += 1;
1054 dst_v += 1;
1055 }
1056 if (width & 1) {
1057 uint8_t b0 = src_argb4444[0] & 0x0f;
1058 uint8_t g0 = src_argb4444[0] >> 4;
1059 uint8_t r0 = src_argb4444[1] & 0x0f;
1060 uint8_t b2 = next_argb4444[0] & 0x0f;
1061 uint8_t g2 = next_argb4444[0] >> 4;
1062 uint8_t r2 = next_argb4444[1] & 0x0f;
1063
1064 b0 = (b0 << 4) | b0;
1065 g0 = (g0 << 4) | g0;
1066 r0 = (r0 << 4) | r0;
1067 b2 = (b2 << 4) | b2;
1068 g2 = (g2 << 4) | g2;
1069 r2 = (r2 << 4) | r2;
1070
1071 #if LIBYUV_ARGBTOUV_PAVGB
1072 uint8_t ab = AVGB(b0, b2);
1073 uint8_t ag = AVGB(g0, g2);
1074 uint8_t ar = AVGB(r0, r2);
1075 dst_u[0] = RGBToU(ar, ag, ab);
1076 dst_v[0] = RGBToV(ar, ag, ab);
1077 #else
1078 uint16_t b = b0 + b2;
1079 uint16_t g = g0 + g2;
1080 uint16_t r = r0 + r2;
1081 dst_u[0] = RGB2xToU(r, g, b);
1082 dst_v[0] = RGB2xToV(r, g, b);
1083 #endif
1084 }
1085 }
1086
ARGBToUV444Row_C(const uint8_t * src_argb,uint8_t * dst_u,uint8_t * dst_v,int width)1087 void ARGBToUV444Row_C(const uint8_t* src_argb,
1088 uint8_t* dst_u,
1089 uint8_t* dst_v,
1090 int width) {
1091 int x;
1092 for (x = 0; x < width; ++x) {
1093 uint8_t ab = src_argb[0];
1094 uint8_t ag = src_argb[1];
1095 uint8_t ar = src_argb[2];
1096 dst_u[0] = RGBToU(ar, ag, ab);
1097 dst_v[0] = RGBToV(ar, ag, ab);
1098 src_argb += 4;
1099 dst_u += 1;
1100 dst_v += 1;
1101 }
1102 }
1103
ARGBGrayRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)1104 void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
1105 int x;
1106 for (x = 0; x < width; ++x) {
1107 uint8_t y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
1108 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
1109 dst_argb[3] = src_argb[3];
1110 dst_argb += 4;
1111 src_argb += 4;
1112 }
1113 }
1114
1115 // Convert a row of image to Sepia tone.
ARGBSepiaRow_C(uint8_t * dst_argb,int width)1116 void ARGBSepiaRow_C(uint8_t* dst_argb, int width) {
1117 int x;
1118 for (x = 0; x < width; ++x) {
1119 int b = dst_argb[0];
1120 int g = dst_argb[1];
1121 int r = dst_argb[2];
1122 int sb = (b * 17 + g * 68 + r * 35) >> 7;
1123 int sg = (b * 22 + g * 88 + r * 45) >> 7;
1124 int sr = (b * 24 + g * 98 + r * 50) >> 7;
1125 // b does not over flow. a is preserved from original.
1126 dst_argb[0] = sb;
1127 dst_argb[1] = clamp255(sg);
1128 dst_argb[2] = clamp255(sr);
1129 dst_argb += 4;
1130 }
1131 }
1132
1133 // Apply color matrix to a row of image. Matrix is signed.
1134 // TODO(fbarchard): Consider adding rounding (+32).
ARGBColorMatrixRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const int8_t * matrix_argb,int width)1135 void ARGBColorMatrixRow_C(const uint8_t* src_argb,
1136 uint8_t* dst_argb,
1137 const int8_t* matrix_argb,
1138 int width) {
1139 int x;
1140 for (x = 0; x < width; ++x) {
1141 int b = src_argb[0];
1142 int g = src_argb[1];
1143 int r = src_argb[2];
1144 int a = src_argb[3];
1145 int sb = (b * matrix_argb[0] + g * matrix_argb[1] + r * matrix_argb[2] +
1146 a * matrix_argb[3]) >>
1147 6;
1148 int sg = (b * matrix_argb[4] + g * matrix_argb[5] + r * matrix_argb[6] +
1149 a * matrix_argb[7]) >>
1150 6;
1151 int sr = (b * matrix_argb[8] + g * matrix_argb[9] + r * matrix_argb[10] +
1152 a * matrix_argb[11]) >>
1153 6;
1154 int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] +
1155 a * matrix_argb[15]) >>
1156 6;
1157 dst_argb[0] = Clamp(sb);
1158 dst_argb[1] = Clamp(sg);
1159 dst_argb[2] = Clamp(sr);
1160 dst_argb[3] = Clamp(sa);
1161 src_argb += 4;
1162 dst_argb += 4;
1163 }
1164 }
1165
1166 // Apply color table to a row of image.
ARGBColorTableRow_C(uint8_t * dst_argb,const uint8_t * table_argb,int width)1167 void ARGBColorTableRow_C(uint8_t* dst_argb,
1168 const uint8_t* table_argb,
1169 int width) {
1170 int x;
1171 for (x = 0; x < width; ++x) {
1172 int b = dst_argb[0];
1173 int g = dst_argb[1];
1174 int r = dst_argb[2];
1175 int a = dst_argb[3];
1176 dst_argb[0] = table_argb[b * 4 + 0];
1177 dst_argb[1] = table_argb[g * 4 + 1];
1178 dst_argb[2] = table_argb[r * 4 + 2];
1179 dst_argb[3] = table_argb[a * 4 + 3];
1180 dst_argb += 4;
1181 }
1182 }
1183
1184 // Apply color table to a row of image.
RGBColorTableRow_C(uint8_t * dst_argb,const uint8_t * table_argb,int width)1185 void RGBColorTableRow_C(uint8_t* dst_argb,
1186 const uint8_t* table_argb,
1187 int width) {
1188 int x;
1189 for (x = 0; x < width; ++x) {
1190 int b = dst_argb[0];
1191 int g = dst_argb[1];
1192 int r = dst_argb[2];
1193 dst_argb[0] = table_argb[b * 4 + 0];
1194 dst_argb[1] = table_argb[g * 4 + 1];
1195 dst_argb[2] = table_argb[r * 4 + 2];
1196 dst_argb += 4;
1197 }
1198 }
1199
ARGBQuantizeRow_C(uint8_t * dst_argb,int scale,int interval_size,int interval_offset,int width)1200 void ARGBQuantizeRow_C(uint8_t* dst_argb,
1201 int scale,
1202 int interval_size,
1203 int interval_offset,
1204 int width) {
1205 int x;
1206 for (x = 0; x < width; ++x) {
1207 int b = dst_argb[0];
1208 int g = dst_argb[1];
1209 int r = dst_argb[2];
1210 dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
1211 dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
1212 dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
1213 dst_argb += 4;
1214 }
1215 }
1216
1217 #define REPEAT8(v) (v) | ((v) << 8)
1218 #define SHADE(f, v) v* f >> 24
1219
ARGBShadeRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width,uint32_t value)1220 void ARGBShadeRow_C(const uint8_t* src_argb,
1221 uint8_t* dst_argb,
1222 int width,
1223 uint32_t value) {
1224 const uint32_t b_scale = REPEAT8(value & 0xff);
1225 const uint32_t g_scale = REPEAT8((value >> 8) & 0xff);
1226 const uint32_t r_scale = REPEAT8((value >> 16) & 0xff);
1227 const uint32_t a_scale = REPEAT8(value >> 24);
1228
1229 int i;
1230 for (i = 0; i < width; ++i) {
1231 const uint32_t b = REPEAT8(src_argb[0]);
1232 const uint32_t g = REPEAT8(src_argb[1]);
1233 const uint32_t r = REPEAT8(src_argb[2]);
1234 const uint32_t a = REPEAT8(src_argb[3]);
1235 dst_argb[0] = SHADE(b, b_scale);
1236 dst_argb[1] = SHADE(g, g_scale);
1237 dst_argb[2] = SHADE(r, r_scale);
1238 dst_argb[3] = SHADE(a, a_scale);
1239 src_argb += 4;
1240 dst_argb += 4;
1241 }
1242 }
1243 #undef REPEAT8
1244 #undef SHADE
1245
1246 #define REPEAT8(v) (v) | ((v) << 8)
1247 #define SHADE(f, v) v* f >> 16
1248
ARGBMultiplyRow_C(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)1249 void ARGBMultiplyRow_C(const uint8_t* src_argb,
1250 const uint8_t* src_argb1,
1251 uint8_t* dst_argb,
1252 int width) {
1253 int i;
1254 for (i = 0; i < width; ++i) {
1255 const uint32_t b = REPEAT8(src_argb[0]);
1256 const uint32_t g = REPEAT8(src_argb[1]);
1257 const uint32_t r = REPEAT8(src_argb[2]);
1258 const uint32_t a = REPEAT8(src_argb[3]);
1259 const uint32_t b_scale = src_argb1[0];
1260 const uint32_t g_scale = src_argb1[1];
1261 const uint32_t r_scale = src_argb1[2];
1262 const uint32_t a_scale = src_argb1[3];
1263 dst_argb[0] = SHADE(b, b_scale);
1264 dst_argb[1] = SHADE(g, g_scale);
1265 dst_argb[2] = SHADE(r, r_scale);
1266 dst_argb[3] = SHADE(a, a_scale);
1267 src_argb += 4;
1268 src_argb1 += 4;
1269 dst_argb += 4;
1270 }
1271 }
1272 #undef REPEAT8
1273 #undef SHADE
1274
1275 #define SHADE(f, v) clamp255(v + f)
1276
ARGBAddRow_C(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)1277 void ARGBAddRow_C(const uint8_t* src_argb,
1278 const uint8_t* src_argb1,
1279 uint8_t* dst_argb,
1280 int width) {
1281 int i;
1282 for (i = 0; i < width; ++i) {
1283 const int b = src_argb[0];
1284 const int g = src_argb[1];
1285 const int r = src_argb[2];
1286 const int a = src_argb[3];
1287 const int b_add = src_argb1[0];
1288 const int g_add = src_argb1[1];
1289 const int r_add = src_argb1[2];
1290 const int a_add = src_argb1[3];
1291 dst_argb[0] = SHADE(b, b_add);
1292 dst_argb[1] = SHADE(g, g_add);
1293 dst_argb[2] = SHADE(r, r_add);
1294 dst_argb[3] = SHADE(a, a_add);
1295 src_argb += 4;
1296 src_argb1 += 4;
1297 dst_argb += 4;
1298 }
1299 }
1300 #undef SHADE
1301
1302 #define SHADE(f, v) clamp0(f - v)
1303
ARGBSubtractRow_C(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)1304 void ARGBSubtractRow_C(const uint8_t* src_argb,
1305 const uint8_t* src_argb1,
1306 uint8_t* dst_argb,
1307 int width) {
1308 int i;
1309 for (i = 0; i < width; ++i) {
1310 const int b = src_argb[0];
1311 const int g = src_argb[1];
1312 const int r = src_argb[2];
1313 const int a = src_argb[3];
1314 const int b_sub = src_argb1[0];
1315 const int g_sub = src_argb1[1];
1316 const int r_sub = src_argb1[2];
1317 const int a_sub = src_argb1[3];
1318 dst_argb[0] = SHADE(b, b_sub);
1319 dst_argb[1] = SHADE(g, g_sub);
1320 dst_argb[2] = SHADE(r, r_sub);
1321 dst_argb[3] = SHADE(a, a_sub);
1322 src_argb += 4;
1323 src_argb1 += 4;
1324 dst_argb += 4;
1325 }
1326 }
1327 #undef SHADE
1328
1329 // Sobel functions which mimics SSSE3.
SobelXRow_C(const uint8_t * src_y0,const uint8_t * src_y1,const uint8_t * src_y2,uint8_t * dst_sobelx,int width)1330 void SobelXRow_C(const uint8_t* src_y0,
1331 const uint8_t* src_y1,
1332 const uint8_t* src_y2,
1333 uint8_t* dst_sobelx,
1334 int width) {
1335 int i;
1336 for (i = 0; i < width; ++i) {
1337 int a = src_y0[i];
1338 int b = src_y1[i];
1339 int c = src_y2[i];
1340 int a_sub = src_y0[i + 2];
1341 int b_sub = src_y1[i + 2];
1342 int c_sub = src_y2[i + 2];
1343 int a_diff = a - a_sub;
1344 int b_diff = b - b_sub;
1345 int c_diff = c - c_sub;
1346 int sobel = Abs(a_diff + b_diff * 2 + c_diff);
1347 dst_sobelx[i] = (uint8_t)(clamp255(sobel));
1348 }
1349 }
1350
SobelYRow_C(const uint8_t * src_y0,const uint8_t * src_y1,uint8_t * dst_sobely,int width)1351 void SobelYRow_C(const uint8_t* src_y0,
1352 const uint8_t* src_y1,
1353 uint8_t* dst_sobely,
1354 int width) {
1355 int i;
1356 for (i = 0; i < width; ++i) {
1357 int a = src_y0[i + 0];
1358 int b = src_y0[i + 1];
1359 int c = src_y0[i + 2];
1360 int a_sub = src_y1[i + 0];
1361 int b_sub = src_y1[i + 1];
1362 int c_sub = src_y1[i + 2];
1363 int a_diff = a - a_sub;
1364 int b_diff = b - b_sub;
1365 int c_diff = c - c_sub;
1366 int sobel = Abs(a_diff + b_diff * 2 + c_diff);
1367 dst_sobely[i] = (uint8_t)(clamp255(sobel));
1368 }
1369 }
1370
SobelRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)1371 void SobelRow_C(const uint8_t* src_sobelx,
1372 const uint8_t* src_sobely,
1373 uint8_t* dst_argb,
1374 int width) {
1375 int i;
1376 for (i = 0; i < width; ++i) {
1377 int r = src_sobelx[i];
1378 int b = src_sobely[i];
1379 int s = clamp255(r + b);
1380 dst_argb[0] = (uint8_t)(s);
1381 dst_argb[1] = (uint8_t)(s);
1382 dst_argb[2] = (uint8_t)(s);
1383 dst_argb[3] = (uint8_t)(255u);
1384 dst_argb += 4;
1385 }
1386 }
1387
SobelToPlaneRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_y,int width)1388 void SobelToPlaneRow_C(const uint8_t* src_sobelx,
1389 const uint8_t* src_sobely,
1390 uint8_t* dst_y,
1391 int width) {
1392 int i;
1393 for (i = 0; i < width; ++i) {
1394 int r = src_sobelx[i];
1395 int b = src_sobely[i];
1396 int s = clamp255(r + b);
1397 dst_y[i] = (uint8_t)(s);
1398 }
1399 }
1400
SobelXYRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)1401 void SobelXYRow_C(const uint8_t* src_sobelx,
1402 const uint8_t* src_sobely,
1403 uint8_t* dst_argb,
1404 int width) {
1405 int i;
1406 for (i = 0; i < width; ++i) {
1407 int r = src_sobelx[i];
1408 int b = src_sobely[i];
1409 int g = clamp255(r + b);
1410 dst_argb[0] = (uint8_t)(b);
1411 dst_argb[1] = (uint8_t)(g);
1412 dst_argb[2] = (uint8_t)(r);
1413 dst_argb[3] = (uint8_t)(255u);
1414 dst_argb += 4;
1415 }
1416 }
1417
J400ToARGBRow_C(const uint8_t * src_y,uint8_t * dst_argb,int width)1418 void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
1419 // Copy a Y to RGB.
1420 int x;
1421 for (x = 0; x < width; ++x) {
1422 uint8_t y = src_y[0];
1423 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
1424 dst_argb[3] = 255u;
1425 dst_argb += 4;
1426 ++src_y;
1427 }
1428 }
1429
1430 // Macros to create SIMD specific yuv to rgb conversion constants.
1431
1432 // clang-format off
1433
1434 #if defined(__aarch64__) || defined(__arm__)
1435 // Bias values include subtract 128 from U and V, bias from Y and rounding.
1436 // For B and R bias is negative. For G bias is positive.
1437 #define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \
1438 {{UB, VR, UG, VG, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, \
1439 {YG, (UB * 128 - YB), (UG * 128 + VG * 128 + YB), (VR * 128 - YB), YB, 0, \
1440 0, 0}}
1441 #else
1442 #define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \
1443 {{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, \
1444 UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, \
1445 {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, \
1446 UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, \
1447 {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, \
1448 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, \
1449 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \
1450 {YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}}
1451 #endif
1452
1453 // clang-format on
1454
1455 #define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR) \
1456 const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = \
1457 YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR); \
1458 const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = \
1459 YUVCONSTANTSBODY(YG, YB, VR, VG, UG, UB);
1460
1461 // TODO(fbarchard): Generate SIMD structures from float matrix.
1462
1463 // BT.601 limited range YUV to RGB reference
1464 // R = (Y - 16) * 1.164 + V * 1.596
1465 // G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
1466 // B = (Y - 16) * 1.164 + U * 2.018
1467 // KR = 0.299; KB = 0.114
1468
1469 // U and V contributions to R,G,B.
1470 #if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT601)
1471 #define UB 129 /* round(2.018 * 64) */
1472 #else
1473 #define UB 128 /* max(128, round(2.018 * 64)) */
1474 #endif
1475 #define UG 25 /* round(0.391 * 64) */
1476 #define VG 52 /* round(0.813 * 64) */
1477 #define VR 102 /* round(1.596 * 64) */
1478
1479 // Y contribution to R,G,B. Scale and bias.
1480 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1481 #define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1482
MAKEYUVCONSTANTS(I601,YG,YB,UB,UG,VG,VR)1483 MAKEYUVCONSTANTS(I601, YG, YB, UB, UG, VG, VR)
1484
1485 #undef YG
1486 #undef YB
1487 #undef UB
1488 #undef UG
1489 #undef VG
1490 #undef VR
1491
1492 // BT.601 full range YUV to RGB reference (aka JPEG)
1493 // * R = Y + V * 1.40200
1494 // * G = Y - U * 0.34414 - V * 0.71414
1495 // * B = Y + U * 1.77200
1496 // KR = 0.299; KB = 0.114
1497
1498 // U and V contributions to R,G,B.
1499 #define UB 113 /* round(1.77200 * 64) */
1500 #define UG 22 /* round(0.34414 * 64) */
1501 #define VG 46 /* round(0.71414 * 64) */
1502 #define VR 90 /* round(1.40200 * 64) */
1503
1504 // Y contribution to R,G,B. Scale and bias.
1505 #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
1506 #define YB 32 /* 64 / 2 */
1507
1508 MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR)
1509
1510 #undef YG
1511 #undef YB
1512 #undef UB
1513 #undef UG
1514 #undef VG
1515 #undef VR
1516
1517 // BT.709 limited range YUV to RGB reference
1518 // R = (Y - 16) * 1.164 + V * 1.793
1519 // G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533
1520 // B = (Y - 16) * 1.164 + U * 2.112
1521 // KR = 0.2126, KB = 0.0722
1522
1523 // U and V contributions to R,G,B.
1524 #if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT709)
1525 #define UB 135 /* round(2.112 * 64) */
1526 #else
1527 #define UB 128 /* max(128, round(2.112 * 64)) */
1528 #endif
1529 #define UG 14 /* round(0.213 * 64) */
1530 #define VG 34 /* round(0.533 * 64) */
1531 #define VR 115 /* round(1.793 * 64) */
1532
1533 // Y contribution to R,G,B. Scale and bias.
1534 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1535 #define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1536
1537 MAKEYUVCONSTANTS(H709, YG, YB, UB, UG, VG, VR)
1538
1539 #undef YG
1540 #undef YB
1541 #undef UB
1542 #undef UG
1543 #undef VG
1544 #undef VR
1545
1546 // BT.709 full range YUV to RGB reference
1547 // R = Y + V * 1.5748
1548 // G = Y - U * 0.18732 - V * 0.46812
1549 // B = Y + U * 1.8556
1550 // KR = 0.2126, KB = 0.0722
1551
1552 // U and V contributions to R,G,B.
1553 #define UB 119 /* round(1.8556 * 64) */
1554 #define UG 12 /* round(0.18732 * 64) */
1555 #define VG 30 /* round(0.46812 * 64) */
1556 #define VR 101 /* round(1.5748 * 64) */
1557
1558 // Y contribution to R,G,B. Scale and bias. (same as jpeg)
1559 #define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
1560 #define YB 32 /* 64 / 2 */
1561
1562 MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR)
1563
1564 #undef YG
1565 #undef YB
1566 #undef UB
1567 #undef UG
1568 #undef VG
1569 #undef VR
1570
1571 // BT.2020 limited range YUV to RGB reference
1572 // R = (Y - 16) * 1.164384 + V * 1.67867
1573 // G = (Y - 16) * 1.164384 - U * 0.187326 - V * 0.65042
1574 // B = (Y - 16) * 1.164384 + U * 2.14177
1575 // KR = 0.2627; KB = 0.0593
1576
1577 // U and V contributions to R,G,B.
1578 #if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT2020)
1579 #define UB 137 /* round(2.142 * 64) */
1580 #else
1581 #define UB 128 /* max(128, round(2.142 * 64)) */
1582 #endif
1583 #define UG 12 /* round(0.187326 * 64) */
1584 #define VG 42 /* round(0.65042 * 64) */
1585 #define VR 107 /* round(1.67867 * 64) */
1586
1587 // Y contribution to R,G,B. Scale and bias.
1588 #define YG 19003 /* round(1.164384 * 64 * 256 * 256 / 257) */
1589 #define YB -1160 /* 1.164384 * 64 * -16 + 64 / 2 */
1590
1591 MAKEYUVCONSTANTS(2020, YG, YB, UB, UG, VG, VR)
1592
1593 #undef YG
1594 #undef YB
1595 #undef UB
1596 #undef UG
1597 #undef VG
1598 #undef VR
1599
1600 // BT.2020 full range YUV to RGB reference
1601 // R = Y + V * 1.474600
1602 // G = Y - U * 0.164553 - V * 0.571353
1603 // B = Y + U * 1.881400
1604 // KR = 0.2627; KB = 0.0593
1605
1606 #define UB 120 /* round(1.881400 * 64) */
1607 #define UG 11 /* round(0.164553 * 64) */
1608 #define VG 37 /* round(0.571353 * 64) */
1609 #define VR 94 /* round(1.474600 * 64) */
1610
1611 // Y contribution to R,G,B. Scale and bias. (same as jpeg)
1612 #define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
1613 #define YB 32 /* 64 / 2 */
1614
1615 MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR)
1616
1617 #undef YG
1618 #undef YB
1619 #undef UB
1620 #undef UG
1621 #undef VG
1622 #undef VR
1623
1624 #undef BB
1625 #undef BG
1626 #undef BR
1627
1628 #undef MAKEYUVCONSTANTS
1629
1630 #if defined(__aarch64__) || defined(__arm__)
1631 #define LOAD_YUV_CONSTANTS \
1632 int ub = yuvconstants->kUVCoeff[0]; \
1633 int vr = yuvconstants->kUVCoeff[1]; \
1634 int ug = yuvconstants->kUVCoeff[2]; \
1635 int vg = yuvconstants->kUVCoeff[3]; \
1636 int yg = yuvconstants->kRGBCoeffBias[0]; \
1637 int bb = yuvconstants->kRGBCoeffBias[1]; \
1638 int bg = yuvconstants->kRGBCoeffBias[2]; \
1639 int br = yuvconstants->kRGBCoeffBias[3]
1640
1641 #define CALC_RGB16 \
1642 int32_t y1 = (uint32_t)(y32 * yg) >> 16; \
1643 int b16 = y1 + (u * ub) - bb; \
1644 int g16 = y1 + bg - (u * ug + v * vg); \
1645 int r16 = y1 + (v * vr) - br
1646 #else
1647 #define LOAD_YUV_CONSTANTS \
1648 int ub = yuvconstants->kUVToB[0]; \
1649 int ug = yuvconstants->kUVToG[0]; \
1650 int vg = yuvconstants->kUVToG[1]; \
1651 int vr = yuvconstants->kUVToR[1]; \
1652 int yg = yuvconstants->kYToRgb[0]; \
1653 int yb = yuvconstants->kYBiasToRgb[0]
1654
1655 #define CALC_RGB16 \
1656 int32_t y1 = ((uint32_t)(y32 * yg) >> 16) + yb; \
1657 int8_t ui = u; \
1658 int8_t vi = v; \
1659 ui -= 0x80; \
1660 vi -= 0x80; \
1661 int b16 = y1 + (ui * ub); \
1662 int g16 = y1 - (ui * ug + vi * vg); \
1663 int r16 = y1 + (vi * vr)
1664 #endif
1665
1666 // C reference code that mimics the YUV assembly.
1667 // Reads 8 bit YUV and leaves result as 16 bit.
1668 static __inline void YuvPixel(uint8_t y,
1669 uint8_t u,
1670 uint8_t v,
1671 uint8_t* b,
1672 uint8_t* g,
1673 uint8_t* r,
1674 const struct YuvConstants* yuvconstants) {
1675 LOAD_YUV_CONSTANTS;
1676 uint32_t y32 = y * 0x0101;
1677 CALC_RGB16;
1678 *b = Clamp((int32_t)(b16) >> 6);
1679 *g = Clamp((int32_t)(g16) >> 6);
1680 *r = Clamp((int32_t)(r16) >> 6);
1681 }
1682
1683 // Reads 8 bit YUV and leaves result as 16 bit.
YuvPixel8_16(uint8_t y,uint8_t u,uint8_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1684 static __inline void YuvPixel8_16(uint8_t y,
1685 uint8_t u,
1686 uint8_t v,
1687 int* b,
1688 int* g,
1689 int* r,
1690 const struct YuvConstants* yuvconstants) {
1691 LOAD_YUV_CONSTANTS;
1692 uint32_t y32 = y * 0x0101;
1693 CALC_RGB16;
1694 *b = b16;
1695 *g = g16;
1696 *r = r16;
1697 }
1698
1699 // C reference code that mimics the YUV 16 bit assembly.
1700 // Reads 10 bit YUV and leaves result as 16 bit.
YuvPixel10_16(uint16_t y,uint16_t u,uint16_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1701 static __inline void YuvPixel10_16(uint16_t y,
1702 uint16_t u,
1703 uint16_t v,
1704 int* b,
1705 int* g,
1706 int* r,
1707 const struct YuvConstants* yuvconstants) {
1708 LOAD_YUV_CONSTANTS;
1709 uint32_t y32 = y << 6;
1710 u = clamp255(u >> 2);
1711 v = clamp255(v >> 2);
1712 CALC_RGB16;
1713 *b = b16;
1714 *g = g16;
1715 *r = r16;
1716 }
1717
1718 // C reference code that mimics the YUV 16 bit assembly.
1719 // Reads 12 bit YUV and leaves result as 16 bit.
YuvPixel12_16(int16_t y,int16_t u,int16_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1720 static __inline void YuvPixel12_16(int16_t y,
1721 int16_t u,
1722 int16_t v,
1723 int* b,
1724 int* g,
1725 int* r,
1726 const struct YuvConstants* yuvconstants) {
1727 LOAD_YUV_CONSTANTS;
1728 uint32_t y32 = y << 4;
1729 u = clamp255(u >> 4);
1730 v = clamp255(v >> 4);
1731 CALC_RGB16;
1732 *b = b16;
1733 *g = g16;
1734 *r = r16;
1735 }
1736
1737 // C reference code that mimics the YUV 10 bit assembly.
1738 // Reads 10 bit YUV and clamps down to 8 bit RGB.
YuvPixel10(uint16_t y,uint16_t u,uint16_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1739 static __inline void YuvPixel10(uint16_t y,
1740 uint16_t u,
1741 uint16_t v,
1742 uint8_t* b,
1743 uint8_t* g,
1744 uint8_t* r,
1745 const struct YuvConstants* yuvconstants) {
1746 int b16;
1747 int g16;
1748 int r16;
1749 YuvPixel10_16(y, u, v, &b16, &g16, &r16, yuvconstants);
1750 *b = Clamp(b16 >> 6);
1751 *g = Clamp(g16 >> 6);
1752 *r = Clamp(r16 >> 6);
1753 }
1754
1755 // C reference code that mimics the YUV 12 bit assembly.
1756 // Reads 12 bit YUV and clamps down to 8 bit RGB.
YuvPixel12(uint16_t y,uint16_t u,uint16_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1757 static __inline void YuvPixel12(uint16_t y,
1758 uint16_t u,
1759 uint16_t v,
1760 uint8_t* b,
1761 uint8_t* g,
1762 uint8_t* r,
1763 const struct YuvConstants* yuvconstants) {
1764 int b16;
1765 int g16;
1766 int r16;
1767 YuvPixel12_16(y, u, v, &b16, &g16, &r16, yuvconstants);
1768 *b = Clamp(b16 >> 6);
1769 *g = Clamp(g16 >> 6);
1770 *r = Clamp(r16 >> 6);
1771 }
1772
1773 // C reference code that mimics the YUV 16 bit assembly.
1774 // Reads 16 bit YUV and leaves result as 8 bit.
YuvPixel16_8(uint16_t y,uint16_t u,uint16_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1775 static __inline void YuvPixel16_8(uint16_t y,
1776 uint16_t u,
1777 uint16_t v,
1778 uint8_t* b,
1779 uint8_t* g,
1780 uint8_t* r,
1781 const struct YuvConstants* yuvconstants) {
1782 LOAD_YUV_CONSTANTS;
1783 uint32_t y32 = y;
1784 u = clamp255(u >> 8);
1785 v = clamp255(v >> 8);
1786 CALC_RGB16;
1787 *b = Clamp((int32_t)(b16) >> 6);
1788 *g = Clamp((int32_t)(g16) >> 6);
1789 *r = Clamp((int32_t)(r16) >> 6);
1790 }
1791
1792 // C reference code that mimics the YUV 16 bit assembly.
1793 // Reads 16 bit YUV and leaves result as 16 bit.
YuvPixel16_16(uint16_t y,uint16_t u,uint16_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1794 static __inline void YuvPixel16_16(uint16_t y,
1795 uint16_t u,
1796 uint16_t v,
1797 int* b,
1798 int* g,
1799 int* r,
1800 const struct YuvConstants* yuvconstants) {
1801 LOAD_YUV_CONSTANTS;
1802 uint32_t y32 = y;
1803 u = clamp255(u >> 8);
1804 v = clamp255(v >> 8);
1805 CALC_RGB16;
1806 *b = b16;
1807 *g = g16;
1808 *r = r16;
1809 }
1810
1811 // C reference code that mimics the YUV assembly.
1812 // Reads 8 bit YUV and leaves result as 8 bit.
YPixel(uint8_t y,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1813 static __inline void YPixel(uint8_t y,
1814 uint8_t* b,
1815 uint8_t* g,
1816 uint8_t* r,
1817 const struct YuvConstants* yuvconstants) {
1818 #if defined(__aarch64__) || defined(__arm__)
1819 int yg = yuvconstants->kRGBCoeffBias[0];
1820 int ygb = yuvconstants->kRGBCoeffBias[4];
1821 #else
1822 int ygb = yuvconstants->kYBiasToRgb[0];
1823 int yg = yuvconstants->kYToRgb[0];
1824 #endif
1825 uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
1826 *b = Clamp(((int32_t)(y1) + ygb) >> 6);
1827 *g = Clamp(((int32_t)(y1) + ygb) >> 6);
1828 *r = Clamp(((int32_t)(y1) + ygb) >> 6);
1829 }
1830
I444ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1831 void I444ToARGBRow_C(const uint8_t* src_y,
1832 const uint8_t* src_u,
1833 const uint8_t* src_v,
1834 uint8_t* rgb_buf,
1835 const struct YuvConstants* yuvconstants,
1836 int width) {
1837 int x;
1838 for (x = 0; x < width; ++x) {
1839 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1840 rgb_buf + 2, yuvconstants);
1841 rgb_buf[3] = 255;
1842 src_y += 1;
1843 src_u += 1;
1844 src_v += 1;
1845 rgb_buf += 4; // Advance 1 pixel.
1846 }
1847 }
1848
1849 // Also used for 420
I422ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1850 void I422ToARGBRow_C(const uint8_t* src_y,
1851 const uint8_t* src_u,
1852 const uint8_t* src_v,
1853 uint8_t* rgb_buf,
1854 const struct YuvConstants* yuvconstants,
1855 int width) {
1856 int x;
1857 for (x = 0; x < width - 1; x += 2) {
1858 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1859 rgb_buf + 2, yuvconstants);
1860 rgb_buf[3] = 255;
1861 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1862 rgb_buf + 6, yuvconstants);
1863 rgb_buf[7] = 255;
1864 src_y += 2;
1865 src_u += 1;
1866 src_v += 1;
1867 rgb_buf += 8; // Advance 2 pixels.
1868 }
1869 if (width & 1) {
1870 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1871 rgb_buf + 2, yuvconstants);
1872 rgb_buf[3] = 255;
1873 }
1874 }
1875
1876 // 10 bit YUV to ARGB
I210ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1877 void I210ToARGBRow_C(const uint16_t* src_y,
1878 const uint16_t* src_u,
1879 const uint16_t* src_v,
1880 uint8_t* rgb_buf,
1881 const struct YuvConstants* yuvconstants,
1882 int width) {
1883 int x;
1884 for (x = 0; x < width - 1; x += 2) {
1885 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1886 rgb_buf + 2, yuvconstants);
1887 rgb_buf[3] = 255;
1888 YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1889 rgb_buf + 6, yuvconstants);
1890 rgb_buf[7] = 255;
1891 src_y += 2;
1892 src_u += 1;
1893 src_v += 1;
1894 rgb_buf += 8; // Advance 2 pixels.
1895 }
1896 if (width & 1) {
1897 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1898 rgb_buf + 2, yuvconstants);
1899 rgb_buf[3] = 255;
1900 }
1901 }
1902
I410ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1903 void I410ToARGBRow_C(const uint16_t* src_y,
1904 const uint16_t* src_u,
1905 const uint16_t* src_v,
1906 uint8_t* rgb_buf,
1907 const struct YuvConstants* yuvconstants,
1908 int width) {
1909 int x;
1910 for (x = 0; x < width; ++x) {
1911 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1912 rgb_buf + 2, yuvconstants);
1913 rgb_buf[3] = 255;
1914 src_y += 1;
1915 src_u += 1;
1916 src_v += 1;
1917 rgb_buf += 4; // Advance 1 pixels.
1918 }
1919 }
1920
I210AlphaToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,const uint16_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1921 void I210AlphaToARGBRow_C(const uint16_t* src_y,
1922 const uint16_t* src_u,
1923 const uint16_t* src_v,
1924 const uint16_t* src_a,
1925 uint8_t* rgb_buf,
1926 const struct YuvConstants* yuvconstants,
1927 int width) {
1928 int x;
1929 for (x = 0; x < width - 1; x += 2) {
1930 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1931 rgb_buf + 2, yuvconstants);
1932 rgb_buf[3] = clamp255(src_a[0] >> 2);
1933 YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1934 rgb_buf + 6, yuvconstants);
1935 rgb_buf[7] = clamp255(src_a[1] >> 2);
1936 src_y += 2;
1937 src_u += 1;
1938 src_v += 1;
1939 src_a += 2;
1940 rgb_buf += 8; // Advance 2 pixels.
1941 }
1942 if (width & 1) {
1943 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1944 rgb_buf + 2, yuvconstants);
1945 rgb_buf[3] = clamp255(src_a[0] >> 2);
1946 }
1947 }
1948
I410AlphaToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,const uint16_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1949 void I410AlphaToARGBRow_C(const uint16_t* src_y,
1950 const uint16_t* src_u,
1951 const uint16_t* src_v,
1952 const uint16_t* src_a,
1953 uint8_t* rgb_buf,
1954 const struct YuvConstants* yuvconstants,
1955 int width) {
1956 int x;
1957 for (x = 0; x < width; ++x) {
1958 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1959 rgb_buf + 2, yuvconstants);
1960 rgb_buf[3] = clamp255(src_a[0] >> 2);
1961 src_y += 1;
1962 src_u += 1;
1963 src_v += 1;
1964 src_a += 1;
1965 rgb_buf += 4; // Advance 1 pixels.
1966 }
1967 }
1968
1969 // 12 bit YUV to ARGB
I212ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1970 void I212ToARGBRow_C(const uint16_t* src_y,
1971 const uint16_t* src_u,
1972 const uint16_t* src_v,
1973 uint8_t* rgb_buf,
1974 const struct YuvConstants* yuvconstants,
1975 int width) {
1976 int x;
1977 for (x = 0; x < width - 1; x += 2) {
1978 YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1979 rgb_buf + 2, yuvconstants);
1980 rgb_buf[3] = 255;
1981 YuvPixel12(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1982 rgb_buf + 6, yuvconstants);
1983 rgb_buf[7] = 255;
1984 src_y += 2;
1985 src_u += 1;
1986 src_v += 1;
1987 rgb_buf += 8; // Advance 2 pixels.
1988 }
1989 if (width & 1) {
1990 YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1991 rgb_buf + 2, yuvconstants);
1992 rgb_buf[3] = 255;
1993 }
1994 }
1995
StoreAR30(uint8_t * rgb_buf,int b,int g,int r)1996 static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
1997 uint32_t ar30;
1998 b = b >> 4; // convert 8 bit 10.6 to 10 bit.
1999 g = g >> 4;
2000 r = r >> 4;
2001 b = Clamp10(b);
2002 g = Clamp10(g);
2003 r = Clamp10(r);
2004 ar30 = b | ((uint32_t)g << 10) | ((uint32_t)r << 20) | 0xc0000000;
2005 (*(uint32_t*)rgb_buf) = ar30;
2006 }
2007
2008 // 10 bit YUV to 10 bit AR30
I210ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2009 void I210ToAR30Row_C(const uint16_t* src_y,
2010 const uint16_t* src_u,
2011 const uint16_t* src_v,
2012 uint8_t* rgb_buf,
2013 const struct YuvConstants* yuvconstants,
2014 int width) {
2015 int x;
2016 int b;
2017 int g;
2018 int r;
2019 for (x = 0; x < width - 1; x += 2) {
2020 YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2021 StoreAR30(rgb_buf, b, g, r);
2022 YuvPixel10_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2023 StoreAR30(rgb_buf + 4, b, g, r);
2024 src_y += 2;
2025 src_u += 1;
2026 src_v += 1;
2027 rgb_buf += 8; // Advance 2 pixels.
2028 }
2029 if (width & 1) {
2030 YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2031 StoreAR30(rgb_buf, b, g, r);
2032 }
2033 }
2034
2035 // 12 bit YUV to 10 bit AR30
I212ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2036 void I212ToAR30Row_C(const uint16_t* src_y,
2037 const uint16_t* src_u,
2038 const uint16_t* src_v,
2039 uint8_t* rgb_buf,
2040 const struct YuvConstants* yuvconstants,
2041 int width) {
2042 int x;
2043 int b;
2044 int g;
2045 int r;
2046 for (x = 0; x < width - 1; x += 2) {
2047 YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2048 StoreAR30(rgb_buf, b, g, r);
2049 YuvPixel12_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2050 StoreAR30(rgb_buf + 4, b, g, r);
2051 src_y += 2;
2052 src_u += 1;
2053 src_v += 1;
2054 rgb_buf += 8; // Advance 2 pixels.
2055 }
2056 if (width & 1) {
2057 YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2058 StoreAR30(rgb_buf, b, g, r);
2059 }
2060 }
2061
I410ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2062 void I410ToAR30Row_C(const uint16_t* src_y,
2063 const uint16_t* src_u,
2064 const uint16_t* src_v,
2065 uint8_t* rgb_buf,
2066 const struct YuvConstants* yuvconstants,
2067 int width) {
2068 int x;
2069 int b;
2070 int g;
2071 int r;
2072 for (x = 0; x < width; ++x) {
2073 YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2074 StoreAR30(rgb_buf, b, g, r);
2075 src_y += 1;
2076 src_u += 1;
2077 src_v += 1;
2078 rgb_buf += 4; // Advance 1 pixel.
2079 }
2080 }
2081
2082 // P210 has 10 bits in msb of 16 bit NV12 style layout.
P210ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_uv,uint8_t * dst_argb,const struct YuvConstants * yuvconstants,int width)2083 void P210ToARGBRow_C(const uint16_t* src_y,
2084 const uint16_t* src_uv,
2085 uint8_t* dst_argb,
2086 const struct YuvConstants* yuvconstants,
2087 int width) {
2088 int x;
2089 for (x = 0; x < width - 1; x += 2) {
2090 YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
2091 dst_argb + 2, yuvconstants);
2092 dst_argb[3] = 255;
2093 YuvPixel16_8(src_y[1], src_uv[0], src_uv[1], dst_argb + 4, dst_argb + 5,
2094 dst_argb + 6, yuvconstants);
2095 dst_argb[7] = 255;
2096 src_y += 2;
2097 src_uv += 2;
2098 dst_argb += 8; // Advance 2 pixels.
2099 }
2100 if (width & 1) {
2101 YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
2102 dst_argb + 2, yuvconstants);
2103 dst_argb[3] = 255;
2104 }
2105 }
2106
P410ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_uv,uint8_t * dst_argb,const struct YuvConstants * yuvconstants,int width)2107 void P410ToARGBRow_C(const uint16_t* src_y,
2108 const uint16_t* src_uv,
2109 uint8_t* dst_argb,
2110 const struct YuvConstants* yuvconstants,
2111 int width) {
2112 int x;
2113 for (x = 0; x < width; ++x) {
2114 YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
2115 dst_argb + 2, yuvconstants);
2116 dst_argb[3] = 255;
2117 src_y += 1;
2118 src_uv += 2;
2119 dst_argb += 4; // Advance 1 pixels.
2120 }
2121 }
2122
P210ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_uv,uint8_t * dst_ar30,const struct YuvConstants * yuvconstants,int width)2123 void P210ToAR30Row_C(const uint16_t* src_y,
2124 const uint16_t* src_uv,
2125 uint8_t* dst_ar30,
2126 const struct YuvConstants* yuvconstants,
2127 int width) {
2128 int x;
2129 int b;
2130 int g;
2131 int r;
2132 for (x = 0; x < width - 1; x += 2) {
2133 YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2134 StoreAR30(dst_ar30, b, g, r);
2135 YuvPixel16_16(src_y[1], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2136 StoreAR30(dst_ar30 + 4, b, g, r);
2137 src_y += 2;
2138 src_uv += 2;
2139 dst_ar30 += 8; // Advance 2 pixels.
2140 }
2141 if (width & 1) {
2142 YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2143 StoreAR30(dst_ar30, b, g, r);
2144 }
2145 }
2146
P410ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_uv,uint8_t * dst_ar30,const struct YuvConstants * yuvconstants,int width)2147 void P410ToAR30Row_C(const uint16_t* src_y,
2148 const uint16_t* src_uv,
2149 uint8_t* dst_ar30,
2150 const struct YuvConstants* yuvconstants,
2151 int width) {
2152 int x;
2153 int b;
2154 int g;
2155 int r;
2156 for (x = 0; x < width; ++x) {
2157 YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2158 StoreAR30(dst_ar30, b, g, r);
2159 src_y += 1;
2160 src_uv += 2;
2161 dst_ar30 += 4; // Advance 1 pixel.
2162 }
2163 }
2164
2165 // 8 bit YUV to 10 bit AR30
2166 // Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
I422ToAR30Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2167 void I422ToAR30Row_C(const uint8_t* src_y,
2168 const uint8_t* src_u,
2169 const uint8_t* src_v,
2170 uint8_t* rgb_buf,
2171 const struct YuvConstants* yuvconstants,
2172 int width) {
2173 int x;
2174 int b;
2175 int g;
2176 int r;
2177 for (x = 0; x < width - 1; x += 2) {
2178 YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2179 StoreAR30(rgb_buf, b, g, r);
2180 YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2181 StoreAR30(rgb_buf + 4, b, g, r);
2182 src_y += 2;
2183 src_u += 1;
2184 src_v += 1;
2185 rgb_buf += 8; // Advance 2 pixels.
2186 }
2187 if (width & 1) {
2188 YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2189 StoreAR30(rgb_buf, b, g, r);
2190 }
2191 }
2192
I444AlphaToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,const uint8_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2193 void I444AlphaToARGBRow_C(const uint8_t* src_y,
2194 const uint8_t* src_u,
2195 const uint8_t* src_v,
2196 const uint8_t* src_a,
2197 uint8_t* rgb_buf,
2198 const struct YuvConstants* yuvconstants,
2199 int width) {
2200 int x;
2201 for (x = 0; x < width; ++x) {
2202 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2203 rgb_buf + 2, yuvconstants);
2204 rgb_buf[3] = src_a[0];
2205 src_y += 1;
2206 src_u += 1;
2207 src_v += 1;
2208 src_a += 1;
2209 rgb_buf += 4; // Advance 1 pixel.
2210 }
2211 }
2212
I422AlphaToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,const uint8_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2213 void I422AlphaToARGBRow_C(const uint8_t* src_y,
2214 const uint8_t* src_u,
2215 const uint8_t* src_v,
2216 const uint8_t* src_a,
2217 uint8_t* rgb_buf,
2218 const struct YuvConstants* yuvconstants,
2219 int width) {
2220 int x;
2221 for (x = 0; x < width - 1; x += 2) {
2222 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2223 rgb_buf + 2, yuvconstants);
2224 rgb_buf[3] = src_a[0];
2225 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
2226 rgb_buf + 6, yuvconstants);
2227 rgb_buf[7] = src_a[1];
2228 src_y += 2;
2229 src_u += 1;
2230 src_v += 1;
2231 src_a += 2;
2232 rgb_buf += 8; // Advance 2 pixels.
2233 }
2234 if (width & 1) {
2235 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2236 rgb_buf + 2, yuvconstants);
2237 rgb_buf[3] = src_a[0];
2238 }
2239 }
2240
I422ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2241 void I422ToRGB24Row_C(const uint8_t* src_y,
2242 const uint8_t* src_u,
2243 const uint8_t* src_v,
2244 uint8_t* rgb_buf,
2245 const struct YuvConstants* yuvconstants,
2246 int width) {
2247 int x;
2248 for (x = 0; x < width - 1; x += 2) {
2249 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2250 rgb_buf + 2, yuvconstants);
2251 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
2252 rgb_buf + 5, yuvconstants);
2253 src_y += 2;
2254 src_u += 1;
2255 src_v += 1;
2256 rgb_buf += 6; // Advance 2 pixels.
2257 }
2258 if (width & 1) {
2259 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2260 rgb_buf + 2, yuvconstants);
2261 }
2262 }
2263
I422ToARGB4444Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)2264 void I422ToARGB4444Row_C(const uint8_t* src_y,
2265 const uint8_t* src_u,
2266 const uint8_t* src_v,
2267 uint8_t* dst_argb4444,
2268 const struct YuvConstants* yuvconstants,
2269 int width) {
2270 uint8_t b0;
2271 uint8_t g0;
2272 uint8_t r0;
2273 uint8_t b1;
2274 uint8_t g1;
2275 uint8_t r1;
2276 int x;
2277 for (x = 0; x < width - 1; x += 2) {
2278 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2279 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2280 b0 = b0 >> 4;
2281 g0 = g0 >> 4;
2282 r0 = r0 >> 4;
2283 b1 = b1 >> 4;
2284 g1 = g1 >> 4;
2285 r1 = r1 >> 4;
2286 *(uint16_t*)(dst_argb4444 + 0) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
2287 *(uint16_t*)(dst_argb4444 + 2) = b1 | (g1 << 4) | (r1 << 8) | 0xf000;
2288 src_y += 2;
2289 src_u += 1;
2290 src_v += 1;
2291 dst_argb4444 += 4; // Advance 2 pixels.
2292 }
2293 if (width & 1) {
2294 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2295 b0 = b0 >> 4;
2296 g0 = g0 >> 4;
2297 r0 = r0 >> 4;
2298 *(uint16_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
2299 }
2300 }
2301
I422ToARGB1555Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)2302 void I422ToARGB1555Row_C(const uint8_t* src_y,
2303 const uint8_t* src_u,
2304 const uint8_t* src_v,
2305 uint8_t* dst_argb1555,
2306 const struct YuvConstants* yuvconstants,
2307 int width) {
2308 uint8_t b0;
2309 uint8_t g0;
2310 uint8_t r0;
2311 uint8_t b1;
2312 uint8_t g1;
2313 uint8_t r1;
2314 int x;
2315 for (x = 0; x < width - 1; x += 2) {
2316 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2317 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2318 b0 = b0 >> 3;
2319 g0 = g0 >> 3;
2320 r0 = r0 >> 3;
2321 b1 = b1 >> 3;
2322 g1 = g1 >> 3;
2323 r1 = r1 >> 3;
2324 *(uint16_t*)(dst_argb1555 + 0) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
2325 *(uint16_t*)(dst_argb1555 + 2) = b1 | (g1 << 5) | (r1 << 10) | 0x8000;
2326 src_y += 2;
2327 src_u += 1;
2328 src_v += 1;
2329 dst_argb1555 += 4; // Advance 2 pixels.
2330 }
2331 if (width & 1) {
2332 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2333 b0 = b0 >> 3;
2334 g0 = g0 >> 3;
2335 r0 = r0 >> 3;
2336 *(uint16_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
2337 }
2338 }
2339
I422ToRGB565Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2340 void I422ToRGB565Row_C(const uint8_t* src_y,
2341 const uint8_t* src_u,
2342 const uint8_t* src_v,
2343 uint8_t* dst_rgb565,
2344 const struct YuvConstants* yuvconstants,
2345 int width) {
2346 uint8_t b0;
2347 uint8_t g0;
2348 uint8_t r0;
2349 uint8_t b1;
2350 uint8_t g1;
2351 uint8_t r1;
2352 int x;
2353 for (x = 0; x < width - 1; x += 2) {
2354 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2355 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2356 b0 = b0 >> 3;
2357 g0 = g0 >> 2;
2358 r0 = r0 >> 3;
2359 b1 = b1 >> 3;
2360 g1 = g1 >> 2;
2361 r1 = r1 >> 3;
2362 *(uint16_t*)(dst_rgb565 + 0) = b0 | (g0 << 5) | (r0 << 11); // for ubsan
2363 *(uint16_t*)(dst_rgb565 + 2) = b1 | (g1 << 5) | (r1 << 11);
2364 src_y += 2;
2365 src_u += 1;
2366 src_v += 1;
2367 dst_rgb565 += 4; // Advance 2 pixels.
2368 }
2369 if (width & 1) {
2370 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2371 b0 = b0 >> 3;
2372 g0 = g0 >> 2;
2373 r0 = r0 >> 3;
2374 *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
2375 }
2376 }
2377
NV12ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2378 void NV12ToARGBRow_C(const uint8_t* src_y,
2379 const uint8_t* src_uv,
2380 uint8_t* rgb_buf,
2381 const struct YuvConstants* yuvconstants,
2382 int width) {
2383 int x;
2384 for (x = 0; x < width - 1; x += 2) {
2385 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2386 rgb_buf + 2, yuvconstants);
2387 rgb_buf[3] = 255;
2388 YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
2389 rgb_buf + 6, yuvconstants);
2390 rgb_buf[7] = 255;
2391 src_y += 2;
2392 src_uv += 2;
2393 rgb_buf += 8; // Advance 2 pixels.
2394 }
2395 if (width & 1) {
2396 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2397 rgb_buf + 2, yuvconstants);
2398 rgb_buf[3] = 255;
2399 }
2400 }
2401
NV21ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2402 void NV21ToARGBRow_C(const uint8_t* src_y,
2403 const uint8_t* src_vu,
2404 uint8_t* rgb_buf,
2405 const struct YuvConstants* yuvconstants,
2406 int width) {
2407 int x;
2408 for (x = 0; x < width - 1; x += 2) {
2409 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2410 rgb_buf + 2, yuvconstants);
2411 rgb_buf[3] = 255;
2412 YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
2413 rgb_buf + 6, yuvconstants);
2414 rgb_buf[7] = 255;
2415 src_y += 2;
2416 src_vu += 2;
2417 rgb_buf += 8; // Advance 2 pixels.
2418 }
2419 if (width & 1) {
2420 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2421 rgb_buf + 2, yuvconstants);
2422 rgb_buf[3] = 255;
2423 }
2424 }
2425
NV12ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2426 void NV12ToRGB24Row_C(const uint8_t* src_y,
2427 const uint8_t* src_uv,
2428 uint8_t* rgb_buf,
2429 const struct YuvConstants* yuvconstants,
2430 int width) {
2431 int x;
2432 for (x = 0; x < width - 1; x += 2) {
2433 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2434 rgb_buf + 2, yuvconstants);
2435 YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4,
2436 rgb_buf + 5, yuvconstants);
2437 src_y += 2;
2438 src_uv += 2;
2439 rgb_buf += 6; // Advance 2 pixels.
2440 }
2441 if (width & 1) {
2442 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2443 rgb_buf + 2, yuvconstants);
2444 }
2445 }
2446
NV21ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2447 void NV21ToRGB24Row_C(const uint8_t* src_y,
2448 const uint8_t* src_vu,
2449 uint8_t* rgb_buf,
2450 const struct YuvConstants* yuvconstants,
2451 int width) {
2452 int x;
2453 for (x = 0; x < width - 1; x += 2) {
2454 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2455 rgb_buf + 2, yuvconstants);
2456 YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4,
2457 rgb_buf + 5, yuvconstants);
2458 src_y += 2;
2459 src_vu += 2;
2460 rgb_buf += 6; // Advance 2 pixels.
2461 }
2462 if (width & 1) {
2463 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2464 rgb_buf + 2, yuvconstants);
2465 }
2466 }
2467
NV12ToRGB565Row_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2468 void NV12ToRGB565Row_C(const uint8_t* src_y,
2469 const uint8_t* src_uv,
2470 uint8_t* dst_rgb565,
2471 const struct YuvConstants* yuvconstants,
2472 int width) {
2473 uint8_t b0;
2474 uint8_t g0;
2475 uint8_t r0;
2476 uint8_t b1;
2477 uint8_t g1;
2478 uint8_t r1;
2479 int x;
2480 for (x = 0; x < width - 1; x += 2) {
2481 YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
2482 YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
2483 b0 = b0 >> 3;
2484 g0 = g0 >> 2;
2485 r0 = r0 >> 3;
2486 b1 = b1 >> 3;
2487 g1 = g1 >> 2;
2488 r1 = r1 >> 3;
2489 *(uint16_t*)(dst_rgb565 + 0) = b0 | (g0 << 5) | (r0 << 11);
2490 *(uint16_t*)(dst_rgb565 + 2) = b1 | (g1 << 5) | (r1 << 11);
2491 src_y += 2;
2492 src_uv += 2;
2493 dst_rgb565 += 4; // Advance 2 pixels.
2494 }
2495 if (width & 1) {
2496 YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
2497 b0 = b0 >> 3;
2498 g0 = g0 >> 2;
2499 r0 = r0 >> 3;
2500 *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
2501 }
2502 }
2503
YUY2ToARGBRow_C(const uint8_t * src_yuy2,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2504 void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
2505 uint8_t* rgb_buf,
2506 const struct YuvConstants* yuvconstants,
2507 int width) {
2508 int x;
2509 for (x = 0; x < width - 1; x += 2) {
2510 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
2511 rgb_buf + 2, yuvconstants);
2512 rgb_buf[3] = 255;
2513 YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
2514 rgb_buf + 6, yuvconstants);
2515 rgb_buf[7] = 255;
2516 src_yuy2 += 4;
2517 rgb_buf += 8; // Advance 2 pixels.
2518 }
2519 if (width & 1) {
2520 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
2521 rgb_buf + 2, yuvconstants);
2522 rgb_buf[3] = 255;
2523 }
2524 }
2525
UYVYToARGBRow_C(const uint8_t * src_uyvy,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2526 void UYVYToARGBRow_C(const uint8_t* src_uyvy,
2527 uint8_t* rgb_buf,
2528 const struct YuvConstants* yuvconstants,
2529 int width) {
2530 int x;
2531 for (x = 0; x < width - 1; x += 2) {
2532 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
2533 rgb_buf + 2, yuvconstants);
2534 rgb_buf[3] = 255;
2535 YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
2536 rgb_buf + 6, yuvconstants);
2537 rgb_buf[7] = 255;
2538 src_uyvy += 4;
2539 rgb_buf += 8; // Advance 2 pixels.
2540 }
2541 if (width & 1) {
2542 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
2543 rgb_buf + 2, yuvconstants);
2544 rgb_buf[3] = 255;
2545 }
2546 }
2547
I422ToRGBARow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2548 void I422ToRGBARow_C(const uint8_t* src_y,
2549 const uint8_t* src_u,
2550 const uint8_t* src_v,
2551 uint8_t* rgb_buf,
2552 const struct YuvConstants* yuvconstants,
2553 int width) {
2554 int x;
2555 for (x = 0; x < width - 1; x += 2) {
2556 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
2557 rgb_buf + 3, yuvconstants);
2558 rgb_buf[0] = 255;
2559 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
2560 rgb_buf + 7, yuvconstants);
2561 rgb_buf[4] = 255;
2562 src_y += 2;
2563 src_u += 1;
2564 src_v += 1;
2565 rgb_buf += 8; // Advance 2 pixels.
2566 }
2567 if (width & 1) {
2568 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
2569 rgb_buf + 3, yuvconstants);
2570 rgb_buf[0] = 255;
2571 }
2572 }
2573
I400ToARGBRow_C(const uint8_t * src_y,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2574 void I400ToARGBRow_C(const uint8_t* src_y,
2575 uint8_t* rgb_buf,
2576 const struct YuvConstants* yuvconstants,
2577 int width) {
2578 int x;
2579 for (x = 0; x < width - 1; x += 2) {
2580 YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
2581 rgb_buf[3] = 255;
2582 YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
2583 rgb_buf[7] = 255;
2584 src_y += 2;
2585 rgb_buf += 8; // Advance 2 pixels.
2586 }
2587 if (width & 1) {
2588 YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
2589 rgb_buf[3] = 255;
2590 }
2591 }
2592
MirrorRow_C(const uint8_t * src,uint8_t * dst,int width)2593 void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2594 int x;
2595 src += width - 1;
2596 for (x = 0; x < width - 1; x += 2) {
2597 dst[x] = src[0];
2598 dst[x + 1] = src[-1];
2599 src -= 2;
2600 }
2601 if (width & 1) {
2602 dst[width - 1] = src[0];
2603 }
2604 }
2605
MirrorUVRow_C(const uint8_t * src_uv,uint8_t * dst_uv,int width)2606 void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
2607 int x;
2608 src_uv += (width - 1) << 1;
2609 for (x = 0; x < width; ++x) {
2610 dst_uv[0] = src_uv[0];
2611 dst_uv[1] = src_uv[1];
2612 src_uv -= 2;
2613 dst_uv += 2;
2614 }
2615 }
2616
MirrorSplitUVRow_C(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)2617 void MirrorSplitUVRow_C(const uint8_t* src_uv,
2618 uint8_t* dst_u,
2619 uint8_t* dst_v,
2620 int width) {
2621 int x;
2622 src_uv += (width - 1) << 1;
2623 for (x = 0; x < width - 1; x += 2) {
2624 dst_u[x] = src_uv[0];
2625 dst_u[x + 1] = src_uv[-2];
2626 dst_v[x] = src_uv[1];
2627 dst_v[x + 1] = src_uv[-2 + 1];
2628 src_uv -= 4;
2629 }
2630 if (width & 1) {
2631 dst_u[width - 1] = src_uv[0];
2632 dst_v[width - 1] = src_uv[1];
2633 }
2634 }
2635
ARGBMirrorRow_C(const uint8_t * src,uint8_t * dst,int width)2636 void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2637 int x;
2638 const uint32_t* src32 = (const uint32_t*)(src);
2639 uint32_t* dst32 = (uint32_t*)(dst);
2640 src32 += width - 1;
2641 for (x = 0; x < width - 1; x += 2) {
2642 dst32[x] = src32[0];
2643 dst32[x + 1] = src32[-1];
2644 src32 -= 2;
2645 }
2646 if (width & 1) {
2647 dst32[width - 1] = src32[0];
2648 }
2649 }
2650
RGB24MirrorRow_C(const uint8_t * src_rgb24,uint8_t * dst_rgb24,int width)2651 void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width) {
2652 int x;
2653 src_rgb24 += width * 3 - 3;
2654 for (x = 0; x < width; ++x) {
2655 uint8_t b = src_rgb24[0];
2656 uint8_t g = src_rgb24[1];
2657 uint8_t r = src_rgb24[2];
2658 dst_rgb24[0] = b;
2659 dst_rgb24[1] = g;
2660 dst_rgb24[2] = r;
2661 src_rgb24 -= 3;
2662 dst_rgb24 += 3;
2663 }
2664 }
2665
DetileRow_C(const uint8_t * src,ptrdiff_t src_tile_stride,uint8_t * dst,int width)2666 void DetileRow_C(const uint8_t* src,
2667 ptrdiff_t src_tile_stride,
2668 uint8_t* dst,
2669 int width) {
2670 int x;
2671 for (x = 0; x < width - 15; x += 16) {
2672 memcpy(dst, src, 16);
2673 dst += 16;
2674 src += src_tile_stride;
2675 }
2676 if (width & 15) {
2677 memcpy(dst, src, width & 15);
2678 }
2679 }
2680
DetileSplitUVRow_C(const uint8_t * src_uv,ptrdiff_t src_tile_stride,uint8_t * dst_u,uint8_t * dst_v,int width)2681 void DetileSplitUVRow_C(const uint8_t* src_uv,
2682 ptrdiff_t src_tile_stride,
2683 uint8_t* dst_u,
2684 uint8_t* dst_v,
2685 int width) {
2686 int tile;
2687 for (tile = 0; tile < width / 16; tile++) {
2688 for (int x = 0; x < 8; x++) {
2689 *dst_u++ = src_uv[0];
2690 *dst_v++ = src_uv[1];
2691 src_uv += 2;
2692 }
2693 src_uv += src_tile_stride - 16;
2694 }
2695 for (int x = 0; x < (width & 0xF) / 2; ++x) {
2696 *dst_u = *src_uv;
2697 dst_u++;
2698 src_uv++;
2699 *dst_v = *src_uv;
2700 dst_v++;
2701 src_uv++;
2702 }
2703 }
2704
SplitUVRow_C(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)2705 void SplitUVRow_C(const uint8_t* src_uv,
2706 uint8_t* dst_u,
2707 uint8_t* dst_v,
2708 int width) {
2709 int x;
2710 for (x = 0; x < width - 1; x += 2) {
2711 dst_u[x] = src_uv[0];
2712 dst_u[x + 1] = src_uv[2];
2713 dst_v[x] = src_uv[1];
2714 dst_v[x + 1] = src_uv[3];
2715 src_uv += 4;
2716 }
2717 if (width & 1) {
2718 dst_u[width - 1] = src_uv[0];
2719 dst_v[width - 1] = src_uv[1];
2720 }
2721 }
2722
MergeUVRow_C(const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_uv,int width)2723 void MergeUVRow_C(const uint8_t* src_u,
2724 const uint8_t* src_v,
2725 uint8_t* dst_uv,
2726 int width) {
2727 int x;
2728 for (x = 0; x < width - 1; x += 2) {
2729 dst_uv[0] = src_u[x];
2730 dst_uv[1] = src_v[x];
2731 dst_uv[2] = src_u[x + 1];
2732 dst_uv[3] = src_v[x + 1];
2733 dst_uv += 4;
2734 }
2735 if (width & 1) {
2736 dst_uv[0] = src_u[width - 1];
2737 dst_uv[1] = src_v[width - 1];
2738 }
2739 }
2740
SplitRGBRow_C(const uint8_t * src_rgb,uint8_t * dst_r,uint8_t * dst_g,uint8_t * dst_b,int width)2741 void SplitRGBRow_C(const uint8_t* src_rgb,
2742 uint8_t* dst_r,
2743 uint8_t* dst_g,
2744 uint8_t* dst_b,
2745 int width) {
2746 int x;
2747 for (x = 0; x < width; ++x) {
2748 dst_r[x] = src_rgb[0];
2749 dst_g[x] = src_rgb[1];
2750 dst_b[x] = src_rgb[2];
2751 src_rgb += 3;
2752 }
2753 }
2754
MergeRGBRow_C(const uint8_t * src_r,const uint8_t * src_g,const uint8_t * src_b,uint8_t * dst_rgb,int width)2755 void MergeRGBRow_C(const uint8_t* src_r,
2756 const uint8_t* src_g,
2757 const uint8_t* src_b,
2758 uint8_t* dst_rgb,
2759 int width) {
2760 int x;
2761 for (x = 0; x < width; ++x) {
2762 dst_rgb[0] = src_r[x];
2763 dst_rgb[1] = src_g[x];
2764 dst_rgb[2] = src_b[x];
2765 dst_rgb += 3;
2766 }
2767 }
2768
SplitARGBRow_C(const uint8_t * src_argb,uint8_t * dst_r,uint8_t * dst_g,uint8_t * dst_b,uint8_t * dst_a,int width)2769 void SplitARGBRow_C(const uint8_t* src_argb,
2770 uint8_t* dst_r,
2771 uint8_t* dst_g,
2772 uint8_t* dst_b,
2773 uint8_t* dst_a,
2774 int width) {
2775 int x;
2776 for (x = 0; x < width; ++x) {
2777 dst_b[x] = src_argb[0];
2778 dst_g[x] = src_argb[1];
2779 dst_r[x] = src_argb[2];
2780 dst_a[x] = src_argb[3];
2781 src_argb += 4;
2782 }
2783 }
2784
MergeARGBRow_C(const uint8_t * src_r,const uint8_t * src_g,const uint8_t * src_b,const uint8_t * src_a,uint8_t * dst_argb,int width)2785 void MergeARGBRow_C(const uint8_t* src_r,
2786 const uint8_t* src_g,
2787 const uint8_t* src_b,
2788 const uint8_t* src_a,
2789 uint8_t* dst_argb,
2790 int width) {
2791 int x;
2792 for (x = 0; x < width; ++x) {
2793 dst_argb[0] = src_b[x];
2794 dst_argb[1] = src_g[x];
2795 dst_argb[2] = src_r[x];
2796 dst_argb[3] = src_a[x];
2797 dst_argb += 4;
2798 }
2799 }
2800
MergeXR30Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,uint8_t * dst_ar30,int depth,int width)2801 void MergeXR30Row_C(const uint16_t* src_r,
2802 const uint16_t* src_g,
2803 const uint16_t* src_b,
2804 uint8_t* dst_ar30,
2805 int depth,
2806 int width) {
2807 assert(depth >= 10);
2808 assert(depth <= 16);
2809 int x;
2810 int shift = depth - 10;
2811 uint32_t* dst_ar30_32 = (uint32_t*)dst_ar30;
2812 for (x = 0; x < width; ++x) {
2813 uint32_t r = clamp1023(src_r[x] >> shift);
2814 uint32_t g = clamp1023(src_g[x] >> shift);
2815 uint32_t b = clamp1023(src_b[x] >> shift);
2816 dst_ar30_32[x] = b | (g << 10) | (r << 20) | 0xc0000000;
2817 }
2818 }
2819
MergeAR64Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,const uint16_t * src_a,uint16_t * dst_ar64,int depth,int width)2820 void MergeAR64Row_C(const uint16_t* src_r,
2821 const uint16_t* src_g,
2822 const uint16_t* src_b,
2823 const uint16_t* src_a,
2824 uint16_t* dst_ar64,
2825 int depth,
2826 int width) {
2827 assert(depth >= 1);
2828 assert(depth <= 16);
2829 int x;
2830 int shift = 16 - depth;
2831 int max = (1 << depth) - 1;
2832 for (x = 0; x < width; ++x) {
2833 dst_ar64[0] = ClampMax(src_b[x], max) << shift;
2834 dst_ar64[1] = ClampMax(src_g[x], max) << shift;
2835 dst_ar64[2] = ClampMax(src_r[x], max) << shift;
2836 dst_ar64[3] = ClampMax(src_a[x], max) << shift;
2837 dst_ar64 += 4;
2838 }
2839 }
2840
MergeARGB16To8Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,const uint16_t * src_a,uint8_t * dst_argb,int depth,int width)2841 void MergeARGB16To8Row_C(const uint16_t* src_r,
2842 const uint16_t* src_g,
2843 const uint16_t* src_b,
2844 const uint16_t* src_a,
2845 uint8_t* dst_argb,
2846 int depth,
2847 int width) {
2848 assert(depth >= 8);
2849 assert(depth <= 16);
2850 int x;
2851 int shift = depth - 8;
2852 for (x = 0; x < width; ++x) {
2853 dst_argb[0] = clamp255(src_b[x] >> shift);
2854 dst_argb[1] = clamp255(src_g[x] >> shift);
2855 dst_argb[2] = clamp255(src_r[x] >> shift);
2856 dst_argb[3] = clamp255(src_a[x] >> shift);
2857 dst_argb += 4;
2858 }
2859 }
2860
MergeXR64Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,uint16_t * dst_ar64,int depth,int width)2861 void MergeXR64Row_C(const uint16_t* src_r,
2862 const uint16_t* src_g,
2863 const uint16_t* src_b,
2864 uint16_t* dst_ar64,
2865 int depth,
2866 int width) {
2867 assert(depth >= 1);
2868 assert(depth <= 16);
2869 int x;
2870 int shift = 16 - depth;
2871 int max = (1 << depth) - 1;
2872 for (x = 0; x < width; ++x) {
2873 dst_ar64[0] = ClampMax(src_b[x], max) << shift;
2874 dst_ar64[1] = ClampMax(src_g[x], max) << shift;
2875 dst_ar64[2] = ClampMax(src_r[x], max) << shift;
2876 dst_ar64[3] = 0xffff;
2877 dst_ar64 += 4;
2878 }
2879 }
2880
MergeXRGB16To8Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,uint8_t * dst_argb,int depth,int width)2881 void MergeXRGB16To8Row_C(const uint16_t* src_r,
2882 const uint16_t* src_g,
2883 const uint16_t* src_b,
2884 uint8_t* dst_argb,
2885 int depth,
2886 int width) {
2887 assert(depth >= 8);
2888 assert(depth <= 16);
2889 int x;
2890 int shift = depth - 8;
2891 for (x = 0; x < width; ++x) {
2892 dst_argb[0] = clamp255(src_b[x] >> shift);
2893 dst_argb[1] = clamp255(src_g[x] >> shift);
2894 dst_argb[2] = clamp255(src_r[x] >> shift);
2895 dst_argb[3] = 0xff;
2896 dst_argb += 4;
2897 }
2898 }
2899
SplitXRGBRow_C(const uint8_t * src_argb,uint8_t * dst_r,uint8_t * dst_g,uint8_t * dst_b,int width)2900 void SplitXRGBRow_C(const uint8_t* src_argb,
2901 uint8_t* dst_r,
2902 uint8_t* dst_g,
2903 uint8_t* dst_b,
2904 int width) {
2905 int x;
2906 for (x = 0; x < width; ++x) {
2907 dst_b[x] = src_argb[0];
2908 dst_g[x] = src_argb[1];
2909 dst_r[x] = src_argb[2];
2910 src_argb += 4;
2911 }
2912 }
2913
MergeXRGBRow_C(const uint8_t * src_r,const uint8_t * src_g,const uint8_t * src_b,uint8_t * dst_argb,int width)2914 void MergeXRGBRow_C(const uint8_t* src_r,
2915 const uint8_t* src_g,
2916 const uint8_t* src_b,
2917 uint8_t* dst_argb,
2918 int width) {
2919 int x;
2920 for (x = 0; x < width; ++x) {
2921 dst_argb[0] = src_b[x];
2922 dst_argb[1] = src_g[x];
2923 dst_argb[2] = src_r[x];
2924 dst_argb[3] = 255;
2925 dst_argb += 4;
2926 }
2927 }
2928
2929 // Convert lsb formats to msb, depending on sample depth.
MergeUVRow_16_C(const uint16_t * src_u,const uint16_t * src_v,uint16_t * dst_uv,int depth,int width)2930 void MergeUVRow_16_C(const uint16_t* src_u,
2931 const uint16_t* src_v,
2932 uint16_t* dst_uv,
2933 int depth,
2934 int width) {
2935 int shift = 16 - depth;
2936 assert(depth >= 8);
2937 assert(depth <= 16);
2938 int x;
2939 for (x = 0; x < width; ++x) {
2940 dst_uv[0] = src_u[x] << shift;
2941 dst_uv[1] = src_v[x] << shift;
2942 dst_uv += 2;
2943 }
2944 }
2945
2946 // Convert msb formats to lsb, depending on sample depth.
SplitUVRow_16_C(const uint16_t * src_uv,uint16_t * dst_u,uint16_t * dst_v,int depth,int width)2947 void SplitUVRow_16_C(const uint16_t* src_uv,
2948 uint16_t* dst_u,
2949 uint16_t* dst_v,
2950 int depth,
2951 int width) {
2952 int shift = 16 - depth;
2953 int x;
2954 assert(depth >= 8);
2955 assert(depth <= 16);
2956 for (x = 0; x < width; ++x) {
2957 dst_u[x] = src_uv[0] >> shift;
2958 dst_v[x] = src_uv[1] >> shift;
2959 src_uv += 2;
2960 }
2961 }
2962
MultiplyRow_16_C(const uint16_t * src_y,uint16_t * dst_y,int scale,int width)2963 void MultiplyRow_16_C(const uint16_t* src_y,
2964 uint16_t* dst_y,
2965 int scale,
2966 int width) {
2967 int x;
2968 for (x = 0; x < width; ++x) {
2969 dst_y[x] = src_y[x] * scale;
2970 }
2971 }
2972
DivideRow_16_C(const uint16_t * src_y,uint16_t * dst_y,int scale,int width)2973 void DivideRow_16_C(const uint16_t* src_y,
2974 uint16_t* dst_y,
2975 int scale,
2976 int width) {
2977 int x;
2978 for (x = 0; x < width; ++x) {
2979 dst_y[x] = (src_y[x] * scale) >> 16;
2980 }
2981 }
2982
2983 // Use scale to convert lsb formats to msb, depending how many bits there are:
2984 // 32768 = 9 bits
2985 // 16384 = 10 bits
2986 // 4096 = 12 bits
2987 // 256 = 16 bits
2988 // TODO(fbarchard): change scale to bits
2989 #define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
2990
Convert16To8Row_C(const uint16_t * src_y,uint8_t * dst_y,int scale,int width)2991 void Convert16To8Row_C(const uint16_t* src_y,
2992 uint8_t* dst_y,
2993 int scale,
2994 int width) {
2995 int x;
2996 assert(scale >= 256);
2997 assert(scale <= 32768);
2998
2999 for (x = 0; x < width; ++x) {
3000 dst_y[x] = C16TO8(src_y[x], scale);
3001 }
3002 }
3003
3004 // Use scale to convert lsb formats to msb, depending how many bits there are:
3005 // 1024 = 10 bits
Convert8To16Row_C(const uint8_t * src_y,uint16_t * dst_y,int scale,int width)3006 void Convert8To16Row_C(const uint8_t* src_y,
3007 uint16_t* dst_y,
3008 int scale,
3009 int width) {
3010 int x;
3011 scale *= 0x0101; // replicates the byte.
3012 for (x = 0; x < width; ++x) {
3013 dst_y[x] = (src_y[x] * scale) >> 16;
3014 }
3015 }
3016
CopyRow_C(const uint8_t * src,uint8_t * dst,int count)3017 void CopyRow_C(const uint8_t* src, uint8_t* dst, int count) {
3018 memcpy(dst, src, count);
3019 }
3020
CopyRow_16_C(const uint16_t * src,uint16_t * dst,int count)3021 void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count) {
3022 memcpy(dst, src, count * 2);
3023 }
3024
SetRow_C(uint8_t * dst,uint8_t v8,int width)3025 void SetRow_C(uint8_t* dst, uint8_t v8, int width) {
3026 memset(dst, v8, width);
3027 }
3028
ARGBSetRow_C(uint8_t * dst_argb,uint32_t v32,int width)3029 void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width) {
3030 int x;
3031 for (x = 0; x < width; ++x) {
3032 memcpy(dst_argb + x * sizeof v32, &v32, sizeof v32);
3033 }
3034 }
3035
3036 // Filter 2 rows of YUY2 UV's (422) into U and V (420).
YUY2ToUVRow_C(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)3037 void YUY2ToUVRow_C(const uint8_t* src_yuy2,
3038 int src_stride_yuy2,
3039 uint8_t* dst_u,
3040 uint8_t* dst_v,
3041 int width) {
3042 // Output a row of UV values, filtering 2 rows of YUY2.
3043 int x;
3044 for (x = 0; x < width; x += 2) {
3045 dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
3046 dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
3047 src_yuy2 += 4;
3048 dst_u += 1;
3049 dst_v += 1;
3050 }
3051 }
3052
3053 // Copy row of YUY2 UV's (422) into U and V (422).
YUY2ToUV422Row_C(const uint8_t * src_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)3054 void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
3055 uint8_t* dst_u,
3056 uint8_t* dst_v,
3057 int width) {
3058 // Output a row of UV values.
3059 int x;
3060 for (x = 0; x < width; x += 2) {
3061 dst_u[0] = src_yuy2[1];
3062 dst_v[0] = src_yuy2[3];
3063 src_yuy2 += 4;
3064 dst_u += 1;
3065 dst_v += 1;
3066 }
3067 }
3068
3069 // Copy row of YUY2 Y's (422) into Y (420/422).
YUY2ToYRow_C(const uint8_t * src_yuy2,uint8_t * dst_y,int width)3070 void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
3071 // Output a row of Y values.
3072 int x;
3073 for (x = 0; x < width - 1; x += 2) {
3074 dst_y[x] = src_yuy2[0];
3075 dst_y[x + 1] = src_yuy2[2];
3076 src_yuy2 += 4;
3077 }
3078 if (width & 1) {
3079 dst_y[width - 1] = src_yuy2[0];
3080 }
3081 }
3082
3083 // Filter 2 rows of UYVY UV's (422) into U and V (420).
UYVYToUVRow_C(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)3084 void UYVYToUVRow_C(const uint8_t* src_uyvy,
3085 int src_stride_uyvy,
3086 uint8_t* dst_u,
3087 uint8_t* dst_v,
3088 int width) {
3089 // Output a row of UV values.
3090 int x;
3091 for (x = 0; x < width; x += 2) {
3092 dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
3093 dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
3094 src_uyvy += 4;
3095 dst_u += 1;
3096 dst_v += 1;
3097 }
3098 }
3099
3100 // Copy row of UYVY UV's (422) into U and V (422).
UYVYToUV422Row_C(const uint8_t * src_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)3101 void UYVYToUV422Row_C(const uint8_t* src_uyvy,
3102 uint8_t* dst_u,
3103 uint8_t* dst_v,
3104 int width) {
3105 // Output a row of UV values.
3106 int x;
3107 for (x = 0; x < width; x += 2) {
3108 dst_u[0] = src_uyvy[0];
3109 dst_v[0] = src_uyvy[2];
3110 src_uyvy += 4;
3111 dst_u += 1;
3112 dst_v += 1;
3113 }
3114 }
3115
3116 // Copy row of UYVY Y's (422) into Y (420/422).
UYVYToYRow_C(const uint8_t * src_uyvy,uint8_t * dst_y,int width)3117 void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
3118 // Output a row of Y values.
3119 int x;
3120 for (x = 0; x < width - 1; x += 2) {
3121 dst_y[x] = src_uyvy[1];
3122 dst_y[x + 1] = src_uyvy[3];
3123 src_uyvy += 4;
3124 }
3125 if (width & 1) {
3126 dst_y[width - 1] = src_uyvy[1];
3127 }
3128 }
3129
3130 #define BLEND(f, b, a) clamp255((((256 - a) * b) >> 8) + f)
3131
3132 // Blend src_argb over src_argb1 and store to dst_argb.
3133 // dst_argb may be src_argb or src_argb1.
3134 // This code mimics the SSSE3 version for better testability.
ARGBBlendRow_C(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)3135 void ARGBBlendRow_C(const uint8_t* src_argb,
3136 const uint8_t* src_argb1,
3137 uint8_t* dst_argb,
3138 int width) {
3139 int x;
3140 for (x = 0; x < width - 1; x += 2) {
3141 uint32_t fb = src_argb[0];
3142 uint32_t fg = src_argb[1];
3143 uint32_t fr = src_argb[2];
3144 uint32_t a = src_argb[3];
3145 uint32_t bb = src_argb1[0];
3146 uint32_t bg = src_argb1[1];
3147 uint32_t br = src_argb1[2];
3148 dst_argb[0] = BLEND(fb, bb, a);
3149 dst_argb[1] = BLEND(fg, bg, a);
3150 dst_argb[2] = BLEND(fr, br, a);
3151 dst_argb[3] = 255u;
3152
3153 fb = src_argb[4 + 0];
3154 fg = src_argb[4 + 1];
3155 fr = src_argb[4 + 2];
3156 a = src_argb[4 + 3];
3157 bb = src_argb1[4 + 0];
3158 bg = src_argb1[4 + 1];
3159 br = src_argb1[4 + 2];
3160 dst_argb[4 + 0] = BLEND(fb, bb, a);
3161 dst_argb[4 + 1] = BLEND(fg, bg, a);
3162 dst_argb[4 + 2] = BLEND(fr, br, a);
3163 dst_argb[4 + 3] = 255u;
3164 src_argb += 8;
3165 src_argb1 += 8;
3166 dst_argb += 8;
3167 }
3168
3169 if (width & 1) {
3170 uint32_t fb = src_argb[0];
3171 uint32_t fg = src_argb[1];
3172 uint32_t fr = src_argb[2];
3173 uint32_t a = src_argb[3];
3174 uint32_t bb = src_argb1[0];
3175 uint32_t bg = src_argb1[1];
3176 uint32_t br = src_argb1[2];
3177 dst_argb[0] = BLEND(fb, bb, a);
3178 dst_argb[1] = BLEND(fg, bg, a);
3179 dst_argb[2] = BLEND(fr, br, a);
3180 dst_argb[3] = 255u;
3181 }
3182 }
3183 #undef BLEND
3184
3185 #define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8
BlendPlaneRow_C(const uint8_t * src0,const uint8_t * src1,const uint8_t * alpha,uint8_t * dst,int width)3186 void BlendPlaneRow_C(const uint8_t* src0,
3187 const uint8_t* src1,
3188 const uint8_t* alpha,
3189 uint8_t* dst,
3190 int width) {
3191 int x;
3192 for (x = 0; x < width - 1; x += 2) {
3193 dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
3194 dst[1] = UBLEND(src0[1], src1[1], alpha[1]);
3195 src0 += 2;
3196 src1 += 2;
3197 alpha += 2;
3198 dst += 2;
3199 }
3200 if (width & 1) {
3201 dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
3202 }
3203 }
3204 #undef UBLEND
3205
3206 #if LIBYUV_ATTENUATE_DUP
3207 // This code mimics the SSSE3 version for better testability.
3208 #define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
3209 #else
3210 #define ATTENUATE(f, a) (f * a + 128) >> 8
3211 #endif
3212
3213 // Multiply source RGB by alpha and store to destination.
ARGBAttenuateRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)3214 void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
3215 int i;
3216 for (i = 0; i < width - 1; i += 2) {
3217 uint32_t b = src_argb[0];
3218 uint32_t g = src_argb[1];
3219 uint32_t r = src_argb[2];
3220 uint32_t a = src_argb[3];
3221 dst_argb[0] = ATTENUATE(b, a);
3222 dst_argb[1] = ATTENUATE(g, a);
3223 dst_argb[2] = ATTENUATE(r, a);
3224 dst_argb[3] = a;
3225 b = src_argb[4];
3226 g = src_argb[5];
3227 r = src_argb[6];
3228 a = src_argb[7];
3229 dst_argb[4] = ATTENUATE(b, a);
3230 dst_argb[5] = ATTENUATE(g, a);
3231 dst_argb[6] = ATTENUATE(r, a);
3232 dst_argb[7] = a;
3233 src_argb += 8;
3234 dst_argb += 8;
3235 }
3236
3237 if (width & 1) {
3238 const uint32_t b = src_argb[0];
3239 const uint32_t g = src_argb[1];
3240 const uint32_t r = src_argb[2];
3241 const uint32_t a = src_argb[3];
3242 dst_argb[0] = ATTENUATE(b, a);
3243 dst_argb[1] = ATTENUATE(g, a);
3244 dst_argb[2] = ATTENUATE(r, a);
3245 dst_argb[3] = a;
3246 }
3247 }
3248 #undef ATTENUATE
3249
3250 // Divide source RGB by alpha and store to destination.
3251 // b = (b * 255 + (a / 2)) / a;
3252 // g = (g * 255 + (a / 2)) / a;
3253 // r = (r * 255 + (a / 2)) / a;
3254 // Reciprocal method is off by 1 on some values. ie 125
3255 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
3256 #define T(a) 0x01000000 + (0x10000 / a)
3257 const uint32_t fixed_invtbl8[256] = {
3258 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06),
3259 T(0x07), T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d),
3260 T(0x0e), T(0x0f), T(0x10), T(0x11), T(0x12), T(0x13), T(0x14),
3261 T(0x15), T(0x16), T(0x17), T(0x18), T(0x19), T(0x1a), T(0x1b),
3262 T(0x1c), T(0x1d), T(0x1e), T(0x1f), T(0x20), T(0x21), T(0x22),
3263 T(0x23), T(0x24), T(0x25), T(0x26), T(0x27), T(0x28), T(0x29),
3264 T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f), T(0x30),
3265 T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
3266 T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e),
3267 T(0x3f), T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45),
3268 T(0x46), T(0x47), T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c),
3269 T(0x4d), T(0x4e), T(0x4f), T(0x50), T(0x51), T(0x52), T(0x53),
3270 T(0x54), T(0x55), T(0x56), T(0x57), T(0x58), T(0x59), T(0x5a),
3271 T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f), T(0x60), T(0x61),
3272 T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67), T(0x68),
3273 T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
3274 T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76),
3275 T(0x77), T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d),
3276 T(0x7e), T(0x7f), T(0x80), T(0x81), T(0x82), T(0x83), T(0x84),
3277 T(0x85), T(0x86), T(0x87), T(0x88), T(0x89), T(0x8a), T(0x8b),
3278 T(0x8c), T(0x8d), T(0x8e), T(0x8f), T(0x90), T(0x91), T(0x92),
3279 T(0x93), T(0x94), T(0x95), T(0x96), T(0x97), T(0x98), T(0x99),
3280 T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f), T(0xa0),
3281 T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
3282 T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae),
3283 T(0xaf), T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5),
3284 T(0xb6), T(0xb7), T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc),
3285 T(0xbd), T(0xbe), T(0xbf), T(0xc0), T(0xc1), T(0xc2), T(0xc3),
3286 T(0xc4), T(0xc5), T(0xc6), T(0xc7), T(0xc8), T(0xc9), T(0xca),
3287 T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf), T(0xd0), T(0xd1),
3288 T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7), T(0xd8),
3289 T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
3290 T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6),
3291 T(0xe7), T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed),
3292 T(0xee), T(0xef), T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4),
3293 T(0xf5), T(0xf6), T(0xf7), T(0xf8), T(0xf9), T(0xfa), T(0xfb),
3294 T(0xfc), T(0xfd), T(0xfe), 0x01000100};
3295 #undef T
3296
3297 #if LIBYUV_UNATTENUATE_DUP
3298 // This code mimics the Intel SIMD version for better testability.
3299 #define UNATTENUATE(f, ia) clamp255(((f | (f << 8)) * ia) >> 16)
3300 #else
3301 #define UNATTENUATE(f, ia) clamp255((f * ia) >> 8)
3302 #endif
3303
3304 // mimics the Intel SIMD code for exactness.
ARGBUnattenuateRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)3305 void ARGBUnattenuateRow_C(const uint8_t* src_argb,
3306 uint8_t* dst_argb,
3307 int width) {
3308 int i;
3309 for (i = 0; i < width; ++i) {
3310 uint32_t b = src_argb[0];
3311 uint32_t g = src_argb[1];
3312 uint32_t r = src_argb[2];
3313 const uint32_t a = src_argb[3];
3314 const uint32_t ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point
3315
3316 // Clamping should not be necessary but is free in assembly.
3317 dst_argb[0] = UNATTENUATE(b, ia);
3318 dst_argb[1] = UNATTENUATE(g, ia);
3319 dst_argb[2] = UNATTENUATE(r, ia);
3320 dst_argb[3] = a;
3321 src_argb += 4;
3322 dst_argb += 4;
3323 }
3324 }
3325
ComputeCumulativeSumRow_C(const uint8_t * row,int32_t * cumsum,const int32_t * previous_cumsum,int width)3326 void ComputeCumulativeSumRow_C(const uint8_t* row,
3327 int32_t* cumsum,
3328 const int32_t* previous_cumsum,
3329 int width) {
3330 int32_t row_sum[4] = {0, 0, 0, 0};
3331 int x;
3332 for (x = 0; x < width; ++x) {
3333 row_sum[0] += row[x * 4 + 0];
3334 row_sum[1] += row[x * 4 + 1];
3335 row_sum[2] += row[x * 4 + 2];
3336 row_sum[3] += row[x * 4 + 3];
3337 cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0];
3338 cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1];
3339 cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2];
3340 cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3];
3341 }
3342 }
3343
CumulativeSumToAverageRow_C(const int32_t * tl,const int32_t * bl,int w,int area,uint8_t * dst,int count)3344 void CumulativeSumToAverageRow_C(const int32_t* tl,
3345 const int32_t* bl,
3346 int w,
3347 int area,
3348 uint8_t* dst,
3349 int count) {
3350 float ooa;
3351 int i;
3352 assert(area != 0);
3353
3354 ooa = 1.0f / area;
3355 for (i = 0; i < count; ++i) {
3356 dst[0] = (uint8_t)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
3357 dst[1] = (uint8_t)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
3358 dst[2] = (uint8_t)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
3359 dst[3] = (uint8_t)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
3360 dst += 4;
3361 tl += 4;
3362 bl += 4;
3363 }
3364 }
3365
3366 // Copy pixels from rotated source to destination row with a slope.
3367 LIBYUV_API
ARGBAffineRow_C(const uint8_t * src_argb,int src_argb_stride,uint8_t * dst_argb,const float * uv_dudv,int width)3368 void ARGBAffineRow_C(const uint8_t* src_argb,
3369 int src_argb_stride,
3370 uint8_t* dst_argb,
3371 const float* uv_dudv,
3372 int width) {
3373 int i;
3374 // Render a row of pixels from source into a buffer.
3375 float uv[2];
3376 uv[0] = uv_dudv[0];
3377 uv[1] = uv_dudv[1];
3378 for (i = 0; i < width; ++i) {
3379 int x = (int)(uv[0]);
3380 int y = (int)(uv[1]);
3381 *(uint32_t*)(dst_argb) =
3382 *(const uint32_t*)(src_argb + y * src_argb_stride + x * 4);
3383 dst_argb += 4;
3384 uv[0] += uv_dudv[2];
3385 uv[1] += uv_dudv[3];
3386 }
3387 }
3388
3389 // Blend 2 rows into 1.
HalfRow_C(const uint8_t * src_uv,ptrdiff_t src_uv_stride,uint8_t * dst_uv,int width)3390 static void HalfRow_C(const uint8_t* src_uv,
3391 ptrdiff_t src_uv_stride,
3392 uint8_t* dst_uv,
3393 int width) {
3394 int x;
3395 for (x = 0; x < width; ++x) {
3396 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
3397 }
3398 }
3399
HalfRow_16_C(const uint16_t * src_uv,ptrdiff_t src_uv_stride,uint16_t * dst_uv,int width)3400 static void HalfRow_16_C(const uint16_t* src_uv,
3401 ptrdiff_t src_uv_stride,
3402 uint16_t* dst_uv,
3403 int width) {
3404 int x;
3405 for (x = 0; x < width; ++x) {
3406 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
3407 }
3408 }
3409
HalfRow_16To8_C(const uint16_t * src_uv,ptrdiff_t src_uv_stride,uint8_t * dst_uv,int scale,int width)3410 static void HalfRow_16To8_C(const uint16_t* src_uv,
3411 ptrdiff_t src_uv_stride,
3412 uint8_t* dst_uv,
3413 int scale,
3414 int width) {
3415 int x;
3416 for (x = 0; x < width; ++x) {
3417 dst_uv[x] = C16TO8((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1, scale);
3418 }
3419 }
3420
3421 // C version 2x2 -> 2x1.
InterpolateRow_C(uint8_t * dst_ptr,const uint8_t * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)3422 void InterpolateRow_C(uint8_t* dst_ptr,
3423 const uint8_t* src_ptr,
3424 ptrdiff_t src_stride,
3425 int width,
3426 int source_y_fraction) {
3427 int y1_fraction = source_y_fraction;
3428 int y0_fraction = 256 - y1_fraction;
3429 const uint8_t* src_ptr1 = src_ptr + src_stride;
3430 int x;
3431 assert(source_y_fraction >= 0);
3432 assert(source_y_fraction < 256);
3433
3434 if (y1_fraction == 0) {
3435 memcpy(dst_ptr, src_ptr, width);
3436 return;
3437 }
3438 if (y1_fraction == 128) {
3439 HalfRow_C(src_ptr, src_stride, dst_ptr, width);
3440 return;
3441 }
3442 for (x = 0; x < width; ++x) {
3443 dst_ptr[0] =
3444 (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
3445 ++src_ptr;
3446 ++src_ptr1;
3447 ++dst_ptr;
3448 }
3449 }
3450
3451 // C version 2x2 -> 2x1.
InterpolateRow_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)3452 void InterpolateRow_16_C(uint16_t* dst_ptr,
3453 const uint16_t* src_ptr,
3454 ptrdiff_t src_stride,
3455 int width,
3456 int source_y_fraction) {
3457 int y1_fraction = source_y_fraction;
3458 int y0_fraction = 256 - y1_fraction;
3459 const uint16_t* src_ptr1 = src_ptr + src_stride;
3460 int x;
3461 assert(source_y_fraction >= 0);
3462 assert(source_y_fraction < 256);
3463
3464 if (y1_fraction == 0) {
3465 memcpy(dst_ptr, src_ptr, width * 2);
3466 return;
3467 }
3468 if (y1_fraction == 128) {
3469 HalfRow_16_C(src_ptr, src_stride, dst_ptr, width);
3470 return;
3471 }
3472 for (x = 0; x < width; ++x) {
3473 dst_ptr[0] =
3474 (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
3475 ++src_ptr;
3476 ++src_ptr1;
3477 ++dst_ptr;
3478 }
3479 }
3480
3481 // C version 2x2 16 bit-> 2x1 8 bit.
3482 // Use scale to convert lsb formats to msb, depending how many bits there are:
3483 // 32768 = 9 bits
3484 // 16384 = 10 bits
3485 // 4096 = 12 bits
3486 // 256 = 16 bits
3487 // TODO(fbarchard): change scale to bits
3488
InterpolateRow_16To8_C(uint8_t * dst_ptr,const uint16_t * src_ptr,ptrdiff_t src_stride,int scale,int width,int source_y_fraction)3489 void InterpolateRow_16To8_C(uint8_t* dst_ptr,
3490 const uint16_t* src_ptr,
3491 ptrdiff_t src_stride,
3492 int scale,
3493 int width,
3494 int source_y_fraction) {
3495 int y1_fraction = source_y_fraction;
3496 int y0_fraction = 256 - y1_fraction;
3497 const uint16_t* src_ptr1 = src_ptr + src_stride;
3498 int x;
3499 assert(source_y_fraction >= 0);
3500 assert(source_y_fraction < 256);
3501
3502 if (source_y_fraction == 0) {
3503 Convert16To8Row_C(src_ptr, dst_ptr, scale, width);
3504 return;
3505 }
3506 if (source_y_fraction == 128) {
3507 HalfRow_16To8_C(src_ptr, src_stride, dst_ptr, scale, width);
3508 return;
3509 }
3510 for (x = 0; x < width; ++x) {
3511 dst_ptr[0] = C16TO8(
3512 (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8,
3513 scale);
3514 src_ptr += 1;
3515 src_ptr1 += 1;
3516 dst_ptr += 1;
3517 }
3518 }
3519
3520 // Use first 4 shuffler values to reorder ARGB channels.
ARGBShuffleRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const uint8_t * shuffler,int width)3521 void ARGBShuffleRow_C(const uint8_t* src_argb,
3522 uint8_t* dst_argb,
3523 const uint8_t* shuffler,
3524 int width) {
3525 int index0 = shuffler[0];
3526 int index1 = shuffler[1];
3527 int index2 = shuffler[2];
3528 int index3 = shuffler[3];
3529 // Shuffle a row of ARGB.
3530 int x;
3531 for (x = 0; x < width; ++x) {
3532 // To support in-place conversion.
3533 uint8_t b = src_argb[index0];
3534 uint8_t g = src_argb[index1];
3535 uint8_t r = src_argb[index2];
3536 uint8_t a = src_argb[index3];
3537 dst_argb[0] = b;
3538 dst_argb[1] = g;
3539 dst_argb[2] = r;
3540 dst_argb[3] = a;
3541 src_argb += 4;
3542 dst_argb += 4;
3543 }
3544 }
3545
I422ToYUY2Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)3546 void I422ToYUY2Row_C(const uint8_t* src_y,
3547 const uint8_t* src_u,
3548 const uint8_t* src_v,
3549 uint8_t* dst_frame,
3550 int width) {
3551 int x;
3552 for (x = 0; x < width - 1; x += 2) {
3553 dst_frame[0] = src_y[0];
3554 dst_frame[1] = src_u[0];
3555 dst_frame[2] = src_y[1];
3556 dst_frame[3] = src_v[0];
3557 dst_frame += 4;
3558 src_y += 2;
3559 src_u += 1;
3560 src_v += 1;
3561 }
3562 if (width & 1) {
3563 dst_frame[0] = src_y[0];
3564 dst_frame[1] = src_u[0];
3565 dst_frame[2] = 0;
3566 dst_frame[3] = src_v[0];
3567 }
3568 }
3569
I422ToUYVYRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)3570 void I422ToUYVYRow_C(const uint8_t* src_y,
3571 const uint8_t* src_u,
3572 const uint8_t* src_v,
3573 uint8_t* dst_frame,
3574 int width) {
3575 int x;
3576 for (x = 0; x < width - 1; x += 2) {
3577 dst_frame[0] = src_u[0];
3578 dst_frame[1] = src_y[0];
3579 dst_frame[2] = src_v[0];
3580 dst_frame[3] = src_y[1];
3581 dst_frame += 4;
3582 src_y += 2;
3583 src_u += 1;
3584 src_v += 1;
3585 }
3586 if (width & 1) {
3587 dst_frame[0] = src_u[0];
3588 dst_frame[1] = src_y[0];
3589 dst_frame[2] = src_v[0];
3590 dst_frame[3] = 0;
3591 }
3592 }
3593
ARGBPolynomialRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const float * poly,int width)3594 void ARGBPolynomialRow_C(const uint8_t* src_argb,
3595 uint8_t* dst_argb,
3596 const float* poly,
3597 int width) {
3598 int i;
3599 for (i = 0; i < width; ++i) {
3600 float b = (float)(src_argb[0]);
3601 float g = (float)(src_argb[1]);
3602 float r = (float)(src_argb[2]);
3603 float a = (float)(src_argb[3]);
3604 float b2 = b * b;
3605 float g2 = g * g;
3606 float r2 = r * r;
3607 float a2 = a * a;
3608 float db = poly[0] + poly[4] * b;
3609 float dg = poly[1] + poly[5] * g;
3610 float dr = poly[2] + poly[6] * r;
3611 float da = poly[3] + poly[7] * a;
3612 float b3 = b2 * b;
3613 float g3 = g2 * g;
3614 float r3 = r2 * r;
3615 float a3 = a2 * a;
3616 db += poly[8] * b2;
3617 dg += poly[9] * g2;
3618 dr += poly[10] * r2;
3619 da += poly[11] * a2;
3620 db += poly[12] * b3;
3621 dg += poly[13] * g3;
3622 dr += poly[14] * r3;
3623 da += poly[15] * a3;
3624
3625 dst_argb[0] = Clamp((int32_t)(db));
3626 dst_argb[1] = Clamp((int32_t)(dg));
3627 dst_argb[2] = Clamp((int32_t)(dr));
3628 dst_argb[3] = Clamp((int32_t)(da));
3629 src_argb += 4;
3630 dst_argb += 4;
3631 }
3632 }
3633
3634 // Samples assumed to be unsigned in low 9, 10 or 12 bits. Scale factor
3635 // adjust the source integer range to the half float range desired.
3636
3637 // This magic constant is 2^-112. Multiplying by this
3638 // is the same as subtracting 112 from the exponent, which
3639 // is the difference in exponent bias between 32-bit and
3640 // 16-bit floats. Once we've done this subtraction, we can
3641 // simply extract the low bits of the exponent and the high
3642 // bits of the mantissa from our float and we're done.
3643
3644 // Work around GCC 7 punning warning -Wstrict-aliasing
3645 #if defined(__GNUC__)
3646 typedef uint32_t __attribute__((__may_alias__)) uint32_alias_t;
3647 #else
3648 typedef uint32_t uint32_alias_t;
3649 #endif
3650
HalfFloatRow_C(const uint16_t * src,uint16_t * dst,float scale,int width)3651 void HalfFloatRow_C(const uint16_t* src,
3652 uint16_t* dst,
3653 float scale,
3654 int width) {
3655 int i;
3656 float mult = 1.9259299444e-34f * scale;
3657 for (i = 0; i < width; ++i) {
3658 float value = src[i] * mult;
3659 dst[i] = (uint16_t)((*(const uint32_alias_t*)&value) >> 13);
3660 }
3661 }
3662
ByteToFloatRow_C(const uint8_t * src,float * dst,float scale,int width)3663 void ByteToFloatRow_C(const uint8_t* src, float* dst, float scale, int width) {
3664 int i;
3665 for (i = 0; i < width; ++i) {
3666 float value = src[i] * scale;
3667 dst[i] = value;
3668 }
3669 }
3670
ARGBLumaColorTableRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width,const uint8_t * luma,uint32_t lumacoeff)3671 void ARGBLumaColorTableRow_C(const uint8_t* src_argb,
3672 uint8_t* dst_argb,
3673 int width,
3674 const uint8_t* luma,
3675 uint32_t lumacoeff) {
3676 uint32_t bc = lumacoeff & 0xff;
3677 uint32_t gc = (lumacoeff >> 8) & 0xff;
3678 uint32_t rc = (lumacoeff >> 16) & 0xff;
3679
3680 int i;
3681 for (i = 0; i < width - 1; i += 2) {
3682 // Luminance in rows, color values in columns.
3683 const uint8_t* luma0 =
3684 ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
3685 luma;
3686 const uint8_t* luma1;
3687 dst_argb[0] = luma0[src_argb[0]];
3688 dst_argb[1] = luma0[src_argb[1]];
3689 dst_argb[2] = luma0[src_argb[2]];
3690 dst_argb[3] = src_argb[3];
3691 luma1 =
3692 ((src_argb[4] * bc + src_argb[5] * gc + src_argb[6] * rc) & 0x7F00u) +
3693 luma;
3694 dst_argb[4] = luma1[src_argb[4]];
3695 dst_argb[5] = luma1[src_argb[5]];
3696 dst_argb[6] = luma1[src_argb[6]];
3697 dst_argb[7] = src_argb[7];
3698 src_argb += 8;
3699 dst_argb += 8;
3700 }
3701 if (width & 1) {
3702 // Luminance in rows, color values in columns.
3703 const uint8_t* luma0 =
3704 ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
3705 luma;
3706 dst_argb[0] = luma0[src_argb[0]];
3707 dst_argb[1] = luma0[src_argb[1]];
3708 dst_argb[2] = luma0[src_argb[2]];
3709 dst_argb[3] = src_argb[3];
3710 }
3711 }
3712
ARGBCopyAlphaRow_C(const uint8_t * src,uint8_t * dst,int width)3713 void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
3714 int i;
3715 for (i = 0; i < width - 1; i += 2) {
3716 dst[3] = src[3];
3717 dst[7] = src[7];
3718 dst += 8;
3719 src += 8;
3720 }
3721 if (width & 1) {
3722 dst[3] = src[3];
3723 }
3724 }
3725
ARGBExtractAlphaRow_C(const uint8_t * src_argb,uint8_t * dst_a,int width)3726 void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width) {
3727 int i;
3728 for (i = 0; i < width - 1; i += 2) {
3729 dst_a[0] = src_argb[3];
3730 dst_a[1] = src_argb[7];
3731 dst_a += 2;
3732 src_argb += 8;
3733 }
3734 if (width & 1) {
3735 dst_a[0] = src_argb[3];
3736 }
3737 }
3738
ARGBCopyYToAlphaRow_C(const uint8_t * src,uint8_t * dst,int width)3739 void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
3740 int i;
3741 for (i = 0; i < width - 1; i += 2) {
3742 dst[3] = src[0];
3743 dst[7] = src[1];
3744 dst += 8;
3745 src += 2;
3746 }
3747 if (width & 1) {
3748 dst[3] = src[0];
3749 }
3750 }
3751
3752 // Maximum temporary width for wrappers to process at a time, in pixels.
3753 #define MAXTWIDTH 2048
3754
3755 #if !(defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86)) && \
3756 defined(HAS_I422TORGB565ROW_SSSE3)
3757 // row_win.cc has asm version, but GCC uses 2 step wrapper.
I422ToRGB565Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3758 void I422ToRGB565Row_SSSE3(const uint8_t* src_y,
3759 const uint8_t* src_u,
3760 const uint8_t* src_v,
3761 uint8_t* dst_rgb565,
3762 const struct YuvConstants* yuvconstants,
3763 int width) {
3764 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3765 while (width > 0) {
3766 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3767 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
3768 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3769 src_y += twidth;
3770 src_u += twidth / 2;
3771 src_v += twidth / 2;
3772 dst_rgb565 += twidth * 2;
3773 width -= twidth;
3774 }
3775 }
3776 #endif
3777
3778 #if defined(HAS_I422TOARGB1555ROW_SSSE3)
I422ToARGB1555Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)3779 void I422ToARGB1555Row_SSSE3(const uint8_t* src_y,
3780 const uint8_t* src_u,
3781 const uint8_t* src_v,
3782 uint8_t* dst_argb1555,
3783 const struct YuvConstants* yuvconstants,
3784 int width) {
3785 // Row buffer for intermediate ARGB pixels.
3786 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3787 while (width > 0) {
3788 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3789 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
3790 ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
3791 src_y += twidth;
3792 src_u += twidth / 2;
3793 src_v += twidth / 2;
3794 dst_argb1555 += twidth * 2;
3795 width -= twidth;
3796 }
3797 }
3798 #endif
3799
3800 #if defined(HAS_I422TOARGB4444ROW_SSSE3)
I422ToARGB4444Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)3801 void I422ToARGB4444Row_SSSE3(const uint8_t* src_y,
3802 const uint8_t* src_u,
3803 const uint8_t* src_v,
3804 uint8_t* dst_argb4444,
3805 const struct YuvConstants* yuvconstants,
3806 int width) {
3807 // Row buffer for intermediate ARGB pixels.
3808 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3809 while (width > 0) {
3810 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3811 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
3812 ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
3813 src_y += twidth;
3814 src_u += twidth / 2;
3815 src_v += twidth / 2;
3816 dst_argb4444 += twidth * 2;
3817 width -= twidth;
3818 }
3819 }
3820 #endif
3821
3822 #if defined(HAS_NV12TORGB565ROW_SSSE3)
NV12ToRGB565Row_SSSE3(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3823 void NV12ToRGB565Row_SSSE3(const uint8_t* src_y,
3824 const uint8_t* src_uv,
3825 uint8_t* dst_rgb565,
3826 const struct YuvConstants* yuvconstants,
3827 int width) {
3828 // Row buffer for intermediate ARGB pixels.
3829 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3830 while (width > 0) {
3831 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3832 NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
3833 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3834 src_y += twidth;
3835 src_uv += twidth;
3836 dst_rgb565 += twidth * 2;
3837 width -= twidth;
3838 }
3839 }
3840 #endif
3841
3842 #if defined(HAS_NV12TORGB24ROW_SSSE3)
NV12ToRGB24Row_SSSE3(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3843 void NV12ToRGB24Row_SSSE3(const uint8_t* src_y,
3844 const uint8_t* src_uv,
3845 uint8_t* dst_rgb24,
3846 const struct YuvConstants* yuvconstants,
3847 int width) {
3848 // Row buffer for intermediate ARGB pixels.
3849 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3850 while (width > 0) {
3851 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3852 NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
3853 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3854 src_y += twidth;
3855 src_uv += twidth;
3856 dst_rgb24 += twidth * 3;
3857 width -= twidth;
3858 }
3859 }
3860 #endif
3861
3862 #if defined(HAS_NV21TORGB24ROW_SSSE3)
NV21ToRGB24Row_SSSE3(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3863 void NV21ToRGB24Row_SSSE3(const uint8_t* src_y,
3864 const uint8_t* src_vu,
3865 uint8_t* dst_rgb24,
3866 const struct YuvConstants* yuvconstants,
3867 int width) {
3868 // Row buffer for intermediate ARGB pixels.
3869 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3870 while (width > 0) {
3871 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3872 NV21ToARGBRow_SSSE3(src_y, src_vu, row, yuvconstants, twidth);
3873 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3874 src_y += twidth;
3875 src_vu += twidth;
3876 dst_rgb24 += twidth * 3;
3877 width -= twidth;
3878 }
3879 }
3880 #endif
3881
3882 #if defined(HAS_NV12TORGB24ROW_AVX2)
NV12ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3883 void NV12ToRGB24Row_AVX2(const uint8_t* src_y,
3884 const uint8_t* src_uv,
3885 uint8_t* dst_rgb24,
3886 const struct YuvConstants* yuvconstants,
3887 int width) {
3888 // Row buffer for intermediate ARGB pixels.
3889 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3890 while (width > 0) {
3891 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3892 NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
3893 #if defined(HAS_ARGBTORGB24ROW_AVX2)
3894 ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
3895 #else
3896 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3897 #endif
3898 src_y += twidth;
3899 src_uv += twidth;
3900 dst_rgb24 += twidth * 3;
3901 width -= twidth;
3902 }
3903 }
3904 #endif
3905
3906 #if defined(HAS_NV21TORGB24ROW_AVX2)
NV21ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3907 void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
3908 const uint8_t* src_vu,
3909 uint8_t* dst_rgb24,
3910 const struct YuvConstants* yuvconstants,
3911 int width) {
3912 // Row buffer for intermediate ARGB pixels.
3913 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3914 while (width > 0) {
3915 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3916 NV21ToARGBRow_AVX2(src_y, src_vu, row, yuvconstants, twidth);
3917 #if defined(HAS_ARGBTORGB24ROW_AVX2)
3918 ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
3919 #else
3920 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3921 #endif
3922 src_y += twidth;
3923 src_vu += twidth;
3924 dst_rgb24 += twidth * 3;
3925 width -= twidth;
3926 }
3927 }
3928 #endif
3929
3930 #if defined(HAS_I422TORGB565ROW_AVX2)
I422ToRGB565Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3931 void I422ToRGB565Row_AVX2(const uint8_t* src_y,
3932 const uint8_t* src_u,
3933 const uint8_t* src_v,
3934 uint8_t* dst_rgb565,
3935 const struct YuvConstants* yuvconstants,
3936 int width) {
3937 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3938 while (width > 0) {
3939 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3940 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3941 #if defined(HAS_ARGBTORGB565ROW_AVX2)
3942 ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
3943 #else
3944 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3945 #endif
3946 src_y += twidth;
3947 src_u += twidth / 2;
3948 src_v += twidth / 2;
3949 dst_rgb565 += twidth * 2;
3950 width -= twidth;
3951 }
3952 }
3953 #endif
3954
3955 #if defined(HAS_I422TOARGB1555ROW_AVX2)
I422ToARGB1555Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)3956 void I422ToARGB1555Row_AVX2(const uint8_t* src_y,
3957 const uint8_t* src_u,
3958 const uint8_t* src_v,
3959 uint8_t* dst_argb1555,
3960 const struct YuvConstants* yuvconstants,
3961 int width) {
3962 // Row buffer for intermediate ARGB pixels.
3963 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3964 while (width > 0) {
3965 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3966 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3967 #if defined(HAS_ARGBTOARGB1555ROW_AVX2)
3968 ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
3969 #else
3970 ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
3971 #endif
3972 src_y += twidth;
3973 src_u += twidth / 2;
3974 src_v += twidth / 2;
3975 dst_argb1555 += twidth * 2;
3976 width -= twidth;
3977 }
3978 }
3979 #endif
3980
3981 #if defined(HAS_I422TOARGB4444ROW_AVX2)
I422ToARGB4444Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)3982 void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
3983 const uint8_t* src_u,
3984 const uint8_t* src_v,
3985 uint8_t* dst_argb4444,
3986 const struct YuvConstants* yuvconstants,
3987 int width) {
3988 // Row buffer for intermediate ARGB pixels.
3989 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3990 while (width > 0) {
3991 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3992 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3993 #if defined(HAS_ARGBTOARGB4444ROW_AVX2)
3994 ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
3995 #else
3996 ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
3997 #endif
3998 src_y += twidth;
3999 src_u += twidth / 2;
4000 src_v += twidth / 2;
4001 dst_argb4444 += twidth * 2;
4002 width -= twidth;
4003 }
4004 }
4005 #endif
4006
4007 #if defined(HAS_I422TORGB24ROW_AVX2)
I422ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)4008 void I422ToRGB24Row_AVX2(const uint8_t* src_y,
4009 const uint8_t* src_u,
4010 const uint8_t* src_v,
4011 uint8_t* dst_rgb24,
4012 const struct YuvConstants* yuvconstants,
4013 int width) {
4014 // Row buffer for intermediate ARGB pixels.
4015 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4016 while (width > 0) {
4017 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4018 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4019 #if defined(HAS_ARGBTORGB24ROW_AVX2)
4020 ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
4021 #else
4022 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4023 #endif
4024 src_y += twidth;
4025 src_u += twidth / 2;
4026 src_v += twidth / 2;
4027 dst_rgb24 += twidth * 3;
4028 width -= twidth;
4029 }
4030 }
4031 #endif
4032
4033 #if defined(HAS_NV12TORGB565ROW_AVX2)
NV12ToRGB565Row_AVX2(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)4034 void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
4035 const uint8_t* src_uv,
4036 uint8_t* dst_rgb565,
4037 const struct YuvConstants* yuvconstants,
4038 int width) {
4039 // Row buffer for intermediate ARGB pixels.
4040 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4041 while (width > 0) {
4042 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4043 NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
4044 #if defined(HAS_ARGBTORGB565ROW_AVX2)
4045 ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
4046 #else
4047 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
4048 #endif
4049 src_y += twidth;
4050 src_uv += twidth;
4051 dst_rgb565 += twidth * 2;
4052 width -= twidth;
4053 }
4054 }
4055 #endif
4056
4057 #ifdef HAS_RGB24TOYJROW_AVX2
4058 // Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
RGB24ToYJRow_AVX2(const uint8_t * src_rgb24,uint8_t * dst_yj,int width)4059 void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
4060 // Row buffer for intermediate ARGB pixels.
4061 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4062 while (width > 0) {
4063 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4064 RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
4065 ARGBToYJRow_AVX2(row, dst_yj, twidth);
4066 src_rgb24 += twidth * 3;
4067 dst_yj += twidth;
4068 width -= twidth;
4069 }
4070 }
4071 #endif // HAS_RGB24TOYJROW_AVX2
4072
4073 #ifdef HAS_RAWTOYJROW_AVX2
4074 // Convert 16 RAW pixels (64 bytes) to 16 YJ values.
RAWToYJRow_AVX2(const uint8_t * src_raw,uint8_t * dst_yj,int width)4075 void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
4076 // Row buffer for intermediate ARGB pixels.
4077 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4078 while (width > 0) {
4079 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4080 RAWToARGBRow_SSSE3(src_raw, row, twidth);
4081 ARGBToYJRow_AVX2(row, dst_yj, twidth);
4082 src_raw += twidth * 3;
4083 dst_yj += twidth;
4084 width -= twidth;
4085 }
4086 }
4087 #endif // HAS_RAWTOYJROW_AVX2
4088
4089 #ifdef HAS_RGB24TOYJROW_SSSE3
4090 // Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
RGB24ToYJRow_SSSE3(const uint8_t * src_rgb24,uint8_t * dst_yj,int width)4091 void RGB24ToYJRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
4092 // Row buffer for intermediate ARGB pixels.
4093 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4094 while (width > 0) {
4095 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4096 RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
4097 ARGBToYJRow_SSSE3(row, dst_yj, twidth);
4098 src_rgb24 += twidth * 3;
4099 dst_yj += twidth;
4100 width -= twidth;
4101 }
4102 }
4103 #endif // HAS_RGB24TOYJROW_SSSE3
4104
4105 #ifdef HAS_RAWTOYJROW_SSSE3
4106 // Convert 16 RAW pixels (64 bytes) to 16 YJ values.
RAWToYJRow_SSSE3(const uint8_t * src_raw,uint8_t * dst_yj,int width)4107 void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
4108 // Row buffer for intermediate ARGB pixels.
4109 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4110 while (width > 0) {
4111 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4112 RAWToARGBRow_SSSE3(src_raw, row, twidth);
4113 ARGBToYJRow_SSSE3(row, dst_yj, twidth);
4114 src_raw += twidth * 3;
4115 dst_yj += twidth;
4116 width -= twidth;
4117 }
4118 }
4119 #endif // HAS_RAWTOYJROW_SSSE3
4120
4121 #ifdef HAS_INTERPOLATEROW_16TO8_AVX2
InterpolateRow_16To8_AVX2(uint8_t * dst_ptr,const uint16_t * src_ptr,ptrdiff_t src_stride,int scale,int width,int source_y_fraction)4122 void InterpolateRow_16To8_AVX2(uint8_t* dst_ptr,
4123 const uint16_t* src_ptr,
4124 ptrdiff_t src_stride,
4125 int scale,
4126 int width,
4127 int source_y_fraction) {
4128 // Row buffer for intermediate 16 bit pixels.
4129 SIMD_ALIGNED(uint16_t row[MAXTWIDTH]);
4130 while (width > 0) {
4131 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4132 InterpolateRow_16_C(row, src_ptr, src_stride, twidth, source_y_fraction);
4133 Convert16To8Row_AVX2(row, dst_ptr, scale, twidth);
4134 src_ptr += twidth;
4135 dst_ptr += twidth;
4136 width -= twidth;
4137 }
4138 }
4139 #endif // HAS_INTERPOLATEROW_16TO8_AVX2
4140
ScaleSumSamples_C(const float * src,float * dst,float scale,int width)4141 float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) {
4142 float fsum = 0.f;
4143 int i;
4144 for (i = 0; i < width; ++i) {
4145 float v = *src++;
4146 fsum += v * v;
4147 *dst++ = v * scale;
4148 }
4149 return fsum;
4150 }
4151
ScaleMaxSamples_C(const float * src,float * dst,float scale,int width)4152 float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width) {
4153 float fmax = 0.f;
4154 int i;
4155 for (i = 0; i < width; ++i) {
4156 float v = *src++;
4157 float vs = v * scale;
4158 fmax = (v > fmax) ? v : fmax;
4159 *dst++ = vs;
4160 }
4161 return fmax;
4162 }
4163
ScaleSamples_C(const float * src,float * dst,float scale,int width)4164 void ScaleSamples_C(const float* src, float* dst, float scale, int width) {
4165 int i;
4166 for (i = 0; i < width; ++i) {
4167 *dst++ = *src++ * scale;
4168 }
4169 }
4170
GaussRow_C(const uint32_t * src,uint16_t * dst,int width)4171 void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) {
4172 int i;
4173 for (i = 0; i < width; ++i) {
4174 *dst++ =
4175 (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8;
4176 ++src;
4177 }
4178 }
4179
4180 // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
GaussCol_C(const uint16_t * src0,const uint16_t * src1,const uint16_t * src2,const uint16_t * src3,const uint16_t * src4,uint32_t * dst,int width)4181 void GaussCol_C(const uint16_t* src0,
4182 const uint16_t* src1,
4183 const uint16_t* src2,
4184 const uint16_t* src3,
4185 const uint16_t* src4,
4186 uint32_t* dst,
4187 int width) {
4188 int i;
4189 for (i = 0; i < width; ++i) {
4190 *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
4191 }
4192 }
4193
GaussRow_F32_C(const float * src,float * dst,int width)4194 void GaussRow_F32_C(const float* src, float* dst, int width) {
4195 int i;
4196 for (i = 0; i < width; ++i) {
4197 *dst++ = (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4]) *
4198 (1.0f / 256.0f);
4199 ++src;
4200 }
4201 }
4202
4203 // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
GaussCol_F32_C(const float * src0,const float * src1,const float * src2,const float * src3,const float * src4,float * dst,int width)4204 void GaussCol_F32_C(const float* src0,
4205 const float* src1,
4206 const float* src2,
4207 const float* src3,
4208 const float* src4,
4209 float* dst,
4210 int width) {
4211 int i;
4212 for (i = 0; i < width; ++i) {
4213 *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
4214 }
4215 }
4216
4217 // Convert biplanar NV21 to packed YUV24
NV21ToYUV24Row_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_yuv24,int width)4218 void NV21ToYUV24Row_C(const uint8_t* src_y,
4219 const uint8_t* src_vu,
4220 uint8_t* dst_yuv24,
4221 int width) {
4222 int x;
4223 for (x = 0; x < width - 1; x += 2) {
4224 dst_yuv24[0] = src_vu[0]; // V
4225 dst_yuv24[1] = src_vu[1]; // U
4226 dst_yuv24[2] = src_y[0]; // Y0
4227 dst_yuv24[3] = src_vu[0]; // V
4228 dst_yuv24[4] = src_vu[1]; // U
4229 dst_yuv24[5] = src_y[1]; // Y1
4230 src_y += 2;
4231 src_vu += 2;
4232 dst_yuv24 += 6; // Advance 2 pixels.
4233 }
4234 if (width & 1) {
4235 dst_yuv24[0] = src_vu[0]; // V
4236 dst_yuv24[1] = src_vu[1]; // U
4237 dst_yuv24[2] = src_y[0]; // Y0
4238 }
4239 }
4240
4241 // Filter 2 rows of AYUV UV's (444) into UV (420).
4242 // AYUV is VUYA in memory. UV for NV12 is UV order in memory.
AYUVToUVRow_C(const uint8_t * src_ayuv,int src_stride_ayuv,uint8_t * dst_uv,int width)4243 void AYUVToUVRow_C(const uint8_t* src_ayuv,
4244 int src_stride_ayuv,
4245 uint8_t* dst_uv,
4246 int width) {
4247 // Output a row of UV values, filtering 2x2 rows of AYUV.
4248 int x;
4249 for (x = 0; x < width - 1; x += 2) {
4250 dst_uv[0] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
4251 src_ayuv[src_stride_ayuv + 5] + 2) >>
4252 2;
4253 dst_uv[1] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
4254 src_ayuv[src_stride_ayuv + 4] + 2) >>
4255 2;
4256 src_ayuv += 8;
4257 dst_uv += 2;
4258 }
4259 if (width & 1) {
4260 dst_uv[0] = (src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + 1) >> 1;
4261 dst_uv[1] = (src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + 1) >> 1;
4262 }
4263 }
4264
4265 // Filter 2 rows of AYUV UV's (444) into VU (420).
AYUVToVURow_C(const uint8_t * src_ayuv,int src_stride_ayuv,uint8_t * dst_vu,int width)4266 void AYUVToVURow_C(const uint8_t* src_ayuv,
4267 int src_stride_ayuv,
4268 uint8_t* dst_vu,
4269 int width) {
4270 // Output a row of VU values, filtering 2x2 rows of AYUV.
4271 int x;
4272 for (x = 0; x < width - 1; x += 2) {
4273 dst_vu[0] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
4274 src_ayuv[src_stride_ayuv + 4] + 2) >>
4275 2;
4276 dst_vu[1] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
4277 src_ayuv[src_stride_ayuv + 5] + 2) >>
4278 2;
4279 src_ayuv += 8;
4280 dst_vu += 2;
4281 }
4282 if (width & 1) {
4283 dst_vu[0] = (src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + 1) >> 1;
4284 dst_vu[1] = (src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + 1) >> 1;
4285 }
4286 }
4287
4288 // Copy row of AYUV Y's into Y
AYUVToYRow_C(const uint8_t * src_ayuv,uint8_t * dst_y,int width)4289 void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
4290 // Output a row of Y values.
4291 int x;
4292 for (x = 0; x < width; ++x) {
4293 dst_y[x] = src_ayuv[2]; // v,u,y,a
4294 src_ayuv += 4;
4295 }
4296 }
4297
4298 // Convert UV plane of NV12 to VU of NV21.
SwapUVRow_C(const uint8_t * src_uv,uint8_t * dst_vu,int width)4299 void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
4300 int x;
4301 for (x = 0; x < width; ++x) {
4302 uint8_t u = src_uv[0];
4303 uint8_t v = src_uv[1];
4304 dst_vu[0] = v;
4305 dst_vu[1] = u;
4306 src_uv += 2;
4307 dst_vu += 2;
4308 }
4309 }
4310
HalfMergeUVRow_C(const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_uv,int width)4311 void HalfMergeUVRow_C(const uint8_t* src_u,
4312 int src_stride_u,
4313 const uint8_t* src_v,
4314 int src_stride_v,
4315 uint8_t* dst_uv,
4316 int width) {
4317 int x;
4318 for (x = 0; x < width - 1; x += 2) {
4319 dst_uv[0] = (src_u[0] + src_u[1] + src_u[src_stride_u] +
4320 src_u[src_stride_u + 1] + 2) >>
4321 2;
4322 dst_uv[1] = (src_v[0] + src_v[1] + src_v[src_stride_v] +
4323 src_v[src_stride_v + 1] + 2) >>
4324 2;
4325 src_u += 2;
4326 src_v += 2;
4327 dst_uv += 2;
4328 }
4329 if (width & 1) {
4330 dst_uv[0] = (src_u[0] + src_u[src_stride_u] + 1) >> 1;
4331 dst_uv[1] = (src_v[0] + src_v[src_stride_v] + 1) >> 1;
4332 }
4333 }
4334
4335 #ifdef __cplusplus
4336 } // extern "C"
4337 } // namespace libyuv
4338 #endif
4339