1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/row.h"
12
13 #include <assert.h>
14 #include <string.h> // For memcpy and memset.
15
16 #include "libyuv/basic_types.h"
17 #include "libyuv/convert_argb.h" // For kYuvI601Constants
18
19 #ifdef __cplusplus
20 namespace libyuv {
21 extern "C" {
22 #endif
23
24 // This macro controls YUV to RGB using unsigned math to extend range of
25 // YUV to RGB coefficients to 0 to 4 instead of 0 to 2 for more accuracy on B:
26 // LIBYUV_UNLIMITED_DATA
27
28 // Macros to enable unlimited data for each colorspace
29 // LIBYUV_UNLIMITED_BT601
30 // LIBYUV_UNLIMITED_BT709
31 // LIBYUV_UNLIMITED_BT2020
32
33 // The following macro from row_win makes the C code match the row_win code,
34 // which is 7 bit fixed point for ARGBToI420:
35 #if !defined(LIBYUV_BIT_EXACT) && !defined(LIBYUV_DISABLE_X86) && \
36 defined(_MSC_VER) && !defined(__clang__) && \
37 (defined(_M_IX86) || defined(_M_X64))
38 #define LIBYUV_RGB7 1
39 #endif
40
41 #if !defined(LIBYUV_BIT_EXACT) && (defined(__x86_64__) || defined(_M_X64) || \
42 defined(__i386__) || defined(_M_IX86))
43 #define LIBYUV_ARGBTOUV_PAVGB 1
44 #define LIBYUV_RGBTOU_TRUNCATE 1
45 #define LIBYUV_ATTENUATE_DUP 1
46 #endif
47 #if defined(LIBYUV_BIT_EXACT)
48 #define LIBYUV_UNATTENUATE_DUP 1
49 #endif
50
51 // llvm x86 is poor at ternary operator, so use branchless min/max.
52
53 #define USE_BRANCHLESS 1
54 #if USE_BRANCHLESS
clamp0(int32_t v)55 static __inline int32_t clamp0(int32_t v) {
56 return -(v >= 0) & v;
57 }
58 // TODO(fbarchard): make clamp255 preserve negative values.
clamp255(int32_t v)59 static __inline int32_t clamp255(int32_t v) {
60 return (-(v >= 255) | v) & 255;
61 }
62
clamp1023(int32_t v)63 static __inline int32_t clamp1023(int32_t v) {
64 return (-(v >= 1023) | v) & 1023;
65 }
66
67 // clamp to max
ClampMax(int32_t v,int32_t max)68 static __inline int32_t ClampMax(int32_t v, int32_t max) {
69 return (-(v >= max) | v) & max;
70 }
71
Abs(int32_t v)72 static __inline uint32_t Abs(int32_t v) {
73 int m = -(v < 0);
74 return (v + m) ^ m;
75 }
76 #else // USE_BRANCHLESS
77 static __inline int32_t clamp0(int32_t v) {
78 return (v < 0) ? 0 : v;
79 }
80
81 static __inline int32_t clamp255(int32_t v) {
82 return (v > 255) ? 255 : v;
83 }
84
85 static __inline int32_t clamp1023(int32_t v) {
86 return (v > 1023) ? 1023 : v;
87 }
88
89 static __inline int32_t ClampMax(int32_t v, int32_t max) {
90 return (v > max) ? max : v;
91 }
92
93 static __inline uint32_t Abs(int32_t v) {
94 return (v < 0) ? -v : v;
95 }
96 #endif // USE_BRANCHLESS
Clamp(int32_t val)97 static __inline uint32_t Clamp(int32_t val) {
98 int v = clamp0(val);
99 return (uint32_t)(clamp255(v));
100 }
101
Clamp10(int32_t val)102 static __inline uint32_t Clamp10(int32_t val) {
103 int v = clamp0(val);
104 return (uint32_t)(clamp1023(v));
105 }
106
107 // Little Endian
108 #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
109 defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) || \
110 (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
111 #define WRITEWORD(p, v) *(uint32_t*)(p) = v
112 #else
WRITEWORD(uint8_t * p,uint32_t v)113 static inline void WRITEWORD(uint8_t* p, uint32_t v) {
114 p[0] = (uint8_t)(v & 255);
115 p[1] = (uint8_t)((v >> 8) & 255);
116 p[2] = (uint8_t)((v >> 16) & 255);
117 p[3] = (uint8_t)((v >> 24) & 255);
118 }
119 #endif
120
RGB24ToARGBRow_C(const uint8_t * src_rgb24,uint8_t * dst_argb,int width)121 void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
122 int x;
123 for (x = 0; x < width; ++x) {
124 uint8_t b = src_rgb24[0];
125 uint8_t g = src_rgb24[1];
126 uint8_t r = src_rgb24[2];
127 dst_argb[0] = b;
128 dst_argb[1] = g;
129 dst_argb[2] = r;
130 dst_argb[3] = 255u;
131 dst_argb += 4;
132 src_rgb24 += 3;
133 }
134 }
135
RAWToARGBRow_C(const uint8_t * src_raw,uint8_t * dst_argb,int width)136 void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
137 int x;
138 for (x = 0; x < width; ++x) {
139 uint8_t r = src_raw[0];
140 uint8_t g = src_raw[1];
141 uint8_t b = src_raw[2];
142 dst_argb[0] = b;
143 dst_argb[1] = g;
144 dst_argb[2] = r;
145 dst_argb[3] = 255u;
146 dst_argb += 4;
147 src_raw += 3;
148 }
149 }
150
RAWToRGBARow_C(const uint8_t * src_raw,uint8_t * dst_rgba,int width)151 void RAWToRGBARow_C(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
152 int x;
153 for (x = 0; x < width; ++x) {
154 uint8_t r = src_raw[0];
155 uint8_t g = src_raw[1];
156 uint8_t b = src_raw[2];
157 dst_rgba[0] = 255u;
158 dst_rgba[1] = b;
159 dst_rgba[2] = g;
160 dst_rgba[3] = r;
161 dst_rgba += 4;
162 src_raw += 3;
163 }
164 }
165
RAWToRGB24Row_C(const uint8_t * src_raw,uint8_t * dst_rgb24,int width)166 void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
167 int x;
168 for (x = 0; x < width; ++x) {
169 uint8_t r = src_raw[0];
170 uint8_t g = src_raw[1];
171 uint8_t b = src_raw[2];
172 dst_rgb24[0] = b;
173 dst_rgb24[1] = g;
174 dst_rgb24[2] = r;
175 dst_rgb24 += 3;
176 src_raw += 3;
177 }
178 }
179
RGB565ToARGBRow_C(const uint8_t * src_rgb565,uint8_t * dst_argb,int width)180 void RGB565ToARGBRow_C(const uint8_t* src_rgb565,
181 uint8_t* dst_argb,
182 int width) {
183 int x;
184 for (x = 0; x < width; ++x) {
185 uint8_t b = src_rgb565[0] & 0x1f;
186 uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
187 uint8_t r = src_rgb565[1] >> 3;
188 dst_argb[0] = (b << 3) | (b >> 2);
189 dst_argb[1] = (g << 2) | (g >> 4);
190 dst_argb[2] = (r << 3) | (r >> 2);
191 dst_argb[3] = 255u;
192 dst_argb += 4;
193 src_rgb565 += 2;
194 }
195 }
196
ARGB1555ToARGBRow_C(const uint8_t * src_argb1555,uint8_t * dst_argb,int width)197 void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
198 uint8_t* dst_argb,
199 int width) {
200 int x;
201 for (x = 0; x < width; ++x) {
202 uint8_t b = src_argb1555[0] & 0x1f;
203 uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
204 uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
205 uint8_t a = src_argb1555[1] >> 7;
206 dst_argb[0] = (b << 3) | (b >> 2);
207 dst_argb[1] = (g << 3) | (g >> 2);
208 dst_argb[2] = (r << 3) | (r >> 2);
209 dst_argb[3] = -a;
210 dst_argb += 4;
211 src_argb1555 += 2;
212 }
213 }
214
ARGB4444ToARGBRow_C(const uint8_t * src_argb4444,uint8_t * dst_argb,int width)215 void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
216 uint8_t* dst_argb,
217 int width) {
218 int x;
219 for (x = 0; x < width; ++x) {
220 uint8_t b = src_argb4444[0] & 0x0f;
221 uint8_t g = src_argb4444[0] >> 4;
222 uint8_t r = src_argb4444[1] & 0x0f;
223 uint8_t a = src_argb4444[1] >> 4;
224 dst_argb[0] = (b << 4) | b;
225 dst_argb[1] = (g << 4) | g;
226 dst_argb[2] = (r << 4) | r;
227 dst_argb[3] = (a << 4) | a;
228 dst_argb += 4;
229 src_argb4444 += 2;
230 }
231 }
232
AR30ToARGBRow_C(const uint8_t * src_ar30,uint8_t * dst_argb,int width)233 void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width) {
234 int x;
235 for (x = 0; x < width; ++x) {
236 uint32_t ar30;
237 memcpy(&ar30, src_ar30, sizeof ar30);
238 uint32_t b = (ar30 >> 2) & 0xff;
239 uint32_t g = (ar30 >> 12) & 0xff;
240 uint32_t r = (ar30 >> 22) & 0xff;
241 uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits.
242 *(uint32_t*)(dst_argb) = b | (g << 8) | (r << 16) | (a << 24);
243 dst_argb += 4;
244 src_ar30 += 4;
245 }
246 }
247
AR30ToABGRRow_C(const uint8_t * src_ar30,uint8_t * dst_abgr,int width)248 void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width) {
249 int x;
250 for (x = 0; x < width; ++x) {
251 uint32_t ar30;
252 memcpy(&ar30, src_ar30, sizeof ar30);
253 uint32_t b = (ar30 >> 2) & 0xff;
254 uint32_t g = (ar30 >> 12) & 0xff;
255 uint32_t r = (ar30 >> 22) & 0xff;
256 uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits.
257 *(uint32_t*)(dst_abgr) = r | (g << 8) | (b << 16) | (a << 24);
258 dst_abgr += 4;
259 src_ar30 += 4;
260 }
261 }
262
AR30ToAB30Row_C(const uint8_t * src_ar30,uint8_t * dst_ab30,int width)263 void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) {
264 int x;
265 for (x = 0; x < width; ++x) {
266 uint32_t ar30;
267 memcpy(&ar30, src_ar30, sizeof ar30);
268 uint32_t b = ar30 & 0x3ff;
269 uint32_t ga = ar30 & 0xc00ffc00;
270 uint32_t r = (ar30 >> 20) & 0x3ff;
271 *(uint32_t*)(dst_ab30) = r | ga | (b << 20);
272 dst_ab30 += 4;
273 src_ar30 += 4;
274 }
275 }
276
ARGBToRGB24Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)277 void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
278 int x;
279 for (x = 0; x < width; ++x) {
280 uint8_t b = src_argb[0];
281 uint8_t g = src_argb[1];
282 uint8_t r = src_argb[2];
283 dst_rgb[0] = b;
284 dst_rgb[1] = g;
285 dst_rgb[2] = r;
286 dst_rgb += 3;
287 src_argb += 4;
288 }
289 }
290
ARGBToRAWRow_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)291 void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
292 int x;
293 for (x = 0; x < width; ++x) {
294 uint8_t b = src_argb[0];
295 uint8_t g = src_argb[1];
296 uint8_t r = src_argb[2];
297 dst_rgb[0] = r;
298 dst_rgb[1] = g;
299 dst_rgb[2] = b;
300 dst_rgb += 3;
301 src_argb += 4;
302 }
303 }
304
ARGBToRGB565Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)305 void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
306 int x;
307 for (x = 0; x < width - 1; x += 2) {
308 uint8_t b0 = src_argb[0] >> 3;
309 uint8_t g0 = src_argb[1] >> 2;
310 uint8_t r0 = src_argb[2] >> 3;
311 uint8_t b1 = src_argb[4] >> 3;
312 uint8_t g1 = src_argb[5] >> 2;
313 uint8_t r1 = src_argb[6] >> 3;
314 WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
315 (r1 << 27));
316 dst_rgb += 4;
317 src_argb += 8;
318 }
319 if (width & 1) {
320 uint8_t b0 = src_argb[0] >> 3;
321 uint8_t g0 = src_argb[1] >> 2;
322 uint8_t r0 = src_argb[2] >> 3;
323 *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
324 }
325 }
326
327 // dither4 is a row of 4 values from 4x4 dither matrix.
328 // The 4x4 matrix contains values to increase RGB. When converting to
329 // fewer bits (565) this provides an ordered dither.
330 // The order in the 4x4 matrix in first byte is upper left.
331 // The 4 values are passed as an int, then referenced as an array, so
332 // endian will not affect order of the original matrix. But the dither4
333 // will containing the first pixel in the lower byte for little endian
334 // or the upper byte for big endian.
ARGBToRGB565DitherRow_C(const uint8_t * src_argb,uint8_t * dst_rgb,const uint32_t dither4,int width)335 void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
336 uint8_t* dst_rgb,
337 const uint32_t dither4,
338 int width) {
339 int x;
340 for (x = 0; x < width - 1; x += 2) {
341 int dither0 = ((const unsigned char*)(&dither4))[x & 3];
342 int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
343 uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
344 uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
345 uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
346 uint8_t b1 = clamp255(src_argb[4] + dither1) >> 3;
347 uint8_t g1 = clamp255(src_argb[5] + dither1) >> 2;
348 uint8_t r1 = clamp255(src_argb[6] + dither1) >> 3;
349 *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 5) | (r0 << 11);
350 *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 5) | (r1 << 11);
351 dst_rgb += 4;
352 src_argb += 8;
353 }
354 if (width & 1) {
355 int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
356 uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
357 uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
358 uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
359 *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
360 }
361 }
362
ARGBToARGB1555Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)363 void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
364 int x;
365 for (x = 0; x < width - 1; x += 2) {
366 uint8_t b0 = src_argb[0] >> 3;
367 uint8_t g0 = src_argb[1] >> 3;
368 uint8_t r0 = src_argb[2] >> 3;
369 uint8_t a0 = src_argb[3] >> 7;
370 uint8_t b1 = src_argb[4] >> 3;
371 uint8_t g1 = src_argb[5] >> 3;
372 uint8_t r1 = src_argb[6] >> 3;
373 uint8_t a1 = src_argb[7] >> 7;
374 *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
375 *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 5) | (r1 << 10) | (a1 << 15);
376 dst_rgb += 4;
377 src_argb += 8;
378 }
379 if (width & 1) {
380 uint8_t b0 = src_argb[0] >> 3;
381 uint8_t g0 = src_argb[1] >> 3;
382 uint8_t r0 = src_argb[2] >> 3;
383 uint8_t a0 = src_argb[3] >> 7;
384 *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
385 }
386 }
387
ARGBToARGB4444Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)388 void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
389 int x;
390 for (x = 0; x < width - 1; x += 2) {
391 uint8_t b0 = src_argb[0] >> 4;
392 uint8_t g0 = src_argb[1] >> 4;
393 uint8_t r0 = src_argb[2] >> 4;
394 uint8_t a0 = src_argb[3] >> 4;
395 uint8_t b1 = src_argb[4] >> 4;
396 uint8_t g1 = src_argb[5] >> 4;
397 uint8_t r1 = src_argb[6] >> 4;
398 uint8_t a1 = src_argb[7] >> 4;
399 *(uint16_t*)(dst_rgb + 0) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
400 *(uint16_t*)(dst_rgb + 2) = b1 | (g1 << 4) | (r1 << 8) | (a1 << 12);
401 dst_rgb += 4;
402 src_argb += 8;
403 }
404 if (width & 1) {
405 uint8_t b0 = src_argb[0] >> 4;
406 uint8_t g0 = src_argb[1] >> 4;
407 uint8_t r0 = src_argb[2] >> 4;
408 uint8_t a0 = src_argb[3] >> 4;
409 *(uint16_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
410 }
411 }
412
ABGRToAR30Row_C(const uint8_t * src_abgr,uint8_t * dst_ar30,int width)413 void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) {
414 int x;
415 for (x = 0; x < width; ++x) {
416 uint32_t b0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2);
417 uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2);
418 uint32_t r0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
419 uint32_t a0 = (src_abgr[3] >> 6);
420 *(uint32_t*)(dst_ar30) = r0 | (g0 << 10) | (b0 << 20) | (a0 << 30);
421 dst_ar30 += 4;
422 src_abgr += 4;
423 }
424 }
425
ARGBToAR30Row_C(const uint8_t * src_argb,uint8_t * dst_ar30,int width)426 void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
427 int x;
428 for (x = 0; x < width; ++x) {
429 uint32_t b0 = (src_argb[0] >> 6) | ((uint32_t)(src_argb[0]) << 2);
430 uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2);
431 uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2);
432 uint32_t a0 = (src_argb[3] >> 6);
433 *(uint32_t*)(dst_ar30) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30);
434 dst_ar30 += 4;
435 src_argb += 4;
436 }
437 }
438
ARGBToAR64Row_C(const uint8_t * src_argb,uint16_t * dst_ar64,int width)439 void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
440 int x;
441 for (x = 0; x < width; ++x) {
442 dst_ar64[0] = src_argb[0] * 0x0101;
443 dst_ar64[1] = src_argb[1] * 0x0101;
444 dst_ar64[2] = src_argb[2] * 0x0101;
445 dst_ar64[3] = src_argb[3] * 0x0101;
446 dst_ar64 += 4;
447 src_argb += 4;
448 }
449 }
450
ARGBToAB64Row_C(const uint8_t * src_argb,uint16_t * dst_ab64,int width)451 void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
452 int x;
453 for (x = 0; x < width; ++x) {
454 dst_ab64[0] = src_argb[2] * 0x0101;
455 dst_ab64[1] = src_argb[1] * 0x0101;
456 dst_ab64[2] = src_argb[0] * 0x0101;
457 dst_ab64[3] = src_argb[3] * 0x0101;
458 dst_ab64 += 4;
459 src_argb += 4;
460 }
461 }
462
AR64ToARGBRow_C(const uint16_t * src_ar64,uint8_t * dst_argb,int width)463 void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
464 int x;
465 for (x = 0; x < width; ++x) {
466 dst_argb[0] = src_ar64[0] >> 8;
467 dst_argb[1] = src_ar64[1] >> 8;
468 dst_argb[2] = src_ar64[2] >> 8;
469 dst_argb[3] = src_ar64[3] >> 8;
470 dst_argb += 4;
471 src_ar64 += 4;
472 }
473 }
474
AB64ToARGBRow_C(const uint16_t * src_ab64,uint8_t * dst_argb,int width)475 void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
476 int x;
477 for (x = 0; x < width; ++x) {
478 dst_argb[0] = src_ab64[2] >> 8;
479 dst_argb[1] = src_ab64[1] >> 8;
480 dst_argb[2] = src_ab64[0] >> 8;
481 dst_argb[3] = src_ab64[3] >> 8;
482 dst_argb += 4;
483 src_ab64 += 4;
484 }
485 }
486
487 // TODO(fbarchard): Make shuffle compatible with SIMD versions
AR64ShuffleRow_C(const uint8_t * src_ar64,uint8_t * dst_ar64,const uint8_t * shuffler,int width)488 void AR64ShuffleRow_C(const uint8_t* src_ar64,
489 uint8_t* dst_ar64,
490 const uint8_t* shuffler,
491 int width) {
492 const uint16_t* src_ar64_16 = (const uint16_t*)src_ar64;
493 uint16_t* dst_ar64_16 = (uint16_t*)dst_ar64;
494 int index0 = shuffler[0] / 2;
495 int index1 = shuffler[2] / 2;
496 int index2 = shuffler[4] / 2;
497 int index3 = shuffler[6] / 2;
498 // Shuffle a row of AR64.
499 int x;
500 for (x = 0; x < width / 2; ++x) {
501 // To support in-place conversion.
502 uint16_t b = src_ar64_16[index0];
503 uint16_t g = src_ar64_16[index1];
504 uint16_t r = src_ar64_16[index2];
505 uint16_t a = src_ar64_16[index3];
506 dst_ar64_16[0] = b;
507 dst_ar64_16[1] = g;
508 dst_ar64_16[2] = r;
509 dst_ar64_16[3] = a;
510 src_ar64_16 += 4;
511 dst_ar64_16 += 4;
512 }
513 }
514
515 #ifdef LIBYUV_RGB7
516 // Old 7 bit math for compatibility on unsupported platforms.
RGBToY(uint8_t r,uint8_t g,uint8_t b)517 static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
518 return ((33 * r + 65 * g + 13 * b) >> 7) + 16;
519 }
520 #else
521 // 8 bit
522 // Intel SSE/AVX uses the following equivalent formula
523 // 0x7e80 = (66 + 129 + 25) * -128 + 0x1000 (for +16) and 0x0080 for round.
524 // return (66 * ((int)r - 128) + 129 * ((int)g - 128) + 25 * ((int)b - 128) +
525 // 0x7e80) >> 8;
526
RGBToY(uint8_t r,uint8_t g,uint8_t b)527 static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
528 return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
529 }
530 #endif
531
532 #define AVGB(a, b) (((a) + (b) + 1) >> 1)
533
534 // LIBYUV_RGBTOU_TRUNCATE mimics x86 code that does not round.
535 #ifdef LIBYUV_RGBTOU_TRUNCATE
RGBToU(uint8_t r,uint8_t g,uint8_t b)536 static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
537 return (112 * b - 74 * g - 38 * r + 0x8000) >> 8;
538 }
RGBToV(uint8_t r,uint8_t g,uint8_t b)539 static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
540 return (112 * r - 94 * g - 18 * b + 0x8000) >> 8;
541 }
542 #else
543 // TODO(fbarchard): Add rounding to x86 SIMD and use this
RGBToU(uint8_t r,uint8_t g,uint8_t b)544 static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
545 return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
546 }
RGBToV(uint8_t r,uint8_t g,uint8_t b)547 static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
548 return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
549 }
550 #endif
551
552 // LIBYUV_ARGBTOUV_PAVGB mimics x86 code that subsamples with 2 pavgb.
553 #if !defined(LIBYUV_ARGBTOUV_PAVGB)
RGB2xToU(uint16_t r,uint16_t g,uint16_t b)554 static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) {
555 return ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8;
556 }
RGB2xToV(uint16_t r,uint16_t g,uint16_t b)557 static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
558 return ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8;
559 }
560 #endif
561
562 // ARGBToY_C and ARGBToUV_C
563 // Intel version mimic SSE/AVX which does 2 pavgb
564 #if LIBYUV_ARGBTOUV_PAVGB
565 #define MAKEROWY(NAME, R, G, B, BPP) \
566 void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
567 int x; \
568 for (x = 0; x < width; ++x) { \
569 dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]); \
570 src_rgb += BPP; \
571 dst_y += 1; \
572 } \
573 } \
574 void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
575 uint8_t* dst_u, uint8_t* dst_v, int width) { \
576 const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
577 int x; \
578 for (x = 0; x < width - 1; x += 2) { \
579 uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]), \
580 AVGB(src_rgb[B + BPP], src_rgb1[B + BPP])); \
581 uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]), \
582 AVGB(src_rgb[G + BPP], src_rgb1[G + BPP])); \
583 uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]), \
584 AVGB(src_rgb[R + BPP], src_rgb1[R + BPP])); \
585 dst_u[0] = RGBToU(ar, ag, ab); \
586 dst_v[0] = RGBToV(ar, ag, ab); \
587 src_rgb += BPP * 2; \
588 src_rgb1 += BPP * 2; \
589 dst_u += 1; \
590 dst_v += 1; \
591 } \
592 if (width & 1) { \
593 uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]); \
594 uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]); \
595 uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]); \
596 dst_u[0] = RGBToU(ar, ag, ab); \
597 dst_v[0] = RGBToV(ar, ag, ab); \
598 } \
599 }
600 #else
601 // ARM version does sum / 2 then multiply by 2x smaller coefficients
602 #define MAKEROWY(NAME, R, G, B, BPP) \
603 void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
604 int x; \
605 for (x = 0; x < width; ++x) { \
606 dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]); \
607 src_rgb += BPP; \
608 dst_y += 1; \
609 } \
610 } \
611 void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
612 uint8_t* dst_u, uint8_t* dst_v, int width) { \
613 const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
614 int x; \
615 for (x = 0; x < width - 1; x += 2) { \
616 uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
617 src_rgb1[B + BPP] + 1) >> \
618 1; \
619 uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
620 src_rgb1[G + BPP] + 1) >> \
621 1; \
622 uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
623 src_rgb1[R + BPP] + 1) >> \
624 1; \
625 dst_u[0] = RGB2xToU(ar, ag, ab); \
626 dst_v[0] = RGB2xToV(ar, ag, ab); \
627 src_rgb += BPP * 2; \
628 src_rgb1 += BPP * 2; \
629 dst_u += 1; \
630 dst_v += 1; \
631 } \
632 if (width & 1) { \
633 uint16_t ab = src_rgb[B] + src_rgb1[B]; \
634 uint16_t ag = src_rgb[G] + src_rgb1[G]; \
635 uint16_t ar = src_rgb[R] + src_rgb1[R]; \
636 dst_u[0] = RGB2xToU(ar, ag, ab); \
637 dst_v[0] = RGB2xToV(ar, ag, ab); \
638 } \
639 }
640 #endif
641
642 MAKEROWY(ARGB, 2, 1, 0, 4)
643 MAKEROWY(BGRA, 1, 2, 3, 4)
644 MAKEROWY(ABGR, 0, 1, 2, 4)
645 MAKEROWY(RGBA, 3, 2, 1, 4)
646 MAKEROWY(RGB24, 2, 1, 0, 3)
647 MAKEROWY(RAW, 0, 1, 2, 3)
648 #undef MAKEROWY
649
650 // JPeg uses a variation on BT.601-1 full range
651 // y = 0.29900 * r + 0.58700 * g + 0.11400 * b
652 // u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
653 // v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
654 // BT.601 Mpeg range uses:
655 // b 0.1016 * 255 = 25.908 = 25
656 // g 0.5078 * 255 = 129.489 = 129
657 // r 0.2578 * 255 = 65.739 = 66
658 // JPeg 7 bit Y (deprecated)
659 // b 0.11400 * 128 = 14.592 = 15
660 // g 0.58700 * 128 = 75.136 = 75
661 // r 0.29900 * 128 = 38.272 = 38
662 // JPeg 8 bit Y:
663 // b 0.11400 * 256 = 29.184 = 29
664 // g 0.58700 * 256 = 150.272 = 150
665 // r 0.29900 * 256 = 76.544 = 77
666 // JPeg 8 bit U:
667 // b 0.50000 * 255 = 127.5 = 127
668 // g -0.33126 * 255 = -84.4713 = -84
669 // r -0.16874 * 255 = -43.0287 = -43
670 // JPeg 8 bit V:
671 // b -0.08131 * 255 = -20.73405 = -20
672 // g -0.41869 * 255 = -106.76595 = -107
673 // r 0.50000 * 255 = 127.5 = 127
674
675 #ifdef LIBYUV_RGB7
676 // Old 7 bit math for compatibility on unsupported platforms.
RGBToYJ(uint8_t r,uint8_t g,uint8_t b)677 static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
678 return (38 * r + 75 * g + 15 * b + 64) >> 7;
679 }
680 #else
681 // 8 bit
682 static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
683 return (77 * r + 150 * g + 29 * b + 128) >> 8;
684 }
685 #endif
686
687 #if defined(LIBYUV_ARGBTOUV_PAVGB)
RGBToUJ(uint8_t r,uint8_t g,uint8_t b)688 static __inline int RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
689 return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
690 }
RGBToVJ(uint8_t r,uint8_t g,uint8_t b)691 static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
692 return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
693 }
694 #else
RGB2xToUJ(uint16_t r,uint16_t g,uint16_t b)695 static __inline int RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) {
696 return ((127 / 2) * b - (84 / 2) * g - (43 / 2) * r + 0x8080) >> 8;
697 }
RGB2xToVJ(uint16_t r,uint16_t g,uint16_t b)698 static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
699 return ((127 / 2) * r - (107 / 2) * g - (20 / 2) * b + 0x8080) >> 8;
700 }
701 #endif
702
703 // ARGBToYJ_C and ARGBToUVJ_C
704 // Intel version mimic SSE/AVX which does 2 pavgb
705 #if LIBYUV_ARGBTOUV_PAVGB
706 #define MAKEROWYJ(NAME, R, G, B, BPP) \
707 void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
708 int x; \
709 for (x = 0; x < width; ++x) { \
710 dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]); \
711 src_rgb += BPP; \
712 dst_y += 1; \
713 } \
714 } \
715 void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
716 uint8_t* dst_u, uint8_t* dst_v, int width) { \
717 const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
718 int x; \
719 for (x = 0; x < width - 1; x += 2) { \
720 uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]), \
721 AVGB(src_rgb[B + BPP], src_rgb1[B + BPP])); \
722 uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]), \
723 AVGB(src_rgb[G + BPP], src_rgb1[G + BPP])); \
724 uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]), \
725 AVGB(src_rgb[R + BPP], src_rgb1[R + BPP])); \
726 dst_u[0] = RGBToUJ(ar, ag, ab); \
727 dst_v[0] = RGBToVJ(ar, ag, ab); \
728 src_rgb += BPP * 2; \
729 src_rgb1 += BPP * 2; \
730 dst_u += 1; \
731 dst_v += 1; \
732 } \
733 if (width & 1) { \
734 uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]); \
735 uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]); \
736 uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]); \
737 dst_u[0] = RGBToUJ(ar, ag, ab); \
738 dst_v[0] = RGBToVJ(ar, ag, ab); \
739 } \
740 }
741 #else
742 // ARM version does sum / 2 then multiply by 2x smaller coefficients
743 #define MAKEROWYJ(NAME, R, G, B, BPP) \
744 void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
745 int x; \
746 for (x = 0; x < width; ++x) { \
747 dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]); \
748 src_rgb += BPP; \
749 dst_y += 1; \
750 } \
751 } \
752 void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb, \
753 uint8_t* dst_u, uint8_t* dst_v, int width) { \
754 const uint8_t* src_rgb1 = src_rgb + src_stride_rgb; \
755 int x; \
756 for (x = 0; x < width - 1; x += 2) { \
757 uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] + \
758 src_rgb1[B + BPP] + 1) >> \
759 1; \
760 uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] + \
761 src_rgb1[G + BPP] + 1) >> \
762 1; \
763 uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] + \
764 src_rgb1[R + BPP] + 1) >> \
765 1; \
766 dst_u[0] = RGB2xToUJ(ar, ag, ab); \
767 dst_v[0] = RGB2xToVJ(ar, ag, ab); \
768 src_rgb += BPP * 2; \
769 src_rgb1 += BPP * 2; \
770 dst_u += 1; \
771 dst_v += 1; \
772 } \
773 if (width & 1) { \
774 uint16_t ab = (src_rgb[B] + src_rgb1[B]); \
775 uint16_t ag = (src_rgb[G] + src_rgb1[G]); \
776 uint16_t ar = (src_rgb[R] + src_rgb1[R]); \
777 dst_u[0] = RGB2xToUJ(ar, ag, ab); \
778 dst_v[0] = RGB2xToVJ(ar, ag, ab); \
779 } \
780 }
781
782 #endif
783
784 MAKEROWYJ(ARGB, 2, 1, 0, 4)
785 MAKEROWYJ(RGBA, 3, 2, 1, 4)
786 MAKEROWYJ(RGB24, 2, 1, 0, 3)
787 MAKEROWYJ(RAW, 0, 1, 2, 3)
788 #undef MAKEROWYJ
789
RGB565ToYRow_C(const uint8_t * src_rgb565,uint8_t * dst_y,int width)790 void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
791 int x;
792 for (x = 0; x < width; ++x) {
793 uint8_t b = src_rgb565[0] & 0x1f;
794 uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
795 uint8_t r = src_rgb565[1] >> 3;
796 b = (b << 3) | (b >> 2);
797 g = (g << 2) | (g >> 4);
798 r = (r << 3) | (r >> 2);
799 dst_y[0] = RGBToY(r, g, b);
800 src_rgb565 += 2;
801 dst_y += 1;
802 }
803 }
804
ARGB1555ToYRow_C(const uint8_t * src_argb1555,uint8_t * dst_y,int width)805 void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
806 int x;
807 for (x = 0; x < width; ++x) {
808 uint8_t b = src_argb1555[0] & 0x1f;
809 uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
810 uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
811 b = (b << 3) | (b >> 2);
812 g = (g << 3) | (g >> 2);
813 r = (r << 3) | (r >> 2);
814 dst_y[0] = RGBToY(r, g, b);
815 src_argb1555 += 2;
816 dst_y += 1;
817 }
818 }
819
ARGB4444ToYRow_C(const uint8_t * src_argb4444,uint8_t * dst_y,int width)820 void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) {
821 int x;
822 for (x = 0; x < width; ++x) {
823 uint8_t b = src_argb4444[0] & 0x0f;
824 uint8_t g = src_argb4444[0] >> 4;
825 uint8_t r = src_argb4444[1] & 0x0f;
826 b = (b << 4) | b;
827 g = (g << 4) | g;
828 r = (r << 4) | r;
829 dst_y[0] = RGBToY(r, g, b);
830 src_argb4444 += 2;
831 dst_y += 1;
832 }
833 }
834
RGB565ToUVRow_C(const uint8_t * src_rgb565,int src_stride_rgb565,uint8_t * dst_u,uint8_t * dst_v,int width)835 void RGB565ToUVRow_C(const uint8_t* src_rgb565,
836 int src_stride_rgb565,
837 uint8_t* dst_u,
838 uint8_t* dst_v,
839 int width) {
840 const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565;
841 int x;
842 for (x = 0; x < width - 1; x += 2) {
843 uint8_t b0 = src_rgb565[0] & 0x1f;
844 uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
845 uint8_t r0 = src_rgb565[1] >> 3;
846 uint8_t b1 = src_rgb565[2] & 0x1f;
847 uint8_t g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
848 uint8_t r1 = src_rgb565[3] >> 3;
849 uint8_t b2 = next_rgb565[0] & 0x1f;
850 uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
851 uint8_t r2 = next_rgb565[1] >> 3;
852 uint8_t b3 = next_rgb565[2] & 0x1f;
853 uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
854 uint8_t r3 = next_rgb565[3] >> 3;
855
856 b0 = (b0 << 3) | (b0 >> 2);
857 g0 = (g0 << 2) | (g0 >> 4);
858 r0 = (r0 << 3) | (r0 >> 2);
859 b1 = (b1 << 3) | (b1 >> 2);
860 g1 = (g1 << 2) | (g1 >> 4);
861 r1 = (r1 << 3) | (r1 >> 2);
862 b2 = (b2 << 3) | (b2 >> 2);
863 g2 = (g2 << 2) | (g2 >> 4);
864 r2 = (r2 << 3) | (r2 >> 2);
865 b3 = (b3 << 3) | (b3 >> 2);
866 g3 = (g3 << 2) | (g3 >> 4);
867 r3 = (r3 << 3) | (r3 >> 2);
868
869 #if LIBYUV_ARGBTOUV_PAVGB
870 uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
871 uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
872 uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
873 dst_u[0] = RGBToU(ar, ag, ab);
874 dst_v[0] = RGBToV(ar, ag, ab);
875 #else
876 uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
877 uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
878 uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
879 dst_u[0] = RGB2xToU(r, g, b);
880 dst_v[0] = RGB2xToV(r, g, b);
881 #endif
882
883 src_rgb565 += 4;
884 next_rgb565 += 4;
885 dst_u += 1;
886 dst_v += 1;
887 }
888 if (width & 1) {
889 uint8_t b0 = src_rgb565[0] & 0x1f;
890 uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
891 uint8_t r0 = src_rgb565[1] >> 3;
892 uint8_t b2 = next_rgb565[0] & 0x1f;
893 uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
894 uint8_t r2 = next_rgb565[1] >> 3;
895
896 b0 = (b0 << 3) | (b0 >> 2);
897 g0 = (g0 << 2) | (g0 >> 4);
898 r0 = (r0 << 3) | (r0 >> 2);
899 b2 = (b2 << 3) | (b2 >> 2);
900 g2 = (g2 << 2) | (g2 >> 4);
901 r2 = (r2 << 3) | (r2 >> 2);
902
903 #if LIBYUV_ARGBTOUV_PAVGB
904 uint8_t ab = AVGB(b0, b2);
905 uint8_t ag = AVGB(g0, g2);
906 uint8_t ar = AVGB(r0, r2);
907 dst_u[0] = RGBToU(ar, ag, ab);
908 dst_v[0] = RGBToV(ar, ag, ab);
909 #else
910 uint16_t b = b0 + b2;
911 uint16_t g = g0 + g2;
912 uint16_t r = r0 + r2;
913 dst_u[0] = RGB2xToU(r, g, b);
914 dst_v[0] = RGB2xToV(r, g, b);
915 #endif
916 }
917 }
918
ARGB1555ToUVRow_C(const uint8_t * src_argb1555,int src_stride_argb1555,uint8_t * dst_u,uint8_t * dst_v,int width)919 void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
920 int src_stride_argb1555,
921 uint8_t* dst_u,
922 uint8_t* dst_v,
923 int width) {
924 const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555;
925 int x;
926 for (x = 0; x < width - 1; x += 2) {
927 uint8_t b0 = src_argb1555[0] & 0x1f;
928 uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
929 uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
930 uint8_t b1 = src_argb1555[2] & 0x1f;
931 uint8_t g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
932 uint8_t r1 = (src_argb1555[3] & 0x7c) >> 2;
933 uint8_t b2 = next_argb1555[0] & 0x1f;
934 uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
935 uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2;
936 uint8_t b3 = next_argb1555[2] & 0x1f;
937 uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
938 uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2;
939
940 b0 = (b0 << 3) | (b0 >> 2);
941 g0 = (g0 << 3) | (g0 >> 2);
942 r0 = (r0 << 3) | (r0 >> 2);
943 b1 = (b1 << 3) | (b1 >> 2);
944 g1 = (g1 << 3) | (g1 >> 2);
945 r1 = (r1 << 3) | (r1 >> 2);
946 b2 = (b2 << 3) | (b2 >> 2);
947 g2 = (g2 << 3) | (g2 >> 2);
948 r2 = (r2 << 3) | (r2 >> 2);
949 b3 = (b3 << 3) | (b3 >> 2);
950 g3 = (g3 << 3) | (g3 >> 2);
951 r3 = (r3 << 3) | (r3 >> 2);
952
953 #if LIBYUV_ARGBTOUV_PAVGB
954 uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
955 uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
956 uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
957 dst_u[0] = RGBToU(ar, ag, ab);
958 dst_v[0] = RGBToV(ar, ag, ab);
959 #else
960 uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
961 uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
962 uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
963 dst_u[0] = RGB2xToU(r, g, b);
964 dst_v[0] = RGB2xToV(r, g, b);
965 #endif
966
967 src_argb1555 += 4;
968 next_argb1555 += 4;
969 dst_u += 1;
970 dst_v += 1;
971 }
972 if (width & 1) {
973 uint8_t b0 = src_argb1555[0] & 0x1f;
974 uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
975 uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
976 uint8_t b2 = next_argb1555[0] & 0x1f;
977 uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
978 uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2;
979
980 b0 = (b0 << 3) | (b0 >> 2);
981 g0 = (g0 << 3) | (g0 >> 2);
982 r0 = (r0 << 3) | (r0 >> 2);
983 b2 = (b2 << 3) | (b2 >> 2);
984 g2 = (g2 << 3) | (g2 >> 2);
985 r2 = (r2 << 3) | (r2 >> 2);
986
987 #if LIBYUV_ARGBTOUV_PAVGB
988 uint8_t ab = AVGB(b0, b2);
989 uint8_t ag = AVGB(g0, g2);
990 uint8_t ar = AVGB(r0, r2);
991 dst_u[0] = RGBToU(ar, ag, ab);
992 dst_v[0] = RGBToV(ar, ag, ab);
993 #else
994 uint16_t b = b0 + b2;
995 uint16_t g = g0 + g2;
996 uint16_t r = r0 + r2;
997 dst_u[0] = RGB2xToU(r, g, b);
998 dst_v[0] = RGB2xToV(r, g, b);
999 #endif
1000 }
1001 }
1002
ARGB4444ToUVRow_C(const uint8_t * src_argb4444,int src_stride_argb4444,uint8_t * dst_u,uint8_t * dst_v,int width)1003 void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
1004 int src_stride_argb4444,
1005 uint8_t* dst_u,
1006 uint8_t* dst_v,
1007 int width) {
1008 const uint8_t* next_argb4444 = src_argb4444 + src_stride_argb4444;
1009 int x;
1010 for (x = 0; x < width - 1; x += 2) {
1011 uint8_t b0 = src_argb4444[0] & 0x0f;
1012 uint8_t g0 = src_argb4444[0] >> 4;
1013 uint8_t r0 = src_argb4444[1] & 0x0f;
1014 uint8_t b1 = src_argb4444[2] & 0x0f;
1015 uint8_t g1 = src_argb4444[2] >> 4;
1016 uint8_t r1 = src_argb4444[3] & 0x0f;
1017 uint8_t b2 = next_argb4444[0] & 0x0f;
1018 uint8_t g2 = next_argb4444[0] >> 4;
1019 uint8_t r2 = next_argb4444[1] & 0x0f;
1020 uint8_t b3 = next_argb4444[2] & 0x0f;
1021 uint8_t g3 = next_argb4444[2] >> 4;
1022 uint8_t r3 = next_argb4444[3] & 0x0f;
1023
1024 b0 = (b0 << 4) | b0;
1025 g0 = (g0 << 4) | g0;
1026 r0 = (r0 << 4) | r0;
1027 b1 = (b1 << 4) | b1;
1028 g1 = (g1 << 4) | g1;
1029 r1 = (r1 << 4) | r1;
1030 b2 = (b2 << 4) | b2;
1031 g2 = (g2 << 4) | g2;
1032 r2 = (r2 << 4) | r2;
1033 b3 = (b3 << 4) | b3;
1034 g3 = (g3 << 4) | g3;
1035 r3 = (r3 << 4) | r3;
1036
1037 #if LIBYUV_ARGBTOUV_PAVGB
1038 uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
1039 uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
1040 uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
1041 dst_u[0] = RGBToU(ar, ag, ab);
1042 dst_v[0] = RGBToV(ar, ag, ab);
1043 #else
1044 uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
1045 uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
1046 uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
1047 dst_u[0] = RGB2xToU(r, g, b);
1048 dst_v[0] = RGB2xToV(r, g, b);
1049 #endif
1050
1051 src_argb4444 += 4;
1052 next_argb4444 += 4;
1053 dst_u += 1;
1054 dst_v += 1;
1055 }
1056 if (width & 1) {
1057 uint8_t b0 = src_argb4444[0] & 0x0f;
1058 uint8_t g0 = src_argb4444[0] >> 4;
1059 uint8_t r0 = src_argb4444[1] & 0x0f;
1060 uint8_t b2 = next_argb4444[0] & 0x0f;
1061 uint8_t g2 = next_argb4444[0] >> 4;
1062 uint8_t r2 = next_argb4444[1] & 0x0f;
1063
1064 b0 = (b0 << 4) | b0;
1065 g0 = (g0 << 4) | g0;
1066 r0 = (r0 << 4) | r0;
1067 b2 = (b2 << 4) | b2;
1068 g2 = (g2 << 4) | g2;
1069 r2 = (r2 << 4) | r2;
1070
1071 #if LIBYUV_ARGBTOUV_PAVGB
1072 uint8_t ab = AVGB(b0, b2);
1073 uint8_t ag = AVGB(g0, g2);
1074 uint8_t ar = AVGB(r0, r2);
1075 dst_u[0] = RGBToU(ar, ag, ab);
1076 dst_v[0] = RGBToV(ar, ag, ab);
1077 #else
1078 uint16_t b = b0 + b2;
1079 uint16_t g = g0 + g2;
1080 uint16_t r = r0 + r2;
1081 dst_u[0] = RGB2xToU(r, g, b);
1082 dst_v[0] = RGB2xToV(r, g, b);
1083 #endif
1084 }
1085 }
1086
ARGBToUV444Row_C(const uint8_t * src_argb,uint8_t * dst_u,uint8_t * dst_v,int width)1087 void ARGBToUV444Row_C(const uint8_t* src_argb,
1088 uint8_t* dst_u,
1089 uint8_t* dst_v,
1090 int width) {
1091 int x;
1092 for (x = 0; x < width; ++x) {
1093 uint8_t ab = src_argb[0];
1094 uint8_t ag = src_argb[1];
1095 uint8_t ar = src_argb[2];
1096 dst_u[0] = RGBToU(ar, ag, ab);
1097 dst_v[0] = RGBToV(ar, ag, ab);
1098 src_argb += 4;
1099 dst_u += 1;
1100 dst_v += 1;
1101 }
1102 }
1103
ARGBGrayRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)1104 void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
1105 int x;
1106 for (x = 0; x < width; ++x) {
1107 uint8_t y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
1108 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
1109 dst_argb[3] = src_argb[3];
1110 dst_argb += 4;
1111 src_argb += 4;
1112 }
1113 }
1114
1115 // Convert a row of image to Sepia tone.
ARGBSepiaRow_C(uint8_t * dst_argb,int width)1116 void ARGBSepiaRow_C(uint8_t* dst_argb, int width) {
1117 int x;
1118 for (x = 0; x < width; ++x) {
1119 int b = dst_argb[0];
1120 int g = dst_argb[1];
1121 int r = dst_argb[2];
1122 int sb = (b * 17 + g * 68 + r * 35) >> 7;
1123 int sg = (b * 22 + g * 88 + r * 45) >> 7;
1124 int sr = (b * 24 + g * 98 + r * 50) >> 7;
1125 // b does not over flow. a is preserved from original.
1126 dst_argb[0] = sb;
1127 dst_argb[1] = clamp255(sg);
1128 dst_argb[2] = clamp255(sr);
1129 dst_argb += 4;
1130 }
1131 }
1132
1133 // Apply color matrix to a row of image. Matrix is signed.
1134 // TODO(fbarchard): Consider adding rounding (+32).
ARGBColorMatrixRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const int8_t * matrix_argb,int width)1135 void ARGBColorMatrixRow_C(const uint8_t* src_argb,
1136 uint8_t* dst_argb,
1137 const int8_t* matrix_argb,
1138 int width) {
1139 int x;
1140 for (x = 0; x < width; ++x) {
1141 int b = src_argb[0];
1142 int g = src_argb[1];
1143 int r = src_argb[2];
1144 int a = src_argb[3];
1145 int sb = (b * matrix_argb[0] + g * matrix_argb[1] + r * matrix_argb[2] +
1146 a * matrix_argb[3]) >>
1147 6;
1148 int sg = (b * matrix_argb[4] + g * matrix_argb[5] + r * matrix_argb[6] +
1149 a * matrix_argb[7]) >>
1150 6;
1151 int sr = (b * matrix_argb[8] + g * matrix_argb[9] + r * matrix_argb[10] +
1152 a * matrix_argb[11]) >>
1153 6;
1154 int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] +
1155 a * matrix_argb[15]) >>
1156 6;
1157 dst_argb[0] = Clamp(sb);
1158 dst_argb[1] = Clamp(sg);
1159 dst_argb[2] = Clamp(sr);
1160 dst_argb[3] = Clamp(sa);
1161 src_argb += 4;
1162 dst_argb += 4;
1163 }
1164 }
1165
1166 // Apply color table to a row of image.
ARGBColorTableRow_C(uint8_t * dst_argb,const uint8_t * table_argb,int width)1167 void ARGBColorTableRow_C(uint8_t* dst_argb,
1168 const uint8_t* table_argb,
1169 int width) {
1170 int x;
1171 for (x = 0; x < width; ++x) {
1172 int b = dst_argb[0];
1173 int g = dst_argb[1];
1174 int r = dst_argb[2];
1175 int a = dst_argb[3];
1176 dst_argb[0] = table_argb[b * 4 + 0];
1177 dst_argb[1] = table_argb[g * 4 + 1];
1178 dst_argb[2] = table_argb[r * 4 + 2];
1179 dst_argb[3] = table_argb[a * 4 + 3];
1180 dst_argb += 4;
1181 }
1182 }
1183
1184 // Apply color table to a row of image.
RGBColorTableRow_C(uint8_t * dst_argb,const uint8_t * table_argb,int width)1185 void RGBColorTableRow_C(uint8_t* dst_argb,
1186 const uint8_t* table_argb,
1187 int width) {
1188 int x;
1189 for (x = 0; x < width; ++x) {
1190 int b = dst_argb[0];
1191 int g = dst_argb[1];
1192 int r = dst_argb[2];
1193 dst_argb[0] = table_argb[b * 4 + 0];
1194 dst_argb[1] = table_argb[g * 4 + 1];
1195 dst_argb[2] = table_argb[r * 4 + 2];
1196 dst_argb += 4;
1197 }
1198 }
1199
ARGBQuantizeRow_C(uint8_t * dst_argb,int scale,int interval_size,int interval_offset,int width)1200 void ARGBQuantizeRow_C(uint8_t* dst_argb,
1201 int scale,
1202 int interval_size,
1203 int interval_offset,
1204 int width) {
1205 int x;
1206 for (x = 0; x < width; ++x) {
1207 int b = dst_argb[0];
1208 int g = dst_argb[1];
1209 int r = dst_argb[2];
1210 dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
1211 dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
1212 dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
1213 dst_argb += 4;
1214 }
1215 }
1216
1217 #define REPEAT8(v) (v) | ((v) << 8)
1218 #define SHADE(f, v) v* f >> 24
1219
ARGBShadeRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width,uint32_t value)1220 void ARGBShadeRow_C(const uint8_t* src_argb,
1221 uint8_t* dst_argb,
1222 int width,
1223 uint32_t value) {
1224 const uint32_t b_scale = REPEAT8(value & 0xff);
1225 const uint32_t g_scale = REPEAT8((value >> 8) & 0xff);
1226 const uint32_t r_scale = REPEAT8((value >> 16) & 0xff);
1227 const uint32_t a_scale = REPEAT8(value >> 24);
1228
1229 int i;
1230 for (i = 0; i < width; ++i) {
1231 const uint32_t b = REPEAT8(src_argb[0]);
1232 const uint32_t g = REPEAT8(src_argb[1]);
1233 const uint32_t r = REPEAT8(src_argb[2]);
1234 const uint32_t a = REPEAT8(src_argb[3]);
1235 dst_argb[0] = SHADE(b, b_scale);
1236 dst_argb[1] = SHADE(g, g_scale);
1237 dst_argb[2] = SHADE(r, r_scale);
1238 dst_argb[3] = SHADE(a, a_scale);
1239 src_argb += 4;
1240 dst_argb += 4;
1241 }
1242 }
1243 #undef REPEAT8
1244 #undef SHADE
1245
1246 #define REPEAT8(v) (v) | ((v) << 8)
1247 #define SHADE(f, v) v* f >> 16
1248
ARGBMultiplyRow_C(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)1249 void ARGBMultiplyRow_C(const uint8_t* src_argb,
1250 const uint8_t* src_argb1,
1251 uint8_t* dst_argb,
1252 int width) {
1253 int i;
1254 for (i = 0; i < width; ++i) {
1255 const uint32_t b = REPEAT8(src_argb[0]);
1256 const uint32_t g = REPEAT8(src_argb[1]);
1257 const uint32_t r = REPEAT8(src_argb[2]);
1258 const uint32_t a = REPEAT8(src_argb[3]);
1259 const uint32_t b_scale = src_argb1[0];
1260 const uint32_t g_scale = src_argb1[1];
1261 const uint32_t r_scale = src_argb1[2];
1262 const uint32_t a_scale = src_argb1[3];
1263 dst_argb[0] = SHADE(b, b_scale);
1264 dst_argb[1] = SHADE(g, g_scale);
1265 dst_argb[2] = SHADE(r, r_scale);
1266 dst_argb[3] = SHADE(a, a_scale);
1267 src_argb += 4;
1268 src_argb1 += 4;
1269 dst_argb += 4;
1270 }
1271 }
1272 #undef REPEAT8
1273 #undef SHADE
1274
1275 #define SHADE(f, v) clamp255(v + f)
1276
ARGBAddRow_C(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)1277 void ARGBAddRow_C(const uint8_t* src_argb,
1278 const uint8_t* src_argb1,
1279 uint8_t* dst_argb,
1280 int width) {
1281 int i;
1282 for (i = 0; i < width; ++i) {
1283 const int b = src_argb[0];
1284 const int g = src_argb[1];
1285 const int r = src_argb[2];
1286 const int a = src_argb[3];
1287 const int b_add = src_argb1[0];
1288 const int g_add = src_argb1[1];
1289 const int r_add = src_argb1[2];
1290 const int a_add = src_argb1[3];
1291 dst_argb[0] = SHADE(b, b_add);
1292 dst_argb[1] = SHADE(g, g_add);
1293 dst_argb[2] = SHADE(r, r_add);
1294 dst_argb[3] = SHADE(a, a_add);
1295 src_argb += 4;
1296 src_argb1 += 4;
1297 dst_argb += 4;
1298 }
1299 }
1300 #undef SHADE
1301
1302 #define SHADE(f, v) clamp0(f - v)
1303
ARGBSubtractRow_C(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)1304 void ARGBSubtractRow_C(const uint8_t* src_argb,
1305 const uint8_t* src_argb1,
1306 uint8_t* dst_argb,
1307 int width) {
1308 int i;
1309 for (i = 0; i < width; ++i) {
1310 const int b = src_argb[0];
1311 const int g = src_argb[1];
1312 const int r = src_argb[2];
1313 const int a = src_argb[3];
1314 const int b_sub = src_argb1[0];
1315 const int g_sub = src_argb1[1];
1316 const int r_sub = src_argb1[2];
1317 const int a_sub = src_argb1[3];
1318 dst_argb[0] = SHADE(b, b_sub);
1319 dst_argb[1] = SHADE(g, g_sub);
1320 dst_argb[2] = SHADE(r, r_sub);
1321 dst_argb[3] = SHADE(a, a_sub);
1322 src_argb += 4;
1323 src_argb1 += 4;
1324 dst_argb += 4;
1325 }
1326 }
1327 #undef SHADE
1328
1329 // Sobel functions which mimics SSSE3.
SobelXRow_C(const uint8_t * src_y0,const uint8_t * src_y1,const uint8_t * src_y2,uint8_t * dst_sobelx,int width)1330 void SobelXRow_C(const uint8_t* src_y0,
1331 const uint8_t* src_y1,
1332 const uint8_t* src_y2,
1333 uint8_t* dst_sobelx,
1334 int width) {
1335 int i;
1336 for (i = 0; i < width; ++i) {
1337 int a = src_y0[i];
1338 int b = src_y1[i];
1339 int c = src_y2[i];
1340 int a_sub = src_y0[i + 2];
1341 int b_sub = src_y1[i + 2];
1342 int c_sub = src_y2[i + 2];
1343 int a_diff = a - a_sub;
1344 int b_diff = b - b_sub;
1345 int c_diff = c - c_sub;
1346 int sobel = Abs(a_diff + b_diff * 2 + c_diff);
1347 dst_sobelx[i] = (uint8_t)(clamp255(sobel));
1348 }
1349 }
1350
SobelYRow_C(const uint8_t * src_y0,const uint8_t * src_y1,uint8_t * dst_sobely,int width)1351 void SobelYRow_C(const uint8_t* src_y0,
1352 const uint8_t* src_y1,
1353 uint8_t* dst_sobely,
1354 int width) {
1355 int i;
1356 for (i = 0; i < width; ++i) {
1357 int a = src_y0[i + 0];
1358 int b = src_y0[i + 1];
1359 int c = src_y0[i + 2];
1360 int a_sub = src_y1[i + 0];
1361 int b_sub = src_y1[i + 1];
1362 int c_sub = src_y1[i + 2];
1363 int a_diff = a - a_sub;
1364 int b_diff = b - b_sub;
1365 int c_diff = c - c_sub;
1366 int sobel = Abs(a_diff + b_diff * 2 + c_diff);
1367 dst_sobely[i] = (uint8_t)(clamp255(sobel));
1368 }
1369 }
1370
SobelRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)1371 void SobelRow_C(const uint8_t* src_sobelx,
1372 const uint8_t* src_sobely,
1373 uint8_t* dst_argb,
1374 int width) {
1375 int i;
1376 for (i = 0; i < width; ++i) {
1377 int r = src_sobelx[i];
1378 int b = src_sobely[i];
1379 int s = clamp255(r + b);
1380 dst_argb[0] = (uint8_t)(s);
1381 dst_argb[1] = (uint8_t)(s);
1382 dst_argb[2] = (uint8_t)(s);
1383 dst_argb[3] = (uint8_t)(255u);
1384 dst_argb += 4;
1385 }
1386 }
1387
SobelToPlaneRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_y,int width)1388 void SobelToPlaneRow_C(const uint8_t* src_sobelx,
1389 const uint8_t* src_sobely,
1390 uint8_t* dst_y,
1391 int width) {
1392 int i;
1393 for (i = 0; i < width; ++i) {
1394 int r = src_sobelx[i];
1395 int b = src_sobely[i];
1396 int s = clamp255(r + b);
1397 dst_y[i] = (uint8_t)(s);
1398 }
1399 }
1400
SobelXYRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)1401 void SobelXYRow_C(const uint8_t* src_sobelx,
1402 const uint8_t* src_sobely,
1403 uint8_t* dst_argb,
1404 int width) {
1405 int i;
1406 for (i = 0; i < width; ++i) {
1407 int r = src_sobelx[i];
1408 int b = src_sobely[i];
1409 int g = clamp255(r + b);
1410 dst_argb[0] = (uint8_t)(b);
1411 dst_argb[1] = (uint8_t)(g);
1412 dst_argb[2] = (uint8_t)(r);
1413 dst_argb[3] = (uint8_t)(255u);
1414 dst_argb += 4;
1415 }
1416 }
1417
J400ToARGBRow_C(const uint8_t * src_y,uint8_t * dst_argb,int width)1418 void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
1419 // Copy a Y to RGB.
1420 int x;
1421 for (x = 0; x < width; ++x) {
1422 uint8_t y = src_y[0];
1423 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
1424 dst_argb[3] = 255u;
1425 dst_argb += 4;
1426 ++src_y;
1427 }
1428 }
1429
1430 // Macros to create SIMD specific yuv to rgb conversion constants.
1431
1432 // clang-format off
1433
1434 #if defined(__aarch64__) || defined(__arm__)
1435 // Bias values include subtract 128 from U and V, bias from Y and rounding.
1436 // For B and R bias is negative. For G bias is positive.
1437 #define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \
1438 {{UB, VR, UG, VG, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, \
1439 {YG, (UB * 128 - YB), (UG * 128 + VG * 128 + YB), (VR * 128 - YB), YB, 0, \
1440 0, 0}}
1441 #else
1442 #define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR) \
1443 {{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, \
1444 UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, \
1445 {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, \
1446 UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, \
1447 {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, \
1448 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, \
1449 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \
1450 {YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}}
1451 #endif
1452
1453 // clang-format on
1454
1455 #define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR) \
1456 const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = \
1457 YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR); \
1458 const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = \
1459 YUVCONSTANTSBODY(YG, YB, VR, VG, UG, UB);
1460
1461 // TODO(fbarchard): Generate SIMD structures from float matrix.
1462
1463 // BT.601 limited range YUV to RGB reference
1464 // R = (Y - 16) * 1.164 + V * 1.596
1465 // G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
1466 // B = (Y - 16) * 1.164 + U * 2.018
1467 // KR = 0.299; KB = 0.114
1468
1469 // U and V contributions to R,G,B.
1470 #if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT601)
1471 #define UB 129 /* round(2.018 * 64) */
1472 #else
1473 #define UB 128 /* max(128, round(2.018 * 64)) */
1474 #endif
1475 #define UG 25 /* round(0.391 * 64) */
1476 #define VG 52 /* round(0.813 * 64) */
1477 #define VR 102 /* round(1.596 * 64) */
1478
1479 // Y contribution to R,G,B. Scale and bias.
1480 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1481 #define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1482
MAKEYUVCONSTANTS(I601,YG,YB,UB,UG,VG,VR)1483 MAKEYUVCONSTANTS(I601, YG, YB, UB, UG, VG, VR)
1484
1485 #undef YG
1486 #undef YB
1487 #undef UB
1488 #undef UG
1489 #undef VG
1490 #undef VR
1491
1492 // BT.601 full range YUV to RGB reference (aka JPEG)
1493 // * R = Y + V * 1.40200
1494 // * G = Y - U * 0.34414 - V * 0.71414
1495 // * B = Y + U * 1.77200
1496 // KR = 0.299; KB = 0.114
1497
1498 // U and V contributions to R,G,B.
1499 #define UB 113 /* round(1.77200 * 64) */
1500 #define UG 22 /* round(0.34414 * 64) */
1501 #define VG 46 /* round(0.71414 * 64) */
1502 #define VR 90 /* round(1.40200 * 64) */
1503
1504 // Y contribution to R,G,B. Scale and bias.
1505 #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
1506 #define YB 32 /* 64 / 2 */
1507
1508 MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR)
1509
1510 #undef YG
1511 #undef YB
1512 #undef UB
1513 #undef UG
1514 #undef VG
1515 #undef VR
1516
1517 // BT.709 limited range YUV to RGB reference
1518 // R = (Y - 16) * 1.164 + V * 1.793
1519 // G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533
1520 // B = (Y - 16) * 1.164 + U * 2.112
1521 // KR = 0.2126, KB = 0.0722
1522
1523 // U and V contributions to R,G,B.
1524 #if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT709)
1525 #define UB 135 /* round(2.112 * 64) */
1526 #else
1527 #define UB 128 /* max(128, round(2.112 * 64)) */
1528 #endif
1529 #define UG 14 /* round(0.213 * 64) */
1530 #define VG 34 /* round(0.533 * 64) */
1531 #define VR 115 /* round(1.793 * 64) */
1532
1533 // Y contribution to R,G,B. Scale and bias.
1534 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1535 #define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1536
1537 MAKEYUVCONSTANTS(H709, YG, YB, UB, UG, VG, VR)
1538
1539 #undef YG
1540 #undef YB
1541 #undef UB
1542 #undef UG
1543 #undef VG
1544 #undef VR
1545
1546 // BT.709 full range YUV to RGB reference
1547 // R = Y + V * 1.5748
1548 // G = Y - U * 0.18732 - V * 0.46812
1549 // B = Y + U * 1.8556
1550 // KR = 0.2126, KB = 0.0722
1551
1552 // U and V contributions to R,G,B.
1553 #define UB 119 /* round(1.8556 * 64) */
1554 #define UG 12 /* round(0.18732 * 64) */
1555 #define VG 30 /* round(0.46812 * 64) */
1556 #define VR 101 /* round(1.5748 * 64) */
1557
1558 // Y contribution to R,G,B. Scale and bias. (same as jpeg)
1559 #define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
1560 #define YB 32 /* 64 / 2 */
1561
1562 MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR)
1563
1564 #undef YG
1565 #undef YB
1566 #undef UB
1567 #undef UG
1568 #undef VG
1569 #undef VR
1570
1571 // BT.2020 limited range YUV to RGB reference
1572 // R = (Y - 16) * 1.164384 + V * 1.67867
1573 // G = (Y - 16) * 1.164384 - U * 0.187326 - V * 0.65042
1574 // B = (Y - 16) * 1.164384 + U * 2.14177
1575 // KR = 0.2627; KB = 0.0593
1576
1577 // U and V contributions to R,G,B.
1578 #if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT2020)
1579 #define UB 137 /* round(2.142 * 64) */
1580 #else
1581 #define UB 128 /* max(128, round(2.142 * 64)) */
1582 #endif
1583 #define UG 12 /* round(0.187326 * 64) */
1584 #define VG 42 /* round(0.65042 * 64) */
1585 #define VR 107 /* round(1.67867 * 64) */
1586
1587 // Y contribution to R,G,B. Scale and bias.
1588 #define YG 19003 /* round(1.164384 * 64 * 256 * 256 / 257) */
1589 #define YB -1160 /* 1.164384 * 64 * -16 + 64 / 2 */
1590
1591 MAKEYUVCONSTANTS(2020, YG, YB, UB, UG, VG, VR)
1592
1593 #undef YG
1594 #undef YB
1595 #undef UB
1596 #undef UG
1597 #undef VG
1598 #undef VR
1599
1600 // BT.2020 full range YUV to RGB reference
1601 // R = Y + V * 1.474600
1602 // G = Y - U * 0.164553 - V * 0.571353
1603 // B = Y + U * 1.881400
1604 // KR = 0.2627; KB = 0.0593
1605
1606 #define UB 120 /* round(1.881400 * 64) */
1607 #define UG 11 /* round(0.164553 * 64) */
1608 #define VG 37 /* round(0.571353 * 64) */
1609 #define VR 94 /* round(1.474600 * 64) */
1610
1611 // Y contribution to R,G,B. Scale and bias. (same as jpeg)
1612 #define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
1613 #define YB 32 /* 64 / 2 */
1614
1615 MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR)
1616
1617 #undef YG
1618 #undef YB
1619 #undef UB
1620 #undef UG
1621 #undef VG
1622 #undef VR
1623
1624 #undef BB
1625 #undef BG
1626 #undef BR
1627
1628 #undef MAKEYUVCONSTANTS
1629
1630 #if defined(__aarch64__) || defined(__arm__)
1631 #define LOAD_YUV_CONSTANTS \
1632 int ub = yuvconstants->kUVCoeff[0]; \
1633 int vr = yuvconstants->kUVCoeff[1]; \
1634 int ug = yuvconstants->kUVCoeff[2]; \
1635 int vg = yuvconstants->kUVCoeff[3]; \
1636 int yg = yuvconstants->kRGBCoeffBias[0]; \
1637 int bb = yuvconstants->kRGBCoeffBias[1]; \
1638 int bg = yuvconstants->kRGBCoeffBias[2]; \
1639 int br = yuvconstants->kRGBCoeffBias[3]
1640
1641 #define CALC_RGB16 \
1642 int32_t y1 = (uint32_t)(y32 * yg) >> 16; \
1643 int b16 = y1 + (u * ub) - bb; \
1644 int g16 = y1 + bg - (u * ug + v * vg); \
1645 int r16 = y1 + (v * vr) - br
1646 #else
1647 #define LOAD_YUV_CONSTANTS \
1648 int ub = yuvconstants->kUVToB[0]; \
1649 int ug = yuvconstants->kUVToG[0]; \
1650 int vg = yuvconstants->kUVToG[1]; \
1651 int vr = yuvconstants->kUVToR[1]; \
1652 int yg = yuvconstants->kYToRgb[0]; \
1653 int yb = yuvconstants->kYBiasToRgb[0]
1654
1655 #define CALC_RGB16 \
1656 int32_t y1 = ((uint32_t)(y32 * yg) >> 16) + yb; \
1657 int8_t ui = (int8_t)u; \
1658 int8_t vi = (int8_t)v; \
1659 ui -= 0x80; \
1660 vi -= 0x80; \
1661 int b16 = y1 + (ui * ub); \
1662 int g16 = y1 - (ui * ug + vi * vg); \
1663 int r16 = y1 + (vi * vr)
1664 #endif
1665
1666 // C reference code that mimics the YUV assembly.
1667 // Reads 8 bit YUV and leaves result as 16 bit.
1668 static __inline void YuvPixel(uint8_t y,
1669 uint8_t u,
1670 uint8_t v,
1671 uint8_t* b,
1672 uint8_t* g,
1673 uint8_t* r,
1674 const struct YuvConstants* yuvconstants) {
1675 LOAD_YUV_CONSTANTS;
1676 uint32_t y32 = y * 0x0101;
1677 CALC_RGB16;
1678 *b = Clamp((int32_t)(b16) >> 6);
1679 *g = Clamp((int32_t)(g16) >> 6);
1680 *r = Clamp((int32_t)(r16) >> 6);
1681 }
1682
1683 // Reads 8 bit YUV and leaves result as 16 bit.
YuvPixel8_16(uint8_t y,uint8_t u,uint8_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1684 static __inline void YuvPixel8_16(uint8_t y,
1685 uint8_t u,
1686 uint8_t v,
1687 int* b,
1688 int* g,
1689 int* r,
1690 const struct YuvConstants* yuvconstants) {
1691 LOAD_YUV_CONSTANTS;
1692 uint32_t y32 = y * 0x0101;
1693 CALC_RGB16;
1694 *b = b16;
1695 *g = g16;
1696 *r = r16;
1697 }
1698
1699 // C reference code that mimics the YUV 16 bit assembly.
1700 // Reads 10 bit YUV and leaves result as 16 bit.
YuvPixel10_16(uint16_t y,uint16_t u,uint16_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1701 static __inline void YuvPixel10_16(uint16_t y,
1702 uint16_t u,
1703 uint16_t v,
1704 int* b,
1705 int* g,
1706 int* r,
1707 const struct YuvConstants* yuvconstants) {
1708 LOAD_YUV_CONSTANTS;
1709 uint32_t y32 = y << 6;
1710 u = clamp255(u >> 2);
1711 v = clamp255(v >> 2);
1712 CALC_RGB16;
1713 *b = b16;
1714 *g = g16;
1715 *r = r16;
1716 }
1717
1718 // C reference code that mimics the YUV 16 bit assembly.
1719 // Reads 12 bit YUV and leaves result as 16 bit.
YuvPixel12_16(int16_t y,int16_t u,int16_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1720 static __inline void YuvPixel12_16(int16_t y,
1721 int16_t u,
1722 int16_t v,
1723 int* b,
1724 int* g,
1725 int* r,
1726 const struct YuvConstants* yuvconstants) {
1727 LOAD_YUV_CONSTANTS;
1728 uint32_t y32 = y << 4;
1729 u = clamp255(u >> 4);
1730 v = clamp255(v >> 4);
1731 CALC_RGB16;
1732 *b = b16;
1733 *g = g16;
1734 *r = r16;
1735 }
1736
1737 // C reference code that mimics the YUV 10 bit assembly.
1738 // Reads 10 bit YUV and clamps down to 8 bit RGB.
YuvPixel10(uint16_t y,uint16_t u,uint16_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1739 static __inline void YuvPixel10(uint16_t y,
1740 uint16_t u,
1741 uint16_t v,
1742 uint8_t* b,
1743 uint8_t* g,
1744 uint8_t* r,
1745 const struct YuvConstants* yuvconstants) {
1746 int b16;
1747 int g16;
1748 int r16;
1749 YuvPixel10_16(y, u, v, &b16, &g16, &r16, yuvconstants);
1750 *b = Clamp(b16 >> 6);
1751 *g = Clamp(g16 >> 6);
1752 *r = Clamp(r16 >> 6);
1753 }
1754
1755 // C reference code that mimics the YUV 12 bit assembly.
1756 // Reads 12 bit YUV and clamps down to 8 bit RGB.
YuvPixel12(uint16_t y,uint16_t u,uint16_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1757 static __inline void YuvPixel12(uint16_t y,
1758 uint16_t u,
1759 uint16_t v,
1760 uint8_t* b,
1761 uint8_t* g,
1762 uint8_t* r,
1763 const struct YuvConstants* yuvconstants) {
1764 int b16;
1765 int g16;
1766 int r16;
1767 YuvPixel12_16(y, u, v, &b16, &g16, &r16, yuvconstants);
1768 *b = Clamp(b16 >> 6);
1769 *g = Clamp(g16 >> 6);
1770 *r = Clamp(r16 >> 6);
1771 }
1772
1773 // C reference code that mimics the YUV 16 bit assembly.
1774 // Reads 16 bit YUV and leaves result as 8 bit.
YuvPixel16_8(uint16_t y,uint16_t u,uint16_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1775 static __inline void YuvPixel16_8(uint16_t y,
1776 uint16_t u,
1777 uint16_t v,
1778 uint8_t* b,
1779 uint8_t* g,
1780 uint8_t* r,
1781 const struct YuvConstants* yuvconstants) {
1782 LOAD_YUV_CONSTANTS;
1783 uint32_t y32 = y;
1784 u = clamp255(u >> 8);
1785 v = clamp255(v >> 8);
1786 CALC_RGB16;
1787 *b = Clamp((int32_t)(b16) >> 6);
1788 *g = Clamp((int32_t)(g16) >> 6);
1789 *r = Clamp((int32_t)(r16) >> 6);
1790 }
1791
1792 // C reference code that mimics the YUV 16 bit assembly.
1793 // Reads 16 bit YUV and leaves result as 16 bit.
YuvPixel16_16(uint16_t y,uint16_t u,uint16_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1794 static __inline void YuvPixel16_16(uint16_t y,
1795 uint16_t u,
1796 uint16_t v,
1797 int* b,
1798 int* g,
1799 int* r,
1800 const struct YuvConstants* yuvconstants) {
1801 LOAD_YUV_CONSTANTS;
1802 uint32_t y32 = y;
1803 u = clamp255(u >> 8);
1804 v = clamp255(v >> 8);
1805 CALC_RGB16;
1806 *b = b16;
1807 *g = g16;
1808 *r = r16;
1809 }
1810
1811 // C reference code that mimics the YUV assembly.
1812 // Reads 8 bit YUV and leaves result as 8 bit.
YPixel(uint8_t y,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1813 static __inline void YPixel(uint8_t y,
1814 uint8_t* b,
1815 uint8_t* g,
1816 uint8_t* r,
1817 const struct YuvConstants* yuvconstants) {
1818 #if defined(__aarch64__) || defined(__arm__)
1819 int yg = yuvconstants->kRGBCoeffBias[0];
1820 int ygb = yuvconstants->kRGBCoeffBias[4];
1821 #else
1822 int ygb = yuvconstants->kYBiasToRgb[0];
1823 int yg = yuvconstants->kYToRgb[0];
1824 #endif
1825 uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
1826 *b = Clamp(((int32_t)(y1) + ygb) >> 6);
1827 *g = Clamp(((int32_t)(y1) + ygb) >> 6);
1828 *r = Clamp(((int32_t)(y1) + ygb) >> 6);
1829 }
1830
I444ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1831 void I444ToARGBRow_C(const uint8_t* src_y,
1832 const uint8_t* src_u,
1833 const uint8_t* src_v,
1834 uint8_t* rgb_buf,
1835 const struct YuvConstants* yuvconstants,
1836 int width) {
1837 int x;
1838 for (x = 0; x < width; ++x) {
1839 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1840 rgb_buf + 2, yuvconstants);
1841 rgb_buf[3] = 255;
1842 src_y += 1;
1843 src_u += 1;
1844 src_v += 1;
1845 rgb_buf += 4; // Advance 1 pixel.
1846 }
1847 }
1848
1849 // Also used for 420
I422ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1850 void I422ToARGBRow_C(const uint8_t* src_y,
1851 const uint8_t* src_u,
1852 const uint8_t* src_v,
1853 uint8_t* rgb_buf,
1854 const struct YuvConstants* yuvconstants,
1855 int width) {
1856 int x;
1857 for (x = 0; x < width - 1; x += 2) {
1858 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1859 rgb_buf + 2, yuvconstants);
1860 rgb_buf[3] = 255;
1861 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1862 rgb_buf + 6, yuvconstants);
1863 rgb_buf[7] = 255;
1864 src_y += 2;
1865 src_u += 1;
1866 src_v += 1;
1867 rgb_buf += 8; // Advance 2 pixels.
1868 }
1869 if (width & 1) {
1870 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1871 rgb_buf + 2, yuvconstants);
1872 rgb_buf[3] = 255;
1873 }
1874 }
1875
1876 // 10 bit YUV to ARGB
I210ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1877 void I210ToARGBRow_C(const uint16_t* src_y,
1878 const uint16_t* src_u,
1879 const uint16_t* src_v,
1880 uint8_t* rgb_buf,
1881 const struct YuvConstants* yuvconstants,
1882 int width) {
1883 int x;
1884 for (x = 0; x < width - 1; x += 2) {
1885 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1886 rgb_buf + 2, yuvconstants);
1887 rgb_buf[3] = 255;
1888 YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1889 rgb_buf + 6, yuvconstants);
1890 rgb_buf[7] = 255;
1891 src_y += 2;
1892 src_u += 1;
1893 src_v += 1;
1894 rgb_buf += 8; // Advance 2 pixels.
1895 }
1896 if (width & 1) {
1897 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1898 rgb_buf + 2, yuvconstants);
1899 rgb_buf[3] = 255;
1900 }
1901 }
1902
I410ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1903 void I410ToARGBRow_C(const uint16_t* src_y,
1904 const uint16_t* src_u,
1905 const uint16_t* src_v,
1906 uint8_t* rgb_buf,
1907 const struct YuvConstants* yuvconstants,
1908 int width) {
1909 int x;
1910 for (x = 0; x < width; ++x) {
1911 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1912 rgb_buf + 2, yuvconstants);
1913 rgb_buf[3] = 255;
1914 src_y += 1;
1915 src_u += 1;
1916 src_v += 1;
1917 rgb_buf += 4; // Advance 1 pixels.
1918 }
1919 }
1920
I210AlphaToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,const uint16_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1921 void I210AlphaToARGBRow_C(const uint16_t* src_y,
1922 const uint16_t* src_u,
1923 const uint16_t* src_v,
1924 const uint16_t* src_a,
1925 uint8_t* rgb_buf,
1926 const struct YuvConstants* yuvconstants,
1927 int width) {
1928 int x;
1929 for (x = 0; x < width - 1; x += 2) {
1930 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1931 rgb_buf + 2, yuvconstants);
1932 rgb_buf[3] = clamp255(src_a[0] >> 2);
1933 YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1934 rgb_buf + 6, yuvconstants);
1935 rgb_buf[7] = clamp255(src_a[1] >> 2);
1936 src_y += 2;
1937 src_u += 1;
1938 src_v += 1;
1939 src_a += 2;
1940 rgb_buf += 8; // Advance 2 pixels.
1941 }
1942 if (width & 1) {
1943 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1944 rgb_buf + 2, yuvconstants);
1945 rgb_buf[3] = clamp255(src_a[0] >> 2);
1946 }
1947 }
1948
I410AlphaToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,const uint16_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1949 void I410AlphaToARGBRow_C(const uint16_t* src_y,
1950 const uint16_t* src_u,
1951 const uint16_t* src_v,
1952 const uint16_t* src_a,
1953 uint8_t* rgb_buf,
1954 const struct YuvConstants* yuvconstants,
1955 int width) {
1956 int x;
1957 for (x = 0; x < width; ++x) {
1958 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1959 rgb_buf + 2, yuvconstants);
1960 rgb_buf[3] = clamp255(src_a[0] >> 2);
1961 src_y += 1;
1962 src_u += 1;
1963 src_v += 1;
1964 src_a += 1;
1965 rgb_buf += 4; // Advance 1 pixels.
1966 }
1967 }
1968
1969 // 12 bit YUV to ARGB
I212ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1970 void I212ToARGBRow_C(const uint16_t* src_y,
1971 const uint16_t* src_u,
1972 const uint16_t* src_v,
1973 uint8_t* rgb_buf,
1974 const struct YuvConstants* yuvconstants,
1975 int width) {
1976 int x;
1977 for (x = 0; x < width - 1; x += 2) {
1978 YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1979 rgb_buf + 2, yuvconstants);
1980 rgb_buf[3] = 255;
1981 YuvPixel12(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1982 rgb_buf + 6, yuvconstants);
1983 rgb_buf[7] = 255;
1984 src_y += 2;
1985 src_u += 1;
1986 src_v += 1;
1987 rgb_buf += 8; // Advance 2 pixels.
1988 }
1989 if (width & 1) {
1990 YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1991 rgb_buf + 2, yuvconstants);
1992 rgb_buf[3] = 255;
1993 }
1994 }
1995
StoreAR30(uint8_t * rgb_buf,int b,int g,int r)1996 static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
1997 uint32_t ar30;
1998 b = b >> 4; // convert 8 bit 10.6 to 10 bit.
1999 g = g >> 4;
2000 r = r >> 4;
2001 b = Clamp10(b);
2002 g = Clamp10(g);
2003 r = Clamp10(r);
2004 ar30 = b | ((uint32_t)g << 10) | ((uint32_t)r << 20) | 0xc0000000;
2005 (*(uint32_t*)rgb_buf) = ar30;
2006 }
2007
2008 // 10 bit YUV to 10 bit AR30
I210ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2009 void I210ToAR30Row_C(const uint16_t* src_y,
2010 const uint16_t* src_u,
2011 const uint16_t* src_v,
2012 uint8_t* rgb_buf,
2013 const struct YuvConstants* yuvconstants,
2014 int width) {
2015 int x;
2016 int b;
2017 int g;
2018 int r;
2019 for (x = 0; x < width - 1; x += 2) {
2020 YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2021 StoreAR30(rgb_buf, b, g, r);
2022 YuvPixel10_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2023 StoreAR30(rgb_buf + 4, b, g, r);
2024 src_y += 2;
2025 src_u += 1;
2026 src_v += 1;
2027 rgb_buf += 8; // Advance 2 pixels.
2028 }
2029 if (width & 1) {
2030 YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2031 StoreAR30(rgb_buf, b, g, r);
2032 }
2033 }
2034
2035 // 12 bit YUV to 10 bit AR30
I212ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2036 void I212ToAR30Row_C(const uint16_t* src_y,
2037 const uint16_t* src_u,
2038 const uint16_t* src_v,
2039 uint8_t* rgb_buf,
2040 const struct YuvConstants* yuvconstants,
2041 int width) {
2042 int x;
2043 int b;
2044 int g;
2045 int r;
2046 for (x = 0; x < width - 1; x += 2) {
2047 YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2048 StoreAR30(rgb_buf, b, g, r);
2049 YuvPixel12_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2050 StoreAR30(rgb_buf + 4, b, g, r);
2051 src_y += 2;
2052 src_u += 1;
2053 src_v += 1;
2054 rgb_buf += 8; // Advance 2 pixels.
2055 }
2056 if (width & 1) {
2057 YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2058 StoreAR30(rgb_buf, b, g, r);
2059 }
2060 }
2061
I410ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2062 void I410ToAR30Row_C(const uint16_t* src_y,
2063 const uint16_t* src_u,
2064 const uint16_t* src_v,
2065 uint8_t* rgb_buf,
2066 const struct YuvConstants* yuvconstants,
2067 int width) {
2068 int x;
2069 int b;
2070 int g;
2071 int r;
2072 for (x = 0; x < width; ++x) {
2073 YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2074 StoreAR30(rgb_buf, b, g, r);
2075 src_y += 1;
2076 src_u += 1;
2077 src_v += 1;
2078 rgb_buf += 4; // Advance 1 pixel.
2079 }
2080 }
2081
2082 // P210 has 10 bits in msb of 16 bit NV12 style layout.
P210ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_uv,uint8_t * dst_argb,const struct YuvConstants * yuvconstants,int width)2083 void P210ToARGBRow_C(const uint16_t* src_y,
2084 const uint16_t* src_uv,
2085 uint8_t* dst_argb,
2086 const struct YuvConstants* yuvconstants,
2087 int width) {
2088 int x;
2089 for (x = 0; x < width - 1; x += 2) {
2090 YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
2091 dst_argb + 2, yuvconstants);
2092 dst_argb[3] = 255;
2093 YuvPixel16_8(src_y[1], src_uv[0], src_uv[1], dst_argb + 4, dst_argb + 5,
2094 dst_argb + 6, yuvconstants);
2095 dst_argb[7] = 255;
2096 src_y += 2;
2097 src_uv += 2;
2098 dst_argb += 8; // Advance 2 pixels.
2099 }
2100 if (width & 1) {
2101 YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
2102 dst_argb + 2, yuvconstants);
2103 dst_argb[3] = 255;
2104 }
2105 }
2106
P410ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_uv,uint8_t * dst_argb,const struct YuvConstants * yuvconstants,int width)2107 void P410ToARGBRow_C(const uint16_t* src_y,
2108 const uint16_t* src_uv,
2109 uint8_t* dst_argb,
2110 const struct YuvConstants* yuvconstants,
2111 int width) {
2112 int x;
2113 for (x = 0; x < width; ++x) {
2114 YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
2115 dst_argb + 2, yuvconstants);
2116 dst_argb[3] = 255;
2117 src_y += 1;
2118 src_uv += 2;
2119 dst_argb += 4; // Advance 1 pixels.
2120 }
2121 }
2122
P210ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_uv,uint8_t * dst_ar30,const struct YuvConstants * yuvconstants,int width)2123 void P210ToAR30Row_C(const uint16_t* src_y,
2124 const uint16_t* src_uv,
2125 uint8_t* dst_ar30,
2126 const struct YuvConstants* yuvconstants,
2127 int width) {
2128 int x;
2129 int b;
2130 int g;
2131 int r;
2132 for (x = 0; x < width - 1; x += 2) {
2133 YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2134 StoreAR30(dst_ar30, b, g, r);
2135 YuvPixel16_16(src_y[1], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2136 StoreAR30(dst_ar30 + 4, b, g, r);
2137 src_y += 2;
2138 src_uv += 2;
2139 dst_ar30 += 8; // Advance 2 pixels.
2140 }
2141 if (width & 1) {
2142 YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2143 StoreAR30(dst_ar30, b, g, r);
2144 }
2145 }
2146
P410ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_uv,uint8_t * dst_ar30,const struct YuvConstants * yuvconstants,int width)2147 void P410ToAR30Row_C(const uint16_t* src_y,
2148 const uint16_t* src_uv,
2149 uint8_t* dst_ar30,
2150 const struct YuvConstants* yuvconstants,
2151 int width) {
2152 int x;
2153 int b;
2154 int g;
2155 int r;
2156 for (x = 0; x < width; ++x) {
2157 YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2158 StoreAR30(dst_ar30, b, g, r);
2159 src_y += 1;
2160 src_uv += 2;
2161 dst_ar30 += 4; // Advance 1 pixel.
2162 }
2163 }
2164
2165 // 8 bit YUV to 10 bit AR30
2166 // Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
I422ToAR30Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2167 void I422ToAR30Row_C(const uint8_t* src_y,
2168 const uint8_t* src_u,
2169 const uint8_t* src_v,
2170 uint8_t* rgb_buf,
2171 const struct YuvConstants* yuvconstants,
2172 int width) {
2173 int x;
2174 int b;
2175 int g;
2176 int r;
2177 for (x = 0; x < width - 1; x += 2) {
2178 YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2179 StoreAR30(rgb_buf, b, g, r);
2180 YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2181 StoreAR30(rgb_buf + 4, b, g, r);
2182 src_y += 2;
2183 src_u += 1;
2184 src_v += 1;
2185 rgb_buf += 8; // Advance 2 pixels.
2186 }
2187 if (width & 1) {
2188 YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2189 StoreAR30(rgb_buf, b, g, r);
2190 }
2191 }
2192
I444AlphaToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,const uint8_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2193 void I444AlphaToARGBRow_C(const uint8_t* src_y,
2194 const uint8_t* src_u,
2195 const uint8_t* src_v,
2196 const uint8_t* src_a,
2197 uint8_t* rgb_buf,
2198 const struct YuvConstants* yuvconstants,
2199 int width) {
2200 int x;
2201 for (x = 0; x < width; ++x) {
2202 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2203 rgb_buf + 2, yuvconstants);
2204 rgb_buf[3] = src_a[0];
2205 src_y += 1;
2206 src_u += 1;
2207 src_v += 1;
2208 src_a += 1;
2209 rgb_buf += 4; // Advance 1 pixel.
2210 }
2211 }
2212
I422AlphaToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,const uint8_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2213 void I422AlphaToARGBRow_C(const uint8_t* src_y,
2214 const uint8_t* src_u,
2215 const uint8_t* src_v,
2216 const uint8_t* src_a,
2217 uint8_t* rgb_buf,
2218 const struct YuvConstants* yuvconstants,
2219 int width) {
2220 int x;
2221 for (x = 0; x < width - 1; x += 2) {
2222 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2223 rgb_buf + 2, yuvconstants);
2224 rgb_buf[3] = src_a[0];
2225 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
2226 rgb_buf + 6, yuvconstants);
2227 rgb_buf[7] = src_a[1];
2228 src_y += 2;
2229 src_u += 1;
2230 src_v += 1;
2231 src_a += 2;
2232 rgb_buf += 8; // Advance 2 pixels.
2233 }
2234 if (width & 1) {
2235 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2236 rgb_buf + 2, yuvconstants);
2237 rgb_buf[3] = src_a[0];
2238 }
2239 }
2240
I422ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2241 void I422ToRGB24Row_C(const uint8_t* src_y,
2242 const uint8_t* src_u,
2243 const uint8_t* src_v,
2244 uint8_t* rgb_buf,
2245 const struct YuvConstants* yuvconstants,
2246 int width) {
2247 int x;
2248 for (x = 0; x < width - 1; x += 2) {
2249 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2250 rgb_buf + 2, yuvconstants);
2251 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
2252 rgb_buf + 5, yuvconstants);
2253 src_y += 2;
2254 src_u += 1;
2255 src_v += 1;
2256 rgb_buf += 6; // Advance 2 pixels.
2257 }
2258 if (width & 1) {
2259 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2260 rgb_buf + 2, yuvconstants);
2261 }
2262 }
2263
I422ToARGB4444Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)2264 void I422ToARGB4444Row_C(const uint8_t* src_y,
2265 const uint8_t* src_u,
2266 const uint8_t* src_v,
2267 uint8_t* dst_argb4444,
2268 const struct YuvConstants* yuvconstants,
2269 int width) {
2270 uint8_t b0;
2271 uint8_t g0;
2272 uint8_t r0;
2273 uint8_t b1;
2274 uint8_t g1;
2275 uint8_t r1;
2276 int x;
2277 for (x = 0; x < width - 1; x += 2) {
2278 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2279 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2280 b0 = b0 >> 4;
2281 g0 = g0 >> 4;
2282 r0 = r0 >> 4;
2283 b1 = b1 >> 4;
2284 g1 = g1 >> 4;
2285 r1 = r1 >> 4;
2286 *(uint16_t*)(dst_argb4444 + 0) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
2287 *(uint16_t*)(dst_argb4444 + 2) = b1 | (g1 << 4) | (r1 << 8) | 0xf000;
2288 src_y += 2;
2289 src_u += 1;
2290 src_v += 1;
2291 dst_argb4444 += 4; // Advance 2 pixels.
2292 }
2293 if (width & 1) {
2294 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2295 b0 = b0 >> 4;
2296 g0 = g0 >> 4;
2297 r0 = r0 >> 4;
2298 *(uint16_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
2299 }
2300 }
2301
I422ToARGB1555Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)2302 void I422ToARGB1555Row_C(const uint8_t* src_y,
2303 const uint8_t* src_u,
2304 const uint8_t* src_v,
2305 uint8_t* dst_argb1555,
2306 const struct YuvConstants* yuvconstants,
2307 int width) {
2308 uint8_t b0;
2309 uint8_t g0;
2310 uint8_t r0;
2311 uint8_t b1;
2312 uint8_t g1;
2313 uint8_t r1;
2314 int x;
2315 for (x = 0; x < width - 1; x += 2) {
2316 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2317 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2318 b0 = b0 >> 3;
2319 g0 = g0 >> 3;
2320 r0 = r0 >> 3;
2321 b1 = b1 >> 3;
2322 g1 = g1 >> 3;
2323 r1 = r1 >> 3;
2324 *(uint16_t*)(dst_argb1555 + 0) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
2325 *(uint16_t*)(dst_argb1555 + 2) = b1 | (g1 << 5) | (r1 << 10) | 0x8000;
2326 src_y += 2;
2327 src_u += 1;
2328 src_v += 1;
2329 dst_argb1555 += 4; // Advance 2 pixels.
2330 }
2331 if (width & 1) {
2332 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2333 b0 = b0 >> 3;
2334 g0 = g0 >> 3;
2335 r0 = r0 >> 3;
2336 *(uint16_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
2337 }
2338 }
2339
I422ToRGB565Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2340 void I422ToRGB565Row_C(const uint8_t* src_y,
2341 const uint8_t* src_u,
2342 const uint8_t* src_v,
2343 uint8_t* dst_rgb565,
2344 const struct YuvConstants* yuvconstants,
2345 int width) {
2346 uint8_t b0;
2347 uint8_t g0;
2348 uint8_t r0;
2349 uint8_t b1;
2350 uint8_t g1;
2351 uint8_t r1;
2352 int x;
2353 for (x = 0; x < width - 1; x += 2) {
2354 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2355 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2356 b0 = b0 >> 3;
2357 g0 = g0 >> 2;
2358 r0 = r0 >> 3;
2359 b1 = b1 >> 3;
2360 g1 = g1 >> 2;
2361 r1 = r1 >> 3;
2362 *(uint16_t*)(dst_rgb565 + 0) = b0 | (g0 << 5) | (r0 << 11); // for ubsan
2363 *(uint16_t*)(dst_rgb565 + 2) = b1 | (g1 << 5) | (r1 << 11);
2364 src_y += 2;
2365 src_u += 1;
2366 src_v += 1;
2367 dst_rgb565 += 4; // Advance 2 pixels.
2368 }
2369 if (width & 1) {
2370 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2371 b0 = b0 >> 3;
2372 g0 = g0 >> 2;
2373 r0 = r0 >> 3;
2374 *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
2375 }
2376 }
2377
NV12ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2378 void NV12ToARGBRow_C(const uint8_t* src_y,
2379 const uint8_t* src_uv,
2380 uint8_t* rgb_buf,
2381 const struct YuvConstants* yuvconstants,
2382 int width) {
2383 int x;
2384 for (x = 0; x < width - 1; x += 2) {
2385 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2386 rgb_buf + 2, yuvconstants);
2387 rgb_buf[3] = 255;
2388 YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
2389 rgb_buf + 6, yuvconstants);
2390 rgb_buf[7] = 255;
2391 src_y += 2;
2392 src_uv += 2;
2393 rgb_buf += 8; // Advance 2 pixels.
2394 }
2395 if (width & 1) {
2396 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2397 rgb_buf + 2, yuvconstants);
2398 rgb_buf[3] = 255;
2399 }
2400 }
2401
NV21ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2402 void NV21ToARGBRow_C(const uint8_t* src_y,
2403 const uint8_t* src_vu,
2404 uint8_t* rgb_buf,
2405 const struct YuvConstants* yuvconstants,
2406 int width) {
2407 int x;
2408 for (x = 0; x < width - 1; x += 2) {
2409 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2410 rgb_buf + 2, yuvconstants);
2411 rgb_buf[3] = 255;
2412 YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
2413 rgb_buf + 6, yuvconstants);
2414 rgb_buf[7] = 255;
2415 src_y += 2;
2416 src_vu += 2;
2417 rgb_buf += 8; // Advance 2 pixels.
2418 }
2419 if (width & 1) {
2420 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2421 rgb_buf + 2, yuvconstants);
2422 rgb_buf[3] = 255;
2423 }
2424 }
2425
NV12ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2426 void NV12ToRGB24Row_C(const uint8_t* src_y,
2427 const uint8_t* src_uv,
2428 uint8_t* rgb_buf,
2429 const struct YuvConstants* yuvconstants,
2430 int width) {
2431 int x;
2432 for (x = 0; x < width - 1; x += 2) {
2433 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2434 rgb_buf + 2, yuvconstants);
2435 YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4,
2436 rgb_buf + 5, yuvconstants);
2437 src_y += 2;
2438 src_uv += 2;
2439 rgb_buf += 6; // Advance 2 pixels.
2440 }
2441 if (width & 1) {
2442 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2443 rgb_buf + 2, yuvconstants);
2444 }
2445 }
2446
NV21ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2447 void NV21ToRGB24Row_C(const uint8_t* src_y,
2448 const uint8_t* src_vu,
2449 uint8_t* rgb_buf,
2450 const struct YuvConstants* yuvconstants,
2451 int width) {
2452 int x;
2453 for (x = 0; x < width - 1; x += 2) {
2454 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2455 rgb_buf + 2, yuvconstants);
2456 YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4,
2457 rgb_buf + 5, yuvconstants);
2458 src_y += 2;
2459 src_vu += 2;
2460 rgb_buf += 6; // Advance 2 pixels.
2461 }
2462 if (width & 1) {
2463 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2464 rgb_buf + 2, yuvconstants);
2465 }
2466 }
2467
NV12ToRGB565Row_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2468 void NV12ToRGB565Row_C(const uint8_t* src_y,
2469 const uint8_t* src_uv,
2470 uint8_t* dst_rgb565,
2471 const struct YuvConstants* yuvconstants,
2472 int width) {
2473 uint8_t b0;
2474 uint8_t g0;
2475 uint8_t r0;
2476 uint8_t b1;
2477 uint8_t g1;
2478 uint8_t r1;
2479 int x;
2480 for (x = 0; x < width - 1; x += 2) {
2481 YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
2482 YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
2483 b0 = b0 >> 3;
2484 g0 = g0 >> 2;
2485 r0 = r0 >> 3;
2486 b1 = b1 >> 3;
2487 g1 = g1 >> 2;
2488 r1 = r1 >> 3;
2489 *(uint16_t*)(dst_rgb565 + 0) = b0 | (g0 << 5) | (r0 << 11);
2490 *(uint16_t*)(dst_rgb565 + 2) = b1 | (g1 << 5) | (r1 << 11);
2491 src_y += 2;
2492 src_uv += 2;
2493 dst_rgb565 += 4; // Advance 2 pixels.
2494 }
2495 if (width & 1) {
2496 YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
2497 b0 = b0 >> 3;
2498 g0 = g0 >> 2;
2499 r0 = r0 >> 3;
2500 *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
2501 }
2502 }
2503
YUY2ToARGBRow_C(const uint8_t * src_yuy2,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2504 void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
2505 uint8_t* rgb_buf,
2506 const struct YuvConstants* yuvconstants,
2507 int width) {
2508 int x;
2509 for (x = 0; x < width - 1; x += 2) {
2510 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
2511 rgb_buf + 2, yuvconstants);
2512 rgb_buf[3] = 255;
2513 YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
2514 rgb_buf + 6, yuvconstants);
2515 rgb_buf[7] = 255;
2516 src_yuy2 += 4;
2517 rgb_buf += 8; // Advance 2 pixels.
2518 }
2519 if (width & 1) {
2520 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
2521 rgb_buf + 2, yuvconstants);
2522 rgb_buf[3] = 255;
2523 }
2524 }
2525
UYVYToARGBRow_C(const uint8_t * src_uyvy,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2526 void UYVYToARGBRow_C(const uint8_t* src_uyvy,
2527 uint8_t* rgb_buf,
2528 const struct YuvConstants* yuvconstants,
2529 int width) {
2530 int x;
2531 for (x = 0; x < width - 1; x += 2) {
2532 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
2533 rgb_buf + 2, yuvconstants);
2534 rgb_buf[3] = 255;
2535 YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
2536 rgb_buf + 6, yuvconstants);
2537 rgb_buf[7] = 255;
2538 src_uyvy += 4;
2539 rgb_buf += 8; // Advance 2 pixels.
2540 }
2541 if (width & 1) {
2542 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
2543 rgb_buf + 2, yuvconstants);
2544 rgb_buf[3] = 255;
2545 }
2546 }
2547
I422ToRGBARow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2548 void I422ToRGBARow_C(const uint8_t* src_y,
2549 const uint8_t* src_u,
2550 const uint8_t* src_v,
2551 uint8_t* rgb_buf,
2552 const struct YuvConstants* yuvconstants,
2553 int width) {
2554 int x;
2555 for (x = 0; x < width - 1; x += 2) {
2556 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
2557 rgb_buf + 3, yuvconstants);
2558 rgb_buf[0] = 255;
2559 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
2560 rgb_buf + 7, yuvconstants);
2561 rgb_buf[4] = 255;
2562 src_y += 2;
2563 src_u += 1;
2564 src_v += 1;
2565 rgb_buf += 8; // Advance 2 pixels.
2566 }
2567 if (width & 1) {
2568 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
2569 rgb_buf + 3, yuvconstants);
2570 rgb_buf[0] = 255;
2571 }
2572 }
2573
I400ToARGBRow_C(const uint8_t * src_y,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2574 void I400ToARGBRow_C(const uint8_t* src_y,
2575 uint8_t* rgb_buf,
2576 const struct YuvConstants* yuvconstants,
2577 int width) {
2578 int x;
2579 for (x = 0; x < width - 1; x += 2) {
2580 YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
2581 rgb_buf[3] = 255;
2582 YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
2583 rgb_buf[7] = 255;
2584 src_y += 2;
2585 rgb_buf += 8; // Advance 2 pixels.
2586 }
2587 if (width & 1) {
2588 YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
2589 rgb_buf[3] = 255;
2590 }
2591 }
2592
MirrorRow_C(const uint8_t * src,uint8_t * dst,int width)2593 void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2594 int x;
2595 src += width - 1;
2596 for (x = 0; x < width - 1; x += 2) {
2597 dst[x] = src[0];
2598 dst[x + 1] = src[-1];
2599 src -= 2;
2600 }
2601 if (width & 1) {
2602 dst[width - 1] = src[0];
2603 }
2604 }
2605
MirrorUVRow_C(const uint8_t * src_uv,uint8_t * dst_uv,int width)2606 void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
2607 int x;
2608 src_uv += (width - 1) << 1;
2609 for (x = 0; x < width; ++x) {
2610 dst_uv[0] = src_uv[0];
2611 dst_uv[1] = src_uv[1];
2612 src_uv -= 2;
2613 dst_uv += 2;
2614 }
2615 }
2616
MirrorSplitUVRow_C(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)2617 void MirrorSplitUVRow_C(const uint8_t* src_uv,
2618 uint8_t* dst_u,
2619 uint8_t* dst_v,
2620 int width) {
2621 int x;
2622 src_uv += (width - 1) << 1;
2623 for (x = 0; x < width - 1; x += 2) {
2624 dst_u[x] = src_uv[0];
2625 dst_u[x + 1] = src_uv[-2];
2626 dst_v[x] = src_uv[1];
2627 dst_v[x + 1] = src_uv[-2 + 1];
2628 src_uv -= 4;
2629 }
2630 if (width & 1) {
2631 dst_u[width - 1] = src_uv[0];
2632 dst_v[width - 1] = src_uv[1];
2633 }
2634 }
2635
ARGBMirrorRow_C(const uint8_t * src,uint8_t * dst,int width)2636 void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2637 int x;
2638 const uint32_t* src32 = (const uint32_t*)(src);
2639 uint32_t* dst32 = (uint32_t*)(dst);
2640 src32 += width - 1;
2641 for (x = 0; x < width - 1; x += 2) {
2642 dst32[x] = src32[0];
2643 dst32[x + 1] = src32[-1];
2644 src32 -= 2;
2645 }
2646 if (width & 1) {
2647 dst32[width - 1] = src32[0];
2648 }
2649 }
2650
RGB24MirrorRow_C(const uint8_t * src_rgb24,uint8_t * dst_rgb24,int width)2651 void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width) {
2652 int x;
2653 src_rgb24 += width * 3 - 3;
2654 for (x = 0; x < width; ++x) {
2655 uint8_t b = src_rgb24[0];
2656 uint8_t g = src_rgb24[1];
2657 uint8_t r = src_rgb24[2];
2658 dst_rgb24[0] = b;
2659 dst_rgb24[1] = g;
2660 dst_rgb24[2] = r;
2661 src_rgb24 -= 3;
2662 dst_rgb24 += 3;
2663 }
2664 }
2665
SplitUVRow_C(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)2666 void SplitUVRow_C(const uint8_t* src_uv,
2667 uint8_t* dst_u,
2668 uint8_t* dst_v,
2669 int width) {
2670 int x;
2671 for (x = 0; x < width - 1; x += 2) {
2672 dst_u[x] = src_uv[0];
2673 dst_u[x + 1] = src_uv[2];
2674 dst_v[x] = src_uv[1];
2675 dst_v[x + 1] = src_uv[3];
2676 src_uv += 4;
2677 }
2678 if (width & 1) {
2679 dst_u[width - 1] = src_uv[0];
2680 dst_v[width - 1] = src_uv[1];
2681 }
2682 }
2683
MergeUVRow_C(const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_uv,int width)2684 void MergeUVRow_C(const uint8_t* src_u,
2685 const uint8_t* src_v,
2686 uint8_t* dst_uv,
2687 int width) {
2688 int x;
2689 for (x = 0; x < width - 1; x += 2) {
2690 dst_uv[0] = src_u[x];
2691 dst_uv[1] = src_v[x];
2692 dst_uv[2] = src_u[x + 1];
2693 dst_uv[3] = src_v[x + 1];
2694 dst_uv += 4;
2695 }
2696 if (width & 1) {
2697 dst_uv[0] = src_u[width - 1];
2698 dst_uv[1] = src_v[width - 1];
2699 }
2700 }
2701
DetileRow_C(const uint8_t * src,ptrdiff_t src_tile_stride,uint8_t * dst,int width)2702 void DetileRow_C(const uint8_t* src,
2703 ptrdiff_t src_tile_stride,
2704 uint8_t* dst,
2705 int width) {
2706 int x;
2707 for (x = 0; x < width - 15; x += 16) {
2708 memcpy(dst, src, 16);
2709 dst += 16;
2710 src += src_tile_stride;
2711 }
2712 if (width & 15) {
2713 memcpy(dst, src, width & 15);
2714 }
2715 }
2716
DetileSplitUVRow_C(const uint8_t * src_uv,ptrdiff_t src_tile_stride,uint8_t * dst_u,uint8_t * dst_v,int width)2717 void DetileSplitUVRow_C(const uint8_t* src_uv,
2718 ptrdiff_t src_tile_stride,
2719 uint8_t* dst_u,
2720 uint8_t* dst_v,
2721 int width) {
2722 int x;
2723 for (x = 0; x < width - 15; x += 16) {
2724 SplitUVRow_C(src_uv, dst_u, dst_v, 8);
2725 dst_u += 8;
2726 dst_v += 8;
2727 src_uv += src_tile_stride;
2728 }
2729 if (width & 15) {
2730 SplitUVRow_C(src_uv, dst_u, dst_v, ((width & 15) + 1) / 2);
2731 }
2732 }
2733
SplitRGBRow_C(const uint8_t * src_rgb,uint8_t * dst_r,uint8_t * dst_g,uint8_t * dst_b,int width)2734 void SplitRGBRow_C(const uint8_t* src_rgb,
2735 uint8_t* dst_r,
2736 uint8_t* dst_g,
2737 uint8_t* dst_b,
2738 int width) {
2739 int x;
2740 for (x = 0; x < width; ++x) {
2741 dst_r[x] = src_rgb[0];
2742 dst_g[x] = src_rgb[1];
2743 dst_b[x] = src_rgb[2];
2744 src_rgb += 3;
2745 }
2746 }
2747
MergeRGBRow_C(const uint8_t * src_r,const uint8_t * src_g,const uint8_t * src_b,uint8_t * dst_rgb,int width)2748 void MergeRGBRow_C(const uint8_t* src_r,
2749 const uint8_t* src_g,
2750 const uint8_t* src_b,
2751 uint8_t* dst_rgb,
2752 int width) {
2753 int x;
2754 for (x = 0; x < width; ++x) {
2755 dst_rgb[0] = src_r[x];
2756 dst_rgb[1] = src_g[x];
2757 dst_rgb[2] = src_b[x];
2758 dst_rgb += 3;
2759 }
2760 }
2761
SplitARGBRow_C(const uint8_t * src_argb,uint8_t * dst_r,uint8_t * dst_g,uint8_t * dst_b,uint8_t * dst_a,int width)2762 void SplitARGBRow_C(const uint8_t* src_argb,
2763 uint8_t* dst_r,
2764 uint8_t* dst_g,
2765 uint8_t* dst_b,
2766 uint8_t* dst_a,
2767 int width) {
2768 int x;
2769 for (x = 0; x < width; ++x) {
2770 dst_b[x] = src_argb[0];
2771 dst_g[x] = src_argb[1];
2772 dst_r[x] = src_argb[2];
2773 dst_a[x] = src_argb[3];
2774 src_argb += 4;
2775 }
2776 }
2777
MergeARGBRow_C(const uint8_t * src_r,const uint8_t * src_g,const uint8_t * src_b,const uint8_t * src_a,uint8_t * dst_argb,int width)2778 void MergeARGBRow_C(const uint8_t* src_r,
2779 const uint8_t* src_g,
2780 const uint8_t* src_b,
2781 const uint8_t* src_a,
2782 uint8_t* dst_argb,
2783 int width) {
2784 int x;
2785 for (x = 0; x < width; ++x) {
2786 dst_argb[0] = src_b[x];
2787 dst_argb[1] = src_g[x];
2788 dst_argb[2] = src_r[x];
2789 dst_argb[3] = src_a[x];
2790 dst_argb += 4;
2791 }
2792 }
2793
MergeXR30Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,uint8_t * dst_ar30,int depth,int width)2794 void MergeXR30Row_C(const uint16_t* src_r,
2795 const uint16_t* src_g,
2796 const uint16_t* src_b,
2797 uint8_t* dst_ar30,
2798 int depth,
2799 int width) {
2800 assert(depth >= 10);
2801 assert(depth <= 16);
2802 int x;
2803 int shift = depth - 10;
2804 uint32_t* dst_ar30_32 = (uint32_t*)dst_ar30;
2805 for (x = 0; x < width; ++x) {
2806 uint32_t r = clamp1023(src_r[x] >> shift);
2807 uint32_t g = clamp1023(src_g[x] >> shift);
2808 uint32_t b = clamp1023(src_b[x] >> shift);
2809 dst_ar30_32[x] = b | (g << 10) | (r << 20) | 0xc0000000;
2810 }
2811 }
2812
MergeAR64Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,const uint16_t * src_a,uint16_t * dst_ar64,int depth,int width)2813 void MergeAR64Row_C(const uint16_t* src_r,
2814 const uint16_t* src_g,
2815 const uint16_t* src_b,
2816 const uint16_t* src_a,
2817 uint16_t* dst_ar64,
2818 int depth,
2819 int width) {
2820 assert(depth >= 1);
2821 assert(depth <= 16);
2822 int x;
2823 int shift = 16 - depth;
2824 int max = (1 << depth) - 1;
2825 for (x = 0; x < width; ++x) {
2826 dst_ar64[0] = ClampMax(src_b[x], max) << shift;
2827 dst_ar64[1] = ClampMax(src_g[x], max) << shift;
2828 dst_ar64[2] = ClampMax(src_r[x], max) << shift;
2829 dst_ar64[3] = ClampMax(src_a[x], max) << shift;
2830 dst_ar64 += 4;
2831 }
2832 }
2833
MergeARGB16To8Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,const uint16_t * src_a,uint8_t * dst_argb,int depth,int width)2834 void MergeARGB16To8Row_C(const uint16_t* src_r,
2835 const uint16_t* src_g,
2836 const uint16_t* src_b,
2837 const uint16_t* src_a,
2838 uint8_t* dst_argb,
2839 int depth,
2840 int width) {
2841 assert(depth >= 8);
2842 assert(depth <= 16);
2843 int x;
2844 int shift = depth - 8;
2845 for (x = 0; x < width; ++x) {
2846 dst_argb[0] = clamp255(src_b[x] >> shift);
2847 dst_argb[1] = clamp255(src_g[x] >> shift);
2848 dst_argb[2] = clamp255(src_r[x] >> shift);
2849 dst_argb[3] = clamp255(src_a[x] >> shift);
2850 dst_argb += 4;
2851 }
2852 }
2853
MergeXR64Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,uint16_t * dst_ar64,int depth,int width)2854 void MergeXR64Row_C(const uint16_t* src_r,
2855 const uint16_t* src_g,
2856 const uint16_t* src_b,
2857 uint16_t* dst_ar64,
2858 int depth,
2859 int width) {
2860 assert(depth >= 1);
2861 assert(depth <= 16);
2862 int x;
2863 int shift = 16 - depth;
2864 int max = (1 << depth) - 1;
2865 for (x = 0; x < width; ++x) {
2866 dst_ar64[0] = ClampMax(src_b[x], max) << shift;
2867 dst_ar64[1] = ClampMax(src_g[x], max) << shift;
2868 dst_ar64[2] = ClampMax(src_r[x], max) << shift;
2869 dst_ar64[3] = 0xffff;
2870 dst_ar64 += 4;
2871 }
2872 }
2873
MergeXRGB16To8Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,uint8_t * dst_argb,int depth,int width)2874 void MergeXRGB16To8Row_C(const uint16_t* src_r,
2875 const uint16_t* src_g,
2876 const uint16_t* src_b,
2877 uint8_t* dst_argb,
2878 int depth,
2879 int width) {
2880 assert(depth >= 8);
2881 assert(depth <= 16);
2882 int x;
2883 int shift = depth - 8;
2884 for (x = 0; x < width; ++x) {
2885 dst_argb[0] = clamp255(src_b[x] >> shift);
2886 dst_argb[1] = clamp255(src_g[x] >> shift);
2887 dst_argb[2] = clamp255(src_r[x] >> shift);
2888 dst_argb[3] = 0xff;
2889 dst_argb += 4;
2890 }
2891 }
2892
SplitXRGBRow_C(const uint8_t * src_argb,uint8_t * dst_r,uint8_t * dst_g,uint8_t * dst_b,int width)2893 void SplitXRGBRow_C(const uint8_t* src_argb,
2894 uint8_t* dst_r,
2895 uint8_t* dst_g,
2896 uint8_t* dst_b,
2897 int width) {
2898 int x;
2899 for (x = 0; x < width; ++x) {
2900 dst_b[x] = src_argb[0];
2901 dst_g[x] = src_argb[1];
2902 dst_r[x] = src_argb[2];
2903 src_argb += 4;
2904 }
2905 }
2906
MergeXRGBRow_C(const uint8_t * src_r,const uint8_t * src_g,const uint8_t * src_b,uint8_t * dst_argb,int width)2907 void MergeXRGBRow_C(const uint8_t* src_r,
2908 const uint8_t* src_g,
2909 const uint8_t* src_b,
2910 uint8_t* dst_argb,
2911 int width) {
2912 int x;
2913 for (x = 0; x < width; ++x) {
2914 dst_argb[0] = src_b[x];
2915 dst_argb[1] = src_g[x];
2916 dst_argb[2] = src_r[x];
2917 dst_argb[3] = 255;
2918 dst_argb += 4;
2919 }
2920 }
2921
2922 // Convert lsb formats to msb, depending on sample depth.
MergeUVRow_16_C(const uint16_t * src_u,const uint16_t * src_v,uint16_t * dst_uv,int depth,int width)2923 void MergeUVRow_16_C(const uint16_t* src_u,
2924 const uint16_t* src_v,
2925 uint16_t* dst_uv,
2926 int depth,
2927 int width) {
2928 int shift = 16 - depth;
2929 assert(depth >= 8);
2930 assert(depth <= 16);
2931 int x;
2932 for (x = 0; x < width; ++x) {
2933 dst_uv[0] = src_u[x] << shift;
2934 dst_uv[1] = src_v[x] << shift;
2935 dst_uv += 2;
2936 }
2937 }
2938
2939 // Convert msb formats to lsb, depending on sample depth.
SplitUVRow_16_C(const uint16_t * src_uv,uint16_t * dst_u,uint16_t * dst_v,int depth,int width)2940 void SplitUVRow_16_C(const uint16_t* src_uv,
2941 uint16_t* dst_u,
2942 uint16_t* dst_v,
2943 int depth,
2944 int width) {
2945 int shift = 16 - depth;
2946 int x;
2947 assert(depth >= 8);
2948 assert(depth <= 16);
2949 for (x = 0; x < width; ++x) {
2950 dst_u[x] = src_uv[0] >> shift;
2951 dst_v[x] = src_uv[1] >> shift;
2952 src_uv += 2;
2953 }
2954 }
2955
MultiplyRow_16_C(const uint16_t * src_y,uint16_t * dst_y,int scale,int width)2956 void MultiplyRow_16_C(const uint16_t* src_y,
2957 uint16_t* dst_y,
2958 int scale,
2959 int width) {
2960 int x;
2961 for (x = 0; x < width; ++x) {
2962 dst_y[x] = src_y[x] * scale;
2963 }
2964 }
2965
DivideRow_16_C(const uint16_t * src_y,uint16_t * dst_y,int scale,int width)2966 void DivideRow_16_C(const uint16_t* src_y,
2967 uint16_t* dst_y,
2968 int scale,
2969 int width) {
2970 int x;
2971 for (x = 0; x < width; ++x) {
2972 dst_y[x] = (src_y[x] * scale) >> 16;
2973 }
2974 }
2975
2976 // Use scale to convert lsb formats to msb, depending how many bits there are:
2977 // 32768 = 9 bits
2978 // 16384 = 10 bits
2979 // 4096 = 12 bits
2980 // 256 = 16 bits
2981 // TODO(fbarchard): change scale to bits
2982 #define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
2983
Convert16To8Row_C(const uint16_t * src_y,uint8_t * dst_y,int scale,int width)2984 void Convert16To8Row_C(const uint16_t* src_y,
2985 uint8_t* dst_y,
2986 int scale,
2987 int width) {
2988 int x;
2989 assert(scale >= 256);
2990 assert(scale <= 32768);
2991
2992 for (x = 0; x < width; ++x) {
2993 dst_y[x] = C16TO8(src_y[x], scale);
2994 }
2995 }
2996
2997 // Use scale to convert lsb formats to msb, depending how many bits there are:
2998 // 1024 = 10 bits
Convert8To16Row_C(const uint8_t * src_y,uint16_t * dst_y,int scale,int width)2999 void Convert8To16Row_C(const uint8_t* src_y,
3000 uint16_t* dst_y,
3001 int scale,
3002 int width) {
3003 int x;
3004 scale *= 0x0101; // replicates the byte.
3005 for (x = 0; x < width; ++x) {
3006 dst_y[x] = (src_y[x] * scale) >> 16;
3007 }
3008 }
3009
CopyRow_C(const uint8_t * src,uint8_t * dst,int count)3010 void CopyRow_C(const uint8_t* src, uint8_t* dst, int count) {
3011 memcpy(dst, src, count);
3012 }
3013
CopyRow_16_C(const uint16_t * src,uint16_t * dst,int count)3014 void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count) {
3015 memcpy(dst, src, count * 2);
3016 }
3017
SetRow_C(uint8_t * dst,uint8_t v8,int width)3018 void SetRow_C(uint8_t* dst, uint8_t v8, int width) {
3019 memset(dst, v8, width);
3020 }
3021
ARGBSetRow_C(uint8_t * dst_argb,uint32_t v32,int width)3022 void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width) {
3023 int x;
3024 for (x = 0; x < width; ++x) {
3025 memcpy(dst_argb + x * sizeof v32, &v32, sizeof v32);
3026 }
3027 }
3028
3029 // Filter 2 rows of YUY2 UV's (422) into U and V (420).
YUY2ToUVRow_C(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)3030 void YUY2ToUVRow_C(const uint8_t* src_yuy2,
3031 int src_stride_yuy2,
3032 uint8_t* dst_u,
3033 uint8_t* dst_v,
3034 int width) {
3035 // Output a row of UV values, filtering 2 rows of YUY2.
3036 int x;
3037 for (x = 0; x < width; x += 2) {
3038 dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
3039 dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
3040 src_yuy2 += 4;
3041 dst_u += 1;
3042 dst_v += 1;
3043 }
3044 }
3045
3046 // Copy row of YUY2 UV's (422) into U and V (422).
YUY2ToUV422Row_C(const uint8_t * src_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)3047 void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
3048 uint8_t* dst_u,
3049 uint8_t* dst_v,
3050 int width) {
3051 // Output a row of UV values.
3052 int x;
3053 for (x = 0; x < width; x += 2) {
3054 dst_u[0] = src_yuy2[1];
3055 dst_v[0] = src_yuy2[3];
3056 src_yuy2 += 4;
3057 dst_u += 1;
3058 dst_v += 1;
3059 }
3060 }
3061
3062 // Copy row of YUY2 Y's (422) into Y (420/422).
YUY2ToYRow_C(const uint8_t * src_yuy2,uint8_t * dst_y,int width)3063 void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
3064 // Output a row of Y values.
3065 int x;
3066 for (x = 0; x < width - 1; x += 2) {
3067 dst_y[x] = src_yuy2[0];
3068 dst_y[x + 1] = src_yuy2[2];
3069 src_yuy2 += 4;
3070 }
3071 if (width & 1) {
3072 dst_y[width - 1] = src_yuy2[0];
3073 }
3074 }
3075
3076 // Filter 2 rows of UYVY UV's (422) into U and V (420).
UYVYToUVRow_C(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)3077 void UYVYToUVRow_C(const uint8_t* src_uyvy,
3078 int src_stride_uyvy,
3079 uint8_t* dst_u,
3080 uint8_t* dst_v,
3081 int width) {
3082 // Output a row of UV values.
3083 int x;
3084 for (x = 0; x < width; x += 2) {
3085 dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
3086 dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
3087 src_uyvy += 4;
3088 dst_u += 1;
3089 dst_v += 1;
3090 }
3091 }
3092
3093 // Copy row of UYVY UV's (422) into U and V (422).
UYVYToUV422Row_C(const uint8_t * src_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)3094 void UYVYToUV422Row_C(const uint8_t* src_uyvy,
3095 uint8_t* dst_u,
3096 uint8_t* dst_v,
3097 int width) {
3098 // Output a row of UV values.
3099 int x;
3100 for (x = 0; x < width; x += 2) {
3101 dst_u[0] = src_uyvy[0];
3102 dst_v[0] = src_uyvy[2];
3103 src_uyvy += 4;
3104 dst_u += 1;
3105 dst_v += 1;
3106 }
3107 }
3108
3109 // Copy row of UYVY Y's (422) into Y (420/422).
UYVYToYRow_C(const uint8_t * src_uyvy,uint8_t * dst_y,int width)3110 void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
3111 // Output a row of Y values.
3112 int x;
3113 for (x = 0; x < width - 1; x += 2) {
3114 dst_y[x] = src_uyvy[1];
3115 dst_y[x + 1] = src_uyvy[3];
3116 src_uyvy += 4;
3117 }
3118 if (width & 1) {
3119 dst_y[width - 1] = src_uyvy[1];
3120 }
3121 }
3122
3123 #define BLEND(f, b, a) clamp255((((256 - a) * b) >> 8) + f)
3124
3125 // Blend src_argb over src_argb1 and store to dst_argb.
3126 // dst_argb may be src_argb or src_argb1.
3127 // This code mimics the SSSE3 version for better testability.
ARGBBlendRow_C(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)3128 void ARGBBlendRow_C(const uint8_t* src_argb,
3129 const uint8_t* src_argb1,
3130 uint8_t* dst_argb,
3131 int width) {
3132 int x;
3133 for (x = 0; x < width - 1; x += 2) {
3134 uint32_t fb = src_argb[0];
3135 uint32_t fg = src_argb[1];
3136 uint32_t fr = src_argb[2];
3137 uint32_t a = src_argb[3];
3138 uint32_t bb = src_argb1[0];
3139 uint32_t bg = src_argb1[1];
3140 uint32_t br = src_argb1[2];
3141 dst_argb[0] = BLEND(fb, bb, a);
3142 dst_argb[1] = BLEND(fg, bg, a);
3143 dst_argb[2] = BLEND(fr, br, a);
3144 dst_argb[3] = 255u;
3145
3146 fb = src_argb[4 + 0];
3147 fg = src_argb[4 + 1];
3148 fr = src_argb[4 + 2];
3149 a = src_argb[4 + 3];
3150 bb = src_argb1[4 + 0];
3151 bg = src_argb1[4 + 1];
3152 br = src_argb1[4 + 2];
3153 dst_argb[4 + 0] = BLEND(fb, bb, a);
3154 dst_argb[4 + 1] = BLEND(fg, bg, a);
3155 dst_argb[4 + 2] = BLEND(fr, br, a);
3156 dst_argb[4 + 3] = 255u;
3157 src_argb += 8;
3158 src_argb1 += 8;
3159 dst_argb += 8;
3160 }
3161
3162 if (width & 1) {
3163 uint32_t fb = src_argb[0];
3164 uint32_t fg = src_argb[1];
3165 uint32_t fr = src_argb[2];
3166 uint32_t a = src_argb[3];
3167 uint32_t bb = src_argb1[0];
3168 uint32_t bg = src_argb1[1];
3169 uint32_t br = src_argb1[2];
3170 dst_argb[0] = BLEND(fb, bb, a);
3171 dst_argb[1] = BLEND(fg, bg, a);
3172 dst_argb[2] = BLEND(fr, br, a);
3173 dst_argb[3] = 255u;
3174 }
3175 }
3176 #undef BLEND
3177
3178 #define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8
BlendPlaneRow_C(const uint8_t * src0,const uint8_t * src1,const uint8_t * alpha,uint8_t * dst,int width)3179 void BlendPlaneRow_C(const uint8_t* src0,
3180 const uint8_t* src1,
3181 const uint8_t* alpha,
3182 uint8_t* dst,
3183 int width) {
3184 int x;
3185 for (x = 0; x < width - 1; x += 2) {
3186 dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
3187 dst[1] = UBLEND(src0[1], src1[1], alpha[1]);
3188 src0 += 2;
3189 src1 += 2;
3190 alpha += 2;
3191 dst += 2;
3192 }
3193 if (width & 1) {
3194 dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
3195 }
3196 }
3197 #undef UBLEND
3198
3199 #if LIBYUV_ATTENUATE_DUP
3200 // This code mimics the SSSE3 version for better testability.
3201 #define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
3202 #else
3203 #define ATTENUATE(f, a) (f * a + 128) >> 8
3204 #endif
3205
3206 // Multiply source RGB by alpha and store to destination.
ARGBAttenuateRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)3207 void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
3208 int i;
3209 for (i = 0; i < width - 1; i += 2) {
3210 uint32_t b = src_argb[0];
3211 uint32_t g = src_argb[1];
3212 uint32_t r = src_argb[2];
3213 uint32_t a = src_argb[3];
3214 dst_argb[0] = ATTENUATE(b, a);
3215 dst_argb[1] = ATTENUATE(g, a);
3216 dst_argb[2] = ATTENUATE(r, a);
3217 dst_argb[3] = a;
3218 b = src_argb[4];
3219 g = src_argb[5];
3220 r = src_argb[6];
3221 a = src_argb[7];
3222 dst_argb[4] = ATTENUATE(b, a);
3223 dst_argb[5] = ATTENUATE(g, a);
3224 dst_argb[6] = ATTENUATE(r, a);
3225 dst_argb[7] = a;
3226 src_argb += 8;
3227 dst_argb += 8;
3228 }
3229
3230 if (width & 1) {
3231 const uint32_t b = src_argb[0];
3232 const uint32_t g = src_argb[1];
3233 const uint32_t r = src_argb[2];
3234 const uint32_t a = src_argb[3];
3235 dst_argb[0] = ATTENUATE(b, a);
3236 dst_argb[1] = ATTENUATE(g, a);
3237 dst_argb[2] = ATTENUATE(r, a);
3238 dst_argb[3] = a;
3239 }
3240 }
3241 #undef ATTENUATE
3242
3243 // Divide source RGB by alpha and store to destination.
3244 // b = (b * 255 + (a / 2)) / a;
3245 // g = (g * 255 + (a / 2)) / a;
3246 // r = (r * 255 + (a / 2)) / a;
3247 // Reciprocal method is off by 1 on some values. ie 125
3248 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
3249 #define T(a) 0x01000000 + (0x10000 / a)
3250 const uint32_t fixed_invtbl8[256] = {
3251 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06),
3252 T(0x07), T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d),
3253 T(0x0e), T(0x0f), T(0x10), T(0x11), T(0x12), T(0x13), T(0x14),
3254 T(0x15), T(0x16), T(0x17), T(0x18), T(0x19), T(0x1a), T(0x1b),
3255 T(0x1c), T(0x1d), T(0x1e), T(0x1f), T(0x20), T(0x21), T(0x22),
3256 T(0x23), T(0x24), T(0x25), T(0x26), T(0x27), T(0x28), T(0x29),
3257 T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f), T(0x30),
3258 T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
3259 T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e),
3260 T(0x3f), T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45),
3261 T(0x46), T(0x47), T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c),
3262 T(0x4d), T(0x4e), T(0x4f), T(0x50), T(0x51), T(0x52), T(0x53),
3263 T(0x54), T(0x55), T(0x56), T(0x57), T(0x58), T(0x59), T(0x5a),
3264 T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f), T(0x60), T(0x61),
3265 T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67), T(0x68),
3266 T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
3267 T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76),
3268 T(0x77), T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d),
3269 T(0x7e), T(0x7f), T(0x80), T(0x81), T(0x82), T(0x83), T(0x84),
3270 T(0x85), T(0x86), T(0x87), T(0x88), T(0x89), T(0x8a), T(0x8b),
3271 T(0x8c), T(0x8d), T(0x8e), T(0x8f), T(0x90), T(0x91), T(0x92),
3272 T(0x93), T(0x94), T(0x95), T(0x96), T(0x97), T(0x98), T(0x99),
3273 T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f), T(0xa0),
3274 T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
3275 T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae),
3276 T(0xaf), T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5),
3277 T(0xb6), T(0xb7), T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc),
3278 T(0xbd), T(0xbe), T(0xbf), T(0xc0), T(0xc1), T(0xc2), T(0xc3),
3279 T(0xc4), T(0xc5), T(0xc6), T(0xc7), T(0xc8), T(0xc9), T(0xca),
3280 T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf), T(0xd0), T(0xd1),
3281 T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7), T(0xd8),
3282 T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
3283 T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6),
3284 T(0xe7), T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed),
3285 T(0xee), T(0xef), T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4),
3286 T(0xf5), T(0xf6), T(0xf7), T(0xf8), T(0xf9), T(0xfa), T(0xfb),
3287 T(0xfc), T(0xfd), T(0xfe), 0x01000100};
3288 #undef T
3289
3290 #if LIBYUV_UNATTENUATE_DUP
3291 // This code mimics the Intel SIMD version for better testability.
3292 #define UNATTENUATE(f, ia) clamp255(((f | (f << 8)) * ia) >> 16)
3293 #else
3294 #define UNATTENUATE(f, ia) clamp255((f * ia) >> 8)
3295 #endif
3296
3297 // mimics the Intel SIMD code for exactness.
ARGBUnattenuateRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)3298 void ARGBUnattenuateRow_C(const uint8_t* src_argb,
3299 uint8_t* dst_argb,
3300 int width) {
3301 int i;
3302 for (i = 0; i < width; ++i) {
3303 uint32_t b = src_argb[0];
3304 uint32_t g = src_argb[1];
3305 uint32_t r = src_argb[2];
3306 const uint32_t a = src_argb[3];
3307 const uint32_t ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point
3308
3309 // Clamping should not be necessary but is free in assembly.
3310 dst_argb[0] = UNATTENUATE(b, ia);
3311 dst_argb[1] = UNATTENUATE(g, ia);
3312 dst_argb[2] = UNATTENUATE(r, ia);
3313 dst_argb[3] = a;
3314 src_argb += 4;
3315 dst_argb += 4;
3316 }
3317 }
3318
ComputeCumulativeSumRow_C(const uint8_t * row,int32_t * cumsum,const int32_t * previous_cumsum,int width)3319 void ComputeCumulativeSumRow_C(const uint8_t* row,
3320 int32_t* cumsum,
3321 const int32_t* previous_cumsum,
3322 int width) {
3323 int32_t row_sum[4] = {0, 0, 0, 0};
3324 int x;
3325 for (x = 0; x < width; ++x) {
3326 row_sum[0] += row[x * 4 + 0];
3327 row_sum[1] += row[x * 4 + 1];
3328 row_sum[2] += row[x * 4 + 2];
3329 row_sum[3] += row[x * 4 + 3];
3330 cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0];
3331 cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1];
3332 cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2];
3333 cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3];
3334 }
3335 }
3336
CumulativeSumToAverageRow_C(const int32_t * tl,const int32_t * bl,int w,int area,uint8_t * dst,int count)3337 void CumulativeSumToAverageRow_C(const int32_t* tl,
3338 const int32_t* bl,
3339 int w,
3340 int area,
3341 uint8_t* dst,
3342 int count) {
3343 float ooa;
3344 int i;
3345 assert(area != 0);
3346
3347 ooa = 1.0f / area;
3348 for (i = 0; i < count; ++i) {
3349 dst[0] = (uint8_t)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
3350 dst[1] = (uint8_t)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
3351 dst[2] = (uint8_t)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
3352 dst[3] = (uint8_t)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
3353 dst += 4;
3354 tl += 4;
3355 bl += 4;
3356 }
3357 }
3358
3359 // Copy pixels from rotated source to destination row with a slope.
3360 LIBYUV_API
ARGBAffineRow_C(const uint8_t * src_argb,int src_argb_stride,uint8_t * dst_argb,const float * uv_dudv,int width)3361 void ARGBAffineRow_C(const uint8_t* src_argb,
3362 int src_argb_stride,
3363 uint8_t* dst_argb,
3364 const float* uv_dudv,
3365 int width) {
3366 int i;
3367 // Render a row of pixels from source into a buffer.
3368 float uv[2];
3369 uv[0] = uv_dudv[0];
3370 uv[1] = uv_dudv[1];
3371 for (i = 0; i < width; ++i) {
3372 int x = (int)(uv[0]);
3373 int y = (int)(uv[1]);
3374 *(uint32_t*)(dst_argb) =
3375 *(const uint32_t*)(src_argb + y * src_argb_stride + x * 4);
3376 dst_argb += 4;
3377 uv[0] += uv_dudv[2];
3378 uv[1] += uv_dudv[3];
3379 }
3380 }
3381
3382 // Blend 2 rows into 1.
HalfRow_C(const uint8_t * src_uv,ptrdiff_t src_uv_stride,uint8_t * dst_uv,int width)3383 static void HalfRow_C(const uint8_t* src_uv,
3384 ptrdiff_t src_uv_stride,
3385 uint8_t* dst_uv,
3386 int width) {
3387 int x;
3388 for (x = 0; x < width; ++x) {
3389 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
3390 }
3391 }
3392
HalfRow_16_C(const uint16_t * src_uv,ptrdiff_t src_uv_stride,uint16_t * dst_uv,int width)3393 static void HalfRow_16_C(const uint16_t* src_uv,
3394 ptrdiff_t src_uv_stride,
3395 uint16_t* dst_uv,
3396 int width) {
3397 int x;
3398 for (x = 0; x < width; ++x) {
3399 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
3400 }
3401 }
3402
HalfRow_16To8_C(const uint16_t * src_uv,ptrdiff_t src_uv_stride,uint8_t * dst_uv,int scale,int width)3403 static void HalfRow_16To8_C(const uint16_t* src_uv,
3404 ptrdiff_t src_uv_stride,
3405 uint8_t* dst_uv,
3406 int scale,
3407 int width) {
3408 int x;
3409 for (x = 0; x < width; ++x) {
3410 dst_uv[x] = C16TO8((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1, scale);
3411 }
3412 }
3413
3414 // C version 2x2 -> 2x1.
InterpolateRow_C(uint8_t * dst_ptr,const uint8_t * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)3415 void InterpolateRow_C(uint8_t* dst_ptr,
3416 const uint8_t* src_ptr,
3417 ptrdiff_t src_stride,
3418 int width,
3419 int source_y_fraction) {
3420 int y1_fraction = source_y_fraction;
3421 int y0_fraction = 256 - y1_fraction;
3422 const uint8_t* src_ptr1 = src_ptr + src_stride;
3423 int x;
3424 assert(source_y_fraction >= 0);
3425 assert(source_y_fraction < 256);
3426
3427 if (y1_fraction == 0) {
3428 memcpy(dst_ptr, src_ptr, width);
3429 return;
3430 }
3431 if (y1_fraction == 128) {
3432 HalfRow_C(src_ptr, src_stride, dst_ptr, width);
3433 return;
3434 }
3435 for (x = 0; x < width; ++x) {
3436 dst_ptr[0] =
3437 (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
3438 ++src_ptr;
3439 ++src_ptr1;
3440 ++dst_ptr;
3441 }
3442 }
3443
3444 // C version 2x2 -> 2x1.
InterpolateRow_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)3445 void InterpolateRow_16_C(uint16_t* dst_ptr,
3446 const uint16_t* src_ptr,
3447 ptrdiff_t src_stride,
3448 int width,
3449 int source_y_fraction) {
3450 int y1_fraction = source_y_fraction;
3451 int y0_fraction = 256 - y1_fraction;
3452 const uint16_t* src_ptr1 = src_ptr + src_stride;
3453 int x;
3454 assert(source_y_fraction >= 0);
3455 assert(source_y_fraction < 256);
3456
3457 if (y1_fraction == 0) {
3458 memcpy(dst_ptr, src_ptr, width * 2);
3459 return;
3460 }
3461 if (y1_fraction == 128) {
3462 HalfRow_16_C(src_ptr, src_stride, dst_ptr, width);
3463 return;
3464 }
3465 for (x = 0; x < width; ++x) {
3466 dst_ptr[0] =
3467 (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
3468 ++src_ptr;
3469 ++src_ptr1;
3470 ++dst_ptr;
3471 }
3472 }
3473
3474 // C version 2x2 16 bit-> 2x1 8 bit.
3475 // Use scale to convert lsb formats to msb, depending how many bits there are:
3476 // 32768 = 9 bits
3477 // 16384 = 10 bits
3478 // 4096 = 12 bits
3479 // 256 = 16 bits
3480 // TODO(fbarchard): change scale to bits
3481
InterpolateRow_16To8_C(uint8_t * dst_ptr,const uint16_t * src_ptr,ptrdiff_t src_stride,int scale,int width,int source_y_fraction)3482 void InterpolateRow_16To8_C(uint8_t* dst_ptr,
3483 const uint16_t* src_ptr,
3484 ptrdiff_t src_stride,
3485 int scale,
3486 int width,
3487 int source_y_fraction) {
3488 int y1_fraction = source_y_fraction;
3489 int y0_fraction = 256 - y1_fraction;
3490 const uint16_t* src_ptr1 = src_ptr + src_stride;
3491 int x;
3492 assert(source_y_fraction >= 0);
3493 assert(source_y_fraction < 256);
3494
3495 if (source_y_fraction == 0) {
3496 Convert16To8Row_C(src_ptr, dst_ptr, scale, width);
3497 return;
3498 }
3499 if (source_y_fraction == 128) {
3500 HalfRow_16To8_C(src_ptr, src_stride, dst_ptr, scale, width);
3501 return;
3502 }
3503 for (x = 0; x < width; ++x) {
3504 dst_ptr[0] = C16TO8(
3505 (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8,
3506 scale);
3507 src_ptr += 1;
3508 src_ptr1 += 1;
3509 dst_ptr += 1;
3510 }
3511 }
3512
3513 // Use first 4 shuffler values to reorder ARGB channels.
ARGBShuffleRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const uint8_t * shuffler,int width)3514 void ARGBShuffleRow_C(const uint8_t* src_argb,
3515 uint8_t* dst_argb,
3516 const uint8_t* shuffler,
3517 int width) {
3518 int index0 = shuffler[0];
3519 int index1 = shuffler[1];
3520 int index2 = shuffler[2];
3521 int index3 = shuffler[3];
3522 // Shuffle a row of ARGB.
3523 int x;
3524 for (x = 0; x < width; ++x) {
3525 // To support in-place conversion.
3526 uint8_t b = src_argb[index0];
3527 uint8_t g = src_argb[index1];
3528 uint8_t r = src_argb[index2];
3529 uint8_t a = src_argb[index3];
3530 dst_argb[0] = b;
3531 dst_argb[1] = g;
3532 dst_argb[2] = r;
3533 dst_argb[3] = a;
3534 src_argb += 4;
3535 dst_argb += 4;
3536 }
3537 }
3538
I422ToYUY2Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)3539 void I422ToYUY2Row_C(const uint8_t* src_y,
3540 const uint8_t* src_u,
3541 const uint8_t* src_v,
3542 uint8_t* dst_frame,
3543 int width) {
3544 int x;
3545 for (x = 0; x < width - 1; x += 2) {
3546 dst_frame[0] = src_y[0];
3547 dst_frame[1] = src_u[0];
3548 dst_frame[2] = src_y[1];
3549 dst_frame[3] = src_v[0];
3550 dst_frame += 4;
3551 src_y += 2;
3552 src_u += 1;
3553 src_v += 1;
3554 }
3555 if (width & 1) {
3556 dst_frame[0] = src_y[0];
3557 dst_frame[1] = src_u[0];
3558 dst_frame[2] = 0;
3559 dst_frame[3] = src_v[0];
3560 }
3561 }
3562
I422ToUYVYRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)3563 void I422ToUYVYRow_C(const uint8_t* src_y,
3564 const uint8_t* src_u,
3565 const uint8_t* src_v,
3566 uint8_t* dst_frame,
3567 int width) {
3568 int x;
3569 for (x = 0; x < width - 1; x += 2) {
3570 dst_frame[0] = src_u[0];
3571 dst_frame[1] = src_y[0];
3572 dst_frame[2] = src_v[0];
3573 dst_frame[3] = src_y[1];
3574 dst_frame += 4;
3575 src_y += 2;
3576 src_u += 1;
3577 src_v += 1;
3578 }
3579 if (width & 1) {
3580 dst_frame[0] = src_u[0];
3581 dst_frame[1] = src_y[0];
3582 dst_frame[2] = src_v[0];
3583 dst_frame[3] = 0;
3584 }
3585 }
3586
ARGBPolynomialRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const float * poly,int width)3587 void ARGBPolynomialRow_C(const uint8_t* src_argb,
3588 uint8_t* dst_argb,
3589 const float* poly,
3590 int width) {
3591 int i;
3592 for (i = 0; i < width; ++i) {
3593 float b = (float)(src_argb[0]);
3594 float g = (float)(src_argb[1]);
3595 float r = (float)(src_argb[2]);
3596 float a = (float)(src_argb[3]);
3597 float b2 = b * b;
3598 float g2 = g * g;
3599 float r2 = r * r;
3600 float a2 = a * a;
3601 float db = poly[0] + poly[4] * b;
3602 float dg = poly[1] + poly[5] * g;
3603 float dr = poly[2] + poly[6] * r;
3604 float da = poly[3] + poly[7] * a;
3605 float b3 = b2 * b;
3606 float g3 = g2 * g;
3607 float r3 = r2 * r;
3608 float a3 = a2 * a;
3609 db += poly[8] * b2;
3610 dg += poly[9] * g2;
3611 dr += poly[10] * r2;
3612 da += poly[11] * a2;
3613 db += poly[12] * b3;
3614 dg += poly[13] * g3;
3615 dr += poly[14] * r3;
3616 da += poly[15] * a3;
3617
3618 dst_argb[0] = Clamp((int32_t)(db));
3619 dst_argb[1] = Clamp((int32_t)(dg));
3620 dst_argb[2] = Clamp((int32_t)(dr));
3621 dst_argb[3] = Clamp((int32_t)(da));
3622 src_argb += 4;
3623 dst_argb += 4;
3624 }
3625 }
3626
3627 // Samples assumed to be unsigned in low 9, 10 or 12 bits. Scale factor
3628 // adjust the source integer range to the half float range desired.
3629
3630 // This magic constant is 2^-112. Multiplying by this
3631 // is the same as subtracting 112 from the exponent, which
3632 // is the difference in exponent bias between 32-bit and
3633 // 16-bit floats. Once we've done this subtraction, we can
3634 // simply extract the low bits of the exponent and the high
3635 // bits of the mantissa from our float and we're done.
3636
3637 // Work around GCC 7 punning warning -Wstrict-aliasing
3638 #if defined(__GNUC__)
3639 typedef uint32_t __attribute__((__may_alias__)) uint32_alias_t;
3640 #else
3641 typedef uint32_t uint32_alias_t;
3642 #endif
3643
HalfFloatRow_C(const uint16_t * src,uint16_t * dst,float scale,int width)3644 void HalfFloatRow_C(const uint16_t* src,
3645 uint16_t* dst,
3646 float scale,
3647 int width) {
3648 int i;
3649 float mult = 1.9259299444e-34f * scale;
3650 for (i = 0; i < width; ++i) {
3651 float value = src[i] * mult;
3652 dst[i] = (uint16_t)((*(const uint32_alias_t*)&value) >> 13);
3653 }
3654 }
3655
ByteToFloatRow_C(const uint8_t * src,float * dst,float scale,int width)3656 void ByteToFloatRow_C(const uint8_t* src, float* dst, float scale, int width) {
3657 int i;
3658 for (i = 0; i < width; ++i) {
3659 float value = src[i] * scale;
3660 dst[i] = value;
3661 }
3662 }
3663
ARGBLumaColorTableRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width,const uint8_t * luma,uint32_t lumacoeff)3664 void ARGBLumaColorTableRow_C(const uint8_t* src_argb,
3665 uint8_t* dst_argb,
3666 int width,
3667 const uint8_t* luma,
3668 uint32_t lumacoeff) {
3669 uint32_t bc = lumacoeff & 0xff;
3670 uint32_t gc = (lumacoeff >> 8) & 0xff;
3671 uint32_t rc = (lumacoeff >> 16) & 0xff;
3672
3673 int i;
3674 for (i = 0; i < width - 1; i += 2) {
3675 // Luminance in rows, color values in columns.
3676 const uint8_t* luma0 =
3677 ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
3678 luma;
3679 const uint8_t* luma1;
3680 dst_argb[0] = luma0[src_argb[0]];
3681 dst_argb[1] = luma0[src_argb[1]];
3682 dst_argb[2] = luma0[src_argb[2]];
3683 dst_argb[3] = src_argb[3];
3684 luma1 =
3685 ((src_argb[4] * bc + src_argb[5] * gc + src_argb[6] * rc) & 0x7F00u) +
3686 luma;
3687 dst_argb[4] = luma1[src_argb[4]];
3688 dst_argb[5] = luma1[src_argb[5]];
3689 dst_argb[6] = luma1[src_argb[6]];
3690 dst_argb[7] = src_argb[7];
3691 src_argb += 8;
3692 dst_argb += 8;
3693 }
3694 if (width & 1) {
3695 // Luminance in rows, color values in columns.
3696 const uint8_t* luma0 =
3697 ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
3698 luma;
3699 dst_argb[0] = luma0[src_argb[0]];
3700 dst_argb[1] = luma0[src_argb[1]];
3701 dst_argb[2] = luma0[src_argb[2]];
3702 dst_argb[3] = src_argb[3];
3703 }
3704 }
3705
ARGBCopyAlphaRow_C(const uint8_t * src,uint8_t * dst,int width)3706 void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
3707 int i;
3708 for (i = 0; i < width - 1; i += 2) {
3709 dst[3] = src[3];
3710 dst[7] = src[7];
3711 dst += 8;
3712 src += 8;
3713 }
3714 if (width & 1) {
3715 dst[3] = src[3];
3716 }
3717 }
3718
ARGBExtractAlphaRow_C(const uint8_t * src_argb,uint8_t * dst_a,int width)3719 void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width) {
3720 int i;
3721 for (i = 0; i < width - 1; i += 2) {
3722 dst_a[0] = src_argb[3];
3723 dst_a[1] = src_argb[7];
3724 dst_a += 2;
3725 src_argb += 8;
3726 }
3727 if (width & 1) {
3728 dst_a[0] = src_argb[3];
3729 }
3730 }
3731
ARGBCopyYToAlphaRow_C(const uint8_t * src,uint8_t * dst,int width)3732 void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
3733 int i;
3734 for (i = 0; i < width - 1; i += 2) {
3735 dst[3] = src[0];
3736 dst[7] = src[1];
3737 dst += 8;
3738 src += 2;
3739 }
3740 if (width & 1) {
3741 dst[3] = src[0];
3742 }
3743 }
3744
3745 // Maximum temporary width for wrappers to process at a time, in pixels.
3746 #define MAXTWIDTH 2048
3747
3748 #if !(defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86)) && \
3749 defined(HAS_I422TORGB565ROW_SSSE3)
3750 // row_win.cc has asm version, but GCC uses 2 step wrapper.
I422ToRGB565Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3751 void I422ToRGB565Row_SSSE3(const uint8_t* src_y,
3752 const uint8_t* src_u,
3753 const uint8_t* src_v,
3754 uint8_t* dst_rgb565,
3755 const struct YuvConstants* yuvconstants,
3756 int width) {
3757 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3758 while (width > 0) {
3759 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3760 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
3761 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3762 src_y += twidth;
3763 src_u += twidth / 2;
3764 src_v += twidth / 2;
3765 dst_rgb565 += twidth * 2;
3766 width -= twidth;
3767 }
3768 }
3769 #endif
3770
3771 #if defined(HAS_I422TOARGB1555ROW_SSSE3)
I422ToARGB1555Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)3772 void I422ToARGB1555Row_SSSE3(const uint8_t* src_y,
3773 const uint8_t* src_u,
3774 const uint8_t* src_v,
3775 uint8_t* dst_argb1555,
3776 const struct YuvConstants* yuvconstants,
3777 int width) {
3778 // Row buffer for intermediate ARGB pixels.
3779 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3780 while (width > 0) {
3781 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3782 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
3783 ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
3784 src_y += twidth;
3785 src_u += twidth / 2;
3786 src_v += twidth / 2;
3787 dst_argb1555 += twidth * 2;
3788 width -= twidth;
3789 }
3790 }
3791 #endif
3792
3793 #if defined(HAS_I422TOARGB4444ROW_SSSE3)
I422ToARGB4444Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)3794 void I422ToARGB4444Row_SSSE3(const uint8_t* src_y,
3795 const uint8_t* src_u,
3796 const uint8_t* src_v,
3797 uint8_t* dst_argb4444,
3798 const struct YuvConstants* yuvconstants,
3799 int width) {
3800 // Row buffer for intermediate ARGB pixels.
3801 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3802 while (width > 0) {
3803 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3804 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
3805 ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
3806 src_y += twidth;
3807 src_u += twidth / 2;
3808 src_v += twidth / 2;
3809 dst_argb4444 += twidth * 2;
3810 width -= twidth;
3811 }
3812 }
3813 #endif
3814
3815 #if defined(HAS_NV12TORGB565ROW_SSSE3)
NV12ToRGB565Row_SSSE3(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3816 void NV12ToRGB565Row_SSSE3(const uint8_t* src_y,
3817 const uint8_t* src_uv,
3818 uint8_t* dst_rgb565,
3819 const struct YuvConstants* yuvconstants,
3820 int width) {
3821 // Row buffer for intermediate ARGB pixels.
3822 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3823 while (width > 0) {
3824 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3825 NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
3826 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3827 src_y += twidth;
3828 src_uv += twidth;
3829 dst_rgb565 += twidth * 2;
3830 width -= twidth;
3831 }
3832 }
3833 #endif
3834
3835 #if defined(HAS_NV12TORGB24ROW_SSSE3)
NV12ToRGB24Row_SSSE3(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3836 void NV12ToRGB24Row_SSSE3(const uint8_t* src_y,
3837 const uint8_t* src_uv,
3838 uint8_t* dst_rgb24,
3839 const struct YuvConstants* yuvconstants,
3840 int width) {
3841 // Row buffer for intermediate ARGB pixels.
3842 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3843 while (width > 0) {
3844 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3845 NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
3846 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3847 src_y += twidth;
3848 src_uv += twidth;
3849 dst_rgb24 += twidth * 3;
3850 width -= twidth;
3851 }
3852 }
3853 #endif
3854
3855 #if defined(HAS_NV21TORGB24ROW_SSSE3)
NV21ToRGB24Row_SSSE3(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3856 void NV21ToRGB24Row_SSSE3(const uint8_t* src_y,
3857 const uint8_t* src_vu,
3858 uint8_t* dst_rgb24,
3859 const struct YuvConstants* yuvconstants,
3860 int width) {
3861 // Row buffer for intermediate ARGB pixels.
3862 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3863 while (width > 0) {
3864 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3865 NV21ToARGBRow_SSSE3(src_y, src_vu, row, yuvconstants, twidth);
3866 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3867 src_y += twidth;
3868 src_vu += twidth;
3869 dst_rgb24 += twidth * 3;
3870 width -= twidth;
3871 }
3872 }
3873 #endif
3874
3875 #if defined(HAS_NV12TORGB24ROW_AVX2)
NV12ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3876 void NV12ToRGB24Row_AVX2(const uint8_t* src_y,
3877 const uint8_t* src_uv,
3878 uint8_t* dst_rgb24,
3879 const struct YuvConstants* yuvconstants,
3880 int width) {
3881 // Row buffer for intermediate ARGB pixels.
3882 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3883 while (width > 0) {
3884 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3885 NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
3886 #if defined(HAS_ARGBTORGB24ROW_AVX2)
3887 ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
3888 #else
3889 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3890 #endif
3891 src_y += twidth;
3892 src_uv += twidth;
3893 dst_rgb24 += twidth * 3;
3894 width -= twidth;
3895 }
3896 }
3897 #endif
3898
3899 #if defined(HAS_NV21TORGB24ROW_AVX2)
NV21ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3900 void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
3901 const uint8_t* src_vu,
3902 uint8_t* dst_rgb24,
3903 const struct YuvConstants* yuvconstants,
3904 int width) {
3905 // Row buffer for intermediate ARGB pixels.
3906 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3907 while (width > 0) {
3908 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3909 NV21ToARGBRow_AVX2(src_y, src_vu, row, yuvconstants, twidth);
3910 #if defined(HAS_ARGBTORGB24ROW_AVX2)
3911 ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
3912 #else
3913 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3914 #endif
3915 src_y += twidth;
3916 src_vu += twidth;
3917 dst_rgb24 += twidth * 3;
3918 width -= twidth;
3919 }
3920 }
3921 #endif
3922
3923 #if defined(HAS_I422TORGB565ROW_AVX2)
I422ToRGB565Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3924 void I422ToRGB565Row_AVX2(const uint8_t* src_y,
3925 const uint8_t* src_u,
3926 const uint8_t* src_v,
3927 uint8_t* dst_rgb565,
3928 const struct YuvConstants* yuvconstants,
3929 int width) {
3930 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3931 while (width > 0) {
3932 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3933 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3934 #if defined(HAS_ARGBTORGB565ROW_AVX2)
3935 ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
3936 #else
3937 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3938 #endif
3939 src_y += twidth;
3940 src_u += twidth / 2;
3941 src_v += twidth / 2;
3942 dst_rgb565 += twidth * 2;
3943 width -= twidth;
3944 }
3945 }
3946 #endif
3947
3948 #if defined(HAS_I422TOARGB1555ROW_AVX2)
I422ToARGB1555Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)3949 void I422ToARGB1555Row_AVX2(const uint8_t* src_y,
3950 const uint8_t* src_u,
3951 const uint8_t* src_v,
3952 uint8_t* dst_argb1555,
3953 const struct YuvConstants* yuvconstants,
3954 int width) {
3955 // Row buffer for intermediate ARGB pixels.
3956 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3957 while (width > 0) {
3958 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3959 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3960 #if defined(HAS_ARGBTOARGB1555ROW_AVX2)
3961 ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
3962 #else
3963 ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
3964 #endif
3965 src_y += twidth;
3966 src_u += twidth / 2;
3967 src_v += twidth / 2;
3968 dst_argb1555 += twidth * 2;
3969 width -= twidth;
3970 }
3971 }
3972 #endif
3973
3974 #if defined(HAS_I422TOARGB4444ROW_AVX2)
I422ToARGB4444Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)3975 void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
3976 const uint8_t* src_u,
3977 const uint8_t* src_v,
3978 uint8_t* dst_argb4444,
3979 const struct YuvConstants* yuvconstants,
3980 int width) {
3981 // Row buffer for intermediate ARGB pixels.
3982 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3983 while (width > 0) {
3984 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3985 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3986 #if defined(HAS_ARGBTOARGB4444ROW_AVX2)
3987 ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
3988 #else
3989 ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
3990 #endif
3991 src_y += twidth;
3992 src_u += twidth / 2;
3993 src_v += twidth / 2;
3994 dst_argb4444 += twidth * 2;
3995 width -= twidth;
3996 }
3997 }
3998 #endif
3999
4000 #if defined(HAS_I422TORGB24ROW_AVX2)
I422ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)4001 void I422ToRGB24Row_AVX2(const uint8_t* src_y,
4002 const uint8_t* src_u,
4003 const uint8_t* src_v,
4004 uint8_t* dst_rgb24,
4005 const struct YuvConstants* yuvconstants,
4006 int width) {
4007 // Row buffer for intermediate ARGB pixels.
4008 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4009 while (width > 0) {
4010 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4011 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4012 #if defined(HAS_ARGBTORGB24ROW_AVX2)
4013 ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
4014 #else
4015 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4016 #endif
4017 src_y += twidth;
4018 src_u += twidth / 2;
4019 src_v += twidth / 2;
4020 dst_rgb24 += twidth * 3;
4021 width -= twidth;
4022 }
4023 }
4024 #endif
4025
4026 #if defined(HAS_NV12TORGB565ROW_AVX2)
NV12ToRGB565Row_AVX2(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)4027 void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
4028 const uint8_t* src_uv,
4029 uint8_t* dst_rgb565,
4030 const struct YuvConstants* yuvconstants,
4031 int width) {
4032 // Row buffer for intermediate ARGB pixels.
4033 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4034 while (width > 0) {
4035 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4036 NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
4037 #if defined(HAS_ARGBTORGB565ROW_AVX2)
4038 ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
4039 #else
4040 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
4041 #endif
4042 src_y += twidth;
4043 src_uv += twidth;
4044 dst_rgb565 += twidth * 2;
4045 width -= twidth;
4046 }
4047 }
4048 #endif
4049
4050 #ifdef HAS_RGB24TOYJROW_AVX2
4051 // Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
RGB24ToYJRow_AVX2(const uint8_t * src_rgb24,uint8_t * dst_yj,int width)4052 void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
4053 // Row buffer for intermediate ARGB pixels.
4054 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4055 while (width > 0) {
4056 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4057 RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
4058 ARGBToYJRow_AVX2(row, dst_yj, twidth);
4059 src_rgb24 += twidth * 3;
4060 dst_yj += twidth;
4061 width -= twidth;
4062 }
4063 }
4064 #endif // HAS_RGB24TOYJROW_AVX2
4065
4066 #ifdef HAS_RAWTOYJROW_AVX2
4067 // Convert 16 RAW pixels (64 bytes) to 16 YJ values.
RAWToYJRow_AVX2(const uint8_t * src_raw,uint8_t * dst_yj,int width)4068 void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
4069 // Row buffer for intermediate ARGB pixels.
4070 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4071 while (width > 0) {
4072 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4073 RAWToARGBRow_SSSE3(src_raw, row, twidth);
4074 ARGBToYJRow_AVX2(row, dst_yj, twidth);
4075 src_raw += twidth * 3;
4076 dst_yj += twidth;
4077 width -= twidth;
4078 }
4079 }
4080 #endif // HAS_RAWTOYJROW_AVX2
4081
4082 #ifdef HAS_RGB24TOYJROW_SSSE3
4083 // Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
RGB24ToYJRow_SSSE3(const uint8_t * src_rgb24,uint8_t * dst_yj,int width)4084 void RGB24ToYJRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
4085 // Row buffer for intermediate ARGB pixels.
4086 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4087 while (width > 0) {
4088 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4089 RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
4090 ARGBToYJRow_SSSE3(row, dst_yj, twidth);
4091 src_rgb24 += twidth * 3;
4092 dst_yj += twidth;
4093 width -= twidth;
4094 }
4095 }
4096 #endif // HAS_RGB24TOYJROW_SSSE3
4097
4098 #ifdef HAS_RAWTOYJROW_SSSE3
4099 // Convert 16 RAW pixels (64 bytes) to 16 YJ values.
RAWToYJRow_SSSE3(const uint8_t * src_raw,uint8_t * dst_yj,int width)4100 void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
4101 // Row buffer for intermediate ARGB pixels.
4102 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4103 while (width > 0) {
4104 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4105 RAWToARGBRow_SSSE3(src_raw, row, twidth);
4106 ARGBToYJRow_SSSE3(row, dst_yj, twidth);
4107 src_raw += twidth * 3;
4108 dst_yj += twidth;
4109 width -= twidth;
4110 }
4111 }
4112 #endif // HAS_RAWTOYJROW_SSSE3
4113
4114 #ifdef HAS_INTERPOLATEROW_16TO8_AVX2
InterpolateRow_16To8_AVX2(uint8_t * dst_ptr,const uint16_t * src_ptr,ptrdiff_t src_stride,int scale,int width,int source_y_fraction)4115 void InterpolateRow_16To8_AVX2(uint8_t* dst_ptr,
4116 const uint16_t* src_ptr,
4117 ptrdiff_t src_stride,
4118 int scale,
4119 int width,
4120 int source_y_fraction) {
4121 // Row buffer for intermediate 16 bit pixels.
4122 SIMD_ALIGNED(uint16_t row[MAXTWIDTH]);
4123 while (width > 0) {
4124 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4125 InterpolateRow_16_C(row, src_ptr, src_stride, twidth, source_y_fraction);
4126 Convert16To8Row_AVX2(row, dst_ptr, scale, twidth);
4127 src_ptr += twidth;
4128 dst_ptr += twidth;
4129 width -= twidth;
4130 }
4131 }
4132 #endif // HAS_INTERPOLATEROW_16TO8_AVX2
4133
ScaleSumSamples_C(const float * src,float * dst,float scale,int width)4134 float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) {
4135 float fsum = 0.f;
4136 int i;
4137 for (i = 0; i < width; ++i) {
4138 float v = *src++;
4139 fsum += v * v;
4140 *dst++ = v * scale;
4141 }
4142 return fsum;
4143 }
4144
ScaleMaxSamples_C(const float * src,float * dst,float scale,int width)4145 float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width) {
4146 float fmax = 0.f;
4147 int i;
4148 for (i = 0; i < width; ++i) {
4149 float v = *src++;
4150 float vs = v * scale;
4151 fmax = (v > fmax) ? v : fmax;
4152 *dst++ = vs;
4153 }
4154 return fmax;
4155 }
4156
ScaleSamples_C(const float * src,float * dst,float scale,int width)4157 void ScaleSamples_C(const float* src, float* dst, float scale, int width) {
4158 int i;
4159 for (i = 0; i < width; ++i) {
4160 *dst++ = *src++ * scale;
4161 }
4162 }
4163
GaussRow_C(const uint32_t * src,uint16_t * dst,int width)4164 void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) {
4165 int i;
4166 for (i = 0; i < width; ++i) {
4167 *dst++ =
4168 (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8;
4169 ++src;
4170 }
4171 }
4172
4173 // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
GaussCol_C(const uint16_t * src0,const uint16_t * src1,const uint16_t * src2,const uint16_t * src3,const uint16_t * src4,uint32_t * dst,int width)4174 void GaussCol_C(const uint16_t* src0,
4175 const uint16_t* src1,
4176 const uint16_t* src2,
4177 const uint16_t* src3,
4178 const uint16_t* src4,
4179 uint32_t* dst,
4180 int width) {
4181 int i;
4182 for (i = 0; i < width; ++i) {
4183 *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
4184 }
4185 }
4186
GaussRow_F32_C(const float * src,float * dst,int width)4187 void GaussRow_F32_C(const float* src, float* dst, int width) {
4188 int i;
4189 for (i = 0; i < width; ++i) {
4190 *dst++ = (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4]) *
4191 (1.0f / 256.0f);
4192 ++src;
4193 }
4194 }
4195
4196 // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
GaussCol_F32_C(const float * src0,const float * src1,const float * src2,const float * src3,const float * src4,float * dst,int width)4197 void GaussCol_F32_C(const float* src0,
4198 const float* src1,
4199 const float* src2,
4200 const float* src3,
4201 const float* src4,
4202 float* dst,
4203 int width) {
4204 int i;
4205 for (i = 0; i < width; ++i) {
4206 *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
4207 }
4208 }
4209
4210 // Convert biplanar NV21 to packed YUV24
NV21ToYUV24Row_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_yuv24,int width)4211 void NV21ToYUV24Row_C(const uint8_t* src_y,
4212 const uint8_t* src_vu,
4213 uint8_t* dst_yuv24,
4214 int width) {
4215 int x;
4216 for (x = 0; x < width - 1; x += 2) {
4217 dst_yuv24[0] = src_vu[0]; // V
4218 dst_yuv24[1] = src_vu[1]; // U
4219 dst_yuv24[2] = src_y[0]; // Y0
4220 dst_yuv24[3] = src_vu[0]; // V
4221 dst_yuv24[4] = src_vu[1]; // U
4222 dst_yuv24[5] = src_y[1]; // Y1
4223 src_y += 2;
4224 src_vu += 2;
4225 dst_yuv24 += 6; // Advance 2 pixels.
4226 }
4227 if (width & 1) {
4228 dst_yuv24[0] = src_vu[0]; // V
4229 dst_yuv24[1] = src_vu[1]; // U
4230 dst_yuv24[2] = src_y[0]; // Y0
4231 }
4232 }
4233
4234 // Filter 2 rows of AYUV UV's (444) into UV (420).
4235 // AYUV is VUYA in memory. UV for NV12 is UV order in memory.
AYUVToUVRow_C(const uint8_t * src_ayuv,int src_stride_ayuv,uint8_t * dst_uv,int width)4236 void AYUVToUVRow_C(const uint8_t* src_ayuv,
4237 int src_stride_ayuv,
4238 uint8_t* dst_uv,
4239 int width) {
4240 // Output a row of UV values, filtering 2x2 rows of AYUV.
4241 int x;
4242 for (x = 0; x < width - 1; x += 2) {
4243 dst_uv[0] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
4244 src_ayuv[src_stride_ayuv + 5] + 2) >>
4245 2;
4246 dst_uv[1] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
4247 src_ayuv[src_stride_ayuv + 4] + 2) >>
4248 2;
4249 src_ayuv += 8;
4250 dst_uv += 2;
4251 }
4252 if (width & 1) {
4253 dst_uv[0] = (src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + 1) >> 1;
4254 dst_uv[1] = (src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + 1) >> 1;
4255 }
4256 }
4257
4258 // Filter 2 rows of AYUV UV's (444) into VU (420).
AYUVToVURow_C(const uint8_t * src_ayuv,int src_stride_ayuv,uint8_t * dst_vu,int width)4259 void AYUVToVURow_C(const uint8_t* src_ayuv,
4260 int src_stride_ayuv,
4261 uint8_t* dst_vu,
4262 int width) {
4263 // Output a row of VU values, filtering 2x2 rows of AYUV.
4264 int x;
4265 for (x = 0; x < width - 1; x += 2) {
4266 dst_vu[0] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
4267 src_ayuv[src_stride_ayuv + 4] + 2) >>
4268 2;
4269 dst_vu[1] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
4270 src_ayuv[src_stride_ayuv + 5] + 2) >>
4271 2;
4272 src_ayuv += 8;
4273 dst_vu += 2;
4274 }
4275 if (width & 1) {
4276 dst_vu[0] = (src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + 1) >> 1;
4277 dst_vu[1] = (src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + 1) >> 1;
4278 }
4279 }
4280
4281 // Copy row of AYUV Y's into Y
AYUVToYRow_C(const uint8_t * src_ayuv,uint8_t * dst_y,int width)4282 void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
4283 // Output a row of Y values.
4284 int x;
4285 for (x = 0; x < width; ++x) {
4286 dst_y[x] = src_ayuv[2]; // v,u,y,a
4287 src_ayuv += 4;
4288 }
4289 }
4290
4291 // Convert UV plane of NV12 to VU of NV21.
SwapUVRow_C(const uint8_t * src_uv,uint8_t * dst_vu,int width)4292 void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
4293 int x;
4294 for (x = 0; x < width; ++x) {
4295 uint8_t u = src_uv[0];
4296 uint8_t v = src_uv[1];
4297 dst_vu[0] = v;
4298 dst_vu[1] = u;
4299 src_uv += 2;
4300 dst_vu += 2;
4301 }
4302 }
4303
HalfMergeUVRow_C(const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_uv,int width)4304 void HalfMergeUVRow_C(const uint8_t* src_u,
4305 int src_stride_u,
4306 const uint8_t* src_v,
4307 int src_stride_v,
4308 uint8_t* dst_uv,
4309 int width) {
4310 int x;
4311 for (x = 0; x < width - 1; x += 2) {
4312 dst_uv[0] = (src_u[0] + src_u[1] + src_u[src_stride_u] +
4313 src_u[src_stride_u + 1] + 2) >>
4314 2;
4315 dst_uv[1] = (src_v[0] + src_v[1] + src_v[src_stride_v] +
4316 src_v[src_stride_v + 1] + 2) >>
4317 2;
4318 src_u += 2;
4319 src_v += 2;
4320 dst_uv += 2;
4321 }
4322 if (width & 1) {
4323 dst_uv[0] = (src_u[0] + src_u[src_stride_u] + 1) >> 1;
4324 dst_uv[1] = (src_v[0] + src_v[src_stride_v] + 1) >> 1;
4325 }
4326 }
4327
4328 #ifdef __cplusplus
4329 } // extern "C"
4330 } // namespace libyuv
4331 #endif
4332