1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/row.h"
12
13 #include <string.h> // For memcpy and memset.
14
15 #include "libyuv/basic_types.h"
16
17 #ifdef __cplusplus
18 namespace libyuv {
19 extern "C" {
20 #endif
21
22 // llvm x86 is poor at ternary operator, so use branchless min/max.
23
24 #define USE_BRANCHLESS 1
25 #if USE_BRANCHLESS
clamp0(int32 v)26 static __inline int32 clamp0(int32 v) {
27 return ((-(v) >> 31) & (v));
28 }
29
clamp255(int32 v)30 static __inline int32 clamp255(int32 v) {
31 return (((255 - (v)) >> 31) | (v)) & 255;
32 }
33
Clamp(int32 val)34 static __inline uint32 Clamp(int32 val) {
35 int v = clamp0(val);
36 return (uint32)(clamp255(v));
37 }
38
Abs(int32 v)39 static __inline uint32 Abs(int32 v) {
40 int m = v >> 31;
41 return (v + m) ^ m;
42 }
43 #else // USE_BRANCHLESS
44 static __inline int32 clamp0(int32 v) {
45 return (v < 0) ? 0 : v;
46 }
47
48 static __inline int32 clamp255(int32 v) {
49 return (v > 255) ? 255 : v;
50 }
51
52 static __inline uint32 Clamp(int32 val) {
53 int v = clamp0(val);
54 return (uint32)(clamp255(v));
55 }
56
57 static __inline uint32 Abs(int32 v) {
58 return (v < 0) ? -v : v;
59 }
60 #endif // USE_BRANCHLESS
61
62 #ifdef LIBYUV_LITTLE_ENDIAN
63 #define WRITEWORD(p, v) *(uint32*)(p) = v
64 #else
WRITEWORD(uint8 * p,uint32 v)65 static inline void WRITEWORD(uint8* p, uint32 v) {
66 p[0] = (uint8)(v & 255);
67 p[1] = (uint8)((v >> 8) & 255);
68 p[2] = (uint8)((v >> 16) & 255);
69 p[3] = (uint8)((v >> 24) & 255);
70 }
71 #endif
72
RGB24ToARGBRow_C(const uint8 * src_rgb24,uint8 * dst_argb,int width)73 void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) {
74 int x;
75 for (x = 0; x < width; ++x) {
76 uint8 b = src_rgb24[0];
77 uint8 g = src_rgb24[1];
78 uint8 r = src_rgb24[2];
79 dst_argb[0] = b;
80 dst_argb[1] = g;
81 dst_argb[2] = r;
82 dst_argb[3] = 255u;
83 dst_argb += 4;
84 src_rgb24 += 3;
85 }
86 }
87
RAWToARGBRow_C(const uint8 * src_raw,uint8 * dst_argb,int width)88 void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
89 int x;
90 for (x = 0; x < width; ++x) {
91 uint8 r = src_raw[0];
92 uint8 g = src_raw[1];
93 uint8 b = src_raw[2];
94 dst_argb[0] = b;
95 dst_argb[1] = g;
96 dst_argb[2] = r;
97 dst_argb[3] = 255u;
98 dst_argb += 4;
99 src_raw += 3;
100 }
101 }
102
RAWToRGB24Row_C(const uint8 * src_raw,uint8 * dst_rgb24,int width)103 void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width) {
104 int x;
105 for (x = 0; x < width; ++x) {
106 uint8 r = src_raw[0];
107 uint8 g = src_raw[1];
108 uint8 b = src_raw[2];
109 dst_rgb24[0] = b;
110 dst_rgb24[1] = g;
111 dst_rgb24[2] = r;
112 dst_rgb24 += 3;
113 src_raw += 3;
114 }
115 }
116
RGB565ToARGBRow_C(const uint8 * src_rgb565,uint8 * dst_argb,int width)117 void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
118 int x;
119 for (x = 0; x < width; ++x) {
120 uint8 b = src_rgb565[0] & 0x1f;
121 uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
122 uint8 r = src_rgb565[1] >> 3;
123 dst_argb[0] = (b << 3) | (b >> 2);
124 dst_argb[1] = (g << 2) | (g >> 4);
125 dst_argb[2] = (r << 3) | (r >> 2);
126 dst_argb[3] = 255u;
127 dst_argb += 4;
128 src_rgb565 += 2;
129 }
130 }
131
ARGB1555ToARGBRow_C(const uint8 * src_argb1555,uint8 * dst_argb,int width)132 void ARGB1555ToARGBRow_C(const uint8* src_argb1555,
133 uint8* dst_argb,
134 int width) {
135 int x;
136 for (x = 0; x < width; ++x) {
137 uint8 b = src_argb1555[0] & 0x1f;
138 uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
139 uint8 r = (src_argb1555[1] & 0x7c) >> 2;
140 uint8 a = src_argb1555[1] >> 7;
141 dst_argb[0] = (b << 3) | (b >> 2);
142 dst_argb[1] = (g << 3) | (g >> 2);
143 dst_argb[2] = (r << 3) | (r >> 2);
144 dst_argb[3] = -a;
145 dst_argb += 4;
146 src_argb1555 += 2;
147 }
148 }
149
ARGB4444ToARGBRow_C(const uint8 * src_argb4444,uint8 * dst_argb,int width)150 void ARGB4444ToARGBRow_C(const uint8* src_argb4444,
151 uint8* dst_argb,
152 int width) {
153 int x;
154 for (x = 0; x < width; ++x) {
155 uint8 b = src_argb4444[0] & 0x0f;
156 uint8 g = src_argb4444[0] >> 4;
157 uint8 r = src_argb4444[1] & 0x0f;
158 uint8 a = src_argb4444[1] >> 4;
159 dst_argb[0] = (b << 4) | b;
160 dst_argb[1] = (g << 4) | g;
161 dst_argb[2] = (r << 4) | r;
162 dst_argb[3] = (a << 4) | a;
163 dst_argb += 4;
164 src_argb4444 += 2;
165 }
166 }
167
ARGBToRGB24Row_C(const uint8 * src_argb,uint8 * dst_rgb,int width)168 void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
169 int x;
170 for (x = 0; x < width; ++x) {
171 uint8 b = src_argb[0];
172 uint8 g = src_argb[1];
173 uint8 r = src_argb[2];
174 dst_rgb[0] = b;
175 dst_rgb[1] = g;
176 dst_rgb[2] = r;
177 dst_rgb += 3;
178 src_argb += 4;
179 }
180 }
181
ARGBToRAWRow_C(const uint8 * src_argb,uint8 * dst_rgb,int width)182 void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) {
183 int x;
184 for (x = 0; x < width; ++x) {
185 uint8 b = src_argb[0];
186 uint8 g = src_argb[1];
187 uint8 r = src_argb[2];
188 dst_rgb[0] = r;
189 dst_rgb[1] = g;
190 dst_rgb[2] = b;
191 dst_rgb += 3;
192 src_argb += 4;
193 }
194 }
195
ARGBToRGB565Row_C(const uint8 * src_argb,uint8 * dst_rgb,int width)196 void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
197 int x;
198 for (x = 0; x < width - 1; x += 2) {
199 uint8 b0 = src_argb[0] >> 3;
200 uint8 g0 = src_argb[1] >> 2;
201 uint8 r0 = src_argb[2] >> 3;
202 uint8 b1 = src_argb[4] >> 3;
203 uint8 g1 = src_argb[5] >> 2;
204 uint8 r1 = src_argb[6] >> 3;
205 WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
206 (r1 << 27));
207 dst_rgb += 4;
208 src_argb += 8;
209 }
210 if (width & 1) {
211 uint8 b0 = src_argb[0] >> 3;
212 uint8 g0 = src_argb[1] >> 2;
213 uint8 r0 = src_argb[2] >> 3;
214 *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
215 }
216 }
217
218 // dither4 is a row of 4 values from 4x4 dither matrix.
219 // The 4x4 matrix contains values to increase RGB. When converting to
220 // fewer bits (565) this provides an ordered dither.
221 // The order in the 4x4 matrix in first byte is upper left.
222 // The 4 values are passed as an int, then referenced as an array, so
223 // endian will not affect order of the original matrix. But the dither4
224 // will containing the first pixel in the lower byte for little endian
225 // or the upper byte for big endian.
ARGBToRGB565DitherRow_C(const uint8 * src_argb,uint8 * dst_rgb,const uint32 dither4,int width)226 void ARGBToRGB565DitherRow_C(const uint8* src_argb,
227 uint8* dst_rgb,
228 const uint32 dither4,
229 int width) {
230 int x;
231 for (x = 0; x < width - 1; x += 2) {
232 int dither0 = ((const unsigned char*)(&dither4))[x & 3];
233 int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
234 uint8 b0 = clamp255(src_argb[0] + dither0) >> 3;
235 uint8 g0 = clamp255(src_argb[1] + dither0) >> 2;
236 uint8 r0 = clamp255(src_argb[2] + dither0) >> 3;
237 uint8 b1 = clamp255(src_argb[4] + dither1) >> 3;
238 uint8 g1 = clamp255(src_argb[5] + dither1) >> 2;
239 uint8 r1 = clamp255(src_argb[6] + dither1) >> 3;
240 WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
241 (r1 << 27));
242 dst_rgb += 4;
243 src_argb += 8;
244 }
245 if (width & 1) {
246 int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
247 uint8 b0 = clamp255(src_argb[0] + dither0) >> 3;
248 uint8 g0 = clamp255(src_argb[1] + dither0) >> 2;
249 uint8 r0 = clamp255(src_argb[2] + dither0) >> 3;
250 *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
251 }
252 }
253
ARGBToARGB1555Row_C(const uint8 * src_argb,uint8 * dst_rgb,int width)254 void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
255 int x;
256 for (x = 0; x < width - 1; x += 2) {
257 uint8 b0 = src_argb[0] >> 3;
258 uint8 g0 = src_argb[1] >> 3;
259 uint8 r0 = src_argb[2] >> 3;
260 uint8 a0 = src_argb[3] >> 7;
261 uint8 b1 = src_argb[4] >> 3;
262 uint8 g1 = src_argb[5] >> 3;
263 uint8 r1 = src_argb[6] >> 3;
264 uint8 a1 = src_argb[7] >> 7;
265 *(uint32*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
266 (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
267 dst_rgb += 4;
268 src_argb += 8;
269 }
270 if (width & 1) {
271 uint8 b0 = src_argb[0] >> 3;
272 uint8 g0 = src_argb[1] >> 3;
273 uint8 r0 = src_argb[2] >> 3;
274 uint8 a0 = src_argb[3] >> 7;
275 *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
276 }
277 }
278
ARGBToARGB4444Row_C(const uint8 * src_argb,uint8 * dst_rgb,int width)279 void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
280 int x;
281 for (x = 0; x < width - 1; x += 2) {
282 uint8 b0 = src_argb[0] >> 4;
283 uint8 g0 = src_argb[1] >> 4;
284 uint8 r0 = src_argb[2] >> 4;
285 uint8 a0 = src_argb[3] >> 4;
286 uint8 b1 = src_argb[4] >> 4;
287 uint8 g1 = src_argb[5] >> 4;
288 uint8 r1 = src_argb[6] >> 4;
289 uint8 a1 = src_argb[7] >> 4;
290 *(uint32*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) | (b1 << 16) |
291 (g1 << 20) | (r1 << 24) | (a1 << 28);
292 dst_rgb += 4;
293 src_argb += 8;
294 }
295 if (width & 1) {
296 uint8 b0 = src_argb[0] >> 4;
297 uint8 g0 = src_argb[1] >> 4;
298 uint8 r0 = src_argb[2] >> 4;
299 uint8 a0 = src_argb[3] >> 4;
300 *(uint16*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
301 }
302 }
303
RGBToY(uint8 r,uint8 g,uint8 b)304 static __inline int RGBToY(uint8 r, uint8 g, uint8 b) {
305 return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
306 }
307
RGBToU(uint8 r,uint8 g,uint8 b)308 static __inline int RGBToU(uint8 r, uint8 g, uint8 b) {
309 return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
310 }
RGBToV(uint8 r,uint8 g,uint8 b)311 static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
312 return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
313 }
314
315 // ARGBToY_C and ARGBToUV_C
316 #define MAKEROWY(NAME, R, G, B, BPP) \
317 void NAME##ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
318 int x; \
319 for (x = 0; x < width; ++x) { \
320 dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
321 src_argb0 += BPP; \
322 dst_y += 1; \
323 } \
324 } \
325 void NAME##ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb, \
326 uint8* dst_u, uint8* dst_v, int width) { \
327 const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \
328 int x; \
329 for (x = 0; x < width - 1; x += 2) { \
330 uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \
331 src_rgb1[B + BPP]) >> \
332 2; \
333 uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \
334 src_rgb1[G + BPP]) >> \
335 2; \
336 uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \
337 src_rgb1[R + BPP]) >> \
338 2; \
339 dst_u[0] = RGBToU(ar, ag, ab); \
340 dst_v[0] = RGBToV(ar, ag, ab); \
341 src_rgb0 += BPP * 2; \
342 src_rgb1 += BPP * 2; \
343 dst_u += 1; \
344 dst_v += 1; \
345 } \
346 if (width & 1) { \
347 uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \
348 uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \
349 uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \
350 dst_u[0] = RGBToU(ar, ag, ab); \
351 dst_v[0] = RGBToV(ar, ag, ab); \
352 } \
353 }
354
355 MAKEROWY(ARGB, 2, 1, 0, 4)
356 MAKEROWY(BGRA, 1, 2, 3, 4)
357 MAKEROWY(ABGR, 0, 1, 2, 4)
358 MAKEROWY(RGBA, 3, 2, 1, 4)
359 MAKEROWY(RGB24, 2, 1, 0, 3)
360 MAKEROWY(RAW, 0, 1, 2, 3)
361 #undef MAKEROWY
362
363 // JPeg uses a variation on BT.601-1 full range
364 // y = 0.29900 * r + 0.58700 * g + 0.11400 * b
365 // u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
366 // v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
367 // BT.601 Mpeg range uses:
368 // b 0.1016 * 255 = 25.908 = 25
369 // g 0.5078 * 255 = 129.489 = 129
370 // r 0.2578 * 255 = 65.739 = 66
371 // JPeg 8 bit Y (not used):
372 // b 0.11400 * 256 = 29.184 = 29
373 // g 0.58700 * 256 = 150.272 = 150
374 // r 0.29900 * 256 = 76.544 = 77
375 // JPeg 7 bit Y:
376 // b 0.11400 * 128 = 14.592 = 15
377 // g 0.58700 * 128 = 75.136 = 75
378 // r 0.29900 * 128 = 38.272 = 38
379 // JPeg 8 bit U:
380 // b 0.50000 * 255 = 127.5 = 127
381 // g -0.33126 * 255 = -84.4713 = -84
382 // r -0.16874 * 255 = -43.0287 = -43
383 // JPeg 8 bit V:
384 // b -0.08131 * 255 = -20.73405 = -20
385 // g -0.41869 * 255 = -106.76595 = -107
386 // r 0.50000 * 255 = 127.5 = 127
387
RGBToYJ(uint8 r,uint8 g,uint8 b)388 static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
389 return (38 * r + 75 * g + 15 * b + 64) >> 7;
390 }
391
RGBToUJ(uint8 r,uint8 g,uint8 b)392 static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) {
393 return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
394 }
RGBToVJ(uint8 r,uint8 g,uint8 b)395 static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) {
396 return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
397 }
398
399 #define AVGB(a, b) (((a) + (b) + 1) >> 1)
400
401 // ARGBToYJ_C and ARGBToUVJ_C
402 #define MAKEROWYJ(NAME, R, G, B, BPP) \
403 void NAME##ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) { \
404 int x; \
405 for (x = 0; x < width; ++x) { \
406 dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \
407 src_argb0 += BPP; \
408 dst_y += 1; \
409 } \
410 } \
411 void NAME##ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb, \
412 uint8* dst_u, uint8* dst_v, int width) { \
413 const uint8* src_rgb1 = src_rgb0 + src_stride_rgb; \
414 int x; \
415 for (x = 0; x < width - 1; x += 2) { \
416 uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
417 AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
418 uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
419 AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
420 uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
421 AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
422 dst_u[0] = RGBToUJ(ar, ag, ab); \
423 dst_v[0] = RGBToVJ(ar, ag, ab); \
424 src_rgb0 += BPP * 2; \
425 src_rgb1 += BPP * 2; \
426 dst_u += 1; \
427 dst_v += 1; \
428 } \
429 if (width & 1) { \
430 uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]); \
431 uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]); \
432 uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]); \
433 dst_u[0] = RGBToUJ(ar, ag, ab); \
434 dst_v[0] = RGBToVJ(ar, ag, ab); \
435 } \
436 }
437
438 MAKEROWYJ(ARGB, 2, 1, 0, 4)
439 #undef MAKEROWYJ
440
RGB565ToYRow_C(const uint8 * src_rgb565,uint8 * dst_y,int width)441 void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
442 int x;
443 for (x = 0; x < width; ++x) {
444 uint8 b = src_rgb565[0] & 0x1f;
445 uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
446 uint8 r = src_rgb565[1] >> 3;
447 b = (b << 3) | (b >> 2);
448 g = (g << 2) | (g >> 4);
449 r = (r << 3) | (r >> 2);
450 dst_y[0] = RGBToY(r, g, b);
451 src_rgb565 += 2;
452 dst_y += 1;
453 }
454 }
455
ARGB1555ToYRow_C(const uint8 * src_argb1555,uint8 * dst_y,int width)456 void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) {
457 int x;
458 for (x = 0; x < width; ++x) {
459 uint8 b = src_argb1555[0] & 0x1f;
460 uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
461 uint8 r = (src_argb1555[1] & 0x7c) >> 2;
462 b = (b << 3) | (b >> 2);
463 g = (g << 3) | (g >> 2);
464 r = (r << 3) | (r >> 2);
465 dst_y[0] = RGBToY(r, g, b);
466 src_argb1555 += 2;
467 dst_y += 1;
468 }
469 }
470
ARGB4444ToYRow_C(const uint8 * src_argb4444,uint8 * dst_y,int width)471 void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) {
472 int x;
473 for (x = 0; x < width; ++x) {
474 uint8 b = src_argb4444[0] & 0x0f;
475 uint8 g = src_argb4444[0] >> 4;
476 uint8 r = src_argb4444[1] & 0x0f;
477 b = (b << 4) | b;
478 g = (g << 4) | g;
479 r = (r << 4) | r;
480 dst_y[0] = RGBToY(r, g, b);
481 src_argb4444 += 2;
482 dst_y += 1;
483 }
484 }
485
RGB565ToUVRow_C(const uint8 * src_rgb565,int src_stride_rgb565,uint8 * dst_u,uint8 * dst_v,int width)486 void RGB565ToUVRow_C(const uint8* src_rgb565,
487 int src_stride_rgb565,
488 uint8* dst_u,
489 uint8* dst_v,
490 int width) {
491 const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565;
492 int x;
493 for (x = 0; x < width - 1; x += 2) {
494 uint8 b0 = src_rgb565[0] & 0x1f;
495 uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
496 uint8 r0 = src_rgb565[1] >> 3;
497 uint8 b1 = src_rgb565[2] & 0x1f;
498 uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
499 uint8 r1 = src_rgb565[3] >> 3;
500 uint8 b2 = next_rgb565[0] & 0x1f;
501 uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
502 uint8 r2 = next_rgb565[1] >> 3;
503 uint8 b3 = next_rgb565[2] & 0x1f;
504 uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
505 uint8 r3 = next_rgb565[3] >> 3;
506 uint8 b = (b0 + b1 + b2 + b3); // 565 * 4 = 787.
507 uint8 g = (g0 + g1 + g2 + g3);
508 uint8 r = (r0 + r1 + r2 + r3);
509 b = (b << 1) | (b >> 6); // 787 -> 888.
510 r = (r << 1) | (r >> 6);
511 dst_u[0] = RGBToU(r, g, b);
512 dst_v[0] = RGBToV(r, g, b);
513 src_rgb565 += 4;
514 next_rgb565 += 4;
515 dst_u += 1;
516 dst_v += 1;
517 }
518 if (width & 1) {
519 uint8 b0 = src_rgb565[0] & 0x1f;
520 uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
521 uint8 r0 = src_rgb565[1] >> 3;
522 uint8 b2 = next_rgb565[0] & 0x1f;
523 uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
524 uint8 r2 = next_rgb565[1] >> 3;
525 uint8 b = (b0 + b2); // 565 * 2 = 676.
526 uint8 g = (g0 + g2);
527 uint8 r = (r0 + r2);
528 b = (b << 2) | (b >> 4); // 676 -> 888
529 g = (g << 1) | (g >> 6);
530 r = (r << 2) | (r >> 4);
531 dst_u[0] = RGBToU(r, g, b);
532 dst_v[0] = RGBToV(r, g, b);
533 }
534 }
535
ARGB1555ToUVRow_C(const uint8 * src_argb1555,int src_stride_argb1555,uint8 * dst_u,uint8 * dst_v,int width)536 void ARGB1555ToUVRow_C(const uint8* src_argb1555,
537 int src_stride_argb1555,
538 uint8* dst_u,
539 uint8* dst_v,
540 int width) {
541 const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555;
542 int x;
543 for (x = 0; x < width - 1; x += 2) {
544 uint8 b0 = src_argb1555[0] & 0x1f;
545 uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
546 uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
547 uint8 b1 = src_argb1555[2] & 0x1f;
548 uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
549 uint8 r1 = (src_argb1555[3] & 0x7c) >> 2;
550 uint8 b2 = next_argb1555[0] & 0x1f;
551 uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
552 uint8 r2 = (next_argb1555[1] & 0x7c) >> 2;
553 uint8 b3 = next_argb1555[2] & 0x1f;
554 uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
555 uint8 r3 = (next_argb1555[3] & 0x7c) >> 2;
556 uint8 b = (b0 + b1 + b2 + b3); // 555 * 4 = 777.
557 uint8 g = (g0 + g1 + g2 + g3);
558 uint8 r = (r0 + r1 + r2 + r3);
559 b = (b << 1) | (b >> 6); // 777 -> 888.
560 g = (g << 1) | (g >> 6);
561 r = (r << 1) | (r >> 6);
562 dst_u[0] = RGBToU(r, g, b);
563 dst_v[0] = RGBToV(r, g, b);
564 src_argb1555 += 4;
565 next_argb1555 += 4;
566 dst_u += 1;
567 dst_v += 1;
568 }
569 if (width & 1) {
570 uint8 b0 = src_argb1555[0] & 0x1f;
571 uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
572 uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
573 uint8 b2 = next_argb1555[0] & 0x1f;
574 uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
575 uint8 r2 = next_argb1555[1] >> 3;
576 uint8 b = (b0 + b2); // 555 * 2 = 666.
577 uint8 g = (g0 + g2);
578 uint8 r = (r0 + r2);
579 b = (b << 2) | (b >> 4); // 666 -> 888.
580 g = (g << 2) | (g >> 4);
581 r = (r << 2) | (r >> 4);
582 dst_u[0] = RGBToU(r, g, b);
583 dst_v[0] = RGBToV(r, g, b);
584 }
585 }
586
ARGB4444ToUVRow_C(const uint8 * src_argb4444,int src_stride_argb4444,uint8 * dst_u,uint8 * dst_v,int width)587 void ARGB4444ToUVRow_C(const uint8* src_argb4444,
588 int src_stride_argb4444,
589 uint8* dst_u,
590 uint8* dst_v,
591 int width) {
592 const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444;
593 int x;
594 for (x = 0; x < width - 1; x += 2) {
595 uint8 b0 = src_argb4444[0] & 0x0f;
596 uint8 g0 = src_argb4444[0] >> 4;
597 uint8 r0 = src_argb4444[1] & 0x0f;
598 uint8 b1 = src_argb4444[2] & 0x0f;
599 uint8 g1 = src_argb4444[2] >> 4;
600 uint8 r1 = src_argb4444[3] & 0x0f;
601 uint8 b2 = next_argb4444[0] & 0x0f;
602 uint8 g2 = next_argb4444[0] >> 4;
603 uint8 r2 = next_argb4444[1] & 0x0f;
604 uint8 b3 = next_argb4444[2] & 0x0f;
605 uint8 g3 = next_argb4444[2] >> 4;
606 uint8 r3 = next_argb4444[3] & 0x0f;
607 uint8 b = (b0 + b1 + b2 + b3); // 444 * 4 = 666.
608 uint8 g = (g0 + g1 + g2 + g3);
609 uint8 r = (r0 + r1 + r2 + r3);
610 b = (b << 2) | (b >> 4); // 666 -> 888.
611 g = (g << 2) | (g >> 4);
612 r = (r << 2) | (r >> 4);
613 dst_u[0] = RGBToU(r, g, b);
614 dst_v[0] = RGBToV(r, g, b);
615 src_argb4444 += 4;
616 next_argb4444 += 4;
617 dst_u += 1;
618 dst_v += 1;
619 }
620 if (width & 1) {
621 uint8 b0 = src_argb4444[0] & 0x0f;
622 uint8 g0 = src_argb4444[0] >> 4;
623 uint8 r0 = src_argb4444[1] & 0x0f;
624 uint8 b2 = next_argb4444[0] & 0x0f;
625 uint8 g2 = next_argb4444[0] >> 4;
626 uint8 r2 = next_argb4444[1] & 0x0f;
627 uint8 b = (b0 + b2); // 444 * 2 = 555.
628 uint8 g = (g0 + g2);
629 uint8 r = (r0 + r2);
630 b = (b << 3) | (b >> 2); // 555 -> 888.
631 g = (g << 3) | (g >> 2);
632 r = (r << 3) | (r >> 2);
633 dst_u[0] = RGBToU(r, g, b);
634 dst_v[0] = RGBToV(r, g, b);
635 }
636 }
637
ARGBToUV444Row_C(const uint8 * src_argb,uint8 * dst_u,uint8 * dst_v,int width)638 void ARGBToUV444Row_C(const uint8* src_argb,
639 uint8* dst_u,
640 uint8* dst_v,
641 int width) {
642 int x;
643 for (x = 0; x < width; ++x) {
644 uint8 ab = src_argb[0];
645 uint8 ag = src_argb[1];
646 uint8 ar = src_argb[2];
647 dst_u[0] = RGBToU(ar, ag, ab);
648 dst_v[0] = RGBToV(ar, ag, ab);
649 src_argb += 4;
650 dst_u += 1;
651 dst_v += 1;
652 }
653 }
654
ARGBGrayRow_C(const uint8 * src_argb,uint8 * dst_argb,int width)655 void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
656 int x;
657 for (x = 0; x < width; ++x) {
658 uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
659 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
660 dst_argb[3] = src_argb[3];
661 dst_argb += 4;
662 src_argb += 4;
663 }
664 }
665
666 // Convert a row of image to Sepia tone.
ARGBSepiaRow_C(uint8 * dst_argb,int width)667 void ARGBSepiaRow_C(uint8* dst_argb, int width) {
668 int x;
669 for (x = 0; x < width; ++x) {
670 int b = dst_argb[0];
671 int g = dst_argb[1];
672 int r = dst_argb[2];
673 int sb = (b * 17 + g * 68 + r * 35) >> 7;
674 int sg = (b * 22 + g * 88 + r * 45) >> 7;
675 int sr = (b * 24 + g * 98 + r * 50) >> 7;
676 // b does not over flow. a is preserved from original.
677 dst_argb[0] = sb;
678 dst_argb[1] = clamp255(sg);
679 dst_argb[2] = clamp255(sr);
680 dst_argb += 4;
681 }
682 }
683
684 // Apply color matrix to a row of image. Matrix is signed.
685 // TODO(fbarchard): Consider adding rounding (+32).
ARGBColorMatrixRow_C(const uint8 * src_argb,uint8 * dst_argb,const int8 * matrix_argb,int width)686 void ARGBColorMatrixRow_C(const uint8* src_argb,
687 uint8* dst_argb,
688 const int8* matrix_argb,
689 int width) {
690 int x;
691 for (x = 0; x < width; ++x) {
692 int b = src_argb[0];
693 int g = src_argb[1];
694 int r = src_argb[2];
695 int a = src_argb[3];
696 int sb = (b * matrix_argb[0] + g * matrix_argb[1] + r * matrix_argb[2] +
697 a * matrix_argb[3]) >>
698 6;
699 int sg = (b * matrix_argb[4] + g * matrix_argb[5] + r * matrix_argb[6] +
700 a * matrix_argb[7]) >>
701 6;
702 int sr = (b * matrix_argb[8] + g * matrix_argb[9] + r * matrix_argb[10] +
703 a * matrix_argb[11]) >>
704 6;
705 int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] +
706 a * matrix_argb[15]) >>
707 6;
708 dst_argb[0] = Clamp(sb);
709 dst_argb[1] = Clamp(sg);
710 dst_argb[2] = Clamp(sr);
711 dst_argb[3] = Clamp(sa);
712 src_argb += 4;
713 dst_argb += 4;
714 }
715 }
716
717 // Apply color table to a row of image.
ARGBColorTableRow_C(uint8 * dst_argb,const uint8 * table_argb,int width)718 void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
719 int x;
720 for (x = 0; x < width; ++x) {
721 int b = dst_argb[0];
722 int g = dst_argb[1];
723 int r = dst_argb[2];
724 int a = dst_argb[3];
725 dst_argb[0] = table_argb[b * 4 + 0];
726 dst_argb[1] = table_argb[g * 4 + 1];
727 dst_argb[2] = table_argb[r * 4 + 2];
728 dst_argb[3] = table_argb[a * 4 + 3];
729 dst_argb += 4;
730 }
731 }
732
733 // Apply color table to a row of image.
RGBColorTableRow_C(uint8 * dst_argb,const uint8 * table_argb,int width)734 void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
735 int x;
736 for (x = 0; x < width; ++x) {
737 int b = dst_argb[0];
738 int g = dst_argb[1];
739 int r = dst_argb[2];
740 dst_argb[0] = table_argb[b * 4 + 0];
741 dst_argb[1] = table_argb[g * 4 + 1];
742 dst_argb[2] = table_argb[r * 4 + 2];
743 dst_argb += 4;
744 }
745 }
746
ARGBQuantizeRow_C(uint8 * dst_argb,int scale,int interval_size,int interval_offset,int width)747 void ARGBQuantizeRow_C(uint8* dst_argb,
748 int scale,
749 int interval_size,
750 int interval_offset,
751 int width) {
752 int x;
753 for (x = 0; x < width; ++x) {
754 int b = dst_argb[0];
755 int g = dst_argb[1];
756 int r = dst_argb[2];
757 dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
758 dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
759 dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
760 dst_argb += 4;
761 }
762 }
763
764 #define REPEAT8(v) (v) | ((v) << 8)
765 #define SHADE(f, v) v* f >> 24
766
ARGBShadeRow_C(const uint8 * src_argb,uint8 * dst_argb,int width,uint32 value)767 void ARGBShadeRow_C(const uint8* src_argb,
768 uint8* dst_argb,
769 int width,
770 uint32 value) {
771 const uint32 b_scale = REPEAT8(value & 0xff);
772 const uint32 g_scale = REPEAT8((value >> 8) & 0xff);
773 const uint32 r_scale = REPEAT8((value >> 16) & 0xff);
774 const uint32 a_scale = REPEAT8(value >> 24);
775
776 int i;
777 for (i = 0; i < width; ++i) {
778 const uint32 b = REPEAT8(src_argb[0]);
779 const uint32 g = REPEAT8(src_argb[1]);
780 const uint32 r = REPEAT8(src_argb[2]);
781 const uint32 a = REPEAT8(src_argb[3]);
782 dst_argb[0] = SHADE(b, b_scale);
783 dst_argb[1] = SHADE(g, g_scale);
784 dst_argb[2] = SHADE(r, r_scale);
785 dst_argb[3] = SHADE(a, a_scale);
786 src_argb += 4;
787 dst_argb += 4;
788 }
789 }
790 #undef REPEAT8
791 #undef SHADE
792
793 #define REPEAT8(v) (v) | ((v) << 8)
794 #define SHADE(f, v) v* f >> 16
795
ARGBMultiplyRow_C(const uint8 * src_argb0,const uint8 * src_argb1,uint8 * dst_argb,int width)796 void ARGBMultiplyRow_C(const uint8* src_argb0,
797 const uint8* src_argb1,
798 uint8* dst_argb,
799 int width) {
800 int i;
801 for (i = 0; i < width; ++i) {
802 const uint32 b = REPEAT8(src_argb0[0]);
803 const uint32 g = REPEAT8(src_argb0[1]);
804 const uint32 r = REPEAT8(src_argb0[2]);
805 const uint32 a = REPEAT8(src_argb0[3]);
806 const uint32 b_scale = src_argb1[0];
807 const uint32 g_scale = src_argb1[1];
808 const uint32 r_scale = src_argb1[2];
809 const uint32 a_scale = src_argb1[3];
810 dst_argb[0] = SHADE(b, b_scale);
811 dst_argb[1] = SHADE(g, g_scale);
812 dst_argb[2] = SHADE(r, r_scale);
813 dst_argb[3] = SHADE(a, a_scale);
814 src_argb0 += 4;
815 src_argb1 += 4;
816 dst_argb += 4;
817 }
818 }
819 #undef REPEAT8
820 #undef SHADE
821
822 #define SHADE(f, v) clamp255(v + f)
823
ARGBAddRow_C(const uint8 * src_argb0,const uint8 * src_argb1,uint8 * dst_argb,int width)824 void ARGBAddRow_C(const uint8* src_argb0,
825 const uint8* src_argb1,
826 uint8* dst_argb,
827 int width) {
828 int i;
829 for (i = 0; i < width; ++i) {
830 const int b = src_argb0[0];
831 const int g = src_argb0[1];
832 const int r = src_argb0[2];
833 const int a = src_argb0[3];
834 const int b_add = src_argb1[0];
835 const int g_add = src_argb1[1];
836 const int r_add = src_argb1[2];
837 const int a_add = src_argb1[3];
838 dst_argb[0] = SHADE(b, b_add);
839 dst_argb[1] = SHADE(g, g_add);
840 dst_argb[2] = SHADE(r, r_add);
841 dst_argb[3] = SHADE(a, a_add);
842 src_argb0 += 4;
843 src_argb1 += 4;
844 dst_argb += 4;
845 }
846 }
847 #undef SHADE
848
849 #define SHADE(f, v) clamp0(f - v)
850
ARGBSubtractRow_C(const uint8 * src_argb0,const uint8 * src_argb1,uint8 * dst_argb,int width)851 void ARGBSubtractRow_C(const uint8* src_argb0,
852 const uint8* src_argb1,
853 uint8* dst_argb,
854 int width) {
855 int i;
856 for (i = 0; i < width; ++i) {
857 const int b = src_argb0[0];
858 const int g = src_argb0[1];
859 const int r = src_argb0[2];
860 const int a = src_argb0[3];
861 const int b_sub = src_argb1[0];
862 const int g_sub = src_argb1[1];
863 const int r_sub = src_argb1[2];
864 const int a_sub = src_argb1[3];
865 dst_argb[0] = SHADE(b, b_sub);
866 dst_argb[1] = SHADE(g, g_sub);
867 dst_argb[2] = SHADE(r, r_sub);
868 dst_argb[3] = SHADE(a, a_sub);
869 src_argb0 += 4;
870 src_argb1 += 4;
871 dst_argb += 4;
872 }
873 }
874 #undef SHADE
875
876 // Sobel functions which mimics SSSE3.
SobelXRow_C(const uint8 * src_y0,const uint8 * src_y1,const uint8 * src_y2,uint8 * dst_sobelx,int width)877 void SobelXRow_C(const uint8* src_y0,
878 const uint8* src_y1,
879 const uint8* src_y2,
880 uint8* dst_sobelx,
881 int width) {
882 int i;
883 for (i = 0; i < width; ++i) {
884 int a = src_y0[i];
885 int b = src_y1[i];
886 int c = src_y2[i];
887 int a_sub = src_y0[i + 2];
888 int b_sub = src_y1[i + 2];
889 int c_sub = src_y2[i + 2];
890 int a_diff = a - a_sub;
891 int b_diff = b - b_sub;
892 int c_diff = c - c_sub;
893 int sobel = Abs(a_diff + b_diff * 2 + c_diff);
894 dst_sobelx[i] = (uint8)(clamp255(sobel));
895 }
896 }
897
SobelYRow_C(const uint8 * src_y0,const uint8 * src_y1,uint8 * dst_sobely,int width)898 void SobelYRow_C(const uint8* src_y0,
899 const uint8* src_y1,
900 uint8* dst_sobely,
901 int width) {
902 int i;
903 for (i = 0; i < width; ++i) {
904 int a = src_y0[i + 0];
905 int b = src_y0[i + 1];
906 int c = src_y0[i + 2];
907 int a_sub = src_y1[i + 0];
908 int b_sub = src_y1[i + 1];
909 int c_sub = src_y1[i + 2];
910 int a_diff = a - a_sub;
911 int b_diff = b - b_sub;
912 int c_diff = c - c_sub;
913 int sobel = Abs(a_diff + b_diff * 2 + c_diff);
914 dst_sobely[i] = (uint8)(clamp255(sobel));
915 }
916 }
917
SobelRow_C(const uint8 * src_sobelx,const uint8 * src_sobely,uint8 * dst_argb,int width)918 void SobelRow_C(const uint8* src_sobelx,
919 const uint8* src_sobely,
920 uint8* dst_argb,
921 int width) {
922 int i;
923 for (i = 0; i < width; ++i) {
924 int r = src_sobelx[i];
925 int b = src_sobely[i];
926 int s = clamp255(r + b);
927 dst_argb[0] = (uint8)(s);
928 dst_argb[1] = (uint8)(s);
929 dst_argb[2] = (uint8)(s);
930 dst_argb[3] = (uint8)(255u);
931 dst_argb += 4;
932 }
933 }
934
SobelToPlaneRow_C(const uint8 * src_sobelx,const uint8 * src_sobely,uint8 * dst_y,int width)935 void SobelToPlaneRow_C(const uint8* src_sobelx,
936 const uint8* src_sobely,
937 uint8* dst_y,
938 int width) {
939 int i;
940 for (i = 0; i < width; ++i) {
941 int r = src_sobelx[i];
942 int b = src_sobely[i];
943 int s = clamp255(r + b);
944 dst_y[i] = (uint8)(s);
945 }
946 }
947
SobelXYRow_C(const uint8 * src_sobelx,const uint8 * src_sobely,uint8 * dst_argb,int width)948 void SobelXYRow_C(const uint8* src_sobelx,
949 const uint8* src_sobely,
950 uint8* dst_argb,
951 int width) {
952 int i;
953 for (i = 0; i < width; ++i) {
954 int r = src_sobelx[i];
955 int b = src_sobely[i];
956 int g = clamp255(r + b);
957 dst_argb[0] = (uint8)(b);
958 dst_argb[1] = (uint8)(g);
959 dst_argb[2] = (uint8)(r);
960 dst_argb[3] = (uint8)(255u);
961 dst_argb += 4;
962 }
963 }
964
J400ToARGBRow_C(const uint8 * src_y,uint8 * dst_argb,int width)965 void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
966 // Copy a Y to RGB.
967 int x;
968 for (x = 0; x < width; ++x) {
969 uint8 y = src_y[0];
970 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
971 dst_argb[3] = 255u;
972 dst_argb += 4;
973 ++src_y;
974 }
975 }
976
977 // TODO(fbarchard): Unify these structures to be platform independent.
978 // TODO(fbarchard): Generate SIMD structures from float matrix.
979
980 // BT.601 YUV to RGB reference
981 // R = (Y - 16) * 1.164 - V * -1.596
982 // G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
983 // B = (Y - 16) * 1.164 - U * -2.018
984
985 // Y contribution to R,G,B. Scale and bias.
986 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
987 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
988
989 // U and V contributions to R,G,B.
990 #define UB -128 /* max(-128, round(-2.018 * 64)) */
991 #define UG 25 /* round(0.391 * 64) */
992 #define VG 52 /* round(0.813 * 64) */
993 #define VR -102 /* round(-1.596 * 64) */
994
995 // Bias values to subtract 16 from Y and 128 from U and V.
996 #define BB (UB * 128 + YGB)
997 #define BG (UG * 128 + VG * 128 + YGB)
998 #define BR (VR * 128 + YGB)
999
1000 #if defined(__aarch64__) // 64 bit arm
1001 const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
1002 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1003 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1004 {UG, VG, UG, VG, UG, VG, UG, VG},
1005 {UG, VG, UG, VG, UG, VG, UG, VG},
1006 {BB, BG, BR, 0, 0, 0, 0, 0},
1007 {0x0101 * YG, 0, 0, 0}};
1008 const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
1009 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1010 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1011 {VG, UG, VG, UG, VG, UG, VG, UG},
1012 {VG, UG, VG, UG, VG, UG, VG, UG},
1013 {BR, BG, BB, 0, 0, 0, 0, 0},
1014 {0x0101 * YG, 0, 0, 0}};
1015 #elif defined(__arm__) // 32 bit arm
1016 const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
1017 {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
1018 {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
1019 {BB, BG, BR, 0, 0, 0, 0, 0},
1020 {0x0101 * YG, 0, 0, 0}};
1021 const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
1022 {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
1023 {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
1024 {BR, BG, BB, 0, 0, 0, 0, 0},
1025 {0x0101 * YG, 0, 0, 0}};
1026 #else
1027 const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
1028 {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
1029 UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
1030 {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
1031 UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
1032 {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
1033 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
1034 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1035 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1036 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1037 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1038 const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
1039 {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
1040 VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
1041 {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
1042 VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
1043 {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
1044 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
1045 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1046 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1047 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1048 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1049 #endif
1050
1051 #undef BB
1052 #undef BG
1053 #undef BR
1054 #undef YGB
1055 #undef UB
1056 #undef UG
1057 #undef VG
1058 #undef VR
1059 #undef YG
1060
1061 // JPEG YUV to RGB reference
1062 // * R = Y - V * -1.40200
1063 // * G = Y - U * 0.34414 - V * 0.71414
1064 // * B = Y - U * -1.77200
1065
1066 // Y contribution to R,G,B. Scale and bias.
1067 #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
1068 #define YGB 32 /* 64 / 2 */
1069
1070 // U and V contributions to R,G,B.
1071 #define UB -113 /* round(-1.77200 * 64) */
1072 #define UG 22 /* round(0.34414 * 64) */
1073 #define VG 46 /* round(0.71414 * 64) */
1074 #define VR -90 /* round(-1.40200 * 64) */
1075
1076 // Bias values to round, and subtract 128 from U and V.
1077 #define BB (UB * 128 + YGB)
1078 #define BG (UG * 128 + VG * 128 + YGB)
1079 #define BR (VR * 128 + YGB)
1080
1081 #if defined(__aarch64__)
1082 const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
1083 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1084 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1085 {UG, VG, UG, VG, UG, VG, UG, VG},
1086 {UG, VG, UG, VG, UG, VG, UG, VG},
1087 {BB, BG, BR, 0, 0, 0, 0, 0},
1088 {0x0101 * YG, 0, 0, 0}};
1089 const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
1090 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1091 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1092 {VG, UG, VG, UG, VG, UG, VG, UG},
1093 {VG, UG, VG, UG, VG, UG, VG, UG},
1094 {BR, BG, BB, 0, 0, 0, 0, 0},
1095 {0x0101 * YG, 0, 0, 0}};
1096 #elif defined(__arm__)
1097 const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
1098 {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
1099 {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
1100 {BB, BG, BR, 0, 0, 0, 0, 0},
1101 {0x0101 * YG, 0, 0, 0}};
1102 const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
1103 {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
1104 {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
1105 {BR, BG, BB, 0, 0, 0, 0, 0},
1106 {0x0101 * YG, 0, 0, 0}};
1107 #else
1108 const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
1109 {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
1110 UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
1111 {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
1112 UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
1113 {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
1114 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
1115 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1116 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1117 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1118 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1119 const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
1120 {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
1121 VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
1122 {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
1123 VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
1124 {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
1125 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
1126 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1127 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1128 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1129 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1130 #endif
1131
1132 #undef BB
1133 #undef BG
1134 #undef BR
1135 #undef YGB
1136 #undef UB
1137 #undef UG
1138 #undef VG
1139 #undef VR
1140 #undef YG
1141
1142 // BT.709 YUV to RGB reference
1143 // R = (Y - 16) * 1.164 - V * -1.793
1144 // G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533
1145 // B = (Y - 16) * 1.164 - U * -2.112
1146 // See also http://www.equasys.de/colorconversion.html
1147
1148 // Y contribution to R,G,B. Scale and bias.
1149 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1150 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1151
1152 // TODO(fbarchard): Find way to express 2.112 instead of 2.0.
1153 // U and V contributions to R,G,B.
1154 #define UB -128 /* max(-128, round(-2.112 * 64)) */
1155 #define UG 14 /* round(0.213 * 64) */
1156 #define VG 34 /* round(0.533 * 64) */
1157 #define VR -115 /* round(-1.793 * 64) */
1158
1159 // Bias values to round, and subtract 128 from U and V.
1160 #define BB (UB * 128 + YGB)
1161 #define BG (UG * 128 + VG * 128 + YGB)
1162 #define BR (VR * 128 + YGB)
1163
1164 #if defined(__aarch64__)
1165 const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
1166 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1167 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1168 {UG, VG, UG, VG, UG, VG, UG, VG},
1169 {UG, VG, UG, VG, UG, VG, UG, VG},
1170 {BB, BG, BR, 0, 0, 0, 0, 0},
1171 {0x0101 * YG, 0, 0, 0}};
1172 const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
1173 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1174 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1175 {VG, UG, VG, UG, VG, UG, VG, UG},
1176 {VG, UG, VG, UG, VG, UG, VG, UG},
1177 {BR, BG, BB, 0, 0, 0, 0, 0},
1178 {0x0101 * YG, 0, 0, 0}};
1179 #elif defined(__arm__)
1180 const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
1181 {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
1182 {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
1183 {BB, BG, BR, 0, 0, 0, 0, 0},
1184 {0x0101 * YG, 0, 0, 0}};
1185 const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
1186 {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
1187 {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
1188 {BR, BG, BB, 0, 0, 0, 0, 0},
1189 {0x0101 * YG, 0, 0, 0}};
1190 #else
1191 const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
1192 {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
1193 UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
1194 {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
1195 UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
1196 {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
1197 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
1198 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1199 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1200 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1201 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1202 const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
1203 {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
1204 VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
1205 {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
1206 VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
1207 {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
1208 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
1209 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1210 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1211 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1212 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1213 #endif
1214
1215 #undef BB
1216 #undef BG
1217 #undef BR
1218 #undef YGB
1219 #undef UB
1220 #undef UG
1221 #undef VG
1222 #undef VR
1223 #undef YG
1224
1225 // C reference code that mimics the YUV assembly.
YuvPixel(uint8 y,uint8 u,uint8 v,uint8 * b,uint8 * g,uint8 * r,const struct YuvConstants * yuvconstants)1226 static __inline void YuvPixel(uint8 y,
1227 uint8 u,
1228 uint8 v,
1229 uint8* b,
1230 uint8* g,
1231 uint8* r,
1232 const struct YuvConstants* yuvconstants) {
1233 #if defined(__aarch64__)
1234 int ub = -yuvconstants->kUVToRB[0];
1235 int ug = yuvconstants->kUVToG[0];
1236 int vg = yuvconstants->kUVToG[1];
1237 int vr = -yuvconstants->kUVToRB[1];
1238 int bb = yuvconstants->kUVBiasBGR[0];
1239 int bg = yuvconstants->kUVBiasBGR[1];
1240 int br = yuvconstants->kUVBiasBGR[2];
1241 int yg = yuvconstants->kYToRgb[0] / 0x0101;
1242 #elif defined(__arm__)
1243 int ub = -yuvconstants->kUVToRB[0];
1244 int ug = yuvconstants->kUVToG[0];
1245 int vg = yuvconstants->kUVToG[4];
1246 int vr = -yuvconstants->kUVToRB[4];
1247 int bb = yuvconstants->kUVBiasBGR[0];
1248 int bg = yuvconstants->kUVBiasBGR[1];
1249 int br = yuvconstants->kUVBiasBGR[2];
1250 int yg = yuvconstants->kYToRgb[0] / 0x0101;
1251 #else
1252 int ub = yuvconstants->kUVToB[0];
1253 int ug = yuvconstants->kUVToG[0];
1254 int vg = yuvconstants->kUVToG[1];
1255 int vr = yuvconstants->kUVToR[1];
1256 int bb = yuvconstants->kUVBiasB[0];
1257 int bg = yuvconstants->kUVBiasG[0];
1258 int br = yuvconstants->kUVBiasR[0];
1259 int yg = yuvconstants->kYToRgb[0];
1260 #endif
1261
1262 uint32 y1 = (uint32)(y * 0x0101 * yg) >> 16;
1263 *b = Clamp((int32)(-(u * ub) + y1 + bb) >> 6);
1264 *g = Clamp((int32)(-(u * ug + v * vg) + y1 + bg) >> 6);
1265 *r = Clamp((int32)(-(v * vr) + y1 + br) >> 6);
1266 }
1267
1268 // Y contribution to R,G,B. Scale and bias.
1269 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1270 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1271
1272 // C reference code that mimics the YUV assembly.
YPixel(uint8 y,uint8 * b,uint8 * g,uint8 * r)1273 static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) {
1274 uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16;
1275 *b = Clamp((int32)(y1 + YGB) >> 6);
1276 *g = Clamp((int32)(y1 + YGB) >> 6);
1277 *r = Clamp((int32)(y1 + YGB) >> 6);
1278 }
1279
1280 #undef YG
1281 #undef YGB
1282
1283 #if !defined(LIBYUV_DISABLE_NEON) && \
1284 (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
1285 // C mimic assembly.
1286 // TODO(fbarchard): Remove subsampling from Neon.
I444ToARGBRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,const struct YuvConstants * yuvconstants,int width)1287 void I444ToARGBRow_C(const uint8* src_y,
1288 const uint8* src_u,
1289 const uint8* src_v,
1290 uint8* rgb_buf,
1291 const struct YuvConstants* yuvconstants,
1292 int width) {
1293 int x;
1294 for (x = 0; x < width - 1; x += 2) {
1295 uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
1296 uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
1297 YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
1298 yuvconstants);
1299 rgb_buf[3] = 255;
1300 YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
1301 yuvconstants);
1302 rgb_buf[7] = 255;
1303 src_y += 2;
1304 src_u += 2;
1305 src_v += 2;
1306 rgb_buf += 8; // Advance 2 pixels.
1307 }
1308 if (width & 1) {
1309 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1310 rgb_buf + 2, yuvconstants);
1311 rgb_buf[3] = 255;
1312 }
1313 }
1314 #else
I444ToARGBRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,const struct YuvConstants * yuvconstants,int width)1315 void I444ToARGBRow_C(const uint8* src_y,
1316 const uint8* src_u,
1317 const uint8* src_v,
1318 uint8* rgb_buf,
1319 const struct YuvConstants* yuvconstants,
1320 int width) {
1321 int x;
1322 for (x = 0; x < width; ++x) {
1323 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1324 rgb_buf + 2, yuvconstants);
1325 rgb_buf[3] = 255;
1326 src_y += 1;
1327 src_u += 1;
1328 src_v += 1;
1329 rgb_buf += 4; // Advance 1 pixel.
1330 }
1331 }
1332 #endif
1333
1334 // Also used for 420
I422ToARGBRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,const struct YuvConstants * yuvconstants,int width)1335 void I422ToARGBRow_C(const uint8* src_y,
1336 const uint8* src_u,
1337 const uint8* src_v,
1338 uint8* rgb_buf,
1339 const struct YuvConstants* yuvconstants,
1340 int width) {
1341 int x;
1342 for (x = 0; x < width - 1; x += 2) {
1343 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1344 rgb_buf + 2, yuvconstants);
1345 rgb_buf[3] = 255;
1346 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1347 rgb_buf + 6, yuvconstants);
1348 rgb_buf[7] = 255;
1349 src_y += 2;
1350 src_u += 1;
1351 src_v += 1;
1352 rgb_buf += 8; // Advance 2 pixels.
1353 }
1354 if (width & 1) {
1355 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1356 rgb_buf + 2, yuvconstants);
1357 rgb_buf[3] = 255;
1358 }
1359 }
1360
I422AlphaToARGBRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,const uint8 * src_a,uint8 * rgb_buf,const struct YuvConstants * yuvconstants,int width)1361 void I422AlphaToARGBRow_C(const uint8* src_y,
1362 const uint8* src_u,
1363 const uint8* src_v,
1364 const uint8* src_a,
1365 uint8* rgb_buf,
1366 const struct YuvConstants* yuvconstants,
1367 int width) {
1368 int x;
1369 for (x = 0; x < width - 1; x += 2) {
1370 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1371 rgb_buf + 2, yuvconstants);
1372 rgb_buf[3] = src_a[0];
1373 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1374 rgb_buf + 6, yuvconstants);
1375 rgb_buf[7] = src_a[1];
1376 src_y += 2;
1377 src_u += 1;
1378 src_v += 1;
1379 src_a += 2;
1380 rgb_buf += 8; // Advance 2 pixels.
1381 }
1382 if (width & 1) {
1383 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1384 rgb_buf + 2, yuvconstants);
1385 rgb_buf[3] = src_a[0];
1386 }
1387 }
1388
I422ToRGB24Row_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,const struct YuvConstants * yuvconstants,int width)1389 void I422ToRGB24Row_C(const uint8* src_y,
1390 const uint8* src_u,
1391 const uint8* src_v,
1392 uint8* rgb_buf,
1393 const struct YuvConstants* yuvconstants,
1394 int width) {
1395 int x;
1396 for (x = 0; x < width - 1; x += 2) {
1397 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1398 rgb_buf + 2, yuvconstants);
1399 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
1400 rgb_buf + 5, yuvconstants);
1401 src_y += 2;
1402 src_u += 1;
1403 src_v += 1;
1404 rgb_buf += 6; // Advance 2 pixels.
1405 }
1406 if (width & 1) {
1407 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1408 rgb_buf + 2, yuvconstants);
1409 }
1410 }
1411
I422ToARGB4444Row_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb4444,const struct YuvConstants * yuvconstants,int width)1412 void I422ToARGB4444Row_C(const uint8* src_y,
1413 const uint8* src_u,
1414 const uint8* src_v,
1415 uint8* dst_argb4444,
1416 const struct YuvConstants* yuvconstants,
1417 int width) {
1418 uint8 b0;
1419 uint8 g0;
1420 uint8 r0;
1421 uint8 b1;
1422 uint8 g1;
1423 uint8 r1;
1424 int x;
1425 for (x = 0; x < width - 1; x += 2) {
1426 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1427 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
1428 b0 = b0 >> 4;
1429 g0 = g0 >> 4;
1430 r0 = r0 >> 4;
1431 b1 = b1 >> 4;
1432 g1 = g1 >> 4;
1433 r1 = r1 >> 4;
1434 *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | (b1 << 16) |
1435 (g1 << 20) | (r1 << 24) | 0xf000f000;
1436 src_y += 2;
1437 src_u += 1;
1438 src_v += 1;
1439 dst_argb4444 += 4; // Advance 2 pixels.
1440 }
1441 if (width & 1) {
1442 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1443 b0 = b0 >> 4;
1444 g0 = g0 >> 4;
1445 r0 = r0 >> 4;
1446 *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
1447 }
1448 }
1449
I422ToARGB1555Row_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb1555,const struct YuvConstants * yuvconstants,int width)1450 void I422ToARGB1555Row_C(const uint8* src_y,
1451 const uint8* src_u,
1452 const uint8* src_v,
1453 uint8* dst_argb1555,
1454 const struct YuvConstants* yuvconstants,
1455 int width) {
1456 uint8 b0;
1457 uint8 g0;
1458 uint8 r0;
1459 uint8 b1;
1460 uint8 g1;
1461 uint8 r1;
1462 int x;
1463 for (x = 0; x < width - 1; x += 2) {
1464 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1465 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
1466 b0 = b0 >> 3;
1467 g0 = g0 >> 3;
1468 r0 = r0 >> 3;
1469 b1 = b1 >> 3;
1470 g1 = g1 >> 3;
1471 r1 = r1 >> 3;
1472 *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | (b1 << 16) |
1473 (g1 << 21) | (r1 << 26) | 0x80008000;
1474 src_y += 2;
1475 src_u += 1;
1476 src_v += 1;
1477 dst_argb1555 += 4; // Advance 2 pixels.
1478 }
1479 if (width & 1) {
1480 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1481 b0 = b0 >> 3;
1482 g0 = g0 >> 3;
1483 r0 = r0 >> 3;
1484 *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
1485 }
1486 }
1487
I422ToRGB565Row_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_rgb565,const struct YuvConstants * yuvconstants,int width)1488 void I422ToRGB565Row_C(const uint8* src_y,
1489 const uint8* src_u,
1490 const uint8* src_v,
1491 uint8* dst_rgb565,
1492 const struct YuvConstants* yuvconstants,
1493 int width) {
1494 uint8 b0;
1495 uint8 g0;
1496 uint8 r0;
1497 uint8 b1;
1498 uint8 g1;
1499 uint8 r1;
1500 int x;
1501 for (x = 0; x < width - 1; x += 2) {
1502 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1503 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
1504 b0 = b0 >> 3;
1505 g0 = g0 >> 2;
1506 r0 = r0 >> 3;
1507 b1 = b1 >> 3;
1508 g1 = g1 >> 2;
1509 r1 = r1 >> 3;
1510 *(uint32*)(dst_rgb565) =
1511 b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
1512 src_y += 2;
1513 src_u += 1;
1514 src_v += 1;
1515 dst_rgb565 += 4; // Advance 2 pixels.
1516 }
1517 if (width & 1) {
1518 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1519 b0 = b0 >> 3;
1520 g0 = g0 >> 2;
1521 r0 = r0 >> 3;
1522 *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1523 }
1524 }
1525
NV12ToARGBRow_C(const uint8 * src_y,const uint8 * src_uv,uint8 * rgb_buf,const struct YuvConstants * yuvconstants,int width)1526 void NV12ToARGBRow_C(const uint8* src_y,
1527 const uint8* src_uv,
1528 uint8* rgb_buf,
1529 const struct YuvConstants* yuvconstants,
1530 int width) {
1531 int x;
1532 for (x = 0; x < width - 1; x += 2) {
1533 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
1534 rgb_buf + 2, yuvconstants);
1535 rgb_buf[3] = 255;
1536 YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
1537 rgb_buf + 6, yuvconstants);
1538 rgb_buf[7] = 255;
1539 src_y += 2;
1540 src_uv += 2;
1541 rgb_buf += 8; // Advance 2 pixels.
1542 }
1543 if (width & 1) {
1544 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
1545 rgb_buf + 2, yuvconstants);
1546 rgb_buf[3] = 255;
1547 }
1548 }
1549
NV21ToARGBRow_C(const uint8 * src_y,const uint8 * src_vu,uint8 * rgb_buf,const struct YuvConstants * yuvconstants,int width)1550 void NV21ToARGBRow_C(const uint8* src_y,
1551 const uint8* src_vu,
1552 uint8* rgb_buf,
1553 const struct YuvConstants* yuvconstants,
1554 int width) {
1555 int x;
1556 for (x = 0; x < width - 1; x += 2) {
1557 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
1558 rgb_buf + 2, yuvconstants);
1559 rgb_buf[3] = 255;
1560 YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
1561 rgb_buf + 6, yuvconstants);
1562 rgb_buf[7] = 255;
1563 src_y += 2;
1564 src_vu += 2;
1565 rgb_buf += 8; // Advance 2 pixels.
1566 }
1567 if (width & 1) {
1568 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
1569 rgb_buf + 2, yuvconstants);
1570 rgb_buf[3] = 255;
1571 }
1572 }
1573
NV12ToRGB565Row_C(const uint8 * src_y,const uint8 * src_uv,uint8 * dst_rgb565,const struct YuvConstants * yuvconstants,int width)1574 void NV12ToRGB565Row_C(const uint8* src_y,
1575 const uint8* src_uv,
1576 uint8* dst_rgb565,
1577 const struct YuvConstants* yuvconstants,
1578 int width) {
1579 uint8 b0;
1580 uint8 g0;
1581 uint8 r0;
1582 uint8 b1;
1583 uint8 g1;
1584 uint8 r1;
1585 int x;
1586 for (x = 0; x < width - 1; x += 2) {
1587 YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
1588 YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
1589 b0 = b0 >> 3;
1590 g0 = g0 >> 2;
1591 r0 = r0 >> 3;
1592 b1 = b1 >> 3;
1593 g1 = g1 >> 2;
1594 r1 = r1 >> 3;
1595 *(uint32*)(dst_rgb565) =
1596 b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
1597 src_y += 2;
1598 src_uv += 2;
1599 dst_rgb565 += 4; // Advance 2 pixels.
1600 }
1601 if (width & 1) {
1602 YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
1603 b0 = b0 >> 3;
1604 g0 = g0 >> 2;
1605 r0 = r0 >> 3;
1606 *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1607 }
1608 }
1609
YUY2ToARGBRow_C(const uint8 * src_yuy2,uint8 * rgb_buf,const struct YuvConstants * yuvconstants,int width)1610 void YUY2ToARGBRow_C(const uint8* src_yuy2,
1611 uint8* rgb_buf,
1612 const struct YuvConstants* yuvconstants,
1613 int width) {
1614 int x;
1615 for (x = 0; x < width - 1; x += 2) {
1616 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
1617 rgb_buf + 2, yuvconstants);
1618 rgb_buf[3] = 255;
1619 YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
1620 rgb_buf + 6, yuvconstants);
1621 rgb_buf[7] = 255;
1622 src_yuy2 += 4;
1623 rgb_buf += 8; // Advance 2 pixels.
1624 }
1625 if (width & 1) {
1626 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
1627 rgb_buf + 2, yuvconstants);
1628 rgb_buf[3] = 255;
1629 }
1630 }
1631
UYVYToARGBRow_C(const uint8 * src_uyvy,uint8 * rgb_buf,const struct YuvConstants * yuvconstants,int width)1632 void UYVYToARGBRow_C(const uint8* src_uyvy,
1633 uint8* rgb_buf,
1634 const struct YuvConstants* yuvconstants,
1635 int width) {
1636 int x;
1637 for (x = 0; x < width - 1; x += 2) {
1638 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
1639 rgb_buf + 2, yuvconstants);
1640 rgb_buf[3] = 255;
1641 YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
1642 rgb_buf + 6, yuvconstants);
1643 rgb_buf[7] = 255;
1644 src_uyvy += 4;
1645 rgb_buf += 8; // Advance 2 pixels.
1646 }
1647 if (width & 1) {
1648 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
1649 rgb_buf + 2, yuvconstants);
1650 rgb_buf[3] = 255;
1651 }
1652 }
1653
I422ToRGBARow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * rgb_buf,const struct YuvConstants * yuvconstants,int width)1654 void I422ToRGBARow_C(const uint8* src_y,
1655 const uint8* src_u,
1656 const uint8* src_v,
1657 uint8* rgb_buf,
1658 const struct YuvConstants* yuvconstants,
1659 int width) {
1660 int x;
1661 for (x = 0; x < width - 1; x += 2) {
1662 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
1663 rgb_buf + 3, yuvconstants);
1664 rgb_buf[0] = 255;
1665 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
1666 rgb_buf + 7, yuvconstants);
1667 rgb_buf[4] = 255;
1668 src_y += 2;
1669 src_u += 1;
1670 src_v += 1;
1671 rgb_buf += 8; // Advance 2 pixels.
1672 }
1673 if (width & 1) {
1674 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
1675 rgb_buf + 3, yuvconstants);
1676 rgb_buf[0] = 255;
1677 }
1678 }
1679
I400ToARGBRow_C(const uint8 * src_y,uint8 * rgb_buf,int width)1680 void I400ToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
1681 int x;
1682 for (x = 0; x < width - 1; x += 2) {
1683 YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1684 rgb_buf[3] = 255;
1685 YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
1686 rgb_buf[7] = 255;
1687 src_y += 2;
1688 rgb_buf += 8; // Advance 2 pixels.
1689 }
1690 if (width & 1) {
1691 YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
1692 rgb_buf[3] = 255;
1693 }
1694 }
1695
MirrorRow_C(const uint8 * src,uint8 * dst,int width)1696 void MirrorRow_C(const uint8* src, uint8* dst, int width) {
1697 int x;
1698 src += width - 1;
1699 for (x = 0; x < width - 1; x += 2) {
1700 dst[x] = src[0];
1701 dst[x + 1] = src[-1];
1702 src -= 2;
1703 }
1704 if (width & 1) {
1705 dst[width - 1] = src[0];
1706 }
1707 }
1708
MirrorUVRow_C(const uint8 * src_uv,uint8 * dst_u,uint8 * dst_v,int width)1709 void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
1710 int x;
1711 src_uv += (width - 1) << 1;
1712 for (x = 0; x < width - 1; x += 2) {
1713 dst_u[x] = src_uv[0];
1714 dst_u[x + 1] = src_uv[-2];
1715 dst_v[x] = src_uv[1];
1716 dst_v[x + 1] = src_uv[-2 + 1];
1717 src_uv -= 4;
1718 }
1719 if (width & 1) {
1720 dst_u[width - 1] = src_uv[0];
1721 dst_v[width - 1] = src_uv[1];
1722 }
1723 }
1724
ARGBMirrorRow_C(const uint8 * src,uint8 * dst,int width)1725 void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
1726 int x;
1727 const uint32* src32 = (const uint32*)(src);
1728 uint32* dst32 = (uint32*)(dst);
1729 src32 += width - 1;
1730 for (x = 0; x < width - 1; x += 2) {
1731 dst32[x] = src32[0];
1732 dst32[x + 1] = src32[-1];
1733 src32 -= 2;
1734 }
1735 if (width & 1) {
1736 dst32[width - 1] = src32[0];
1737 }
1738 }
1739
SplitUVRow_C(const uint8 * src_uv,uint8 * dst_u,uint8 * dst_v,int width)1740 void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
1741 int x;
1742 for (x = 0; x < width - 1; x += 2) {
1743 dst_u[x] = src_uv[0];
1744 dst_u[x + 1] = src_uv[2];
1745 dst_v[x] = src_uv[1];
1746 dst_v[x + 1] = src_uv[3];
1747 src_uv += 4;
1748 }
1749 if (width & 1) {
1750 dst_u[width - 1] = src_uv[0];
1751 dst_v[width - 1] = src_uv[1];
1752 }
1753 }
1754
MergeUVRow_C(const uint8 * src_u,const uint8 * src_v,uint8 * dst_uv,int width)1755 void MergeUVRow_C(const uint8* src_u,
1756 const uint8* src_v,
1757 uint8* dst_uv,
1758 int width) {
1759 int x;
1760 for (x = 0; x < width - 1; x += 2) {
1761 dst_uv[0] = src_u[x];
1762 dst_uv[1] = src_v[x];
1763 dst_uv[2] = src_u[x + 1];
1764 dst_uv[3] = src_v[x + 1];
1765 dst_uv += 4;
1766 }
1767 if (width & 1) {
1768 dst_uv[0] = src_u[width - 1];
1769 dst_uv[1] = src_v[width - 1];
1770 }
1771 }
1772
CopyRow_C(const uint8 * src,uint8 * dst,int count)1773 void CopyRow_C(const uint8* src, uint8* dst, int count) {
1774 memcpy(dst, src, count);
1775 }
1776
CopyRow_16_C(const uint16 * src,uint16 * dst,int count)1777 void CopyRow_16_C(const uint16* src, uint16* dst, int count) {
1778 memcpy(dst, src, count * 2);
1779 }
1780
SetRow_C(uint8 * dst,uint8 v8,int width)1781 void SetRow_C(uint8* dst, uint8 v8, int width) {
1782 memset(dst, v8, width);
1783 }
1784
ARGBSetRow_C(uint8 * dst_argb,uint32 v32,int width)1785 void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int width) {
1786 uint32* d = (uint32*)(dst_argb);
1787 int x;
1788 for (x = 0; x < width; ++x) {
1789 d[x] = v32;
1790 }
1791 }
1792
1793 // Filter 2 rows of YUY2 UV's (422) into U and V (420).
YUY2ToUVRow_C(const uint8 * src_yuy2,int src_stride_yuy2,uint8 * dst_u,uint8 * dst_v,int width)1794 void YUY2ToUVRow_C(const uint8* src_yuy2,
1795 int src_stride_yuy2,
1796 uint8* dst_u,
1797 uint8* dst_v,
1798 int width) {
1799 // Output a row of UV values, filtering 2 rows of YUY2.
1800 int x;
1801 for (x = 0; x < width; x += 2) {
1802 dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
1803 dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
1804 src_yuy2 += 4;
1805 dst_u += 1;
1806 dst_v += 1;
1807 }
1808 }
1809
1810 // Copy row of YUY2 UV's (422) into U and V (422).
YUY2ToUV422Row_C(const uint8 * src_yuy2,uint8 * dst_u,uint8 * dst_v,int width)1811 void YUY2ToUV422Row_C(const uint8* src_yuy2,
1812 uint8* dst_u,
1813 uint8* dst_v,
1814 int width) {
1815 // Output a row of UV values.
1816 int x;
1817 for (x = 0; x < width; x += 2) {
1818 dst_u[0] = src_yuy2[1];
1819 dst_v[0] = src_yuy2[3];
1820 src_yuy2 += 4;
1821 dst_u += 1;
1822 dst_v += 1;
1823 }
1824 }
1825
1826 // Copy row of YUY2 Y's (422) into Y (420/422).
YUY2ToYRow_C(const uint8 * src_yuy2,uint8 * dst_y,int width)1827 void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
1828 // Output a row of Y values.
1829 int x;
1830 for (x = 0; x < width - 1; x += 2) {
1831 dst_y[x] = src_yuy2[0];
1832 dst_y[x + 1] = src_yuy2[2];
1833 src_yuy2 += 4;
1834 }
1835 if (width & 1) {
1836 dst_y[width - 1] = src_yuy2[0];
1837 }
1838 }
1839
1840 // Filter 2 rows of UYVY UV's (422) into U and V (420).
UYVYToUVRow_C(const uint8 * src_uyvy,int src_stride_uyvy,uint8 * dst_u,uint8 * dst_v,int width)1841 void UYVYToUVRow_C(const uint8* src_uyvy,
1842 int src_stride_uyvy,
1843 uint8* dst_u,
1844 uint8* dst_v,
1845 int width) {
1846 // Output a row of UV values.
1847 int x;
1848 for (x = 0; x < width; x += 2) {
1849 dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
1850 dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
1851 src_uyvy += 4;
1852 dst_u += 1;
1853 dst_v += 1;
1854 }
1855 }
1856
1857 // Copy row of UYVY UV's (422) into U and V (422).
UYVYToUV422Row_C(const uint8 * src_uyvy,uint8 * dst_u,uint8 * dst_v,int width)1858 void UYVYToUV422Row_C(const uint8* src_uyvy,
1859 uint8* dst_u,
1860 uint8* dst_v,
1861 int width) {
1862 // Output a row of UV values.
1863 int x;
1864 for (x = 0; x < width; x += 2) {
1865 dst_u[0] = src_uyvy[0];
1866 dst_v[0] = src_uyvy[2];
1867 src_uyvy += 4;
1868 dst_u += 1;
1869 dst_v += 1;
1870 }
1871 }
1872
1873 // Copy row of UYVY Y's (422) into Y (420/422).
UYVYToYRow_C(const uint8 * src_uyvy,uint8 * dst_y,int width)1874 void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) {
1875 // Output a row of Y values.
1876 int x;
1877 for (x = 0; x < width - 1; x += 2) {
1878 dst_y[x] = src_uyvy[1];
1879 dst_y[x + 1] = src_uyvy[3];
1880 src_uyvy += 4;
1881 }
1882 if (width & 1) {
1883 dst_y[width - 1] = src_uyvy[1];
1884 }
1885 }
1886
1887 #define BLEND(f, b, a) (((256 - a) * b) >> 8) + f
1888
1889 // Blend src_argb0 over src_argb1 and store to dst_argb.
1890 // dst_argb may be src_argb0 or src_argb1.
1891 // This code mimics the SSSE3 version for better testability.
ARGBBlendRow_C(const uint8 * src_argb0,const uint8 * src_argb1,uint8 * dst_argb,int width)1892 void ARGBBlendRow_C(const uint8* src_argb0,
1893 const uint8* src_argb1,
1894 uint8* dst_argb,
1895 int width) {
1896 int x;
1897 for (x = 0; x < width - 1; x += 2) {
1898 uint32 fb = src_argb0[0];
1899 uint32 fg = src_argb0[1];
1900 uint32 fr = src_argb0[2];
1901 uint32 a = src_argb0[3];
1902 uint32 bb = src_argb1[0];
1903 uint32 bg = src_argb1[1];
1904 uint32 br = src_argb1[2];
1905 dst_argb[0] = BLEND(fb, bb, a);
1906 dst_argb[1] = BLEND(fg, bg, a);
1907 dst_argb[2] = BLEND(fr, br, a);
1908 dst_argb[3] = 255u;
1909
1910 fb = src_argb0[4 + 0];
1911 fg = src_argb0[4 + 1];
1912 fr = src_argb0[4 + 2];
1913 a = src_argb0[4 + 3];
1914 bb = src_argb1[4 + 0];
1915 bg = src_argb1[4 + 1];
1916 br = src_argb1[4 + 2];
1917 dst_argb[4 + 0] = BLEND(fb, bb, a);
1918 dst_argb[4 + 1] = BLEND(fg, bg, a);
1919 dst_argb[4 + 2] = BLEND(fr, br, a);
1920 dst_argb[4 + 3] = 255u;
1921 src_argb0 += 8;
1922 src_argb1 += 8;
1923 dst_argb += 8;
1924 }
1925
1926 if (width & 1) {
1927 uint32 fb = src_argb0[0];
1928 uint32 fg = src_argb0[1];
1929 uint32 fr = src_argb0[2];
1930 uint32 a = src_argb0[3];
1931 uint32 bb = src_argb1[0];
1932 uint32 bg = src_argb1[1];
1933 uint32 br = src_argb1[2];
1934 dst_argb[0] = BLEND(fb, bb, a);
1935 dst_argb[1] = BLEND(fg, bg, a);
1936 dst_argb[2] = BLEND(fr, br, a);
1937 dst_argb[3] = 255u;
1938 }
1939 }
1940 #undef BLEND
1941
1942 #define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8
BlendPlaneRow_C(const uint8 * src0,const uint8 * src1,const uint8 * alpha,uint8 * dst,int width)1943 void BlendPlaneRow_C(const uint8* src0,
1944 const uint8* src1,
1945 const uint8* alpha,
1946 uint8* dst,
1947 int width) {
1948 int x;
1949 for (x = 0; x < width - 1; x += 2) {
1950 dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
1951 dst[1] = UBLEND(src0[1], src1[1], alpha[1]);
1952 src0 += 2;
1953 src1 += 2;
1954 alpha += 2;
1955 dst += 2;
1956 }
1957 if (width & 1) {
1958 dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
1959 }
1960 }
1961 #undef UBLEND
1962
1963 #define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
1964
1965 // Multiply source RGB by alpha and store to destination.
1966 // This code mimics the SSSE3 version for better testability.
ARGBAttenuateRow_C(const uint8 * src_argb,uint8 * dst_argb,int width)1967 void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
1968 int i;
1969 for (i = 0; i < width - 1; i += 2) {
1970 uint32 b = src_argb[0];
1971 uint32 g = src_argb[1];
1972 uint32 r = src_argb[2];
1973 uint32 a = src_argb[3];
1974 dst_argb[0] = ATTENUATE(b, a);
1975 dst_argb[1] = ATTENUATE(g, a);
1976 dst_argb[2] = ATTENUATE(r, a);
1977 dst_argb[3] = a;
1978 b = src_argb[4];
1979 g = src_argb[5];
1980 r = src_argb[6];
1981 a = src_argb[7];
1982 dst_argb[4] = ATTENUATE(b, a);
1983 dst_argb[5] = ATTENUATE(g, a);
1984 dst_argb[6] = ATTENUATE(r, a);
1985 dst_argb[7] = a;
1986 src_argb += 8;
1987 dst_argb += 8;
1988 }
1989
1990 if (width & 1) {
1991 const uint32 b = src_argb[0];
1992 const uint32 g = src_argb[1];
1993 const uint32 r = src_argb[2];
1994 const uint32 a = src_argb[3];
1995 dst_argb[0] = ATTENUATE(b, a);
1996 dst_argb[1] = ATTENUATE(g, a);
1997 dst_argb[2] = ATTENUATE(r, a);
1998 dst_argb[3] = a;
1999 }
2000 }
2001 #undef ATTENUATE
2002
2003 // Divide source RGB by alpha and store to destination.
2004 // b = (b * 255 + (a / 2)) / a;
2005 // g = (g * 255 + (a / 2)) / a;
2006 // r = (r * 255 + (a / 2)) / a;
2007 // Reciprocal method is off by 1 on some values. ie 125
2008 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
2009 #define T(a) 0x01000000 + (0x10000 / a)
2010 const uint32 fixed_invtbl8[256] = {
2011 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06),
2012 T(0x07), T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d),
2013 T(0x0e), T(0x0f), T(0x10), T(0x11), T(0x12), T(0x13), T(0x14),
2014 T(0x15), T(0x16), T(0x17), T(0x18), T(0x19), T(0x1a), T(0x1b),
2015 T(0x1c), T(0x1d), T(0x1e), T(0x1f), T(0x20), T(0x21), T(0x22),
2016 T(0x23), T(0x24), T(0x25), T(0x26), T(0x27), T(0x28), T(0x29),
2017 T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f), T(0x30),
2018 T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
2019 T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e),
2020 T(0x3f), T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45),
2021 T(0x46), T(0x47), T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c),
2022 T(0x4d), T(0x4e), T(0x4f), T(0x50), T(0x51), T(0x52), T(0x53),
2023 T(0x54), T(0x55), T(0x56), T(0x57), T(0x58), T(0x59), T(0x5a),
2024 T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f), T(0x60), T(0x61),
2025 T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67), T(0x68),
2026 T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
2027 T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76),
2028 T(0x77), T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d),
2029 T(0x7e), T(0x7f), T(0x80), T(0x81), T(0x82), T(0x83), T(0x84),
2030 T(0x85), T(0x86), T(0x87), T(0x88), T(0x89), T(0x8a), T(0x8b),
2031 T(0x8c), T(0x8d), T(0x8e), T(0x8f), T(0x90), T(0x91), T(0x92),
2032 T(0x93), T(0x94), T(0x95), T(0x96), T(0x97), T(0x98), T(0x99),
2033 T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f), T(0xa0),
2034 T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
2035 T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae),
2036 T(0xaf), T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5),
2037 T(0xb6), T(0xb7), T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc),
2038 T(0xbd), T(0xbe), T(0xbf), T(0xc0), T(0xc1), T(0xc2), T(0xc3),
2039 T(0xc4), T(0xc5), T(0xc6), T(0xc7), T(0xc8), T(0xc9), T(0xca),
2040 T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf), T(0xd0), T(0xd1),
2041 T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7), T(0xd8),
2042 T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
2043 T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6),
2044 T(0xe7), T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed),
2045 T(0xee), T(0xef), T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4),
2046 T(0xf5), T(0xf6), T(0xf7), T(0xf8), T(0xf9), T(0xfa), T(0xfb),
2047 T(0xfc), T(0xfd), T(0xfe), 0x01000100};
2048 #undef T
2049
ARGBUnattenuateRow_C(const uint8 * src_argb,uint8 * dst_argb,int width)2050 void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
2051 int i;
2052 for (i = 0; i < width; ++i) {
2053 uint32 b = src_argb[0];
2054 uint32 g = src_argb[1];
2055 uint32 r = src_argb[2];
2056 const uint32 a = src_argb[3];
2057 const uint32 ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point
2058 b = (b * ia) >> 8;
2059 g = (g * ia) >> 8;
2060 r = (r * ia) >> 8;
2061 // Clamping should not be necessary but is free in assembly.
2062 dst_argb[0] = clamp255(b);
2063 dst_argb[1] = clamp255(g);
2064 dst_argb[2] = clamp255(r);
2065 dst_argb[3] = a;
2066 src_argb += 4;
2067 dst_argb += 4;
2068 }
2069 }
2070
ComputeCumulativeSumRow_C(const uint8 * row,int32 * cumsum,const int32 * previous_cumsum,int width)2071 void ComputeCumulativeSumRow_C(const uint8* row,
2072 int32* cumsum,
2073 const int32* previous_cumsum,
2074 int width) {
2075 int32 row_sum[4] = {0, 0, 0, 0};
2076 int x;
2077 for (x = 0; x < width; ++x) {
2078 row_sum[0] += row[x * 4 + 0];
2079 row_sum[1] += row[x * 4 + 1];
2080 row_sum[2] += row[x * 4 + 2];
2081 row_sum[3] += row[x * 4 + 3];
2082 cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0];
2083 cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1];
2084 cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2];
2085 cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3];
2086 }
2087 }
2088
CumulativeSumToAverageRow_C(const int32 * tl,const int32 * bl,int w,int area,uint8 * dst,int count)2089 void CumulativeSumToAverageRow_C(const int32* tl,
2090 const int32* bl,
2091 int w,
2092 int area,
2093 uint8* dst,
2094 int count) {
2095 float ooa = 1.0f / area;
2096 int i;
2097 for (i = 0; i < count; ++i) {
2098 dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
2099 dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
2100 dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
2101 dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
2102 dst += 4;
2103 tl += 4;
2104 bl += 4;
2105 }
2106 }
2107
2108 // Copy pixels from rotated source to destination row with a slope.
2109 LIBYUV_API
ARGBAffineRow_C(const uint8 * src_argb,int src_argb_stride,uint8 * dst_argb,const float * uv_dudv,int width)2110 void ARGBAffineRow_C(const uint8* src_argb,
2111 int src_argb_stride,
2112 uint8* dst_argb,
2113 const float* uv_dudv,
2114 int width) {
2115 int i;
2116 // Render a row of pixels from source into a buffer.
2117 float uv[2];
2118 uv[0] = uv_dudv[0];
2119 uv[1] = uv_dudv[1];
2120 for (i = 0; i < width; ++i) {
2121 int x = (int)(uv[0]);
2122 int y = (int)(uv[1]);
2123 *(uint32*)(dst_argb) =
2124 *(const uint32*)(src_argb + y * src_argb_stride + x * 4);
2125 dst_argb += 4;
2126 uv[0] += uv_dudv[2];
2127 uv[1] += uv_dudv[3];
2128 }
2129 }
2130
2131 // Blend 2 rows into 1.
HalfRow_C(const uint8 * src_uv,ptrdiff_t src_uv_stride,uint8 * dst_uv,int width)2132 static void HalfRow_C(const uint8* src_uv,
2133 ptrdiff_t src_uv_stride,
2134 uint8* dst_uv,
2135 int width) {
2136 int x;
2137 for (x = 0; x < width; ++x) {
2138 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
2139 }
2140 }
2141
HalfRow_16_C(const uint16 * src_uv,ptrdiff_t src_uv_stride,uint16 * dst_uv,int width)2142 static void HalfRow_16_C(const uint16* src_uv,
2143 ptrdiff_t src_uv_stride,
2144 uint16* dst_uv,
2145 int width) {
2146 int x;
2147 for (x = 0; x < width; ++x) {
2148 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
2149 }
2150 }
2151
2152 // C version 2x2 -> 2x1.
InterpolateRow_C(uint8 * dst_ptr,const uint8 * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)2153 void InterpolateRow_C(uint8* dst_ptr,
2154 const uint8* src_ptr,
2155 ptrdiff_t src_stride,
2156 int width,
2157 int source_y_fraction) {
2158 int y1_fraction = source_y_fraction;
2159 int y0_fraction = 256 - y1_fraction;
2160 const uint8* src_ptr1 = src_ptr + src_stride;
2161 int x;
2162 if (y1_fraction == 0) {
2163 memcpy(dst_ptr, src_ptr, width);
2164 return;
2165 }
2166 if (y1_fraction == 128) {
2167 HalfRow_C(src_ptr, src_stride, dst_ptr, width);
2168 return;
2169 }
2170 for (x = 0; x < width - 1; x += 2) {
2171 dst_ptr[0] =
2172 (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
2173 dst_ptr[1] =
2174 (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction + 128) >> 8;
2175 src_ptr += 2;
2176 src_ptr1 += 2;
2177 dst_ptr += 2;
2178 }
2179 if (width & 1) {
2180 dst_ptr[0] =
2181 (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
2182 }
2183 }
2184
InterpolateRow_16_C(uint16 * dst_ptr,const uint16 * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)2185 void InterpolateRow_16_C(uint16* dst_ptr,
2186 const uint16* src_ptr,
2187 ptrdiff_t src_stride,
2188 int width,
2189 int source_y_fraction) {
2190 int y1_fraction = source_y_fraction;
2191 int y0_fraction = 256 - y1_fraction;
2192 const uint16* src_ptr1 = src_ptr + src_stride;
2193 int x;
2194 if (source_y_fraction == 0) {
2195 memcpy(dst_ptr, src_ptr, width * 2);
2196 return;
2197 }
2198 if (source_y_fraction == 128) {
2199 HalfRow_16_C(src_ptr, src_stride, dst_ptr, width);
2200 return;
2201 }
2202 for (x = 0; x < width - 1; x += 2) {
2203 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
2204 dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
2205 src_ptr += 2;
2206 src_ptr1 += 2;
2207 dst_ptr += 2;
2208 }
2209 if (width & 1) {
2210 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
2211 }
2212 }
2213
2214 // Use first 4 shuffler values to reorder ARGB channels.
ARGBShuffleRow_C(const uint8 * src_argb,uint8 * dst_argb,const uint8 * shuffler,int width)2215 void ARGBShuffleRow_C(const uint8* src_argb,
2216 uint8* dst_argb,
2217 const uint8* shuffler,
2218 int width) {
2219 int index0 = shuffler[0];
2220 int index1 = shuffler[1];
2221 int index2 = shuffler[2];
2222 int index3 = shuffler[3];
2223 // Shuffle a row of ARGB.
2224 int x;
2225 for (x = 0; x < width; ++x) {
2226 // To support in-place conversion.
2227 uint8 b = src_argb[index0];
2228 uint8 g = src_argb[index1];
2229 uint8 r = src_argb[index2];
2230 uint8 a = src_argb[index3];
2231 dst_argb[0] = b;
2232 dst_argb[1] = g;
2233 dst_argb[2] = r;
2234 dst_argb[3] = a;
2235 src_argb += 4;
2236 dst_argb += 4;
2237 }
2238 }
2239
I422ToYUY2Row_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_frame,int width)2240 void I422ToYUY2Row_C(const uint8* src_y,
2241 const uint8* src_u,
2242 const uint8* src_v,
2243 uint8* dst_frame,
2244 int width) {
2245 int x;
2246 for (x = 0; x < width - 1; x += 2) {
2247 dst_frame[0] = src_y[0];
2248 dst_frame[1] = src_u[0];
2249 dst_frame[2] = src_y[1];
2250 dst_frame[3] = src_v[0];
2251 dst_frame += 4;
2252 src_y += 2;
2253 src_u += 1;
2254 src_v += 1;
2255 }
2256 if (width & 1) {
2257 dst_frame[0] = src_y[0];
2258 dst_frame[1] = src_u[0];
2259 dst_frame[2] = 0;
2260 dst_frame[3] = src_v[0];
2261 }
2262 }
2263
I422ToUYVYRow_C(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_frame,int width)2264 void I422ToUYVYRow_C(const uint8* src_y,
2265 const uint8* src_u,
2266 const uint8* src_v,
2267 uint8* dst_frame,
2268 int width) {
2269 int x;
2270 for (x = 0; x < width - 1; x += 2) {
2271 dst_frame[0] = src_u[0];
2272 dst_frame[1] = src_y[0];
2273 dst_frame[2] = src_v[0];
2274 dst_frame[3] = src_y[1];
2275 dst_frame += 4;
2276 src_y += 2;
2277 src_u += 1;
2278 src_v += 1;
2279 }
2280 if (width & 1) {
2281 dst_frame[0] = src_u[0];
2282 dst_frame[1] = src_y[0];
2283 dst_frame[2] = src_v[0];
2284 dst_frame[3] = 0;
2285 }
2286 }
2287
ARGBPolynomialRow_C(const uint8 * src_argb,uint8 * dst_argb,const float * poly,int width)2288 void ARGBPolynomialRow_C(const uint8* src_argb,
2289 uint8* dst_argb,
2290 const float* poly,
2291 int width) {
2292 int i;
2293 for (i = 0; i < width; ++i) {
2294 float b = (float)(src_argb[0]);
2295 float g = (float)(src_argb[1]);
2296 float r = (float)(src_argb[2]);
2297 float a = (float)(src_argb[3]);
2298 float b2 = b * b;
2299 float g2 = g * g;
2300 float r2 = r * r;
2301 float a2 = a * a;
2302 float db = poly[0] + poly[4] * b;
2303 float dg = poly[1] + poly[5] * g;
2304 float dr = poly[2] + poly[6] * r;
2305 float da = poly[3] + poly[7] * a;
2306 float b3 = b2 * b;
2307 float g3 = g2 * g;
2308 float r3 = r2 * r;
2309 float a3 = a2 * a;
2310 db += poly[8] * b2;
2311 dg += poly[9] * g2;
2312 dr += poly[10] * r2;
2313 da += poly[11] * a2;
2314 db += poly[12] * b3;
2315 dg += poly[13] * g3;
2316 dr += poly[14] * r3;
2317 da += poly[15] * a3;
2318
2319 dst_argb[0] = Clamp((int32)(db));
2320 dst_argb[1] = Clamp((int32)(dg));
2321 dst_argb[2] = Clamp((int32)(dr));
2322 dst_argb[3] = Clamp((int32)(da));
2323 src_argb += 4;
2324 dst_argb += 4;
2325 }
2326 }
2327
2328 // Samples assumed to be unsigned in low 9, 10 or 12 bits. Scale factor
2329 // adjust the source integer range to the half float range desired.
2330
2331 // This magic constant is 2^-112. Multiplying by this
2332 // is the same as subtracting 112 from the exponent, which
2333 // is the difference in exponent bias between 32-bit and
2334 // 16-bit floats. Once we've done this subtraction, we can
2335 // simply extract the low bits of the exponent and the high
2336 // bits of the mantissa from our float and we're done.
2337
HalfFloatRow_C(const uint16 * src,uint16 * dst,float scale,int width)2338 void HalfFloatRow_C(const uint16* src, uint16* dst, float scale, int width) {
2339 int i;
2340 float mult = 1.9259299444e-34f * scale;
2341 for (i = 0; i < width; ++i) {
2342 float value = src[i] * mult;
2343 dst[i] = (uint16)((*(uint32_t*)&value) >> 13);
2344 }
2345 }
2346
ARGBLumaColorTableRow_C(const uint8 * src_argb,uint8 * dst_argb,int width,const uint8 * luma,uint32 lumacoeff)2347 void ARGBLumaColorTableRow_C(const uint8* src_argb,
2348 uint8* dst_argb,
2349 int width,
2350 const uint8* luma,
2351 uint32 lumacoeff) {
2352 uint32 bc = lumacoeff & 0xff;
2353 uint32 gc = (lumacoeff >> 8) & 0xff;
2354 uint32 rc = (lumacoeff >> 16) & 0xff;
2355
2356 int i;
2357 for (i = 0; i < width - 1; i += 2) {
2358 // Luminance in rows, color values in columns.
2359 const uint8* luma0 =
2360 ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
2361 luma;
2362 const uint8* luma1;
2363 dst_argb[0] = luma0[src_argb[0]];
2364 dst_argb[1] = luma0[src_argb[1]];
2365 dst_argb[2] = luma0[src_argb[2]];
2366 dst_argb[3] = src_argb[3];
2367 luma1 =
2368 ((src_argb[4] * bc + src_argb[5] * gc + src_argb[6] * rc) & 0x7F00u) +
2369 luma;
2370 dst_argb[4] = luma1[src_argb[4]];
2371 dst_argb[5] = luma1[src_argb[5]];
2372 dst_argb[6] = luma1[src_argb[6]];
2373 dst_argb[7] = src_argb[7];
2374 src_argb += 8;
2375 dst_argb += 8;
2376 }
2377 if (width & 1) {
2378 // Luminance in rows, color values in columns.
2379 const uint8* luma0 =
2380 ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
2381 luma;
2382 dst_argb[0] = luma0[src_argb[0]];
2383 dst_argb[1] = luma0[src_argb[1]];
2384 dst_argb[2] = luma0[src_argb[2]];
2385 dst_argb[3] = src_argb[3];
2386 }
2387 }
2388
ARGBCopyAlphaRow_C(const uint8 * src,uint8 * dst,int width)2389 void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) {
2390 int i;
2391 for (i = 0; i < width - 1; i += 2) {
2392 dst[3] = src[3];
2393 dst[7] = src[7];
2394 dst += 8;
2395 src += 8;
2396 }
2397 if (width & 1) {
2398 dst[3] = src[3];
2399 }
2400 }
2401
ARGBExtractAlphaRow_C(const uint8 * src_argb,uint8 * dst_a,int width)2402 void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width) {
2403 int i;
2404 for (i = 0; i < width - 1; i += 2) {
2405 dst_a[0] = src_argb[3];
2406 dst_a[1] = src_argb[7];
2407 dst_a += 2;
2408 src_argb += 8;
2409 }
2410 if (width & 1) {
2411 dst_a[0] = src_argb[3];
2412 }
2413 }
2414
ARGBCopyYToAlphaRow_C(const uint8 * src,uint8 * dst,int width)2415 void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
2416 int i;
2417 for (i = 0; i < width - 1; i += 2) {
2418 dst[3] = src[0];
2419 dst[7] = src[1];
2420 dst += 8;
2421 src += 2;
2422 }
2423 if (width & 1) {
2424 dst[3] = src[0];
2425 }
2426 }
2427
2428 // Maximum temporary width for wrappers to process at a time, in pixels.
2429 #define MAXTWIDTH 2048
2430
2431 #if !(defined(_MSC_VER) && defined(_M_IX86)) && \
2432 defined(HAS_I422TORGB565ROW_SSSE3)
2433 // row_win.cc has asm version, but GCC uses 2 step wrapper.
I422ToRGB565Row_SSSE3(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2434 void I422ToRGB565Row_SSSE3(const uint8* src_y,
2435 const uint8* src_u,
2436 const uint8* src_v,
2437 uint8* dst_rgb565,
2438 const struct YuvConstants* yuvconstants,
2439 int width) {
2440 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2441 while (width > 0) {
2442 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2443 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
2444 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
2445 src_y += twidth;
2446 src_u += twidth / 2;
2447 src_v += twidth / 2;
2448 dst_rgb565 += twidth * 2;
2449 width -= twidth;
2450 }
2451 }
2452 #endif
2453
2454 #if defined(HAS_I422TOARGB1555ROW_SSSE3)
I422ToARGB1555Row_SSSE3(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb1555,const struct YuvConstants * yuvconstants,int width)2455 void I422ToARGB1555Row_SSSE3(const uint8* src_y,
2456 const uint8* src_u,
2457 const uint8* src_v,
2458 uint8* dst_argb1555,
2459 const struct YuvConstants* yuvconstants,
2460 int width) {
2461 // Row buffer for intermediate ARGB pixels.
2462 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2463 while (width > 0) {
2464 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2465 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
2466 ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
2467 src_y += twidth;
2468 src_u += twidth / 2;
2469 src_v += twidth / 2;
2470 dst_argb1555 += twidth * 2;
2471 width -= twidth;
2472 }
2473 }
2474 #endif
2475
2476 #if defined(HAS_I422TOARGB4444ROW_SSSE3)
I422ToARGB4444Row_SSSE3(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb4444,const struct YuvConstants * yuvconstants,int width)2477 void I422ToARGB4444Row_SSSE3(const uint8* src_y,
2478 const uint8* src_u,
2479 const uint8* src_v,
2480 uint8* dst_argb4444,
2481 const struct YuvConstants* yuvconstants,
2482 int width) {
2483 // Row buffer for intermediate ARGB pixels.
2484 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2485 while (width > 0) {
2486 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2487 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
2488 ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
2489 src_y += twidth;
2490 src_u += twidth / 2;
2491 src_v += twidth / 2;
2492 dst_argb4444 += twidth * 2;
2493 width -= twidth;
2494 }
2495 }
2496 #endif
2497
2498 #if defined(HAS_NV12TORGB565ROW_SSSE3)
NV12ToRGB565Row_SSSE3(const uint8 * src_y,const uint8 * src_uv,uint8 * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2499 void NV12ToRGB565Row_SSSE3(const uint8* src_y,
2500 const uint8* src_uv,
2501 uint8* dst_rgb565,
2502 const struct YuvConstants* yuvconstants,
2503 int width) {
2504 // Row buffer for intermediate ARGB pixels.
2505 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2506 while (width > 0) {
2507 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2508 NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
2509 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
2510 src_y += twidth;
2511 src_uv += twidth;
2512 dst_rgb565 += twidth * 2;
2513 width -= twidth;
2514 }
2515 }
2516 #endif
2517
2518 #if defined(HAS_I422TORGB565ROW_AVX2)
I422ToRGB565Row_AVX2(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2519 void I422ToRGB565Row_AVX2(const uint8* src_y,
2520 const uint8* src_u,
2521 const uint8* src_v,
2522 uint8* dst_rgb565,
2523 const struct YuvConstants* yuvconstants,
2524 int width) {
2525 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2526 while (width > 0) {
2527 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2528 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
2529 #if defined(HAS_ARGBTORGB565ROW_AVX2)
2530 ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
2531 #else
2532 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
2533 #endif
2534 src_y += twidth;
2535 src_u += twidth / 2;
2536 src_v += twidth / 2;
2537 dst_rgb565 += twidth * 2;
2538 width -= twidth;
2539 }
2540 }
2541 #endif
2542
2543 #if defined(HAS_I422TOARGB1555ROW_AVX2)
I422ToARGB1555Row_AVX2(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb1555,const struct YuvConstants * yuvconstants,int width)2544 void I422ToARGB1555Row_AVX2(const uint8* src_y,
2545 const uint8* src_u,
2546 const uint8* src_v,
2547 uint8* dst_argb1555,
2548 const struct YuvConstants* yuvconstants,
2549 int width) {
2550 // Row buffer for intermediate ARGB pixels.
2551 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2552 while (width > 0) {
2553 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2554 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
2555 #if defined(HAS_ARGBTOARGB1555ROW_AVX2)
2556 ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
2557 #else
2558 ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
2559 #endif
2560 src_y += twidth;
2561 src_u += twidth / 2;
2562 src_v += twidth / 2;
2563 dst_argb1555 += twidth * 2;
2564 width -= twidth;
2565 }
2566 }
2567 #endif
2568
2569 #if defined(HAS_I422TOARGB4444ROW_AVX2)
I422ToARGB4444Row_AVX2(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb4444,const struct YuvConstants * yuvconstants,int width)2570 void I422ToARGB4444Row_AVX2(const uint8* src_y,
2571 const uint8* src_u,
2572 const uint8* src_v,
2573 uint8* dst_argb4444,
2574 const struct YuvConstants* yuvconstants,
2575 int width) {
2576 // Row buffer for intermediate ARGB pixels.
2577 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2578 while (width > 0) {
2579 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2580 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
2581 #if defined(HAS_ARGBTOARGB4444ROW_AVX2)
2582 ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
2583 #else
2584 ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
2585 #endif
2586 src_y += twidth;
2587 src_u += twidth / 2;
2588 src_v += twidth / 2;
2589 dst_argb4444 += twidth * 2;
2590 width -= twidth;
2591 }
2592 }
2593 #endif
2594
2595 #if defined(HAS_I422TORGB24ROW_AVX2)
I422ToRGB24Row_AVX2(const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_rgb24,const struct YuvConstants * yuvconstants,int width)2596 void I422ToRGB24Row_AVX2(const uint8* src_y,
2597 const uint8* src_u,
2598 const uint8* src_v,
2599 uint8* dst_rgb24,
2600 const struct YuvConstants* yuvconstants,
2601 int width) {
2602 // Row buffer for intermediate ARGB pixels.
2603 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2604 while (width > 0) {
2605 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2606 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
2607 // TODO(fbarchard): ARGBToRGB24Row_AVX2
2608 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
2609 src_y += twidth;
2610 src_u += twidth / 2;
2611 src_v += twidth / 2;
2612 dst_rgb24 += twidth * 3;
2613 width -= twidth;
2614 }
2615 }
2616 #endif
2617
2618 #if defined(HAS_NV12TORGB565ROW_AVX2)
NV12ToRGB565Row_AVX2(const uint8 * src_y,const uint8 * src_uv,uint8 * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2619 void NV12ToRGB565Row_AVX2(const uint8* src_y,
2620 const uint8* src_uv,
2621 uint8* dst_rgb565,
2622 const struct YuvConstants* yuvconstants,
2623 int width) {
2624 // Row buffer for intermediate ARGB pixels.
2625 SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
2626 while (width > 0) {
2627 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2628 NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
2629 #if defined(HAS_ARGBTORGB565ROW_AVX2)
2630 ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
2631 #else
2632 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
2633 #endif
2634 src_y += twidth;
2635 src_uv += twidth;
2636 dst_rgb565 += twidth * 2;
2637 width -= twidth;
2638 }
2639 }
2640 #endif
2641
2642 #ifdef __cplusplus
2643 } // extern "C"
2644 } // namespace libyuv
2645 #endif
2646