1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/row.h"
12
13 #include <stdio.h>
14 #include <string.h> // For memcpy and memset.
15
16 #include "libyuv/basic_types.h"
17 #include "libyuv/convert_argb.h" // For kYuvI601Constants
18
19 #ifdef __cplusplus
20 namespace libyuv {
21 extern "C" {
22 #endif
23
24 // The following ifdef from row_win makes the C code match the row_win code,
25 // which is 7 bit fixed point.
26 #if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
27 (defined(_M_IX86) || (defined(_M_X64) && !defined(__clang__)))
28 #define LIBYUV_RGB7 1
29 #endif
30
31 #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
32 defined(_M_IX86)
33 #define LIBYUV_ARGBTOUV_PAVGB 1
34 #define LIBYUV_RGBTOU_TRUNCATE 1
35 #endif
36
37 // llvm x86 is poor at ternary operator, so use branchless min/max.
38
39 #define USE_BRANCHLESS 1
40 #if USE_BRANCHLESS
clamp0(int32_t v)41 static __inline int32_t clamp0(int32_t v) {
42 return -(v >= 0) & v;
43 }
44 // TODO(fbarchard): make clamp255 preserve negative values.
clamp255(int32_t v)45 static __inline int32_t clamp255(int32_t v) {
46 return (-(v >= 255) | v) & 255;
47 }
48
clamp1023(int32_t v)49 static __inline int32_t clamp1023(int32_t v) {
50 return (-(v >= 1023) | v) & 1023;
51 }
52
Abs(int32_t v)53 static __inline uint32_t Abs(int32_t v) {
54 int m = -(v < 0);
55 return (v + m) ^ m;
56 }
57 #else // USE_BRANCHLESS
58 static __inline int32_t clamp0(int32_t v) {
59 return (v < 0) ? 0 : v;
60 }
61
62 static __inline int32_t clamp255(int32_t v) {
63 return (v > 255) ? 255 : v;
64 }
65
66 static __inline int32_t clamp1023(int32_t v) {
67 return (v > 1023) ? 1023 : v;
68 }
69
70 static __inline uint32_t Abs(int32_t v) {
71 return (v < 0) ? -v : v;
72 }
73 #endif // USE_BRANCHLESS
Clamp(int32_t val)74 static __inline uint32_t Clamp(int32_t val) {
75 int v = clamp0(val);
76 return (uint32_t)(clamp255(v));
77 }
78
Clamp10(int32_t val)79 static __inline uint32_t Clamp10(int32_t val) {
80 int v = clamp0(val);
81 return (uint32_t)(clamp1023(v));
82 }
83
84 // Little Endian
85 #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
86 defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) || \
87 (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
88 #define WRITEWORD(p, v) *(uint32_t*)(p) = v
89 #else
WRITEWORD(uint8_t * p,uint32_t v)90 static inline void WRITEWORD(uint8_t* p, uint32_t v) {
91 p[0] = (uint8_t)(v & 255);
92 p[1] = (uint8_t)((v >> 8) & 255);
93 p[2] = (uint8_t)((v >> 16) & 255);
94 p[3] = (uint8_t)((v >> 24) & 255);
95 }
96 #endif
97
RGB24ToARGBRow_C(const uint8_t * src_rgb24,uint8_t * dst_argb,int width)98 void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
99 int x;
100 for (x = 0; x < width; ++x) {
101 uint8_t b = src_rgb24[0];
102 uint8_t g = src_rgb24[1];
103 uint8_t r = src_rgb24[2];
104 dst_argb[0] = b;
105 dst_argb[1] = g;
106 dst_argb[2] = r;
107 dst_argb[3] = 255u;
108 dst_argb += 4;
109 src_rgb24 += 3;
110 }
111 }
112
RAWToARGBRow_C(const uint8_t * src_raw,uint8_t * dst_argb,int width)113 void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
114 int x;
115 for (x = 0; x < width; ++x) {
116 uint8_t r = src_raw[0];
117 uint8_t g = src_raw[1];
118 uint8_t b = src_raw[2];
119 dst_argb[0] = b;
120 dst_argb[1] = g;
121 dst_argb[2] = r;
122 dst_argb[3] = 255u;
123 dst_argb += 4;
124 src_raw += 3;
125 }
126 }
127
RAWToRGBARow_C(const uint8_t * src_raw,uint8_t * dst_rgba,int width)128 void RAWToRGBARow_C(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
129 int x;
130 for (x = 0; x < width; ++x) {
131 uint8_t r = src_raw[0];
132 uint8_t g = src_raw[1];
133 uint8_t b = src_raw[2];
134 dst_rgba[0] = 255u;
135 dst_rgba[1] = b;
136 dst_rgba[2] = g;
137 dst_rgba[3] = r;
138 dst_rgba += 4;
139 src_raw += 3;
140 }
141 }
142
RAWToRGB24Row_C(const uint8_t * src_raw,uint8_t * dst_rgb24,int width)143 void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
144 int x;
145 for (x = 0; x < width; ++x) {
146 uint8_t r = src_raw[0];
147 uint8_t g = src_raw[1];
148 uint8_t b = src_raw[2];
149 dst_rgb24[0] = b;
150 dst_rgb24[1] = g;
151 dst_rgb24[2] = r;
152 dst_rgb24 += 3;
153 src_raw += 3;
154 }
155 }
156
RGB565ToARGBRow_C(const uint8_t * src_rgb565,uint8_t * dst_argb,int width)157 void RGB565ToARGBRow_C(const uint8_t* src_rgb565,
158 uint8_t* dst_argb,
159 int width) {
160 int x;
161 for (x = 0; x < width; ++x) {
162 uint8_t b = src_rgb565[0] & 0x1f;
163 uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
164 uint8_t r = src_rgb565[1] >> 3;
165 dst_argb[0] = (b << 3) | (b >> 2);
166 dst_argb[1] = (g << 2) | (g >> 4);
167 dst_argb[2] = (r << 3) | (r >> 2);
168 dst_argb[3] = 255u;
169 dst_argb += 4;
170 src_rgb565 += 2;
171 }
172 }
173
ARGB1555ToARGBRow_C(const uint8_t * src_argb1555,uint8_t * dst_argb,int width)174 void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
175 uint8_t* dst_argb,
176 int width) {
177 int x;
178 for (x = 0; x < width; ++x) {
179 uint8_t b = src_argb1555[0] & 0x1f;
180 uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
181 uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
182 uint8_t a = src_argb1555[1] >> 7;
183 dst_argb[0] = (b << 3) | (b >> 2);
184 dst_argb[1] = (g << 3) | (g >> 2);
185 dst_argb[2] = (r << 3) | (r >> 2);
186 dst_argb[3] = -a;
187 dst_argb += 4;
188 src_argb1555 += 2;
189 }
190 }
191
ARGB4444ToARGBRow_C(const uint8_t * src_argb4444,uint8_t * dst_argb,int width)192 void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
193 uint8_t* dst_argb,
194 int width) {
195 int x;
196 for (x = 0; x < width; ++x) {
197 uint8_t b = src_argb4444[0] & 0x0f;
198 uint8_t g = src_argb4444[0] >> 4;
199 uint8_t r = src_argb4444[1] & 0x0f;
200 uint8_t a = src_argb4444[1] >> 4;
201 dst_argb[0] = (b << 4) | b;
202 dst_argb[1] = (g << 4) | g;
203 dst_argb[2] = (r << 4) | r;
204 dst_argb[3] = (a << 4) | a;
205 dst_argb += 4;
206 src_argb4444 += 2;
207 }
208 }
209
AR30ToARGBRow_C(const uint8_t * src_ar30,uint8_t * dst_argb,int width)210 void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width) {
211 int x;
212 for (x = 0; x < width; ++x) {
213 uint32_t ar30;
214 memcpy(&ar30, src_ar30, sizeof ar30);
215 uint32_t b = (ar30 >> 2) & 0xff;
216 uint32_t g = (ar30 >> 12) & 0xff;
217 uint32_t r = (ar30 >> 22) & 0xff;
218 uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits.
219 *(uint32_t*)(dst_argb) = b | (g << 8) | (r << 16) | (a << 24);
220 dst_argb += 4;
221 src_ar30 += 4;
222 }
223 }
224
AR30ToABGRRow_C(const uint8_t * src_ar30,uint8_t * dst_abgr,int width)225 void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width) {
226 int x;
227 for (x = 0; x < width; ++x) {
228 uint32_t ar30;
229 memcpy(&ar30, src_ar30, sizeof ar30);
230 uint32_t b = (ar30 >> 2) & 0xff;
231 uint32_t g = (ar30 >> 12) & 0xff;
232 uint32_t r = (ar30 >> 22) & 0xff;
233 uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits.
234 *(uint32_t*)(dst_abgr) = r | (g << 8) | (b << 16) | (a << 24);
235 dst_abgr += 4;
236 src_ar30 += 4;
237 }
238 }
239
AR30ToAB30Row_C(const uint8_t * src_ar30,uint8_t * dst_ab30,int width)240 void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) {
241 int x;
242 for (x = 0; x < width; ++x) {
243 uint32_t ar30;
244 memcpy(&ar30, src_ar30, sizeof ar30);
245 uint32_t b = ar30 & 0x3ff;
246 uint32_t ga = ar30 & 0xc00ffc00;
247 uint32_t r = (ar30 >> 20) & 0x3ff;
248 *(uint32_t*)(dst_ab30) = r | ga | (b << 20);
249 dst_ab30 += 4;
250 src_ar30 += 4;
251 }
252 }
253
ARGBToRGB24Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)254 void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
255 int x;
256 for (x = 0; x < width; ++x) {
257 uint8_t b = src_argb[0];
258 uint8_t g = src_argb[1];
259 uint8_t r = src_argb[2];
260 dst_rgb[0] = b;
261 dst_rgb[1] = g;
262 dst_rgb[2] = r;
263 dst_rgb += 3;
264 src_argb += 4;
265 }
266 }
267
ARGBToRAWRow_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)268 void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
269 int x;
270 for (x = 0; x < width; ++x) {
271 uint8_t b = src_argb[0];
272 uint8_t g = src_argb[1];
273 uint8_t r = src_argb[2];
274 dst_rgb[0] = r;
275 dst_rgb[1] = g;
276 dst_rgb[2] = b;
277 dst_rgb += 3;
278 src_argb += 4;
279 }
280 }
281
ARGBToRGB565Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)282 void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
283 int x;
284 for (x = 0; x < width - 1; x += 2) {
285 uint8_t b0 = src_argb[0] >> 3;
286 uint8_t g0 = src_argb[1] >> 2;
287 uint8_t r0 = src_argb[2] >> 3;
288 uint8_t b1 = src_argb[4] >> 3;
289 uint8_t g1 = src_argb[5] >> 2;
290 uint8_t r1 = src_argb[6] >> 3;
291 WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
292 (r1 << 27));
293 dst_rgb += 4;
294 src_argb += 8;
295 }
296 if (width & 1) {
297 uint8_t b0 = src_argb[0] >> 3;
298 uint8_t g0 = src_argb[1] >> 2;
299 uint8_t r0 = src_argb[2] >> 3;
300 *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
301 }
302 }
303
304 // dither4 is a row of 4 values from 4x4 dither matrix.
305 // The 4x4 matrix contains values to increase RGB. When converting to
306 // fewer bits (565) this provides an ordered dither.
307 // The order in the 4x4 matrix in first byte is upper left.
308 // The 4 values are passed as an int, then referenced as an array, so
309 // endian will not affect order of the original matrix. But the dither4
310 // will containing the first pixel in the lower byte for little endian
311 // or the upper byte for big endian.
ARGBToRGB565DitherRow_C(const uint8_t * src_argb,uint8_t * dst_rgb,const uint32_t dither4,int width)312 void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
313 uint8_t* dst_rgb,
314 const uint32_t dither4,
315 int width) {
316 int x;
317 for (x = 0; x < width - 1; x += 2) {
318 int dither0 = ((const unsigned char*)(&dither4))[x & 3];
319 int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
320 uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
321 uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
322 uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
323 uint8_t b1 = clamp255(src_argb[4] + dither1) >> 3;
324 uint8_t g1 = clamp255(src_argb[5] + dither1) >> 2;
325 uint8_t r1 = clamp255(src_argb[6] + dither1) >> 3;
326 WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
327 (r1 << 27));
328 dst_rgb += 4;
329 src_argb += 8;
330 }
331 if (width & 1) {
332 int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
333 uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
334 uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
335 uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
336 *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
337 }
338 }
339
ARGBToARGB1555Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)340 void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
341 int x;
342 for (x = 0; x < width - 1; x += 2) {
343 uint8_t b0 = src_argb[0] >> 3;
344 uint8_t g0 = src_argb[1] >> 3;
345 uint8_t r0 = src_argb[2] >> 3;
346 uint8_t a0 = src_argb[3] >> 7;
347 uint8_t b1 = src_argb[4] >> 3;
348 uint8_t g1 = src_argb[5] >> 3;
349 uint8_t r1 = src_argb[6] >> 3;
350 uint8_t a1 = src_argb[7] >> 7;
351 *(uint32_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
352 (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
353 dst_rgb += 4;
354 src_argb += 8;
355 }
356 if (width & 1) {
357 uint8_t b0 = src_argb[0] >> 3;
358 uint8_t g0 = src_argb[1] >> 3;
359 uint8_t r0 = src_argb[2] >> 3;
360 uint8_t a0 = src_argb[3] >> 7;
361 *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
362 }
363 }
364
ARGBToARGB4444Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)365 void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
366 int x;
367 for (x = 0; x < width - 1; x += 2) {
368 uint8_t b0 = src_argb[0] >> 4;
369 uint8_t g0 = src_argb[1] >> 4;
370 uint8_t r0 = src_argb[2] >> 4;
371 uint8_t a0 = src_argb[3] >> 4;
372 uint8_t b1 = src_argb[4] >> 4;
373 uint8_t g1 = src_argb[5] >> 4;
374 uint8_t r1 = src_argb[6] >> 4;
375 uint8_t a1 = src_argb[7] >> 4;
376 *(uint32_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
377 (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
378 dst_rgb += 4;
379 src_argb += 8;
380 }
381 if (width & 1) {
382 uint8_t b0 = src_argb[0] >> 4;
383 uint8_t g0 = src_argb[1] >> 4;
384 uint8_t r0 = src_argb[2] >> 4;
385 uint8_t a0 = src_argb[3] >> 4;
386 *(uint16_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
387 }
388 }
389
ABGRToAR30Row_C(const uint8_t * src_abgr,uint8_t * dst_ar30,int width)390 void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) {
391 int x;
392 for (x = 0; x < width; ++x) {
393 uint32_t b0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2);
394 uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2);
395 uint32_t r0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
396 uint32_t a0 = (src_abgr[3] >> 6);
397 *(uint32_t*)(dst_ar30) = r0 | (g0 << 10) | (b0 << 20) | (a0 << 30);
398 dst_ar30 += 4;
399 src_abgr += 4;
400 }
401 }
402
ARGBToAR30Row_C(const uint8_t * src_argb,uint8_t * dst_ar30,int width)403 void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
404 int x;
405 for (x = 0; x < width; ++x) {
406 uint32_t b0 = (src_argb[0] >> 6) | ((uint32_t)(src_argb[0]) << 2);
407 uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2);
408 uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2);
409 uint32_t a0 = (src_argb[3] >> 6);
410 *(uint32_t*)(dst_ar30) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30);
411 dst_ar30 += 4;
412 src_argb += 4;
413 }
414 }
415
416 #ifdef LIBYUV_RGB7
417 // Old 7 bit math for compatibility on unsupported platforms.
RGBToY(uint8_t r,uint8_t g,uint8_t b)418 static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
419 return ((33 * r + 65 * g + 13 * b) >> 7) + 16;
420 }
421 #else
422 // 8 bit
423 // Intel SSE/AVX uses the following equivalent formula
424 // 0x7e80 = (66 + 129 + 25) * -128 + 0x1000 (for +16) and 0x0080 for round.
425 // return (66 * ((int)r - 128) + 129 * ((int)g - 128) + 25 * ((int)b - 128) +
426 // 0x7e80) >> 8;
427
RGBToY(uint8_t r,uint8_t g,uint8_t b)428 static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
429 return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
430 }
431 #endif
432
433 #define AVGB(a, b) (((a) + (b) + 1) >> 1)
434
435 #ifdef LIBYUV_RGBTOU_TRUNCATE
RGBToU(uint8_t r,uint8_t g,uint8_t b)436 static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
437 return (112 * b - 74 * g - 38 * r + 0x8000) >> 8;
438 }
RGBToV(uint8_t r,uint8_t g,uint8_t b)439 static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
440 return (112 * r - 94 * g - 18 * b + 0x8000) >> 8;
441 }
442 #else
443 // TODO(fbarchard): Add rounding to SIMD and use this
RGBToU(uint8_t r,uint8_t g,uint8_t b)444 static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
445 return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
446 }
RGBToV(uint8_t r,uint8_t g,uint8_t b)447 static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
448 return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
449 }
450 #endif
451
452 #if !defined(LIBYUV_ARGBTOUV_PAVGB)
RGB2xToU(uint16_t r,uint16_t g,uint16_t b)453 static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) {
454 return ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8;
455 }
RGB2xToV(uint16_t r,uint16_t g,uint16_t b)456 static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
457 return ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8;
458 }
459 #endif
460
461 // ARGBToY_C and ARGBToUV_C
462 // Intel version mimic SSE/AVX which does 2 pavgb
463 #if LIBYUV_ARGBTOUV_PAVGB
464
465 #define MAKEROWY(NAME, R, G, B, BPP) \
466 void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
467 int x; \
468 for (x = 0; x < width; ++x) { \
469 dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
470 src_argb0 += BPP; \
471 dst_y += 1; \
472 } \
473 } \
474 void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
475 uint8_t* dst_u, uint8_t* dst_v, int width) { \
476 const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
477 int x; \
478 for (x = 0; x < width - 1; x += 2) { \
479 uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
480 AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
481 uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
482 AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
483 uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
484 AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
485 dst_u[0] = RGBToU(ar, ag, ab); \
486 dst_v[0] = RGBToV(ar, ag, ab); \
487 src_rgb0 += BPP * 2; \
488 src_rgb1 += BPP * 2; \
489 dst_u += 1; \
490 dst_v += 1; \
491 } \
492 if (width & 1) { \
493 uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]); \
494 uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]); \
495 uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]); \
496 dst_u[0] = RGBToU(ar, ag, ab); \
497 dst_v[0] = RGBToV(ar, ag, ab); \
498 } \
499 }
500 #else
501 // ARM version does sum / 2 then multiply by 2x smaller coefficients
502 #define MAKEROWY(NAME, R, G, B, BPP) \
503 void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
504 int x; \
505 for (x = 0; x < width; ++x) { \
506 dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
507 src_argb0 += BPP; \
508 dst_y += 1; \
509 } \
510 } \
511 void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
512 uint8_t* dst_u, uint8_t* dst_v, int width) { \
513 const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
514 int x; \
515 for (x = 0; x < width - 1; x += 2) { \
516 uint16_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \
517 src_rgb1[B + BPP] + 1) >> \
518 1; \
519 uint16_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \
520 src_rgb1[G + BPP] + 1) >> \
521 1; \
522 uint16_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \
523 src_rgb1[R + BPP] + 1) >> \
524 1; \
525 dst_u[0] = RGB2xToU(ar, ag, ab); \
526 dst_v[0] = RGB2xToV(ar, ag, ab); \
527 src_rgb0 += BPP * 2; \
528 src_rgb1 += BPP * 2; \
529 dst_u += 1; \
530 dst_v += 1; \
531 } \
532 if (width & 1) { \
533 uint16_t ab = src_rgb0[B] + src_rgb1[B]; \
534 uint16_t ag = src_rgb0[G] + src_rgb1[G]; \
535 uint16_t ar = src_rgb0[R] + src_rgb1[R]; \
536 dst_u[0] = RGB2xToU(ar, ag, ab); \
537 dst_v[0] = RGB2xToV(ar, ag, ab); \
538 } \
539 }
540 #endif
541
542 MAKEROWY(ARGB, 2, 1, 0, 4)
543 MAKEROWY(BGRA, 1, 2, 3, 4)
544 MAKEROWY(ABGR, 0, 1, 2, 4)
545 MAKEROWY(RGBA, 3, 2, 1, 4)
546 MAKEROWY(RGB24, 2, 1, 0, 3)
547 MAKEROWY(RAW, 0, 1, 2, 3)
548 #undef MAKEROWY
549
550 // JPeg uses a variation on BT.601-1 full range
551 // y = 0.29900 * r + 0.58700 * g + 0.11400 * b
552 // u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
553 // v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
554 // BT.601 Mpeg range uses:
555 // b 0.1016 * 255 = 25.908 = 25
556 // g 0.5078 * 255 = 129.489 = 129
557 // r 0.2578 * 255 = 65.739 = 66
558 // JPeg 7 bit Y (deprecated)
559 // b 0.11400 * 128 = 14.592 = 15
560 // g 0.58700 * 128 = 75.136 = 75
561 // r 0.29900 * 128 = 38.272 = 38
562 // JPeg 8 bit Y:
563 // b 0.11400 * 256 = 29.184 = 29
564 // g 0.58700 * 256 = 150.272 = 150
565 // r 0.29900 * 256 = 76.544 = 77
566 // JPeg 8 bit U:
567 // b 0.50000 * 255 = 127.5 = 127
568 // g -0.33126 * 255 = -84.4713 = -84
569 // r -0.16874 * 255 = -43.0287 = -43
570 // JPeg 8 bit V:
571 // b -0.08131 * 255 = -20.73405 = -20
572 // g -0.41869 * 255 = -106.76595 = -107
573 // r 0.50000 * 255 = 127.5 = 127
574
575 #ifdef LIBYUV_RGB7
576 // Old 7 bit math for compatibility on unsupported platforms.
RGBToYJ(uint8_t r,uint8_t g,uint8_t b)577 static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
578 return (38 * r + 75 * g + 15 * b + 64) >> 7;
579 }
580 #else
581 // 8 bit
582 static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
583 return (77 * r + 150 * g + 29 * b + 128) >> 8;
584 }
585 #endif
586
587 #if defined(LIBYUV_ARGBTOUV_PAVGB)
RGBToUJ(uint8_t r,uint8_t g,uint8_t b)588 static __inline int RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
589 return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
590 }
RGBToVJ(uint8_t r,uint8_t g,uint8_t b)591 static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
592 return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
593 }
594 #else
RGB2xToUJ(uint16_t r,uint16_t g,uint16_t b)595 static __inline int RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) {
596 return ((127 / 2) * b - (84 / 2) * g - (43 / 2) * r + 0x8080) >> 8;
597 }
RGB2xToVJ(uint16_t r,uint16_t g,uint16_t b)598 static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
599 return ((127 / 2) * r - (107 / 2) * g - (20 / 2) * b + 0x8080) >> 8;
600 }
601 #endif
602
603 // ARGBToYJ_C and ARGBToUVJ_C
604 // Intel version mimic SSE/AVX which does 2 pavgb
605 #if LIBYUV_ARGBTOUV_PAVGB
606 #define MAKEROWYJ(NAME, R, G, B, BPP) \
607 void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
608 int x; \
609 for (x = 0; x < width; ++x) { \
610 dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \
611 src_argb0 += BPP; \
612 dst_y += 1; \
613 } \
614 } \
615 void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
616 uint8_t* dst_u, uint8_t* dst_v, int width) { \
617 const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
618 int x; \
619 for (x = 0; x < width - 1; x += 2) { \
620 uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
621 AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
622 uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
623 AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
624 uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
625 AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
626 dst_u[0] = RGBToUJ(ar, ag, ab); \
627 dst_v[0] = RGBToVJ(ar, ag, ab); \
628 src_rgb0 += BPP * 2; \
629 src_rgb1 += BPP * 2; \
630 dst_u += 1; \
631 dst_v += 1; \
632 } \
633 if (width & 1) { \
634 uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]); \
635 uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]); \
636 uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]); \
637 dst_u[0] = RGBToUJ(ar, ag, ab); \
638 dst_v[0] = RGBToVJ(ar, ag, ab); \
639 } \
640 }
641 #else
642 // ARM version does sum / 2 then multiply by 2x smaller coefficients
643 #define MAKEROWYJ(NAME, R, G, B, BPP) \
644 void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
645 int x; \
646 for (x = 0; x < width; ++x) { \
647 dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \
648 src_argb0 += BPP; \
649 dst_y += 1; \
650 } \
651 } \
652 void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
653 uint8_t* dst_u, uint8_t* dst_v, int width) { \
654 const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
655 int x; \
656 for (x = 0; x < width - 1; x += 2) { \
657 uint16_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \
658 src_rgb1[B + BPP] + 1) >> \
659 1; \
660 uint16_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \
661 src_rgb1[G + BPP] + 1) >> \
662 1; \
663 uint16_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \
664 src_rgb1[R + BPP] + 1) >> \
665 1; \
666 dst_u[0] = RGB2xToUJ(ar, ag, ab); \
667 dst_v[0] = RGB2xToVJ(ar, ag, ab); \
668 src_rgb0 += BPP * 2; \
669 src_rgb1 += BPP * 2; \
670 dst_u += 1; \
671 dst_v += 1; \
672 } \
673 if (width & 1) { \
674 uint16_t ab = (src_rgb0[B] + src_rgb1[B]); \
675 uint16_t ag = (src_rgb0[G] + src_rgb1[G]); \
676 uint16_t ar = (src_rgb0[R] + src_rgb1[R]); \
677 dst_u[0] = RGB2xToUJ(ar, ag, ab); \
678 dst_v[0] = RGB2xToVJ(ar, ag, ab); \
679 } \
680 }
681
682 #endif
683
684 MAKEROWYJ(ARGB, 2, 1, 0, 4)
685 MAKEROWYJ(RGBA, 3, 2, 1, 4)
686 MAKEROWYJ(RGB24, 2, 1, 0, 3)
687 MAKEROWYJ(RAW, 0, 1, 2, 3)
688 #undef MAKEROWYJ
689
RGB565ToYRow_C(const uint8_t * src_rgb565,uint8_t * dst_y,int width)690 void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
691 int x;
692 for (x = 0; x < width; ++x) {
693 uint8_t b = src_rgb565[0] & 0x1f;
694 uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
695 uint8_t r = src_rgb565[1] >> 3;
696 b = (b << 3) | (b >> 2);
697 g = (g << 2) | (g >> 4);
698 r = (r << 3) | (r >> 2);
699 dst_y[0] = RGBToY(r, g, b);
700 src_rgb565 += 2;
701 dst_y += 1;
702 }
703 }
704
ARGB1555ToYRow_C(const uint8_t * src_argb1555,uint8_t * dst_y,int width)705 void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
706 int x;
707 for (x = 0; x < width; ++x) {
708 uint8_t b = src_argb1555[0] & 0x1f;
709 uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
710 uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
711 b = (b << 3) | (b >> 2);
712 g = (g << 3) | (g >> 2);
713 r = (r << 3) | (r >> 2);
714 dst_y[0] = RGBToY(r, g, b);
715 src_argb1555 += 2;
716 dst_y += 1;
717 }
718 }
719
ARGB4444ToYRow_C(const uint8_t * src_argb4444,uint8_t * dst_y,int width)720 void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) {
721 int x;
722 for (x = 0; x < width; ++x) {
723 uint8_t b = src_argb4444[0] & 0x0f;
724 uint8_t g = src_argb4444[0] >> 4;
725 uint8_t r = src_argb4444[1] & 0x0f;
726 b = (b << 4) | b;
727 g = (g << 4) | g;
728 r = (r << 4) | r;
729 dst_y[0] = RGBToY(r, g, b);
730 src_argb4444 += 2;
731 dst_y += 1;
732 }
733 }
734
RGB565ToUVRow_C(const uint8_t * src_rgb565,int src_stride_rgb565,uint8_t * dst_u,uint8_t * dst_v,int width)735 void RGB565ToUVRow_C(const uint8_t* src_rgb565,
736 int src_stride_rgb565,
737 uint8_t* dst_u,
738 uint8_t* dst_v,
739 int width) {
740 const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565;
741 int x;
742 for (x = 0; x < width - 1; x += 2) {
743 uint8_t b0 = src_rgb565[0] & 0x1f;
744 uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
745 uint8_t r0 = src_rgb565[1] >> 3;
746 uint8_t b1 = src_rgb565[2] & 0x1f;
747 uint8_t g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
748 uint8_t r1 = src_rgb565[3] >> 3;
749 uint8_t b2 = next_rgb565[0] & 0x1f;
750 uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
751 uint8_t r2 = next_rgb565[1] >> 3;
752 uint8_t b3 = next_rgb565[2] & 0x1f;
753 uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
754 uint8_t r3 = next_rgb565[3] >> 3;
755
756 b0 = (b0 << 3) | (b0 >> 2);
757 g0 = (g0 << 2) | (g0 >> 4);
758 r0 = (r0 << 3) | (r0 >> 2);
759 b1 = (b1 << 3) | (b1 >> 2);
760 g1 = (g1 << 2) | (g1 >> 4);
761 r1 = (r1 << 3) | (r1 >> 2);
762 b2 = (b2 << 3) | (b2 >> 2);
763 g2 = (g2 << 2) | (g2 >> 4);
764 r2 = (r2 << 3) | (r2 >> 2);
765 b3 = (b3 << 3) | (b3 >> 2);
766 g3 = (g3 << 2) | (g3 >> 4);
767 r3 = (r3 << 3) | (r3 >> 2);
768
769 #if LIBYUV_ARGBTOUV_PAVGB
770 uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
771 uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
772 uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
773 dst_u[0] = RGBToU(ar, ag, ab);
774 dst_v[0] = RGBToV(ar, ag, ab);
775 #else
776 uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
777 uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
778 uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
779 dst_u[0] = RGB2xToU(r, g, b);
780 dst_v[0] = RGB2xToV(r, g, b);
781 #endif
782
783 src_rgb565 += 4;
784 next_rgb565 += 4;
785 dst_u += 1;
786 dst_v += 1;
787 }
788 if (width & 1) {
789 uint8_t b0 = src_rgb565[0] & 0x1f;
790 uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
791 uint8_t r0 = src_rgb565[1] >> 3;
792 uint8_t b2 = next_rgb565[0] & 0x1f;
793 uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
794 uint8_t r2 = next_rgb565[1] >> 3;
795
796 b0 = (b0 << 3) | (b0 >> 2);
797 g0 = (g0 << 2) | (g0 >> 4);
798 r0 = (r0 << 3) | (r0 >> 2);
799 b2 = (b2 << 3) | (b2 >> 2);
800 g2 = (g2 << 2) | (g2 >> 4);
801 r2 = (r2 << 3) | (r2 >> 2);
802
803 #if LIBYUV_ARGBTOUV_PAVGB
804 uint8_t ab = AVGB(b0, b2);
805 uint8_t ag = AVGB(g0, g2);
806 uint8_t ar = AVGB(r0, r2);
807 dst_u[0] = RGBToU(ar, ag, ab);
808 dst_v[0] = RGBToV(ar, ag, ab);
809 #else
810 uint16_t b = b0 + b2;
811 uint16_t g = g0 + g2;
812 uint16_t r = r0 + r2;
813 dst_u[0] = RGB2xToU(r, g, b);
814 dst_v[0] = RGB2xToV(r, g, b);
815 #endif
816 }
817 }
818
ARGB1555ToUVRow_C(const uint8_t * src_argb1555,int src_stride_argb1555,uint8_t * dst_u,uint8_t * dst_v,int width)819 void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
820 int src_stride_argb1555,
821 uint8_t* dst_u,
822 uint8_t* dst_v,
823 int width) {
824 const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555;
825 int x;
826 for (x = 0; x < width - 1; x += 2) {
827 uint8_t b0 = src_argb1555[0] & 0x1f;
828 uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
829 uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
830 uint8_t b1 = src_argb1555[2] & 0x1f;
831 uint8_t g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
832 uint8_t r1 = (src_argb1555[3] & 0x7c) >> 2;
833 uint8_t b2 = next_argb1555[0] & 0x1f;
834 uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
835 uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2;
836 uint8_t b3 = next_argb1555[2] & 0x1f;
837 uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
838 uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2;
839
840 b0 = (b0 << 3) | (b0 >> 2);
841 g0 = (g0 << 3) | (g0 >> 2);
842 r0 = (r0 << 3) | (r0 >> 2);
843 b1 = (b1 << 3) | (b1 >> 2);
844 g1 = (g1 << 3) | (g1 >> 2);
845 r1 = (r1 << 3) | (r1 >> 2);
846 b2 = (b2 << 3) | (b2 >> 2);
847 g2 = (g2 << 3) | (g2 >> 2);
848 r2 = (r2 << 3) | (r2 >> 2);
849 b3 = (b3 << 3) | (b3 >> 2);
850 g3 = (g3 << 3) | (g3 >> 2);
851 r3 = (r3 << 3) | (r3 >> 2);
852
853 #if LIBYUV_ARGBTOUV_PAVGB
854 uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
855 uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
856 uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
857 dst_u[0] = RGBToU(ar, ag, ab);
858 dst_v[0] = RGBToV(ar, ag, ab);
859 #else
860 uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
861 uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
862 uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
863 dst_u[0] = RGB2xToU(r, g, b);
864 dst_v[0] = RGB2xToV(r, g, b);
865 #endif
866
867 src_argb1555 += 4;
868 next_argb1555 += 4;
869 dst_u += 1;
870 dst_v += 1;
871 }
872 if (width & 1) {
873 uint8_t b0 = src_argb1555[0] & 0x1f;
874 uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
875 uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
876 uint8_t b2 = next_argb1555[0] & 0x1f;
877 uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
878 uint8_t r2 = next_argb1555[1] >> 3;
879
880 b0 = (b0 << 3) | (b0 >> 2);
881 g0 = (g0 << 3) | (g0 >> 2);
882 r0 = (r0 << 3) | (r0 >> 2);
883 b2 = (b2 << 3) | (b2 >> 2);
884 g2 = (g2 << 3) | (g2 >> 2);
885 r2 = (r2 << 3) | (r2 >> 2);
886
887 #if LIBYUV_ARGBTOUV_PAVGB
888 uint8_t ab = AVGB(b0, b2);
889 uint8_t ag = AVGB(g0, g2);
890 uint8_t ar = AVGB(r0, r2);
891 dst_u[0] = RGBToU(ar, ag, ab);
892 dst_v[0] = RGBToV(ar, ag, ab);
893 #else
894 uint16_t b = b0 + b2;
895 uint16_t g = g0 + g2;
896 uint16_t r = r0 + r2;
897 dst_u[0] = RGB2xToU(r, g, b);
898 dst_v[0] = RGB2xToV(r, g, b);
899 #endif
900 }
901 }
902
ARGB4444ToUVRow_C(const uint8_t * src_argb4444,int src_stride_argb4444,uint8_t * dst_u,uint8_t * dst_v,int width)903 void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
904 int src_stride_argb4444,
905 uint8_t* dst_u,
906 uint8_t* dst_v,
907 int width) {
908 const uint8_t* next_argb4444 = src_argb4444 + src_stride_argb4444;
909 int x;
910 for (x = 0; x < width - 1; x += 2) {
911 uint8_t b0 = src_argb4444[0] & 0x0f;
912 uint8_t g0 = src_argb4444[0] >> 4;
913 uint8_t r0 = src_argb4444[1] & 0x0f;
914 uint8_t b1 = src_argb4444[2] & 0x0f;
915 uint8_t g1 = src_argb4444[2] >> 4;
916 uint8_t r1 = src_argb4444[3] & 0x0f;
917 uint8_t b2 = next_argb4444[0] & 0x0f;
918 uint8_t g2 = next_argb4444[0] >> 4;
919 uint8_t r2 = next_argb4444[1] & 0x0f;
920 uint8_t b3 = next_argb4444[2] & 0x0f;
921 uint8_t g3 = next_argb4444[2] >> 4;
922 uint8_t r3 = next_argb4444[3] & 0x0f;
923
924 b0 = (b0 << 4) | b0;
925 g0 = (g0 << 4) | g0;
926 r0 = (r0 << 4) | r0;
927 b1 = (b1 << 4) | b1;
928 g1 = (g1 << 4) | g1;
929 r1 = (r1 << 4) | r1;
930 b2 = (b2 << 4) | b2;
931 g2 = (g2 << 4) | g2;
932 r2 = (r2 << 4) | r2;
933 b3 = (b3 << 4) | b3;
934 g3 = (g3 << 4) | g3;
935 r3 = (r3 << 4) | r3;
936
937 #if LIBYUV_ARGBTOUV_PAVGB
938 uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
939 uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
940 uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
941 dst_u[0] = RGBToU(ar, ag, ab);
942 dst_v[0] = RGBToV(ar, ag, ab);
943 #else
944 uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
945 uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
946 uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
947 dst_u[0] = RGB2xToU(r, g, b);
948 dst_v[0] = RGB2xToV(r, g, b);
949 #endif
950
951 src_argb4444 += 4;
952 next_argb4444 += 4;
953 dst_u += 1;
954 dst_v += 1;
955 }
956 if (width & 1) {
957 uint8_t b0 = src_argb4444[0] & 0x0f;
958 uint8_t g0 = src_argb4444[0] >> 4;
959 uint8_t r0 = src_argb4444[1] & 0x0f;
960 uint8_t b2 = next_argb4444[0] & 0x0f;
961 uint8_t g2 = next_argb4444[0] >> 4;
962 uint8_t r2 = next_argb4444[1] & 0x0f;
963
964 b0 = (b0 << 4) | b0;
965 g0 = (g0 << 4) | g0;
966 r0 = (r0 << 4) | r0;
967 b2 = (b2 << 4) | b2;
968 g2 = (g2 << 4) | g2;
969 r2 = (r2 << 4) | r2;
970
971 #if LIBYUV_ARGBTOUV_PAVGB
972 uint8_t ab = AVGB(b0, b2);
973 uint8_t ag = AVGB(g0, g2);
974 uint8_t ar = AVGB(r0, r2);
975 dst_u[0] = RGBToU(ar, ag, ab);
976 dst_v[0] = RGBToV(ar, ag, ab);
977 #else
978 uint16_t b = b0 + b2;
979 uint16_t g = g0 + g2;
980 uint16_t r = r0 + r2;
981 dst_u[0] = RGB2xToU(r, g, b);
982 dst_v[0] = RGB2xToV(r, g, b);
983 #endif
984 }
985 }
986
ARGBToUV444Row_C(const uint8_t * src_argb,uint8_t * dst_u,uint8_t * dst_v,int width)987 void ARGBToUV444Row_C(const uint8_t* src_argb,
988 uint8_t* dst_u,
989 uint8_t* dst_v,
990 int width) {
991 int x;
992 for (x = 0; x < width; ++x) {
993 uint8_t ab = src_argb[0];
994 uint8_t ag = src_argb[1];
995 uint8_t ar = src_argb[2];
996 dst_u[0] = RGBToU(ar, ag, ab);
997 dst_v[0] = RGBToV(ar, ag, ab);
998 src_argb += 4;
999 dst_u += 1;
1000 dst_v += 1;
1001 }
1002 }
1003
ARGBGrayRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)1004 void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
1005 int x;
1006 for (x = 0; x < width; ++x) {
1007 uint8_t y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
1008 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
1009 dst_argb[3] = src_argb[3];
1010 dst_argb += 4;
1011 src_argb += 4;
1012 }
1013 }
1014
1015 // Convert a row of image to Sepia tone.
ARGBSepiaRow_C(uint8_t * dst_argb,int width)1016 void ARGBSepiaRow_C(uint8_t* dst_argb, int width) {
1017 int x;
1018 for (x = 0; x < width; ++x) {
1019 int b = dst_argb[0];
1020 int g = dst_argb[1];
1021 int r = dst_argb[2];
1022 int sb = (b * 17 + g * 68 + r * 35) >> 7;
1023 int sg = (b * 22 + g * 88 + r * 45) >> 7;
1024 int sr = (b * 24 + g * 98 + r * 50) >> 7;
1025 // b does not over flow. a is preserved from original.
1026 dst_argb[0] = sb;
1027 dst_argb[1] = clamp255(sg);
1028 dst_argb[2] = clamp255(sr);
1029 dst_argb += 4;
1030 }
1031 }
1032
1033 // Apply color matrix to a row of image. Matrix is signed.
1034 // TODO(fbarchard): Consider adding rounding (+32).
ARGBColorMatrixRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const int8_t * matrix_argb,int width)1035 void ARGBColorMatrixRow_C(const uint8_t* src_argb,
1036 uint8_t* dst_argb,
1037 const int8_t* matrix_argb,
1038 int width) {
1039 int x;
1040 for (x = 0; x < width; ++x) {
1041 int b = src_argb[0];
1042 int g = src_argb[1];
1043 int r = src_argb[2];
1044 int a = src_argb[3];
1045 int sb = (b * matrix_argb[0] + g * matrix_argb[1] + r * matrix_argb[2] +
1046 a * matrix_argb[3]) >>
1047 6;
1048 int sg = (b * matrix_argb[4] + g * matrix_argb[5] + r * matrix_argb[6] +
1049 a * matrix_argb[7]) >>
1050 6;
1051 int sr = (b * matrix_argb[8] + g * matrix_argb[9] + r * matrix_argb[10] +
1052 a * matrix_argb[11]) >>
1053 6;
1054 int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] +
1055 a * matrix_argb[15]) >>
1056 6;
1057 dst_argb[0] = Clamp(sb);
1058 dst_argb[1] = Clamp(sg);
1059 dst_argb[2] = Clamp(sr);
1060 dst_argb[3] = Clamp(sa);
1061 src_argb += 4;
1062 dst_argb += 4;
1063 }
1064 }
1065
1066 // Apply color table to a row of image.
ARGBColorTableRow_C(uint8_t * dst_argb,const uint8_t * table_argb,int width)1067 void ARGBColorTableRow_C(uint8_t* dst_argb,
1068 const uint8_t* table_argb,
1069 int width) {
1070 int x;
1071 for (x = 0; x < width; ++x) {
1072 int b = dst_argb[0];
1073 int g = dst_argb[1];
1074 int r = dst_argb[2];
1075 int a = dst_argb[3];
1076 dst_argb[0] = table_argb[b * 4 + 0];
1077 dst_argb[1] = table_argb[g * 4 + 1];
1078 dst_argb[2] = table_argb[r * 4 + 2];
1079 dst_argb[3] = table_argb[a * 4 + 3];
1080 dst_argb += 4;
1081 }
1082 }
1083
1084 // Apply color table to a row of image.
RGBColorTableRow_C(uint8_t * dst_argb,const uint8_t * table_argb,int width)1085 void RGBColorTableRow_C(uint8_t* dst_argb,
1086 const uint8_t* table_argb,
1087 int width) {
1088 int x;
1089 for (x = 0; x < width; ++x) {
1090 int b = dst_argb[0];
1091 int g = dst_argb[1];
1092 int r = dst_argb[2];
1093 dst_argb[0] = table_argb[b * 4 + 0];
1094 dst_argb[1] = table_argb[g * 4 + 1];
1095 dst_argb[2] = table_argb[r * 4 + 2];
1096 dst_argb += 4;
1097 }
1098 }
1099
ARGBQuantizeRow_C(uint8_t * dst_argb,int scale,int interval_size,int interval_offset,int width)1100 void ARGBQuantizeRow_C(uint8_t* dst_argb,
1101 int scale,
1102 int interval_size,
1103 int interval_offset,
1104 int width) {
1105 int x;
1106 for (x = 0; x < width; ++x) {
1107 int b = dst_argb[0];
1108 int g = dst_argb[1];
1109 int r = dst_argb[2];
1110 dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
1111 dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
1112 dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
1113 dst_argb += 4;
1114 }
1115 }
1116
1117 #define REPEAT8(v) (v) | ((v) << 8)
1118 #define SHADE(f, v) v* f >> 24
1119
ARGBShadeRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width,uint32_t value)1120 void ARGBShadeRow_C(const uint8_t* src_argb,
1121 uint8_t* dst_argb,
1122 int width,
1123 uint32_t value) {
1124 const uint32_t b_scale = REPEAT8(value & 0xff);
1125 const uint32_t g_scale = REPEAT8((value >> 8) & 0xff);
1126 const uint32_t r_scale = REPEAT8((value >> 16) & 0xff);
1127 const uint32_t a_scale = REPEAT8(value >> 24);
1128
1129 int i;
1130 for (i = 0; i < width; ++i) {
1131 const uint32_t b = REPEAT8(src_argb[0]);
1132 const uint32_t g = REPEAT8(src_argb[1]);
1133 const uint32_t r = REPEAT8(src_argb[2]);
1134 const uint32_t a = REPEAT8(src_argb[3]);
1135 dst_argb[0] = SHADE(b, b_scale);
1136 dst_argb[1] = SHADE(g, g_scale);
1137 dst_argb[2] = SHADE(r, r_scale);
1138 dst_argb[3] = SHADE(a, a_scale);
1139 src_argb += 4;
1140 dst_argb += 4;
1141 }
1142 }
1143 #undef REPEAT8
1144 #undef SHADE
1145
1146 #define REPEAT8(v) (v) | ((v) << 8)
1147 #define SHADE(f, v) v* f >> 16
1148
ARGBMultiplyRow_C(const uint8_t * src_argb0,const uint8_t * src_argb1,uint8_t * dst_argb,int width)1149 void ARGBMultiplyRow_C(const uint8_t* src_argb0,
1150 const uint8_t* src_argb1,
1151 uint8_t* dst_argb,
1152 int width) {
1153 int i;
1154 for (i = 0; i < width; ++i) {
1155 const uint32_t b = REPEAT8(src_argb0[0]);
1156 const uint32_t g = REPEAT8(src_argb0[1]);
1157 const uint32_t r = REPEAT8(src_argb0[2]);
1158 const uint32_t a = REPEAT8(src_argb0[3]);
1159 const uint32_t b_scale = src_argb1[0];
1160 const uint32_t g_scale = src_argb1[1];
1161 const uint32_t r_scale = src_argb1[2];
1162 const uint32_t a_scale = src_argb1[3];
1163 dst_argb[0] = SHADE(b, b_scale);
1164 dst_argb[1] = SHADE(g, g_scale);
1165 dst_argb[2] = SHADE(r, r_scale);
1166 dst_argb[3] = SHADE(a, a_scale);
1167 src_argb0 += 4;
1168 src_argb1 += 4;
1169 dst_argb += 4;
1170 }
1171 }
1172 #undef REPEAT8
1173 #undef SHADE
1174
1175 #define SHADE(f, v) clamp255(v + f)
1176
ARGBAddRow_C(const uint8_t * src_argb0,const uint8_t * src_argb1,uint8_t * dst_argb,int width)1177 void ARGBAddRow_C(const uint8_t* src_argb0,
1178 const uint8_t* src_argb1,
1179 uint8_t* dst_argb,
1180 int width) {
1181 int i;
1182 for (i = 0; i < width; ++i) {
1183 const int b = src_argb0[0];
1184 const int g = src_argb0[1];
1185 const int r = src_argb0[2];
1186 const int a = src_argb0[3];
1187 const int b_add = src_argb1[0];
1188 const int g_add = src_argb1[1];
1189 const int r_add = src_argb1[2];
1190 const int a_add = src_argb1[3];
1191 dst_argb[0] = SHADE(b, b_add);
1192 dst_argb[1] = SHADE(g, g_add);
1193 dst_argb[2] = SHADE(r, r_add);
1194 dst_argb[3] = SHADE(a, a_add);
1195 src_argb0 += 4;
1196 src_argb1 += 4;
1197 dst_argb += 4;
1198 }
1199 }
1200 #undef SHADE
1201
1202 #define SHADE(f, v) clamp0(f - v)
1203
ARGBSubtractRow_C(const uint8_t * src_argb0,const uint8_t * src_argb1,uint8_t * dst_argb,int width)1204 void ARGBSubtractRow_C(const uint8_t* src_argb0,
1205 const uint8_t* src_argb1,
1206 uint8_t* dst_argb,
1207 int width) {
1208 int i;
1209 for (i = 0; i < width; ++i) {
1210 const int b = src_argb0[0];
1211 const int g = src_argb0[1];
1212 const int r = src_argb0[2];
1213 const int a = src_argb0[3];
1214 const int b_sub = src_argb1[0];
1215 const int g_sub = src_argb1[1];
1216 const int r_sub = src_argb1[2];
1217 const int a_sub = src_argb1[3];
1218 dst_argb[0] = SHADE(b, b_sub);
1219 dst_argb[1] = SHADE(g, g_sub);
1220 dst_argb[2] = SHADE(r, r_sub);
1221 dst_argb[3] = SHADE(a, a_sub);
1222 src_argb0 += 4;
1223 src_argb1 += 4;
1224 dst_argb += 4;
1225 }
1226 }
1227 #undef SHADE
1228
1229 // Sobel functions which mimics SSSE3.
SobelXRow_C(const uint8_t * src_y0,const uint8_t * src_y1,const uint8_t * src_y2,uint8_t * dst_sobelx,int width)1230 void SobelXRow_C(const uint8_t* src_y0,
1231 const uint8_t* src_y1,
1232 const uint8_t* src_y2,
1233 uint8_t* dst_sobelx,
1234 int width) {
1235 int i;
1236 for (i = 0; i < width; ++i) {
1237 int a = src_y0[i];
1238 int b = src_y1[i];
1239 int c = src_y2[i];
1240 int a_sub = src_y0[i + 2];
1241 int b_sub = src_y1[i + 2];
1242 int c_sub = src_y2[i + 2];
1243 int a_diff = a - a_sub;
1244 int b_diff = b - b_sub;
1245 int c_diff = c - c_sub;
1246 int sobel = Abs(a_diff + b_diff * 2 + c_diff);
1247 dst_sobelx[i] = (uint8_t)(clamp255(sobel));
1248 }
1249 }
1250
SobelYRow_C(const uint8_t * src_y0,const uint8_t * src_y1,uint8_t * dst_sobely,int width)1251 void SobelYRow_C(const uint8_t* src_y0,
1252 const uint8_t* src_y1,
1253 uint8_t* dst_sobely,
1254 int width) {
1255 int i;
1256 for (i = 0; i < width; ++i) {
1257 int a = src_y0[i + 0];
1258 int b = src_y0[i + 1];
1259 int c = src_y0[i + 2];
1260 int a_sub = src_y1[i + 0];
1261 int b_sub = src_y1[i + 1];
1262 int c_sub = src_y1[i + 2];
1263 int a_diff = a - a_sub;
1264 int b_diff = b - b_sub;
1265 int c_diff = c - c_sub;
1266 int sobel = Abs(a_diff + b_diff * 2 + c_diff);
1267 dst_sobely[i] = (uint8_t)(clamp255(sobel));
1268 }
1269 }
1270
SobelRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)1271 void SobelRow_C(const uint8_t* src_sobelx,
1272 const uint8_t* src_sobely,
1273 uint8_t* dst_argb,
1274 int width) {
1275 int i;
1276 for (i = 0; i < width; ++i) {
1277 int r = src_sobelx[i];
1278 int b = src_sobely[i];
1279 int s = clamp255(r + b);
1280 dst_argb[0] = (uint8_t)(s);
1281 dst_argb[1] = (uint8_t)(s);
1282 dst_argb[2] = (uint8_t)(s);
1283 dst_argb[3] = (uint8_t)(255u);
1284 dst_argb += 4;
1285 }
1286 }
1287
SobelToPlaneRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_y,int width)1288 void SobelToPlaneRow_C(const uint8_t* src_sobelx,
1289 const uint8_t* src_sobely,
1290 uint8_t* dst_y,
1291 int width) {
1292 int i;
1293 for (i = 0; i < width; ++i) {
1294 int r = src_sobelx[i];
1295 int b = src_sobely[i];
1296 int s = clamp255(r + b);
1297 dst_y[i] = (uint8_t)(s);
1298 }
1299 }
1300
SobelXYRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)1301 void SobelXYRow_C(const uint8_t* src_sobelx,
1302 const uint8_t* src_sobely,
1303 uint8_t* dst_argb,
1304 int width) {
1305 int i;
1306 for (i = 0; i < width; ++i) {
1307 int r = src_sobelx[i];
1308 int b = src_sobely[i];
1309 int g = clamp255(r + b);
1310 dst_argb[0] = (uint8_t)(b);
1311 dst_argb[1] = (uint8_t)(g);
1312 dst_argb[2] = (uint8_t)(r);
1313 dst_argb[3] = (uint8_t)(255u);
1314 dst_argb += 4;
1315 }
1316 }
1317
J400ToARGBRow_C(const uint8_t * src_y,uint8_t * dst_argb,int width)1318 void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
1319 // Copy a Y to RGB.
1320 int x;
1321 for (x = 0; x < width; ++x) {
1322 uint8_t y = src_y[0];
1323 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
1324 dst_argb[3] = 255u;
1325 dst_argb += 4;
1326 ++src_y;
1327 }
1328 }
1329
1330 // TODO(fbarchard): Unify these structures to be platform independent.
1331 // TODO(fbarchard): Generate SIMD structures from float matrix.
1332
1333 // BT.601 YUV to RGB reference
1334 // R = (Y - 16) * 1.164 - V * -1.596
1335 // G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
1336 // B = (Y - 16) * 1.164 - U * -2.018
1337
1338 // Y contribution to R,G,B. Scale and bias.
1339 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1340 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1341
1342 // U and V contributions to R,G,B.
1343 #define UB -128 /* max(-128, round(-2.018 * 64)) */
1344 #define UG 25 /* round(0.391 * 64) */
1345 #define VG 52 /* round(0.813 * 64) */
1346 #define VR -102 /* round(-1.596 * 64) */
1347
1348 // Bias values to subtract 16 from Y and 128 from U and V.
1349 #define BB (UB * 128 + YGB)
1350 #define BG (UG * 128 + VG * 128 + YGB)
1351 #define BR (VR * 128 + YGB)
1352
1353 #if defined(__aarch64__) // 64 bit arm
1354 const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
1355 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1356 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1357 {UG, VG, UG, VG, UG, VG, UG, VG},
1358 {UG, VG, UG, VG, UG, VG, UG, VG},
1359 {BB, BG, BR, YGB, 0, 0, 0, 0},
1360 {0x0101 * YG, YG, 0, 0}};
1361 const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
1362 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1363 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1364 {VG, UG, VG, UG, VG, UG, VG, UG},
1365 {VG, UG, VG, UG, VG, UG, VG, UG},
1366 {BR, BG, BB, YGB, 0, 0, 0, 0},
1367 {0x0101 * YG, YG, 0, 0}};
1368 #elif defined(__arm__) // 32 bit arm
1369 const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
1370 {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
1371 {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
1372 {BB, BG, BR, YGB, 0, 0, 0, 0},
1373 {0x0101 * YG, YG, 0, 0}};
1374 const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
1375 {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
1376 {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
1377 {BR, BG, BB, YGB, 0, 0, 0, 0},
1378 {0x0101 * YG, YG, 0, 0}};
1379 #else
1380 const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
1381 {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
1382 UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
1383 {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
1384 UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
1385 {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
1386 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
1387 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1388 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1389 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1390 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
1391 {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
1392 YGB}};
1393 const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
1394 {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
1395 VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
1396 {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
1397 VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
1398 {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
1399 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
1400 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1401 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1402 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1403 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
1404 {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
1405 YGB}};
1406 #endif
1407
1408 #undef BB
1409 #undef BG
1410 #undef BR
1411 #undef YGB
1412 #undef UB
1413 #undef UG
1414 #undef VG
1415 #undef VR
1416 #undef YG
1417
1418 // JPEG YUV to RGB reference
1419 // * R = Y - V * -1.40200
1420 // * G = Y - U * 0.34414 - V * 0.71414
1421 // * B = Y - U * -1.77200
1422
1423 // Y contribution to R,G,B. Scale and bias.
1424 #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
1425 #define YGB 32 /* 64 / 2 */
1426
1427 // U and V contributions to R,G,B.
1428 #define UB -113 /* round(-1.77200 * 64) */
1429 #define UG 22 /* round(0.34414 * 64) */
1430 #define VG 46 /* round(0.71414 * 64) */
1431 #define VR -90 /* round(-1.40200 * 64) */
1432
1433 // Bias values to round, and subtract 128 from U and V.
1434 #define BB (UB * 128 + YGB)
1435 #define BG (UG * 128 + VG * 128 + YGB)
1436 #define BR (VR * 128 + YGB)
1437
1438 #if defined(__aarch64__)
1439 const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
1440 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1441 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1442 {UG, VG, UG, VG, UG, VG, UG, VG},
1443 {UG, VG, UG, VG, UG, VG, UG, VG},
1444 {BB, BG, BR, YGB, 0, 0, 0, 0},
1445 {0x0101 * YG, YG, 0, 0}};
1446 const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
1447 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1448 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1449 {VG, UG, VG, UG, VG, UG, VG, UG},
1450 {VG, UG, VG, UG, VG, UG, VG, UG},
1451 {BR, BG, BB, YGB, 0, 0, 0, 0},
1452 {0x0101 * YG, YG, 0, 0}};
1453 #elif defined(__arm__)
1454 const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
1455 {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
1456 {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
1457 {BB, BG, BR, YGB, 0, 0, 0, 0},
1458 {0x0101 * YG, YG, 0, 0}};
1459 const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
1460 {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
1461 {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
1462 {BR, BG, BB, YGB, 0, 0, 0, 0},
1463 {0x0101 * YG, YG, 0, 0}};
1464 #else
1465 const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
1466 {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
1467 UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
1468 {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
1469 UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
1470 {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
1471 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
1472 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1473 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1474 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1475 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
1476 {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
1477 YGB}};
1478 const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
1479 {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
1480 VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
1481 {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
1482 VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
1483 {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
1484 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
1485 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1486 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1487 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1488 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
1489 {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
1490 YGB}};
1491 #endif
1492
1493 #undef BB
1494 #undef BG
1495 #undef BR
1496 #undef YGB
1497 #undef UB
1498 #undef UG
1499 #undef VG
1500 #undef VR
1501 #undef YG
1502
1503 // BT.709 YUV to RGB reference
1504 // R = (Y - 16) * 1.164 - V * -1.793
1505 // G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533
1506 // B = (Y - 16) * 1.164 - U * -2.112
1507 // See also http://www.equasys.de/colorconversion.html
1508
1509 // Y contribution to R,G,B. Scale and bias.
1510 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1511 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1512
1513 // TODO(fbarchard): Find way to express 2.112 instead of 2.0.
1514 // U and V contributions to R,G,B.
1515 #define UB -128 /* max(-128, round(-2.112 * 64)) */
1516 #define UG 14 /* round(0.213 * 64) */
1517 #define VG 34 /* round(0.533 * 64) */
1518 #define VR -115 /* round(-1.793 * 64) */
1519
1520 // Bias values to round, and subtract 128 from U and V.
1521 #define BB (UB * 128 + YGB)
1522 #define BG (UG * 128 + VG * 128 + YGB)
1523 #define BR (VR * 128 + YGB)
1524
1525 #if defined(__aarch64__)
1526 const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
1527 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1528 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1529 {UG, VG, UG, VG, UG, VG, UG, VG},
1530 {UG, VG, UG, VG, UG, VG, UG, VG},
1531 {BB, BG, BR, YGB, 0, 0, 0, 0},
1532 {0x0101 * YG, YG, 0, 0}};
1533 const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
1534 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1535 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1536 {VG, UG, VG, UG, VG, UG, VG, UG},
1537 {VG, UG, VG, UG, VG, UG, VG, UG},
1538 {BR, BG, BB, YGB, 0, 0, 0, 0},
1539 {0x0101 * YG, YG, 0, 0}};
1540 #elif defined(__arm__)
1541 const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
1542 {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
1543 {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
1544 {BB, BG, BR, YGB, 0, 0, 0, 0},
1545 {0x0101 * YG, YG, 0, 0}};
1546 const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
1547 {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
1548 {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
1549 {BR, BG, BB, YGB, 0, 0, 0, 0},
1550 {0x0101 * YG, YG, 0, 0}};
1551 #else
1552 const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
1553 {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
1554 UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
1555 {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
1556 UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
1557 {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
1558 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
1559 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1560 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1561 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1562 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
1563 {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
1564 YGB}};
1565 const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
1566 {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
1567 VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
1568 {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
1569 VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
1570 {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
1571 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
1572 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1573 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1574 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1575 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
1576 {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
1577 YGB}};
1578 #endif
1579
1580 #undef BB
1581 #undef BG
1582 #undef BR
1583 #undef YGB
1584 #undef UB
1585 #undef UG
1586 #undef VG
1587 #undef VR
1588 #undef YG
1589
1590 // BT.2020 YUV to RGB reference
1591 // R = (Y - 16) * 1.164384 - V * -1.67867
1592 // G = (Y - 16) * 1.164384 - U * 0.187326 - V * 0.65042
1593 // B = (Y - 16) * 1.164384 - U * -2.14177
1594
1595 // Y contribution to R,G,B. Scale and bias.
1596 #define YG 19003 /* round(1.164384 * 64 * 256 * 256 / 257) */
1597 #define YGB -1160 /* 1.164384 * 64 * -16 + 64 / 2 */
1598
1599 // TODO(fbarchard): Improve accuracy; the B channel is off by 7%.
1600 // U and V contributions to R,G,B.
1601 #define UB -128 /* max(-128, round(-2.142 * 64)) */
1602 #define UG 12 /* round(0.187326 * 64) */
1603 #define VG 42 /* round(0.65042 * 64) */
1604 #define VR -107 /* round(-1.67867 * 64) */
1605
1606 // Bias values to round, and subtract 128 from U and V.
1607 #define BB (UB * 128 + YGB)
1608 #define BG (UG * 128 + VG * 128 + YGB)
1609 #define BR (VR * 128 + YGB)
1610
1611 #if defined(__aarch64__)
1612 const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = {
1613 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1614 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1615 {UG, VG, UG, VG, UG, VG, UG, VG},
1616 {UG, VG, UG, VG, UG, VG, UG, VG},
1617 {BB, BG, BR, YGB, 0, 0, 0, 0},
1618 {0x0101 * YG, YG, 0, 0}};
1619 const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
1620 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1621 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1622 {VG, UG, VG, UG, VG, UG, VG, UG},
1623 {VG, UG, VG, UG, VG, UG, VG, UG},
1624 {BR, BG, BB, YGB, 0, 0, 0, 0},
1625 {0x0101 * YG, YG, 0, 0}};
1626 #elif defined(__arm__)
1627 const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = {
1628 {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
1629 {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
1630 {BB, BG, BR, YGB, 0, 0, 0, 0},
1631 {0x0101 * YG, YG, 0, 0}};
1632 const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
1633 {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
1634 {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
1635 {BR, BG, BB, YGB, 0, 0, 0, 0},
1636 {0x0101 * YG, YG, 0, 0}};
1637 #else
1638 const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = {
1639 {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
1640 UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
1641 {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
1642 UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
1643 {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
1644 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
1645 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1646 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1647 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1648 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
1649 {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
1650 YGB}};
1651 const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
1652 {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
1653 VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
1654 {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
1655 VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
1656 {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
1657 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
1658 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1659 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1660 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1661 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
1662 {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
1663 YGB}};
1664 #endif
1665
1666 #undef BB
1667 #undef BG
1668 #undef BR
1669 #undef YGB
1670 #undef UB
1671 #undef UG
1672 #undef VG
1673 #undef VR
1674 #undef YG
1675
1676 // C reference code that mimics the YUV assembly.
1677 // Reads 8 bit YUV and leaves result as 16 bit.
YuvPixel(uint8_t y,uint8_t u,uint8_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1678 static __inline void YuvPixel(uint8_t y,
1679 uint8_t u,
1680 uint8_t v,
1681 uint8_t* b,
1682 uint8_t* g,
1683 uint8_t* r,
1684 const struct YuvConstants* yuvconstants) {
1685 #if defined(__aarch64__)
1686 int ub = -yuvconstants->kUVToRB[0];
1687 int ug = yuvconstants->kUVToG[0];
1688 int vg = yuvconstants->kUVToG[1];
1689 int vr = -yuvconstants->kUVToRB[1];
1690 int bb = yuvconstants->kUVBiasBGR[0];
1691 int bg = yuvconstants->kUVBiasBGR[1];
1692 int br = yuvconstants->kUVBiasBGR[2];
1693 int yg = yuvconstants->kYToRgb[1];
1694 #elif defined(__arm__)
1695 int ub = -yuvconstants->kUVToRB[0];
1696 int ug = yuvconstants->kUVToG[0];
1697 int vg = yuvconstants->kUVToG[4];
1698 int vr = -yuvconstants->kUVToRB[4];
1699 int bb = yuvconstants->kUVBiasBGR[0];
1700 int bg = yuvconstants->kUVBiasBGR[1];
1701 int br = yuvconstants->kUVBiasBGR[2];
1702 int yg = yuvconstants->kYToRgb[1];
1703 #else
1704 int ub = yuvconstants->kUVToB[0];
1705 int ug = yuvconstants->kUVToG[0];
1706 int vg = yuvconstants->kUVToG[1];
1707 int vr = yuvconstants->kUVToR[1];
1708 int bb = yuvconstants->kUVBiasB[0];
1709 int bg = yuvconstants->kUVBiasG[0];
1710 int br = yuvconstants->kUVBiasR[0];
1711 int yg = yuvconstants->kYToRgb[0];
1712 #endif
1713
1714 uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
1715 *b = Clamp((int32_t)(-(u * ub) + y1 + bb) >> 6);
1716 *g = Clamp((int32_t)(-(u * ug + v * vg) + y1 + bg) >> 6);
1717 *r = Clamp((int32_t)(-(v * vr) + y1 + br) >> 6);
1718 }
1719
1720 // Reads 8 bit YUV and leaves result as 16 bit.
YuvPixel8_16(uint8_t y,uint8_t u,uint8_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1721 static __inline void YuvPixel8_16(uint8_t y,
1722 uint8_t u,
1723 uint8_t v,
1724 int* b,
1725 int* g,
1726 int* r,
1727 const struct YuvConstants* yuvconstants) {
1728 #if defined(__aarch64__)
1729 int ub = -yuvconstants->kUVToRB[0];
1730 int ug = yuvconstants->kUVToG[0];
1731 int vg = yuvconstants->kUVToG[1];
1732 int vr = -yuvconstants->kUVToRB[1];
1733 int bb = yuvconstants->kUVBiasBGR[0];
1734 int bg = yuvconstants->kUVBiasBGR[1];
1735 int br = yuvconstants->kUVBiasBGR[2];
1736 int yg = yuvconstants->kYToRgb[1];
1737 #elif defined(__arm__)
1738 int ub = -yuvconstants->kUVToRB[0];
1739 int ug = yuvconstants->kUVToG[0];
1740 int vg = yuvconstants->kUVToG[4];
1741 int vr = -yuvconstants->kUVToRB[4];
1742 int bb = yuvconstants->kUVBiasBGR[0];
1743 int bg = yuvconstants->kUVBiasBGR[1];
1744 int br = yuvconstants->kUVBiasBGR[2];
1745 int yg = yuvconstants->kYToRgb[1];
1746 #else
1747 int ub = yuvconstants->kUVToB[0];
1748 int ug = yuvconstants->kUVToG[0];
1749 int vg = yuvconstants->kUVToG[1];
1750 int vr = yuvconstants->kUVToR[1];
1751 int bb = yuvconstants->kUVBiasB[0];
1752 int bg = yuvconstants->kUVBiasG[0];
1753 int br = yuvconstants->kUVBiasR[0];
1754 int yg = yuvconstants->kYToRgb[0];
1755 #endif
1756
1757 uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
1758 *b = (int)(-(u * ub) + y1 + bb);
1759 *g = (int)(-(u * ug + v * vg) + y1 + bg);
1760 *r = (int)(-(v * vr) + y1 + br);
1761 }
1762
1763 // C reference code that mimics the YUV 16 bit assembly.
1764 // Reads 10 bit YUV and leaves result as 16 bit.
YuvPixel16(int16_t y,int16_t u,int16_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1765 static __inline void YuvPixel16(int16_t y,
1766 int16_t u,
1767 int16_t v,
1768 int* b,
1769 int* g,
1770 int* r,
1771 const struct YuvConstants* yuvconstants) {
1772 #if defined(__aarch64__)
1773 int ub = -yuvconstants->kUVToRB[0];
1774 int ug = yuvconstants->kUVToG[0];
1775 int vg = yuvconstants->kUVToG[1];
1776 int vr = -yuvconstants->kUVToRB[1];
1777 int bb = yuvconstants->kUVBiasBGR[0];
1778 int bg = yuvconstants->kUVBiasBGR[1];
1779 int br = yuvconstants->kUVBiasBGR[2];
1780 int yg = yuvconstants->kYToRgb[1];
1781 #elif defined(__arm__)
1782 int ub = -yuvconstants->kUVToRB[0];
1783 int ug = yuvconstants->kUVToG[0];
1784 int vg = yuvconstants->kUVToG[4];
1785 int vr = -yuvconstants->kUVToRB[4];
1786 int bb = yuvconstants->kUVBiasBGR[0];
1787 int bg = yuvconstants->kUVBiasBGR[1];
1788 int br = yuvconstants->kUVBiasBGR[2];
1789 int yg = yuvconstants->kYToRgb[1];
1790 #else
1791 int ub = yuvconstants->kUVToB[0];
1792 int ug = yuvconstants->kUVToG[0];
1793 int vg = yuvconstants->kUVToG[1];
1794 int vr = yuvconstants->kUVToR[1];
1795 int bb = yuvconstants->kUVBiasB[0];
1796 int bg = yuvconstants->kUVBiasG[0];
1797 int br = yuvconstants->kUVBiasR[0];
1798 int yg = yuvconstants->kYToRgb[0];
1799 #endif
1800
1801 uint32_t y1 = (uint32_t)((y << 6) * yg) >> 16;
1802 u = clamp255(u >> 2);
1803 v = clamp255(v >> 2);
1804 *b = (int)(-(u * ub) + y1 + bb);
1805 *g = (int)(-(u * ug + v * vg) + y1 + bg);
1806 *r = (int)(-(v * vr) + y1 + br);
1807 }
1808
1809 // C reference code that mimics the YUV 10 bit assembly.
1810 // Reads 10 bit YUV and clamps down to 8 bit RGB.
YuvPixel10(uint16_t y,uint16_t u,uint16_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1811 static __inline void YuvPixel10(uint16_t y,
1812 uint16_t u,
1813 uint16_t v,
1814 uint8_t* b,
1815 uint8_t* g,
1816 uint8_t* r,
1817 const struct YuvConstants* yuvconstants) {
1818 int b16;
1819 int g16;
1820 int r16;
1821 YuvPixel16(y, u, v, &b16, &g16, &r16, yuvconstants);
1822 *b = Clamp(b16 >> 6);
1823 *g = Clamp(g16 >> 6);
1824 *r = Clamp(r16 >> 6);
1825 }
1826
1827 // C reference code that mimics the YUV assembly.
1828 // Reads 8 bit YUV and leaves result as 16 bit.
YPixel(uint8_t y,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1829 static __inline void YPixel(uint8_t y,
1830 uint8_t* b,
1831 uint8_t* g,
1832 uint8_t* r,
1833 const struct YuvConstants* yuvconstants) {
1834 #if defined(__aarch64__) || defined(__arm__)
1835 int ygb = yuvconstants->kUVBiasBGR[3];
1836 int yg = yuvconstants->kYToRgb[1];
1837 #else
1838 int ygb = yuvconstants->kYBiasToRgb[0];
1839 int yg = yuvconstants->kYToRgb[0];
1840 #endif
1841 uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
1842 *b = Clamp(((int32_t)(y1) + ygb) >> 6);
1843 *g = Clamp(((int32_t)(y1) + ygb) >> 6);
1844 *r = Clamp(((int32_t)(y1) + ygb) >> 6);
1845 }
1846
1847 #if !defined(LIBYUV_DISABLE_NEON) && \
1848 (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
1849 // C mimic assembly.
1850 // TODO(fbarchard): Remove subsampling from Neon.
I444ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1851 void I444ToARGBRow_C(const uint8_t* src_y,
1852 const uint8_t* src_u,
1853 const uint8_t* src_v,
1854 uint8_t* rgb_buf,
1855 const struct YuvConstants* yuvconstants,
1856 int width) {
1857 int x;
1858 for (x = 0; x < width - 1; x += 2) {
1859 uint8_t u = (src_u[0] + src_u[1] + 1) >> 1;
1860 uint8_t v = (src_v[0] + src_v[1] + 1) >> 1;
1861 YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
1862 yuvconstants);
1863 rgb_buf[3] = 255;
1864 YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
1865 yuvconstants);
1866 rgb_buf[7] = 255;
1867 src_y += 2;
1868 src_u += 2;
1869 src_v += 2;
1870 rgb_buf += 8; // Advance 2 pixels.
1871 }
1872 if (width & 1) {
1873 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1874 rgb_buf + 2, yuvconstants);
1875 rgb_buf[3] = 255;
1876 }
1877 }
1878 #else
I444ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1879 void I444ToARGBRow_C(const uint8_t* src_y,
1880 const uint8_t* src_u,
1881 const uint8_t* src_v,
1882 uint8_t* rgb_buf,
1883 const struct YuvConstants* yuvconstants,
1884 int width) {
1885 int x;
1886 for (x = 0; x < width; ++x) {
1887 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1888 rgb_buf + 2, yuvconstants);
1889 rgb_buf[3] = 255;
1890 src_y += 1;
1891 src_u += 1;
1892 src_v += 1;
1893 rgb_buf += 4; // Advance 1 pixel.
1894 }
1895 }
1896 #endif
1897
1898 // Also used for 420
I422ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1899 void I422ToARGBRow_C(const uint8_t* src_y,
1900 const uint8_t* src_u,
1901 const uint8_t* src_v,
1902 uint8_t* rgb_buf,
1903 const struct YuvConstants* yuvconstants,
1904 int width) {
1905 int x;
1906 for (x = 0; x < width - 1; x += 2) {
1907 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1908 rgb_buf + 2, yuvconstants);
1909 rgb_buf[3] = 255;
1910 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1911 rgb_buf + 6, yuvconstants);
1912 rgb_buf[7] = 255;
1913 src_y += 2;
1914 src_u += 1;
1915 src_v += 1;
1916 rgb_buf += 8; // Advance 2 pixels.
1917 }
1918 if (width & 1) {
1919 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1920 rgb_buf + 2, yuvconstants);
1921 rgb_buf[3] = 255;
1922 }
1923 }
1924
1925 // 10 bit YUV to ARGB
I210ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1926 void I210ToARGBRow_C(const uint16_t* src_y,
1927 const uint16_t* src_u,
1928 const uint16_t* src_v,
1929 uint8_t* rgb_buf,
1930 const struct YuvConstants* yuvconstants,
1931 int width) {
1932 int x;
1933 for (x = 0; x < width - 1; x += 2) {
1934 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1935 rgb_buf + 2, yuvconstants);
1936 rgb_buf[3] = 255;
1937 YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1938 rgb_buf + 6, yuvconstants);
1939 rgb_buf[7] = 255;
1940 src_y += 2;
1941 src_u += 1;
1942 src_v += 1;
1943 rgb_buf += 8; // Advance 2 pixels.
1944 }
1945 if (width & 1) {
1946 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1947 rgb_buf + 2, yuvconstants);
1948 rgb_buf[3] = 255;
1949 }
1950 }
1951
StoreAR30(uint8_t * rgb_buf,int b,int g,int r)1952 static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
1953 uint32_t ar30;
1954 b = b >> 4; // convert 10.6 to 10 bit.
1955 g = g >> 4;
1956 r = r >> 4;
1957 b = Clamp10(b);
1958 g = Clamp10(g);
1959 r = Clamp10(r);
1960 ar30 = b | ((uint32_t)g << 10) | ((uint32_t)r << 20) | 0xc0000000;
1961 (*(uint32_t*)rgb_buf) = ar30;
1962 }
1963
1964 // 10 bit YUV to 10 bit AR30
I210ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1965 void I210ToAR30Row_C(const uint16_t* src_y,
1966 const uint16_t* src_u,
1967 const uint16_t* src_v,
1968 uint8_t* rgb_buf,
1969 const struct YuvConstants* yuvconstants,
1970 int width) {
1971 int x;
1972 int b;
1973 int g;
1974 int r;
1975 for (x = 0; x < width - 1; x += 2) {
1976 YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
1977 StoreAR30(rgb_buf, b, g, r);
1978 YuvPixel16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
1979 StoreAR30(rgb_buf + 4, b, g, r);
1980 src_y += 2;
1981 src_u += 1;
1982 src_v += 1;
1983 rgb_buf += 8; // Advance 2 pixels.
1984 }
1985 if (width & 1) {
1986 YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
1987 StoreAR30(rgb_buf, b, g, r);
1988 }
1989 }
1990
1991 // 8 bit YUV to 10 bit AR30
1992 // Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
I422ToAR30Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1993 void I422ToAR30Row_C(const uint8_t* src_y,
1994 const uint8_t* src_u,
1995 const uint8_t* src_v,
1996 uint8_t* rgb_buf,
1997 const struct YuvConstants* yuvconstants,
1998 int width) {
1999 int x;
2000 int b;
2001 int g;
2002 int r;
2003 for (x = 0; x < width - 1; x += 2) {
2004 YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2005 StoreAR30(rgb_buf, b, g, r);
2006 YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2007 StoreAR30(rgb_buf + 4, b, g, r);
2008 src_y += 2;
2009 src_u += 1;
2010 src_v += 1;
2011 rgb_buf += 8; // Advance 2 pixels.
2012 }
2013 if (width & 1) {
2014 YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2015 StoreAR30(rgb_buf, b, g, r);
2016 }
2017 }
2018
I422AlphaToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,const uint8_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2019 void I422AlphaToARGBRow_C(const uint8_t* src_y,
2020 const uint8_t* src_u,
2021 const uint8_t* src_v,
2022 const uint8_t* src_a,
2023 uint8_t* rgb_buf,
2024 const struct YuvConstants* yuvconstants,
2025 int width) {
2026 int x;
2027 for (x = 0; x < width - 1; x += 2) {
2028 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2029 rgb_buf + 2, yuvconstants);
2030 rgb_buf[3] = src_a[0];
2031 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
2032 rgb_buf + 6, yuvconstants);
2033 rgb_buf[7] = src_a[1];
2034 src_y += 2;
2035 src_u += 1;
2036 src_v += 1;
2037 src_a += 2;
2038 rgb_buf += 8; // Advance 2 pixels.
2039 }
2040 if (width & 1) {
2041 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2042 rgb_buf + 2, yuvconstants);
2043 rgb_buf[3] = src_a[0];
2044 }
2045 }
2046
I422ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2047 void I422ToRGB24Row_C(const uint8_t* src_y,
2048 const uint8_t* src_u,
2049 const uint8_t* src_v,
2050 uint8_t* rgb_buf,
2051 const struct YuvConstants* yuvconstants,
2052 int width) {
2053 int x;
2054 for (x = 0; x < width - 1; x += 2) {
2055 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2056 rgb_buf + 2, yuvconstants);
2057 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
2058 rgb_buf + 5, yuvconstants);
2059 src_y += 2;
2060 src_u += 1;
2061 src_v += 1;
2062 rgb_buf += 6; // Advance 2 pixels.
2063 }
2064 if (width & 1) {
2065 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2066 rgb_buf + 2, yuvconstants);
2067 }
2068 }
2069
I422ToARGB4444Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)2070 void I422ToARGB4444Row_C(const uint8_t* src_y,
2071 const uint8_t* src_u,
2072 const uint8_t* src_v,
2073 uint8_t* dst_argb4444,
2074 const struct YuvConstants* yuvconstants,
2075 int width) {
2076 uint8_t b0;
2077 uint8_t g0;
2078 uint8_t r0;
2079 uint8_t b1;
2080 uint8_t g1;
2081 uint8_t r1;
2082 int x;
2083 for (x = 0; x < width - 1; x += 2) {
2084 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2085 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2086 b0 = b0 >> 4;
2087 g0 = g0 >> 4;
2088 r0 = r0 >> 4;
2089 b1 = b1 >> 4;
2090 g1 = g1 >> 4;
2091 r1 = r1 >> 4;
2092 *(uint32_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | (b1 << 16) |
2093 (g1 << 20) | (r1 << 24) | 0xf000f000;
2094 src_y += 2;
2095 src_u += 1;
2096 src_v += 1;
2097 dst_argb4444 += 4; // Advance 2 pixels.
2098 }
2099 if (width & 1) {
2100 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2101 b0 = b0 >> 4;
2102 g0 = g0 >> 4;
2103 r0 = r0 >> 4;
2104 *(uint16_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
2105 }
2106 }
2107
I422ToARGB1555Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)2108 void I422ToARGB1555Row_C(const uint8_t* src_y,
2109 const uint8_t* src_u,
2110 const uint8_t* src_v,
2111 uint8_t* dst_argb1555,
2112 const struct YuvConstants* yuvconstants,
2113 int width) {
2114 uint8_t b0;
2115 uint8_t g0;
2116 uint8_t r0;
2117 uint8_t b1;
2118 uint8_t g1;
2119 uint8_t r1;
2120 int x;
2121 for (x = 0; x < width - 1; x += 2) {
2122 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2123 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2124 b0 = b0 >> 3;
2125 g0 = g0 >> 3;
2126 r0 = r0 >> 3;
2127 b1 = b1 >> 3;
2128 g1 = g1 >> 3;
2129 r1 = r1 >> 3;
2130 *(uint32_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | (b1 << 16) |
2131 (g1 << 21) | (r1 << 26) | 0x80008000;
2132 src_y += 2;
2133 src_u += 1;
2134 src_v += 1;
2135 dst_argb1555 += 4; // Advance 2 pixels.
2136 }
2137 if (width & 1) {
2138 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2139 b0 = b0 >> 3;
2140 g0 = g0 >> 3;
2141 r0 = r0 >> 3;
2142 *(uint16_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
2143 }
2144 }
2145
I422ToRGB565Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2146 void I422ToRGB565Row_C(const uint8_t* src_y,
2147 const uint8_t* src_u,
2148 const uint8_t* src_v,
2149 uint8_t* dst_rgb565,
2150 const struct YuvConstants* yuvconstants,
2151 int width) {
2152 uint8_t b0;
2153 uint8_t g0;
2154 uint8_t r0;
2155 uint8_t b1;
2156 uint8_t g1;
2157 uint8_t r1;
2158 int x;
2159 for (x = 0; x < width - 1; x += 2) {
2160 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2161 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2162 b0 = b0 >> 3;
2163 g0 = g0 >> 2;
2164 r0 = r0 >> 3;
2165 b1 = b1 >> 3;
2166 g1 = g1 >> 2;
2167 r1 = r1 >> 3;
2168 *(uint32_t*)(dst_rgb565) =
2169 b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
2170 src_y += 2;
2171 src_u += 1;
2172 src_v += 1;
2173 dst_rgb565 += 4; // Advance 2 pixels.
2174 }
2175 if (width & 1) {
2176 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2177 b0 = b0 >> 3;
2178 g0 = g0 >> 2;
2179 r0 = r0 >> 3;
2180 *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
2181 }
2182 }
2183
NV12ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2184 void NV12ToARGBRow_C(const uint8_t* src_y,
2185 const uint8_t* src_uv,
2186 uint8_t* rgb_buf,
2187 const struct YuvConstants* yuvconstants,
2188 int width) {
2189 int x;
2190 for (x = 0; x < width - 1; x += 2) {
2191 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2192 rgb_buf + 2, yuvconstants);
2193 rgb_buf[3] = 255;
2194 YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
2195 rgb_buf + 6, yuvconstants);
2196 rgb_buf[7] = 255;
2197 src_y += 2;
2198 src_uv += 2;
2199 rgb_buf += 8; // Advance 2 pixels.
2200 }
2201 if (width & 1) {
2202 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2203 rgb_buf + 2, yuvconstants);
2204 rgb_buf[3] = 255;
2205 }
2206 }
2207
NV21ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2208 void NV21ToARGBRow_C(const uint8_t* src_y,
2209 const uint8_t* src_vu,
2210 uint8_t* rgb_buf,
2211 const struct YuvConstants* yuvconstants,
2212 int width) {
2213 int x;
2214 for (x = 0; x < width - 1; x += 2) {
2215 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2216 rgb_buf + 2, yuvconstants);
2217 rgb_buf[3] = 255;
2218 YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
2219 rgb_buf + 6, yuvconstants);
2220 rgb_buf[7] = 255;
2221 src_y += 2;
2222 src_vu += 2;
2223 rgb_buf += 8; // Advance 2 pixels.
2224 }
2225 if (width & 1) {
2226 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2227 rgb_buf + 2, yuvconstants);
2228 rgb_buf[3] = 255;
2229 }
2230 }
2231
NV12ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2232 void NV12ToRGB24Row_C(const uint8_t* src_y,
2233 const uint8_t* src_uv,
2234 uint8_t* rgb_buf,
2235 const struct YuvConstants* yuvconstants,
2236 int width) {
2237 int x;
2238 for (x = 0; x < width - 1; x += 2) {
2239 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2240 rgb_buf + 2, yuvconstants);
2241 YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4,
2242 rgb_buf + 5, yuvconstants);
2243 src_y += 2;
2244 src_uv += 2;
2245 rgb_buf += 6; // Advance 2 pixels.
2246 }
2247 if (width & 1) {
2248 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2249 rgb_buf + 2, yuvconstants);
2250 }
2251 }
2252
NV21ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2253 void NV21ToRGB24Row_C(const uint8_t* src_y,
2254 const uint8_t* src_vu,
2255 uint8_t* rgb_buf,
2256 const struct YuvConstants* yuvconstants,
2257 int width) {
2258 int x;
2259 for (x = 0; x < width - 1; x += 2) {
2260 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2261 rgb_buf + 2, yuvconstants);
2262 YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4,
2263 rgb_buf + 5, yuvconstants);
2264 src_y += 2;
2265 src_vu += 2;
2266 rgb_buf += 6; // Advance 2 pixels.
2267 }
2268 if (width & 1) {
2269 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2270 rgb_buf + 2, yuvconstants);
2271 }
2272 }
2273
NV12ToRGB565Row_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2274 void NV12ToRGB565Row_C(const uint8_t* src_y,
2275 const uint8_t* src_uv,
2276 uint8_t* dst_rgb565,
2277 const struct YuvConstants* yuvconstants,
2278 int width) {
2279 uint8_t b0;
2280 uint8_t g0;
2281 uint8_t r0;
2282 uint8_t b1;
2283 uint8_t g1;
2284 uint8_t r1;
2285 int x;
2286 for (x = 0; x < width - 1; x += 2) {
2287 YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
2288 YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
2289 b0 = b0 >> 3;
2290 g0 = g0 >> 2;
2291 r0 = r0 >> 3;
2292 b1 = b1 >> 3;
2293 g1 = g1 >> 2;
2294 r1 = r1 >> 3;
2295 *(uint32_t*)(dst_rgb565) =
2296 b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
2297 src_y += 2;
2298 src_uv += 2;
2299 dst_rgb565 += 4; // Advance 2 pixels.
2300 }
2301 if (width & 1) {
2302 YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
2303 b0 = b0 >> 3;
2304 g0 = g0 >> 2;
2305 r0 = r0 >> 3;
2306 *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
2307 }
2308 }
2309
YUY2ToARGBRow_C(const uint8_t * src_yuy2,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2310 void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
2311 uint8_t* rgb_buf,
2312 const struct YuvConstants* yuvconstants,
2313 int width) {
2314 int x;
2315 for (x = 0; x < width - 1; x += 2) {
2316 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
2317 rgb_buf + 2, yuvconstants);
2318 rgb_buf[3] = 255;
2319 YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
2320 rgb_buf + 6, yuvconstants);
2321 rgb_buf[7] = 255;
2322 src_yuy2 += 4;
2323 rgb_buf += 8; // Advance 2 pixels.
2324 }
2325 if (width & 1) {
2326 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
2327 rgb_buf + 2, yuvconstants);
2328 rgb_buf[3] = 255;
2329 }
2330 }
2331
UYVYToARGBRow_C(const uint8_t * src_uyvy,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2332 void UYVYToARGBRow_C(const uint8_t* src_uyvy,
2333 uint8_t* rgb_buf,
2334 const struct YuvConstants* yuvconstants,
2335 int width) {
2336 int x;
2337 for (x = 0; x < width - 1; x += 2) {
2338 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
2339 rgb_buf + 2, yuvconstants);
2340 rgb_buf[3] = 255;
2341 YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
2342 rgb_buf + 6, yuvconstants);
2343 rgb_buf[7] = 255;
2344 src_uyvy += 4;
2345 rgb_buf += 8; // Advance 2 pixels.
2346 }
2347 if (width & 1) {
2348 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
2349 rgb_buf + 2, yuvconstants);
2350 rgb_buf[3] = 255;
2351 }
2352 }
2353
I422ToRGBARow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2354 void I422ToRGBARow_C(const uint8_t* src_y,
2355 const uint8_t* src_u,
2356 const uint8_t* src_v,
2357 uint8_t* rgb_buf,
2358 const struct YuvConstants* yuvconstants,
2359 int width) {
2360 int x;
2361 for (x = 0; x < width - 1; x += 2) {
2362 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
2363 rgb_buf + 3, yuvconstants);
2364 rgb_buf[0] = 255;
2365 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
2366 rgb_buf + 7, yuvconstants);
2367 rgb_buf[4] = 255;
2368 src_y += 2;
2369 src_u += 1;
2370 src_v += 1;
2371 rgb_buf += 8; // Advance 2 pixels.
2372 }
2373 if (width & 1) {
2374 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
2375 rgb_buf + 3, yuvconstants);
2376 rgb_buf[0] = 255;
2377 }
2378 }
2379
I400ToARGBRow_C(const uint8_t * src_y,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2380 void I400ToARGBRow_C(const uint8_t* src_y,
2381 uint8_t* rgb_buf,
2382 const struct YuvConstants* yuvconstants,
2383 int width) {
2384 int x;
2385 for (x = 0; x < width - 1; x += 2) {
2386 YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
2387 rgb_buf[3] = 255;
2388 YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
2389 rgb_buf[7] = 255;
2390 src_y += 2;
2391 rgb_buf += 8; // Advance 2 pixels.
2392 }
2393 if (width & 1) {
2394 YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
2395 rgb_buf[3] = 255;
2396 }
2397 }
2398
MirrorRow_C(const uint8_t * src,uint8_t * dst,int width)2399 void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2400 int x;
2401 src += width - 1;
2402 for (x = 0; x < width - 1; x += 2) {
2403 dst[x] = src[0];
2404 dst[x + 1] = src[-1];
2405 src -= 2;
2406 }
2407 if (width & 1) {
2408 dst[width - 1] = src[0];
2409 }
2410 }
2411
MirrorUVRow_C(const uint8_t * src_uv,uint8_t * dst_uv,int width)2412 void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
2413 int x;
2414 src_uv += (width - 1) << 1;
2415 for (x = 0; x < width; ++x) {
2416 dst_uv[0] = src_uv[0];
2417 dst_uv[1] = src_uv[1];
2418 src_uv -= 2;
2419 dst_uv += 2;
2420 }
2421 }
2422
MirrorSplitUVRow_C(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)2423 void MirrorSplitUVRow_C(const uint8_t* src_uv,
2424 uint8_t* dst_u,
2425 uint8_t* dst_v,
2426 int width) {
2427 int x;
2428 src_uv += (width - 1) << 1;
2429 for (x = 0; x < width - 1; x += 2) {
2430 dst_u[x] = src_uv[0];
2431 dst_u[x + 1] = src_uv[-2];
2432 dst_v[x] = src_uv[1];
2433 dst_v[x + 1] = src_uv[-2 + 1];
2434 src_uv -= 4;
2435 }
2436 if (width & 1) {
2437 dst_u[width - 1] = src_uv[0];
2438 dst_v[width - 1] = src_uv[1];
2439 }
2440 }
2441
ARGBMirrorRow_C(const uint8_t * src,uint8_t * dst,int width)2442 void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2443 int x;
2444 const uint32_t* src32 = (const uint32_t*)(src);
2445 uint32_t* dst32 = (uint32_t*)(dst);
2446 src32 += width - 1;
2447 for (x = 0; x < width - 1; x += 2) {
2448 dst32[x] = src32[0];
2449 dst32[x + 1] = src32[-1];
2450 src32 -= 2;
2451 }
2452 if (width & 1) {
2453 dst32[width - 1] = src32[0];
2454 }
2455 }
2456
RGB24MirrorRow_C(const uint8_t * src_rgb24,uint8_t * dst_rgb24,int width)2457 void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width) {
2458 int x;
2459 src_rgb24 += width * 3 - 3;
2460 for (x = 0; x < width; ++x) {
2461 uint8_t b = src_rgb24[0];
2462 uint8_t g = src_rgb24[1];
2463 uint8_t r = src_rgb24[2];
2464 dst_rgb24[0] = b;
2465 dst_rgb24[1] = g;
2466 dst_rgb24[2] = r;
2467 src_rgb24 -= 3;
2468 dst_rgb24 += 3;
2469 }
2470 }
2471
SplitUVRow_C(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)2472 void SplitUVRow_C(const uint8_t* src_uv,
2473 uint8_t* dst_u,
2474 uint8_t* dst_v,
2475 int width) {
2476 int x;
2477 for (x = 0; x < width - 1; x += 2) {
2478 dst_u[x] = src_uv[0];
2479 dst_u[x + 1] = src_uv[2];
2480 dst_v[x] = src_uv[1];
2481 dst_v[x + 1] = src_uv[3];
2482 src_uv += 4;
2483 }
2484 if (width & 1) {
2485 dst_u[width - 1] = src_uv[0];
2486 dst_v[width - 1] = src_uv[1];
2487 }
2488 }
2489
MergeUVRow_C(const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_uv,int width)2490 void MergeUVRow_C(const uint8_t* src_u,
2491 const uint8_t* src_v,
2492 uint8_t* dst_uv,
2493 int width) {
2494 int x;
2495 for (x = 0; x < width - 1; x += 2) {
2496 dst_uv[0] = src_u[x];
2497 dst_uv[1] = src_v[x];
2498 dst_uv[2] = src_u[x + 1];
2499 dst_uv[3] = src_v[x + 1];
2500 dst_uv += 4;
2501 }
2502 if (width & 1) {
2503 dst_uv[0] = src_u[width - 1];
2504 dst_uv[1] = src_v[width - 1];
2505 }
2506 }
2507
SplitRGBRow_C(const uint8_t * src_rgb,uint8_t * dst_r,uint8_t * dst_g,uint8_t * dst_b,int width)2508 void SplitRGBRow_C(const uint8_t* src_rgb,
2509 uint8_t* dst_r,
2510 uint8_t* dst_g,
2511 uint8_t* dst_b,
2512 int width) {
2513 int x;
2514 for (x = 0; x < width; ++x) {
2515 dst_r[x] = src_rgb[0];
2516 dst_g[x] = src_rgb[1];
2517 dst_b[x] = src_rgb[2];
2518 src_rgb += 3;
2519 }
2520 }
2521
MergeRGBRow_C(const uint8_t * src_r,const uint8_t * src_g,const uint8_t * src_b,uint8_t * dst_rgb,int width)2522 void MergeRGBRow_C(const uint8_t* src_r,
2523 const uint8_t* src_g,
2524 const uint8_t* src_b,
2525 uint8_t* dst_rgb,
2526 int width) {
2527 int x;
2528 for (x = 0; x < width; ++x) {
2529 dst_rgb[0] = src_r[x];
2530 dst_rgb[1] = src_g[x];
2531 dst_rgb[2] = src_b[x];
2532 dst_rgb += 3;
2533 }
2534 }
2535
2536 // Use scale to convert lsb formats to msb, depending how many bits there are:
2537 // 128 = 9 bits
2538 // 64 = 10 bits
2539 // 16 = 12 bits
2540 // 1 = 16 bits
MergeUVRow_16_C(const uint16_t * src_u,const uint16_t * src_v,uint16_t * dst_uv,int scale,int width)2541 void MergeUVRow_16_C(const uint16_t* src_u,
2542 const uint16_t* src_v,
2543 uint16_t* dst_uv,
2544 int scale,
2545 int width) {
2546 int x;
2547 for (x = 0; x < width - 1; x += 2) {
2548 dst_uv[0] = src_u[x] * scale;
2549 dst_uv[1] = src_v[x] * scale;
2550 dst_uv[2] = src_u[x + 1] * scale;
2551 dst_uv[3] = src_v[x + 1] * scale;
2552 dst_uv += 4;
2553 }
2554 if (width & 1) {
2555 dst_uv[0] = src_u[width - 1] * scale;
2556 dst_uv[1] = src_v[width - 1] * scale;
2557 }
2558 }
2559
MultiplyRow_16_C(const uint16_t * src_y,uint16_t * dst_y,int scale,int width)2560 void MultiplyRow_16_C(const uint16_t* src_y,
2561 uint16_t* dst_y,
2562 int scale,
2563 int width) {
2564 int x;
2565 for (x = 0; x < width; ++x) {
2566 dst_y[x] = src_y[x] * scale;
2567 }
2568 }
2569
2570 // Use scale to convert lsb formats to msb, depending how many bits there are:
2571 // 32768 = 9 bits
2572 // 16384 = 10 bits
2573 // 4096 = 12 bits
2574 // 256 = 16 bits
Convert16To8Row_C(const uint16_t * src_y,uint8_t * dst_y,int scale,int width)2575 void Convert16To8Row_C(const uint16_t* src_y,
2576 uint8_t* dst_y,
2577 int scale,
2578 int width) {
2579 int x;
2580 for (x = 0; x < width; ++x) {
2581 dst_y[x] = clamp255((src_y[x] * scale) >> 16);
2582 }
2583 }
2584
2585 // Use scale to convert lsb formats to msb, depending how many bits there are:
2586 // 1024 = 10 bits
Convert8To16Row_C(const uint8_t * src_y,uint16_t * dst_y,int scale,int width)2587 void Convert8To16Row_C(const uint8_t* src_y,
2588 uint16_t* dst_y,
2589 int scale,
2590 int width) {
2591 int x;
2592 scale *= 0x0101; // replicates the byte.
2593 for (x = 0; x < width; ++x) {
2594 dst_y[x] = (src_y[x] * scale) >> 16;
2595 }
2596 }
2597
CopyRow_C(const uint8_t * src,uint8_t * dst,int count)2598 void CopyRow_C(const uint8_t* src, uint8_t* dst, int count) {
2599 memcpy(dst, src, count);
2600 }
2601
CopyRow_16_C(const uint16_t * src,uint16_t * dst,int count)2602 void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count) {
2603 memcpy(dst, src, count * 2);
2604 }
2605
SetRow_C(uint8_t * dst,uint8_t v8,int width)2606 void SetRow_C(uint8_t* dst, uint8_t v8, int width) {
2607 memset(dst, v8, width);
2608 }
2609
ARGBSetRow_C(uint8_t * dst_argb,uint32_t v32,int width)2610 void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width) {
2611 int x;
2612 for (x = 0; x < width; ++x) {
2613 memcpy(dst_argb + x * sizeof v32, &v32, sizeof v32);
2614 }
2615 }
2616
2617 // Filter 2 rows of YUY2 UV's (422) into U and V (420).
YUY2ToUVRow_C(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)2618 void YUY2ToUVRow_C(const uint8_t* src_yuy2,
2619 int src_stride_yuy2,
2620 uint8_t* dst_u,
2621 uint8_t* dst_v,
2622 int width) {
2623 // Output a row of UV values, filtering 2 rows of YUY2.
2624 int x;
2625 for (x = 0; x < width; x += 2) {
2626 dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
2627 dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
2628 src_yuy2 += 4;
2629 dst_u += 1;
2630 dst_v += 1;
2631 }
2632 }
2633
2634 // Copy row of YUY2 UV's (422) into U and V (422).
YUY2ToUV422Row_C(const uint8_t * src_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)2635 void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
2636 uint8_t* dst_u,
2637 uint8_t* dst_v,
2638 int width) {
2639 // Output a row of UV values.
2640 int x;
2641 for (x = 0; x < width; x += 2) {
2642 dst_u[0] = src_yuy2[1];
2643 dst_v[0] = src_yuy2[3];
2644 src_yuy2 += 4;
2645 dst_u += 1;
2646 dst_v += 1;
2647 }
2648 }
2649
2650 // Copy row of YUY2 Y's (422) into Y (420/422).
YUY2ToYRow_C(const uint8_t * src_yuy2,uint8_t * dst_y,int width)2651 void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
2652 // Output a row of Y values.
2653 int x;
2654 for (x = 0; x < width - 1; x += 2) {
2655 dst_y[x] = src_yuy2[0];
2656 dst_y[x + 1] = src_yuy2[2];
2657 src_yuy2 += 4;
2658 }
2659 if (width & 1) {
2660 dst_y[width - 1] = src_yuy2[0];
2661 }
2662 }
2663
2664 // Filter 2 rows of UYVY UV's (422) into U and V (420).
UYVYToUVRow_C(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)2665 void UYVYToUVRow_C(const uint8_t* src_uyvy,
2666 int src_stride_uyvy,
2667 uint8_t* dst_u,
2668 uint8_t* dst_v,
2669 int width) {
2670 // Output a row of UV values.
2671 int x;
2672 for (x = 0; x < width; x += 2) {
2673 dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
2674 dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
2675 src_uyvy += 4;
2676 dst_u += 1;
2677 dst_v += 1;
2678 }
2679 }
2680
2681 // Copy row of UYVY UV's (422) into U and V (422).
UYVYToUV422Row_C(const uint8_t * src_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)2682 void UYVYToUV422Row_C(const uint8_t* src_uyvy,
2683 uint8_t* dst_u,
2684 uint8_t* dst_v,
2685 int width) {
2686 // Output a row of UV values.
2687 int x;
2688 for (x = 0; x < width; x += 2) {
2689 dst_u[0] = src_uyvy[0];
2690 dst_v[0] = src_uyvy[2];
2691 src_uyvy += 4;
2692 dst_u += 1;
2693 dst_v += 1;
2694 }
2695 }
2696
2697 // Copy row of UYVY Y's (422) into Y (420/422).
UYVYToYRow_C(const uint8_t * src_uyvy,uint8_t * dst_y,int width)2698 void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
2699 // Output a row of Y values.
2700 int x;
2701 for (x = 0; x < width - 1; x += 2) {
2702 dst_y[x] = src_uyvy[1];
2703 dst_y[x + 1] = src_uyvy[3];
2704 src_uyvy += 4;
2705 }
2706 if (width & 1) {
2707 dst_y[width - 1] = src_uyvy[1];
2708 }
2709 }
2710
2711 #define BLEND(f, b, a) clamp255((((256 - a) * b) >> 8) + f)
2712
2713 // Blend src_argb0 over src_argb1 and store to dst_argb.
2714 // dst_argb may be src_argb0 or src_argb1.
2715 // This code mimics the SSSE3 version for better testability.
ARGBBlendRow_C(const uint8_t * src_argb0,const uint8_t * src_argb1,uint8_t * dst_argb,int width)2716 void ARGBBlendRow_C(const uint8_t* src_argb0,
2717 const uint8_t* src_argb1,
2718 uint8_t* dst_argb,
2719 int width) {
2720 int x;
2721 for (x = 0; x < width - 1; x += 2) {
2722 uint32_t fb = src_argb0[0];
2723 uint32_t fg = src_argb0[1];
2724 uint32_t fr = src_argb0[2];
2725 uint32_t a = src_argb0[3];
2726 uint32_t bb = src_argb1[0];
2727 uint32_t bg = src_argb1[1];
2728 uint32_t br = src_argb1[2];
2729 dst_argb[0] = BLEND(fb, bb, a);
2730 dst_argb[1] = BLEND(fg, bg, a);
2731 dst_argb[2] = BLEND(fr, br, a);
2732 dst_argb[3] = 255u;
2733
2734 fb = src_argb0[4 + 0];
2735 fg = src_argb0[4 + 1];
2736 fr = src_argb0[4 + 2];
2737 a = src_argb0[4 + 3];
2738 bb = src_argb1[4 + 0];
2739 bg = src_argb1[4 + 1];
2740 br = src_argb1[4 + 2];
2741 dst_argb[4 + 0] = BLEND(fb, bb, a);
2742 dst_argb[4 + 1] = BLEND(fg, bg, a);
2743 dst_argb[4 + 2] = BLEND(fr, br, a);
2744 dst_argb[4 + 3] = 255u;
2745 src_argb0 += 8;
2746 src_argb1 += 8;
2747 dst_argb += 8;
2748 }
2749
2750 if (width & 1) {
2751 uint32_t fb = src_argb0[0];
2752 uint32_t fg = src_argb0[1];
2753 uint32_t fr = src_argb0[2];
2754 uint32_t a = src_argb0[3];
2755 uint32_t bb = src_argb1[0];
2756 uint32_t bg = src_argb1[1];
2757 uint32_t br = src_argb1[2];
2758 dst_argb[0] = BLEND(fb, bb, a);
2759 dst_argb[1] = BLEND(fg, bg, a);
2760 dst_argb[2] = BLEND(fr, br, a);
2761 dst_argb[3] = 255u;
2762 }
2763 }
2764 #undef BLEND
2765
2766 #define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8
BlendPlaneRow_C(const uint8_t * src0,const uint8_t * src1,const uint8_t * alpha,uint8_t * dst,int width)2767 void BlendPlaneRow_C(const uint8_t* src0,
2768 const uint8_t* src1,
2769 const uint8_t* alpha,
2770 uint8_t* dst,
2771 int width) {
2772 int x;
2773 for (x = 0; x < width - 1; x += 2) {
2774 dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
2775 dst[1] = UBLEND(src0[1], src1[1], alpha[1]);
2776 src0 += 2;
2777 src1 += 2;
2778 alpha += 2;
2779 dst += 2;
2780 }
2781 if (width & 1) {
2782 dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
2783 }
2784 }
2785 #undef UBLEND
2786
2787 #if defined(__aarch64__) || defined(__arm__)
2788 #define ATTENUATE(f, a) (f * a + 128) >> 8
2789 #else
2790 // This code mimics the SSSE3 version for better testability.
2791 #define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
2792 #endif
2793
2794 // Multiply source RGB by alpha and store to destination.
ARGBAttenuateRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)2795 void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
2796 int i;
2797 for (i = 0; i < width - 1; i += 2) {
2798 uint32_t b = src_argb[0];
2799 uint32_t g = src_argb[1];
2800 uint32_t r = src_argb[2];
2801 uint32_t a = src_argb[3];
2802 dst_argb[0] = ATTENUATE(b, a);
2803 dst_argb[1] = ATTENUATE(g, a);
2804 dst_argb[2] = ATTENUATE(r, a);
2805 dst_argb[3] = a;
2806 b = src_argb[4];
2807 g = src_argb[5];
2808 r = src_argb[6];
2809 a = src_argb[7];
2810 dst_argb[4] = ATTENUATE(b, a);
2811 dst_argb[5] = ATTENUATE(g, a);
2812 dst_argb[6] = ATTENUATE(r, a);
2813 dst_argb[7] = a;
2814 src_argb += 8;
2815 dst_argb += 8;
2816 }
2817
2818 if (width & 1) {
2819 const uint32_t b = src_argb[0];
2820 const uint32_t g = src_argb[1];
2821 const uint32_t r = src_argb[2];
2822 const uint32_t a = src_argb[3];
2823 dst_argb[0] = ATTENUATE(b, a);
2824 dst_argb[1] = ATTENUATE(g, a);
2825 dst_argb[2] = ATTENUATE(r, a);
2826 dst_argb[3] = a;
2827 }
2828 }
2829 #undef ATTENUATE
2830
2831 // Divide source RGB by alpha and store to destination.
2832 // b = (b * 255 + (a / 2)) / a;
2833 // g = (g * 255 + (a / 2)) / a;
2834 // r = (r * 255 + (a / 2)) / a;
2835 // Reciprocal method is off by 1 on some values. ie 125
2836 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
2837 #define T(a) 0x01000000 + (0x10000 / a)
2838 const uint32_t fixed_invtbl8[256] = {
2839 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06),
2840 T(0x07), T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d),
2841 T(0x0e), T(0x0f), T(0x10), T(0x11), T(0x12), T(0x13), T(0x14),
2842 T(0x15), T(0x16), T(0x17), T(0x18), T(0x19), T(0x1a), T(0x1b),
2843 T(0x1c), T(0x1d), T(0x1e), T(0x1f), T(0x20), T(0x21), T(0x22),
2844 T(0x23), T(0x24), T(0x25), T(0x26), T(0x27), T(0x28), T(0x29),
2845 T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f), T(0x30),
2846 T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
2847 T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e),
2848 T(0x3f), T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45),
2849 T(0x46), T(0x47), T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c),
2850 T(0x4d), T(0x4e), T(0x4f), T(0x50), T(0x51), T(0x52), T(0x53),
2851 T(0x54), T(0x55), T(0x56), T(0x57), T(0x58), T(0x59), T(0x5a),
2852 T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f), T(0x60), T(0x61),
2853 T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67), T(0x68),
2854 T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
2855 T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76),
2856 T(0x77), T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d),
2857 T(0x7e), T(0x7f), T(0x80), T(0x81), T(0x82), T(0x83), T(0x84),
2858 T(0x85), T(0x86), T(0x87), T(0x88), T(0x89), T(0x8a), T(0x8b),
2859 T(0x8c), T(0x8d), T(0x8e), T(0x8f), T(0x90), T(0x91), T(0x92),
2860 T(0x93), T(0x94), T(0x95), T(0x96), T(0x97), T(0x98), T(0x99),
2861 T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f), T(0xa0),
2862 T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
2863 T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae),
2864 T(0xaf), T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5),
2865 T(0xb6), T(0xb7), T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc),
2866 T(0xbd), T(0xbe), T(0xbf), T(0xc0), T(0xc1), T(0xc2), T(0xc3),
2867 T(0xc4), T(0xc5), T(0xc6), T(0xc7), T(0xc8), T(0xc9), T(0xca),
2868 T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf), T(0xd0), T(0xd1),
2869 T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7), T(0xd8),
2870 T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
2871 T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6),
2872 T(0xe7), T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed),
2873 T(0xee), T(0xef), T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4),
2874 T(0xf5), T(0xf6), T(0xf7), T(0xf8), T(0xf9), T(0xfa), T(0xfb),
2875 T(0xfc), T(0xfd), T(0xfe), 0x01000100};
2876 #undef T
2877
ARGBUnattenuateRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)2878 void ARGBUnattenuateRow_C(const uint8_t* src_argb,
2879 uint8_t* dst_argb,
2880 int width) {
2881 int i;
2882 for (i = 0; i < width; ++i) {
2883 uint32_t b = src_argb[0];
2884 uint32_t g = src_argb[1];
2885 uint32_t r = src_argb[2];
2886 const uint32_t a = src_argb[3];
2887 const uint32_t ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point
2888 b = (b * ia) >> 8;
2889 g = (g * ia) >> 8;
2890 r = (r * ia) >> 8;
2891 // Clamping should not be necessary but is free in assembly.
2892 dst_argb[0] = clamp255(b);
2893 dst_argb[1] = clamp255(g);
2894 dst_argb[2] = clamp255(r);
2895 dst_argb[3] = a;
2896 src_argb += 4;
2897 dst_argb += 4;
2898 }
2899 }
2900
ComputeCumulativeSumRow_C(const uint8_t * row,int32_t * cumsum,const int32_t * previous_cumsum,int width)2901 void ComputeCumulativeSumRow_C(const uint8_t* row,
2902 int32_t* cumsum,
2903 const int32_t* previous_cumsum,
2904 int width) {
2905 int32_t row_sum[4] = {0, 0, 0, 0};
2906 int x;
2907 for (x = 0; x < width; ++x) {
2908 row_sum[0] += row[x * 4 + 0];
2909 row_sum[1] += row[x * 4 + 1];
2910 row_sum[2] += row[x * 4 + 2];
2911 row_sum[3] += row[x * 4 + 3];
2912 cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0];
2913 cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1];
2914 cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2];
2915 cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3];
2916 }
2917 }
2918
CumulativeSumToAverageRow_C(const int32_t * tl,const int32_t * bl,int w,int area,uint8_t * dst,int count)2919 void CumulativeSumToAverageRow_C(const int32_t* tl,
2920 const int32_t* bl,
2921 int w,
2922 int area,
2923 uint8_t* dst,
2924 int count) {
2925 float ooa = 1.0f / area;
2926 int i;
2927 for (i = 0; i < count; ++i) {
2928 dst[0] = (uint8_t)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
2929 dst[1] = (uint8_t)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
2930 dst[2] = (uint8_t)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
2931 dst[3] = (uint8_t)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
2932 dst += 4;
2933 tl += 4;
2934 bl += 4;
2935 }
2936 }
2937
2938 // Copy pixels from rotated source to destination row with a slope.
2939 LIBYUV_API
ARGBAffineRow_C(const uint8_t * src_argb,int src_argb_stride,uint8_t * dst_argb,const float * uv_dudv,int width)2940 void ARGBAffineRow_C(const uint8_t* src_argb,
2941 int src_argb_stride,
2942 uint8_t* dst_argb,
2943 const float* uv_dudv,
2944 int width) {
2945 int i;
2946 // Render a row of pixels from source into a buffer.
2947 float uv[2];
2948 uv[0] = uv_dudv[0];
2949 uv[1] = uv_dudv[1];
2950 for (i = 0; i < width; ++i) {
2951 int x = (int)(uv[0]);
2952 int y = (int)(uv[1]);
2953 *(uint32_t*)(dst_argb) =
2954 *(const uint32_t*)(src_argb + y * src_argb_stride + x * 4);
2955 dst_argb += 4;
2956 uv[0] += uv_dudv[2];
2957 uv[1] += uv_dudv[3];
2958 }
2959 }
2960
2961 // Blend 2 rows into 1.
HalfRow_C(const uint8_t * src_uv,ptrdiff_t src_uv_stride,uint8_t * dst_uv,int width)2962 static void HalfRow_C(const uint8_t* src_uv,
2963 ptrdiff_t src_uv_stride,
2964 uint8_t* dst_uv,
2965 int width) {
2966 int x;
2967 for (x = 0; x < width; ++x) {
2968 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
2969 }
2970 }
2971
HalfRow_16_C(const uint16_t * src_uv,ptrdiff_t src_uv_stride,uint16_t * dst_uv,int width)2972 static void HalfRow_16_C(const uint16_t* src_uv,
2973 ptrdiff_t src_uv_stride,
2974 uint16_t* dst_uv,
2975 int width) {
2976 int x;
2977 for (x = 0; x < width; ++x) {
2978 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
2979 }
2980 }
2981
2982 // C version 2x2 -> 2x1.
InterpolateRow_C(uint8_t * dst_ptr,const uint8_t * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)2983 void InterpolateRow_C(uint8_t* dst_ptr,
2984 const uint8_t* src_ptr,
2985 ptrdiff_t src_stride,
2986 int width,
2987 int source_y_fraction) {
2988 int y1_fraction = source_y_fraction;
2989 int y0_fraction = 256 - y1_fraction;
2990 const uint8_t* src_ptr1 = src_ptr + src_stride;
2991 int x;
2992 if (y1_fraction == 0) {
2993 memcpy(dst_ptr, src_ptr, width);
2994 return;
2995 }
2996 if (y1_fraction == 128) {
2997 HalfRow_C(src_ptr, src_stride, dst_ptr, width);
2998 return;
2999 }
3000 for (x = 0; x < width - 1; x += 2) {
3001 dst_ptr[0] =
3002 (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
3003 dst_ptr[1] =
3004 (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction + 128) >> 8;
3005 src_ptr += 2;
3006 src_ptr1 += 2;
3007 dst_ptr += 2;
3008 }
3009 if (width & 1) {
3010 dst_ptr[0] =
3011 (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
3012 }
3013 }
3014
InterpolateRow_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)3015 void InterpolateRow_16_C(uint16_t* dst_ptr,
3016 const uint16_t* src_ptr,
3017 ptrdiff_t src_stride,
3018 int width,
3019 int source_y_fraction) {
3020 int y1_fraction = source_y_fraction;
3021 int y0_fraction = 256 - y1_fraction;
3022 const uint16_t* src_ptr1 = src_ptr + src_stride;
3023 int x;
3024 if (source_y_fraction == 0) {
3025 memcpy(dst_ptr, src_ptr, width * 2);
3026 return;
3027 }
3028 if (source_y_fraction == 128) {
3029 HalfRow_16_C(src_ptr, src_stride, dst_ptr, width);
3030 return;
3031 }
3032 for (x = 0; x < width - 1; x += 2) {
3033 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
3034 dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
3035 src_ptr += 2;
3036 src_ptr1 += 2;
3037 dst_ptr += 2;
3038 }
3039 if (width & 1) {
3040 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
3041 }
3042 }
3043
3044 // Use first 4 shuffler values to reorder ARGB channels.
ARGBShuffleRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const uint8_t * shuffler,int width)3045 void ARGBShuffleRow_C(const uint8_t* src_argb,
3046 uint8_t* dst_argb,
3047 const uint8_t* shuffler,
3048 int width) {
3049 int index0 = shuffler[0];
3050 int index1 = shuffler[1];
3051 int index2 = shuffler[2];
3052 int index3 = shuffler[3];
3053 // Shuffle a row of ARGB.
3054 int x;
3055 for (x = 0; x < width; ++x) {
3056 // To support in-place conversion.
3057 uint8_t b = src_argb[index0];
3058 uint8_t g = src_argb[index1];
3059 uint8_t r = src_argb[index2];
3060 uint8_t a = src_argb[index3];
3061 dst_argb[0] = b;
3062 dst_argb[1] = g;
3063 dst_argb[2] = r;
3064 dst_argb[3] = a;
3065 src_argb += 4;
3066 dst_argb += 4;
3067 }
3068 }
3069
I422ToYUY2Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)3070 void I422ToYUY2Row_C(const uint8_t* src_y,
3071 const uint8_t* src_u,
3072 const uint8_t* src_v,
3073 uint8_t* dst_frame,
3074 int width) {
3075 int x;
3076 for (x = 0; x < width - 1; x += 2) {
3077 dst_frame[0] = src_y[0];
3078 dst_frame[1] = src_u[0];
3079 dst_frame[2] = src_y[1];
3080 dst_frame[3] = src_v[0];
3081 dst_frame += 4;
3082 src_y += 2;
3083 src_u += 1;
3084 src_v += 1;
3085 }
3086 if (width & 1) {
3087 dst_frame[0] = src_y[0];
3088 dst_frame[1] = src_u[0];
3089 dst_frame[2] = 0;
3090 dst_frame[3] = src_v[0];
3091 }
3092 }
3093
I422ToUYVYRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)3094 void I422ToUYVYRow_C(const uint8_t* src_y,
3095 const uint8_t* src_u,
3096 const uint8_t* src_v,
3097 uint8_t* dst_frame,
3098 int width) {
3099 int x;
3100 for (x = 0; x < width - 1; x += 2) {
3101 dst_frame[0] = src_u[0];
3102 dst_frame[1] = src_y[0];
3103 dst_frame[2] = src_v[0];
3104 dst_frame[3] = src_y[1];
3105 dst_frame += 4;
3106 src_y += 2;
3107 src_u += 1;
3108 src_v += 1;
3109 }
3110 if (width & 1) {
3111 dst_frame[0] = src_u[0];
3112 dst_frame[1] = src_y[0];
3113 dst_frame[2] = src_v[0];
3114 dst_frame[3] = 0;
3115 }
3116 }
3117
ARGBPolynomialRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const float * poly,int width)3118 void ARGBPolynomialRow_C(const uint8_t* src_argb,
3119 uint8_t* dst_argb,
3120 const float* poly,
3121 int width) {
3122 int i;
3123 for (i = 0; i < width; ++i) {
3124 float b = (float)(src_argb[0]);
3125 float g = (float)(src_argb[1]);
3126 float r = (float)(src_argb[2]);
3127 float a = (float)(src_argb[3]);
3128 float b2 = b * b;
3129 float g2 = g * g;
3130 float r2 = r * r;
3131 float a2 = a * a;
3132 float db = poly[0] + poly[4] * b;
3133 float dg = poly[1] + poly[5] * g;
3134 float dr = poly[2] + poly[6] * r;
3135 float da = poly[3] + poly[7] * a;
3136 float b3 = b2 * b;
3137 float g3 = g2 * g;
3138 float r3 = r2 * r;
3139 float a3 = a2 * a;
3140 db += poly[8] * b2;
3141 dg += poly[9] * g2;
3142 dr += poly[10] * r2;
3143 da += poly[11] * a2;
3144 db += poly[12] * b3;
3145 dg += poly[13] * g3;
3146 dr += poly[14] * r3;
3147 da += poly[15] * a3;
3148
3149 dst_argb[0] = Clamp((int32_t)(db));
3150 dst_argb[1] = Clamp((int32_t)(dg));
3151 dst_argb[2] = Clamp((int32_t)(dr));
3152 dst_argb[3] = Clamp((int32_t)(da));
3153 src_argb += 4;
3154 dst_argb += 4;
3155 }
3156 }
3157
3158 // Samples assumed to be unsigned in low 9, 10 or 12 bits. Scale factor
3159 // adjust the source integer range to the half float range desired.
3160
3161 // This magic constant is 2^-112. Multiplying by this
3162 // is the same as subtracting 112 from the exponent, which
3163 // is the difference in exponent bias between 32-bit and
3164 // 16-bit floats. Once we've done this subtraction, we can
3165 // simply extract the low bits of the exponent and the high
3166 // bits of the mantissa from our float and we're done.
3167
3168 // Work around GCC 7 punning warning -Wstrict-aliasing
3169 #if defined(__GNUC__)
3170 typedef uint32_t __attribute__((__may_alias__)) uint32_alias_t;
3171 #else
3172 typedef uint32_t uint32_alias_t;
3173 #endif
3174
HalfFloatRow_C(const uint16_t * src,uint16_t * dst,float scale,int width)3175 void HalfFloatRow_C(const uint16_t* src,
3176 uint16_t* dst,
3177 float scale,
3178 int width) {
3179 int i;
3180 float mult = 1.9259299444e-34f * scale;
3181 for (i = 0; i < width; ++i) {
3182 float value = src[i] * mult;
3183 dst[i] = (uint16_t)((*(const uint32_alias_t*)&value) >> 13);
3184 }
3185 }
3186
ByteToFloatRow_C(const uint8_t * src,float * dst,float scale,int width)3187 void ByteToFloatRow_C(const uint8_t* src, float* dst, float scale, int width) {
3188 int i;
3189 for (i = 0; i < width; ++i) {
3190 float value = src[i] * scale;
3191 dst[i] = value;
3192 }
3193 }
3194
ARGBLumaColorTableRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width,const uint8_t * luma,uint32_t lumacoeff)3195 void ARGBLumaColorTableRow_C(const uint8_t* src_argb,
3196 uint8_t* dst_argb,
3197 int width,
3198 const uint8_t* luma,
3199 uint32_t lumacoeff) {
3200 uint32_t bc = lumacoeff & 0xff;
3201 uint32_t gc = (lumacoeff >> 8) & 0xff;
3202 uint32_t rc = (lumacoeff >> 16) & 0xff;
3203
3204 int i;
3205 for (i = 0; i < width - 1; i += 2) {
3206 // Luminance in rows, color values in columns.
3207 const uint8_t* luma0 =
3208 ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
3209 luma;
3210 const uint8_t* luma1;
3211 dst_argb[0] = luma0[src_argb[0]];
3212 dst_argb[1] = luma0[src_argb[1]];
3213 dst_argb[2] = luma0[src_argb[2]];
3214 dst_argb[3] = src_argb[3];
3215 luma1 =
3216 ((src_argb[4] * bc + src_argb[5] * gc + src_argb[6] * rc) & 0x7F00u) +
3217 luma;
3218 dst_argb[4] = luma1[src_argb[4]];
3219 dst_argb[5] = luma1[src_argb[5]];
3220 dst_argb[6] = luma1[src_argb[6]];
3221 dst_argb[7] = src_argb[7];
3222 src_argb += 8;
3223 dst_argb += 8;
3224 }
3225 if (width & 1) {
3226 // Luminance in rows, color values in columns.
3227 const uint8_t* luma0 =
3228 ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
3229 luma;
3230 dst_argb[0] = luma0[src_argb[0]];
3231 dst_argb[1] = luma0[src_argb[1]];
3232 dst_argb[2] = luma0[src_argb[2]];
3233 dst_argb[3] = src_argb[3];
3234 }
3235 }
3236
ARGBCopyAlphaRow_C(const uint8_t * src,uint8_t * dst,int width)3237 void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
3238 int i;
3239 for (i = 0; i < width - 1; i += 2) {
3240 dst[3] = src[3];
3241 dst[7] = src[7];
3242 dst += 8;
3243 src += 8;
3244 }
3245 if (width & 1) {
3246 dst[3] = src[3];
3247 }
3248 }
3249
ARGBExtractAlphaRow_C(const uint8_t * src_argb,uint8_t * dst_a,int width)3250 void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width) {
3251 int i;
3252 for (i = 0; i < width - 1; i += 2) {
3253 dst_a[0] = src_argb[3];
3254 dst_a[1] = src_argb[7];
3255 dst_a += 2;
3256 src_argb += 8;
3257 }
3258 if (width & 1) {
3259 dst_a[0] = src_argb[3];
3260 }
3261 }
3262
ARGBCopyYToAlphaRow_C(const uint8_t * src,uint8_t * dst,int width)3263 void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
3264 int i;
3265 for (i = 0; i < width - 1; i += 2) {
3266 dst[3] = src[0];
3267 dst[7] = src[1];
3268 dst += 8;
3269 src += 2;
3270 }
3271 if (width & 1) {
3272 dst[3] = src[0];
3273 }
3274 }
3275
3276 // Maximum temporary width for wrappers to process at a time, in pixels.
3277 #define MAXTWIDTH 2048
3278
3279 #if !(defined(_MSC_VER) && defined(_M_IX86)) && \
3280 defined(HAS_I422TORGB565ROW_SSSE3)
3281 // row_win.cc has asm version, but GCC uses 2 step wrapper.
I422ToRGB565Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3282 void I422ToRGB565Row_SSSE3(const uint8_t* src_y,
3283 const uint8_t* src_u,
3284 const uint8_t* src_v,
3285 uint8_t* dst_rgb565,
3286 const struct YuvConstants* yuvconstants,
3287 int width) {
3288 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3289 while (width > 0) {
3290 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3291 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
3292 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3293 src_y += twidth;
3294 src_u += twidth / 2;
3295 src_v += twidth / 2;
3296 dst_rgb565 += twidth * 2;
3297 width -= twidth;
3298 }
3299 }
3300 #endif
3301
3302 #if defined(HAS_I422TOARGB1555ROW_SSSE3)
I422ToARGB1555Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)3303 void I422ToARGB1555Row_SSSE3(const uint8_t* src_y,
3304 const uint8_t* src_u,
3305 const uint8_t* src_v,
3306 uint8_t* dst_argb1555,
3307 const struct YuvConstants* yuvconstants,
3308 int width) {
3309 // Row buffer for intermediate ARGB pixels.
3310 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3311 while (width > 0) {
3312 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3313 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
3314 ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
3315 src_y += twidth;
3316 src_u += twidth / 2;
3317 src_v += twidth / 2;
3318 dst_argb1555 += twidth * 2;
3319 width -= twidth;
3320 }
3321 }
3322 #endif
3323
3324 #if defined(HAS_I422TOARGB4444ROW_SSSE3)
I422ToARGB4444Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)3325 void I422ToARGB4444Row_SSSE3(const uint8_t* src_y,
3326 const uint8_t* src_u,
3327 const uint8_t* src_v,
3328 uint8_t* dst_argb4444,
3329 const struct YuvConstants* yuvconstants,
3330 int width) {
3331 // Row buffer for intermediate ARGB pixels.
3332 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3333 while (width > 0) {
3334 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3335 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
3336 ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
3337 src_y += twidth;
3338 src_u += twidth / 2;
3339 src_v += twidth / 2;
3340 dst_argb4444 += twidth * 2;
3341 width -= twidth;
3342 }
3343 }
3344 #endif
3345
3346 #if defined(HAS_NV12TORGB565ROW_SSSE3)
NV12ToRGB565Row_SSSE3(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3347 void NV12ToRGB565Row_SSSE3(const uint8_t* src_y,
3348 const uint8_t* src_uv,
3349 uint8_t* dst_rgb565,
3350 const struct YuvConstants* yuvconstants,
3351 int width) {
3352 // Row buffer for intermediate ARGB pixels.
3353 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3354 while (width > 0) {
3355 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3356 NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
3357 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3358 src_y += twidth;
3359 src_uv += twidth;
3360 dst_rgb565 += twidth * 2;
3361 width -= twidth;
3362 }
3363 }
3364 #endif
3365
3366 #if defined(HAS_NV12TORGB24ROW_SSSE3)
NV12ToRGB24Row_SSSE3(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3367 void NV12ToRGB24Row_SSSE3(const uint8_t* src_y,
3368 const uint8_t* src_uv,
3369 uint8_t* dst_rgb24,
3370 const struct YuvConstants* yuvconstants,
3371 int width) {
3372 // Row buffer for intermediate ARGB pixels.
3373 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3374 while (width > 0) {
3375 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3376 NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
3377 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3378 src_y += twidth;
3379 src_uv += twidth;
3380 dst_rgb24 += twidth * 3;
3381 width -= twidth;
3382 }
3383 }
3384 #endif
3385
3386 #if defined(HAS_NV21TORGB24ROW_SSSE3)
NV21ToRGB24Row_SSSE3(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3387 void NV21ToRGB24Row_SSSE3(const uint8_t* src_y,
3388 const uint8_t* src_vu,
3389 uint8_t* dst_rgb24,
3390 const struct YuvConstants* yuvconstants,
3391 int width) {
3392 // Row buffer for intermediate ARGB pixels.
3393 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3394 while (width > 0) {
3395 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3396 NV21ToARGBRow_SSSE3(src_y, src_vu, row, yuvconstants, twidth);
3397 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3398 src_y += twidth;
3399 src_vu += twidth;
3400 dst_rgb24 += twidth * 3;
3401 width -= twidth;
3402 }
3403 }
3404 #endif
3405
3406 #if defined(HAS_NV12TORGB24ROW_AVX2)
NV12ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3407 void NV12ToRGB24Row_AVX2(const uint8_t* src_y,
3408 const uint8_t* src_uv,
3409 uint8_t* dst_rgb24,
3410 const struct YuvConstants* yuvconstants,
3411 int width) {
3412 // Row buffer for intermediate ARGB pixels.
3413 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3414 while (width > 0) {
3415 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3416 NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
3417 #if defined(HAS_ARGBTORGB24ROW_AVX2)
3418 ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
3419 #else
3420 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3421 #endif
3422 src_y += twidth;
3423 src_uv += twidth;
3424 dst_rgb24 += twidth * 3;
3425 width -= twidth;
3426 }
3427 }
3428 #endif
3429
3430 #if defined(HAS_NV21TORGB24ROW_AVX2)
NV21ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3431 void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
3432 const uint8_t* src_vu,
3433 uint8_t* dst_rgb24,
3434 const struct YuvConstants* yuvconstants,
3435 int width) {
3436 // Row buffer for intermediate ARGB pixels.
3437 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3438 while (width > 0) {
3439 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3440 NV21ToARGBRow_AVX2(src_y, src_vu, row, yuvconstants, twidth);
3441 #if defined(HAS_ARGBTORGB24ROW_AVX2)
3442 ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
3443 #else
3444 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3445 #endif
3446 src_y += twidth;
3447 src_vu += twidth;
3448 dst_rgb24 += twidth * 3;
3449 width -= twidth;
3450 }
3451 }
3452 #endif
3453
3454 #if defined(HAS_I422TORGB565ROW_AVX2)
I422ToRGB565Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3455 void I422ToRGB565Row_AVX2(const uint8_t* src_y,
3456 const uint8_t* src_u,
3457 const uint8_t* src_v,
3458 uint8_t* dst_rgb565,
3459 const struct YuvConstants* yuvconstants,
3460 int width) {
3461 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3462 while (width > 0) {
3463 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3464 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3465 #if defined(HAS_ARGBTORGB565ROW_AVX2)
3466 ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
3467 #else
3468 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3469 #endif
3470 src_y += twidth;
3471 src_u += twidth / 2;
3472 src_v += twidth / 2;
3473 dst_rgb565 += twidth * 2;
3474 width -= twidth;
3475 }
3476 }
3477 #endif
3478
3479 #if defined(HAS_I422TOARGB1555ROW_AVX2)
I422ToARGB1555Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)3480 void I422ToARGB1555Row_AVX2(const uint8_t* src_y,
3481 const uint8_t* src_u,
3482 const uint8_t* src_v,
3483 uint8_t* dst_argb1555,
3484 const struct YuvConstants* yuvconstants,
3485 int width) {
3486 // Row buffer for intermediate ARGB pixels.
3487 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3488 while (width > 0) {
3489 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3490 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3491 #if defined(HAS_ARGBTOARGB1555ROW_AVX2)
3492 ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
3493 #else
3494 ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
3495 #endif
3496 src_y += twidth;
3497 src_u += twidth / 2;
3498 src_v += twidth / 2;
3499 dst_argb1555 += twidth * 2;
3500 width -= twidth;
3501 }
3502 }
3503 #endif
3504
3505 #if defined(HAS_I422TOARGB4444ROW_AVX2)
I422ToARGB4444Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)3506 void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
3507 const uint8_t* src_u,
3508 const uint8_t* src_v,
3509 uint8_t* dst_argb4444,
3510 const struct YuvConstants* yuvconstants,
3511 int width) {
3512 // Row buffer for intermediate ARGB pixels.
3513 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3514 while (width > 0) {
3515 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3516 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3517 #if defined(HAS_ARGBTOARGB4444ROW_AVX2)
3518 ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
3519 #else
3520 ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
3521 #endif
3522 src_y += twidth;
3523 src_u += twidth / 2;
3524 src_v += twidth / 2;
3525 dst_argb4444 += twidth * 2;
3526 width -= twidth;
3527 }
3528 }
3529 #endif
3530
3531 #if defined(HAS_I422TORGB24ROW_AVX2)
I422ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3532 void I422ToRGB24Row_AVX2(const uint8_t* src_y,
3533 const uint8_t* src_u,
3534 const uint8_t* src_v,
3535 uint8_t* dst_rgb24,
3536 const struct YuvConstants* yuvconstants,
3537 int width) {
3538 // Row buffer for intermediate ARGB pixels.
3539 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3540 while (width > 0) {
3541 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3542 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3543 #if defined(HAS_ARGBTORGB24ROW_AVX2)
3544 ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
3545 #else
3546 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3547 #endif
3548 src_y += twidth;
3549 src_u += twidth / 2;
3550 src_v += twidth / 2;
3551 dst_rgb24 += twidth * 3;
3552 width -= twidth;
3553 }
3554 }
3555 #endif
3556
3557 #if defined(HAS_NV12TORGB565ROW_AVX2)
NV12ToRGB565Row_AVX2(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3558 void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
3559 const uint8_t* src_uv,
3560 uint8_t* dst_rgb565,
3561 const struct YuvConstants* yuvconstants,
3562 int width) {
3563 // Row buffer for intermediate ARGB pixels.
3564 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3565 while (width > 0) {
3566 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3567 NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
3568 #if defined(HAS_ARGBTORGB565ROW_AVX2)
3569 ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
3570 #else
3571 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3572 #endif
3573 src_y += twidth;
3574 src_uv += twidth;
3575 dst_rgb565 += twidth * 2;
3576 width -= twidth;
3577 }
3578 }
3579 #endif
3580
3581 #ifdef HAS_RGB24TOYJROW_AVX2
3582 // Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
RGB24ToYJRow_AVX2(const uint8_t * src_rgb24,uint8_t * dst_yj,int width)3583 void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
3584 // Row buffer for intermediate ARGB pixels.
3585 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3586 while (width > 0) {
3587 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3588 RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
3589 ARGBToYJRow_AVX2(row, dst_yj, twidth);
3590 src_rgb24 += twidth * 3;
3591 dst_yj += twidth;
3592 width -= twidth;
3593 }
3594 }
3595 #endif // HAS_RGB24TOYJROW_AVX2
3596
3597 #ifdef HAS_RAWTOYJROW_AVX2
3598 // Convert 16 RAW pixels (64 bytes) to 16 YJ values.
RAWToYJRow_AVX2(const uint8_t * src_raw,uint8_t * dst_yj,int width)3599 void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
3600 // Row buffer for intermediate ARGB pixels.
3601 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3602 while (width > 0) {
3603 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3604 RAWToARGBRow_SSSE3(src_raw, row, twidth);
3605 ARGBToYJRow_AVX2(row, dst_yj, twidth);
3606 src_raw += twidth * 3;
3607 dst_yj += twidth;
3608 width -= twidth;
3609 }
3610 }
3611 #endif // HAS_RAWTOYJROW_AVX2
3612
3613 #ifdef HAS_RGB24TOYJROW_SSSE3
3614 // Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
RGB24ToYJRow_SSSE3(const uint8_t * src_rgb24,uint8_t * dst_yj,int width)3615 void RGB24ToYJRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
3616 // Row buffer for intermediate ARGB pixels.
3617 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3618 while (width > 0) {
3619 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3620 RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
3621 ARGBToYJRow_SSSE3(row, dst_yj, twidth);
3622 src_rgb24 += twidth * 3;
3623 dst_yj += twidth;
3624 width -= twidth;
3625 }
3626 }
3627 #endif // HAS_RGB24TOYJROW_SSSE3
3628
3629 #ifdef HAS_RAWTOYJROW_SSSE3
3630 // Convert 16 RAW pixels (64 bytes) to 16 YJ values.
RAWToYJRow_SSSE3(const uint8_t * src_raw,uint8_t * dst_yj,int width)3631 void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
3632 // Row buffer for intermediate ARGB pixels.
3633 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3634 while (width > 0) {
3635 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3636 RAWToARGBRow_SSSE3(src_raw, row, twidth);
3637 ARGBToYJRow_SSSE3(row, dst_yj, twidth);
3638 src_raw += twidth * 3;
3639 dst_yj += twidth;
3640 width -= twidth;
3641 }
3642 }
3643 #endif // HAS_RAWTOYJROW_SSSE3
3644
ScaleSumSamples_C(const float * src,float * dst,float scale,int width)3645 float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) {
3646 float fsum = 0.f;
3647 int i;
3648 for (i = 0; i < width; ++i) {
3649 float v = *src++;
3650 fsum += v * v;
3651 *dst++ = v * scale;
3652 }
3653 return fsum;
3654 }
3655
ScaleMaxSamples_C(const float * src,float * dst,float scale,int width)3656 float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width) {
3657 float fmax = 0.f;
3658 int i;
3659 for (i = 0; i < width; ++i) {
3660 float v = *src++;
3661 float vs = v * scale;
3662 fmax = (v > fmax) ? v : fmax;
3663 *dst++ = vs;
3664 }
3665 return fmax;
3666 }
3667
ScaleSamples_C(const float * src,float * dst,float scale,int width)3668 void ScaleSamples_C(const float* src, float* dst, float scale, int width) {
3669 int i;
3670 for (i = 0; i < width; ++i) {
3671 *dst++ = *src++ * scale;
3672 }
3673 }
3674
GaussRow_C(const uint32_t * src,uint16_t * dst,int width)3675 void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) {
3676 int i;
3677 for (i = 0; i < width; ++i) {
3678 *dst++ =
3679 (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8;
3680 ++src;
3681 }
3682 }
3683
3684 // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
GaussCol_C(const uint16_t * src0,const uint16_t * src1,const uint16_t * src2,const uint16_t * src3,const uint16_t * src4,uint32_t * dst,int width)3685 void GaussCol_C(const uint16_t* src0,
3686 const uint16_t* src1,
3687 const uint16_t* src2,
3688 const uint16_t* src3,
3689 const uint16_t* src4,
3690 uint32_t* dst,
3691 int width) {
3692 int i;
3693 for (i = 0; i < width; ++i) {
3694 *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
3695 }
3696 }
3697
GaussRow_F32_C(const float * src,float * dst,int width)3698 void GaussRow_F32_C(const float* src, float* dst, int width) {
3699 int i;
3700 for (i = 0; i < width; ++i) {
3701 *dst++ = (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4]) *
3702 (1.0f / 256.0f);
3703 ++src;
3704 }
3705 }
3706
3707 // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
GaussCol_F32_C(const float * src0,const float * src1,const float * src2,const float * src3,const float * src4,float * dst,int width)3708 void GaussCol_F32_C(const float* src0,
3709 const float* src1,
3710 const float* src2,
3711 const float* src3,
3712 const float* src4,
3713 float* dst,
3714 int width) {
3715 int i;
3716 for (i = 0; i < width; ++i) {
3717 *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
3718 }
3719 }
3720
3721 // Convert biplanar NV21 to packed YUV24
NV21ToYUV24Row_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_yuv24,int width)3722 void NV21ToYUV24Row_C(const uint8_t* src_y,
3723 const uint8_t* src_vu,
3724 uint8_t* dst_yuv24,
3725 int width) {
3726 int x;
3727 for (x = 0; x < width - 1; x += 2) {
3728 dst_yuv24[0] = src_vu[0]; // V
3729 dst_yuv24[1] = src_vu[1]; // U
3730 dst_yuv24[2] = src_y[0]; // Y0
3731 dst_yuv24[3] = src_vu[0]; // V
3732 dst_yuv24[4] = src_vu[1]; // U
3733 dst_yuv24[5] = src_y[1]; // Y1
3734 src_y += 2;
3735 src_vu += 2;
3736 dst_yuv24 += 6; // Advance 2 pixels.
3737 }
3738 if (width & 1) {
3739 dst_yuv24[0] = src_vu[0]; // V
3740 dst_yuv24[1] = src_vu[1]; // U
3741 dst_yuv24[2] = src_y[0]; // Y0
3742 }
3743 }
3744
3745 // Filter 2 rows of AYUV UV's (444) into UV (420).
AYUVToUVRow_C(const uint8_t * src_ayuv,int src_stride_ayuv,uint8_t * dst_uv,int width)3746 void AYUVToUVRow_C(const uint8_t* src_ayuv,
3747 int src_stride_ayuv,
3748 uint8_t* dst_uv,
3749 int width) {
3750 // Output a row of UV values, filtering 2x2 rows of AYUV.
3751 int x;
3752 for (x = 0; x < width; x += 2) {
3753 dst_uv[0] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
3754 src_ayuv[src_stride_ayuv + 5] + 2) >>
3755 2;
3756 dst_uv[1] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
3757 src_ayuv[src_stride_ayuv + 4] + 2) >>
3758 2;
3759 src_ayuv += 8;
3760 dst_uv += 2;
3761 }
3762 if (width & 1) {
3763 dst_uv[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] +
3764 src_ayuv[src_stride_ayuv + 0] + 2) >>
3765 2;
3766 dst_uv[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] +
3767 src_ayuv[src_stride_ayuv + 1] + 2) >>
3768 2;
3769 }
3770 }
3771
3772 // Filter 2 rows of AYUV UV's (444) into VU (420).
AYUVToVURow_C(const uint8_t * src_ayuv,int src_stride_ayuv,uint8_t * dst_vu,int width)3773 void AYUVToVURow_C(const uint8_t* src_ayuv,
3774 int src_stride_ayuv,
3775 uint8_t* dst_vu,
3776 int width) {
3777 // Output a row of VU values, filtering 2x2 rows of AYUV.
3778 int x;
3779 for (x = 0; x < width; x += 2) {
3780 dst_vu[0] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
3781 src_ayuv[src_stride_ayuv + 4] + 2) >>
3782 2;
3783 dst_vu[1] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
3784 src_ayuv[src_stride_ayuv + 5] + 2) >>
3785 2;
3786 src_ayuv += 8;
3787 dst_vu += 2;
3788 }
3789 if (width & 1) {
3790 dst_vu[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] +
3791 src_ayuv[src_stride_ayuv + 0] + 2) >>
3792 2;
3793 dst_vu[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] +
3794 src_ayuv[src_stride_ayuv + 1] + 2) >>
3795 2;
3796 }
3797 }
3798
3799 // Copy row of AYUV Y's into Y
AYUVToYRow_C(const uint8_t * src_ayuv,uint8_t * dst_y,int width)3800 void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
3801 // Output a row of Y values.
3802 int x;
3803 for (x = 0; x < width; ++x) {
3804 dst_y[x] = src_ayuv[2]; // v,u,y,a
3805 src_ayuv += 4;
3806 }
3807 }
3808
3809 // Convert UV plane of NV12 to VU of NV21.
SwapUVRow_C(const uint8_t * src_uv,uint8_t * dst_vu,int width)3810 void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
3811 int x;
3812 for (x = 0; x < width; ++x) {
3813 uint8_t u = src_uv[0];
3814 uint8_t v = src_uv[1];
3815 dst_vu[0] = v;
3816 dst_vu[1] = u;
3817 src_uv += 2;
3818 dst_vu += 2;
3819 }
3820 }
3821
HalfMergeUVRow_C(const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_uv,int width)3822 void HalfMergeUVRow_C(const uint8_t* src_u,
3823 int src_stride_u,
3824 const uint8_t* src_v,
3825 int src_stride_v,
3826 uint8_t* dst_uv,
3827 int width) {
3828 int x;
3829 for (x = 0; x < width - 1; x += 2) {
3830 dst_uv[0] = (src_u[0] + src_u[1] + src_u[src_stride_u] +
3831 src_u[src_stride_u + 1] + 2) >>
3832 2;
3833 dst_uv[1] = (src_v[0] + src_v[1] + src_v[src_stride_v] +
3834 src_v[src_stride_v + 1] + 2) >>
3835 2;
3836 src_u += 2;
3837 src_v += 2;
3838 dst_uv += 2;
3839 }
3840 if (width & 1) {
3841 dst_uv[0] = (src_u[0] + src_u[src_stride_u] + 1) >> 1;
3842 dst_uv[1] = (src_v[0] + src_v[src_stride_v] + 1) >> 1;
3843 }
3844 }
3845
3846 #ifdef __cplusplus
3847 } // extern "C"
3848 } // namespace libyuv
3849 #endif
3850