• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "libyuv/row.h"
12 
13 #include <stdio.h>
14 #include <string.h>  // For memcpy and memset.
15 
16 #include "libyuv/basic_types.h"
17 #include "libyuv/convert_argb.h"  // For kYuvI601Constants
18 
19 #ifdef __cplusplus
20 namespace libyuv {
21 extern "C" {
22 #endif
23 
24 // The following ifdef from row_win makes the C code match the row_win code,
25 // which is 7 bit fixed point.
26 #if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \
27     (defined(_M_IX86) || (defined(_M_X64) && !defined(__clang__)))
28 #define LIBYUV_RGB7 1
29 #endif
30 
31 #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
32     defined(_M_IX86)
33 #define LIBYUV_ARGBTOUV_PAVGB 1
34 #define LIBYUV_RGBTOU_TRUNCATE 1
35 #endif
36 
37 // llvm x86 is poor at ternary operator, so use branchless min/max.
38 
39 #define USE_BRANCHLESS 1
40 #if USE_BRANCHLESS
clamp0(int32_t v)41 static __inline int32_t clamp0(int32_t v) {
42   return -(v >= 0) & v;
43 }
44 // TODO(fbarchard): make clamp255 preserve negative values.
clamp255(int32_t v)45 static __inline int32_t clamp255(int32_t v) {
46   return (-(v >= 255) | v) & 255;
47 }
48 
clamp1023(int32_t v)49 static __inline int32_t clamp1023(int32_t v) {
50   return (-(v >= 1023) | v) & 1023;
51 }
52 
Abs(int32_t v)53 static __inline uint32_t Abs(int32_t v) {
54   int m = -(v < 0);
55   return (v + m) ^ m;
56 }
57 #else   // USE_BRANCHLESS
58 static __inline int32_t clamp0(int32_t v) {
59   return (v < 0) ? 0 : v;
60 }
61 
62 static __inline int32_t clamp255(int32_t v) {
63   return (v > 255) ? 255 : v;
64 }
65 
66 static __inline int32_t clamp1023(int32_t v) {
67   return (v > 1023) ? 1023 : v;
68 }
69 
70 static __inline uint32_t Abs(int32_t v) {
71   return (v < 0) ? -v : v;
72 }
73 #endif  // USE_BRANCHLESS
Clamp(int32_t val)74 static __inline uint32_t Clamp(int32_t val) {
75   int v = clamp0(val);
76   return (uint32_t)(clamp255(v));
77 }
78 
Clamp10(int32_t val)79 static __inline uint32_t Clamp10(int32_t val) {
80   int v = clamp0(val);
81   return (uint32_t)(clamp1023(v));
82 }
83 
84 // Little Endian
85 #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
86     defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) ||     \
87     (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
88 #define WRITEWORD(p, v) *(uint32_t*)(p) = v
89 #else
WRITEWORD(uint8_t * p,uint32_t v)90 static inline void WRITEWORD(uint8_t* p, uint32_t v) {
91   p[0] = (uint8_t)(v & 255);
92   p[1] = (uint8_t)((v >> 8) & 255);
93   p[2] = (uint8_t)((v >> 16) & 255);
94   p[3] = (uint8_t)((v >> 24) & 255);
95 }
96 #endif
97 
RGB24ToARGBRow_C(const uint8_t * src_rgb24,uint8_t * dst_argb,int width)98 void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
99   int x;
100   for (x = 0; x < width; ++x) {
101     uint8_t b = src_rgb24[0];
102     uint8_t g = src_rgb24[1];
103     uint8_t r = src_rgb24[2];
104     dst_argb[0] = b;
105     dst_argb[1] = g;
106     dst_argb[2] = r;
107     dst_argb[3] = 255u;
108     dst_argb += 4;
109     src_rgb24 += 3;
110   }
111 }
112 
RAWToARGBRow_C(const uint8_t * src_raw,uint8_t * dst_argb,int width)113 void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
114   int x;
115   for (x = 0; x < width; ++x) {
116     uint8_t r = src_raw[0];
117     uint8_t g = src_raw[1];
118     uint8_t b = src_raw[2];
119     dst_argb[0] = b;
120     dst_argb[1] = g;
121     dst_argb[2] = r;
122     dst_argb[3] = 255u;
123     dst_argb += 4;
124     src_raw += 3;
125   }
126 }
127 
RAWToRGBARow_C(const uint8_t * src_raw,uint8_t * dst_rgba,int width)128 void RAWToRGBARow_C(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
129   int x;
130   for (x = 0; x < width; ++x) {
131     uint8_t r = src_raw[0];
132     uint8_t g = src_raw[1];
133     uint8_t b = src_raw[2];
134     dst_rgba[0] = 255u;
135     dst_rgba[1] = b;
136     dst_rgba[2] = g;
137     dst_rgba[3] = r;
138     dst_rgba += 4;
139     src_raw += 3;
140   }
141 }
142 
RAWToRGB24Row_C(const uint8_t * src_raw,uint8_t * dst_rgb24,int width)143 void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
144   int x;
145   for (x = 0; x < width; ++x) {
146     uint8_t r = src_raw[0];
147     uint8_t g = src_raw[1];
148     uint8_t b = src_raw[2];
149     dst_rgb24[0] = b;
150     dst_rgb24[1] = g;
151     dst_rgb24[2] = r;
152     dst_rgb24 += 3;
153     src_raw += 3;
154   }
155 }
156 
RGB565ToARGBRow_C(const uint8_t * src_rgb565,uint8_t * dst_argb,int width)157 void RGB565ToARGBRow_C(const uint8_t* src_rgb565,
158                        uint8_t* dst_argb,
159                        int width) {
160   int x;
161   for (x = 0; x < width; ++x) {
162     uint8_t b = src_rgb565[0] & 0x1f;
163     uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
164     uint8_t r = src_rgb565[1] >> 3;
165     dst_argb[0] = (b << 3) | (b >> 2);
166     dst_argb[1] = (g << 2) | (g >> 4);
167     dst_argb[2] = (r << 3) | (r >> 2);
168     dst_argb[3] = 255u;
169     dst_argb += 4;
170     src_rgb565 += 2;
171   }
172 }
173 
ARGB1555ToARGBRow_C(const uint8_t * src_argb1555,uint8_t * dst_argb,int width)174 void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
175                          uint8_t* dst_argb,
176                          int width) {
177   int x;
178   for (x = 0; x < width; ++x) {
179     uint8_t b = src_argb1555[0] & 0x1f;
180     uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
181     uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
182     uint8_t a = src_argb1555[1] >> 7;
183     dst_argb[0] = (b << 3) | (b >> 2);
184     dst_argb[1] = (g << 3) | (g >> 2);
185     dst_argb[2] = (r << 3) | (r >> 2);
186     dst_argb[3] = -a;
187     dst_argb += 4;
188     src_argb1555 += 2;
189   }
190 }
191 
ARGB4444ToARGBRow_C(const uint8_t * src_argb4444,uint8_t * dst_argb,int width)192 void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
193                          uint8_t* dst_argb,
194                          int width) {
195   int x;
196   for (x = 0; x < width; ++x) {
197     uint8_t b = src_argb4444[0] & 0x0f;
198     uint8_t g = src_argb4444[0] >> 4;
199     uint8_t r = src_argb4444[1] & 0x0f;
200     uint8_t a = src_argb4444[1] >> 4;
201     dst_argb[0] = (b << 4) | b;
202     dst_argb[1] = (g << 4) | g;
203     dst_argb[2] = (r << 4) | r;
204     dst_argb[3] = (a << 4) | a;
205     dst_argb += 4;
206     src_argb4444 += 2;
207   }
208 }
209 
AR30ToARGBRow_C(const uint8_t * src_ar30,uint8_t * dst_argb,int width)210 void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width) {
211   int x;
212   for (x = 0; x < width; ++x) {
213     uint32_t ar30;
214     memcpy(&ar30, src_ar30, sizeof ar30);
215     uint32_t b = (ar30 >> 2) & 0xff;
216     uint32_t g = (ar30 >> 12) & 0xff;
217     uint32_t r = (ar30 >> 22) & 0xff;
218     uint32_t a = (ar30 >> 30) * 0x55;  // Replicate 2 bits to 8 bits.
219     *(uint32_t*)(dst_argb) = b | (g << 8) | (r << 16) | (a << 24);
220     dst_argb += 4;
221     src_ar30 += 4;
222   }
223 }
224 
AR30ToABGRRow_C(const uint8_t * src_ar30,uint8_t * dst_abgr,int width)225 void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width) {
226   int x;
227   for (x = 0; x < width; ++x) {
228     uint32_t ar30;
229     memcpy(&ar30, src_ar30, sizeof ar30);
230     uint32_t b = (ar30 >> 2) & 0xff;
231     uint32_t g = (ar30 >> 12) & 0xff;
232     uint32_t r = (ar30 >> 22) & 0xff;
233     uint32_t a = (ar30 >> 30) * 0x55;  // Replicate 2 bits to 8 bits.
234     *(uint32_t*)(dst_abgr) = r | (g << 8) | (b << 16) | (a << 24);
235     dst_abgr += 4;
236     src_ar30 += 4;
237   }
238 }
239 
AR30ToAB30Row_C(const uint8_t * src_ar30,uint8_t * dst_ab30,int width)240 void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) {
241   int x;
242   for (x = 0; x < width; ++x) {
243     uint32_t ar30;
244     memcpy(&ar30, src_ar30, sizeof ar30);
245     uint32_t b = ar30 & 0x3ff;
246     uint32_t ga = ar30 & 0xc00ffc00;
247     uint32_t r = (ar30 >> 20) & 0x3ff;
248     *(uint32_t*)(dst_ab30) = r | ga | (b << 20);
249     dst_ab30 += 4;
250     src_ar30 += 4;
251   }
252 }
253 
ARGBToRGB24Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)254 void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
255   int x;
256   for (x = 0; x < width; ++x) {
257     uint8_t b = src_argb[0];
258     uint8_t g = src_argb[1];
259     uint8_t r = src_argb[2];
260     dst_rgb[0] = b;
261     dst_rgb[1] = g;
262     dst_rgb[2] = r;
263     dst_rgb += 3;
264     src_argb += 4;
265   }
266 }
267 
ARGBToRAWRow_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)268 void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
269   int x;
270   for (x = 0; x < width; ++x) {
271     uint8_t b = src_argb[0];
272     uint8_t g = src_argb[1];
273     uint8_t r = src_argb[2];
274     dst_rgb[0] = r;
275     dst_rgb[1] = g;
276     dst_rgb[2] = b;
277     dst_rgb += 3;
278     src_argb += 4;
279   }
280 }
281 
ARGBToRGB565Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)282 void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
283   int x;
284   for (x = 0; x < width - 1; x += 2) {
285     uint8_t b0 = src_argb[0] >> 3;
286     uint8_t g0 = src_argb[1] >> 2;
287     uint8_t r0 = src_argb[2] >> 3;
288     uint8_t b1 = src_argb[4] >> 3;
289     uint8_t g1 = src_argb[5] >> 2;
290     uint8_t r1 = src_argb[6] >> 3;
291     WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
292                            (r1 << 27));
293     dst_rgb += 4;
294     src_argb += 8;
295   }
296   if (width & 1) {
297     uint8_t b0 = src_argb[0] >> 3;
298     uint8_t g0 = src_argb[1] >> 2;
299     uint8_t r0 = src_argb[2] >> 3;
300     *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
301   }
302 }
303 
304 // dither4 is a row of 4 values from 4x4 dither matrix.
305 // The 4x4 matrix contains values to increase RGB.  When converting to
306 // fewer bits (565) this provides an ordered dither.
307 // The order in the 4x4 matrix in first byte is upper left.
308 // The 4 values are passed as an int, then referenced as an array, so
309 // endian will not affect order of the original matrix.  But the dither4
310 // will containing the first pixel in the lower byte for little endian
311 // or the upper byte for big endian.
ARGBToRGB565DitherRow_C(const uint8_t * src_argb,uint8_t * dst_rgb,const uint32_t dither4,int width)312 void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
313                              uint8_t* dst_rgb,
314                              const uint32_t dither4,
315                              int width) {
316   int x;
317   for (x = 0; x < width - 1; x += 2) {
318     int dither0 = ((const unsigned char*)(&dither4))[x & 3];
319     int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
320     uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
321     uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
322     uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
323     uint8_t b1 = clamp255(src_argb[4] + dither1) >> 3;
324     uint8_t g1 = clamp255(src_argb[5] + dither1) >> 2;
325     uint8_t r1 = clamp255(src_argb[6] + dither1) >> 3;
326     WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
327                            (r1 << 27));
328     dst_rgb += 4;
329     src_argb += 8;
330   }
331   if (width & 1) {
332     int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
333     uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
334     uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
335     uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
336     *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
337   }
338 }
339 
ARGBToARGB1555Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)340 void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
341   int x;
342   for (x = 0; x < width - 1; x += 2) {
343     uint8_t b0 = src_argb[0] >> 3;
344     uint8_t g0 = src_argb[1] >> 3;
345     uint8_t r0 = src_argb[2] >> 3;
346     uint8_t a0 = src_argb[3] >> 7;
347     uint8_t b1 = src_argb[4] >> 3;
348     uint8_t g1 = src_argb[5] >> 3;
349     uint8_t r1 = src_argb[6] >> 3;
350     uint8_t a1 = src_argb[7] >> 7;
351     *(uint32_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
352                             (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
353     dst_rgb += 4;
354     src_argb += 8;
355   }
356   if (width & 1) {
357     uint8_t b0 = src_argb[0] >> 3;
358     uint8_t g0 = src_argb[1] >> 3;
359     uint8_t r0 = src_argb[2] >> 3;
360     uint8_t a0 = src_argb[3] >> 7;
361     *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
362   }
363 }
364 
ARGBToARGB4444Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)365 void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
366   int x;
367   for (x = 0; x < width - 1; x += 2) {
368     uint8_t b0 = src_argb[0] >> 4;
369     uint8_t g0 = src_argb[1] >> 4;
370     uint8_t r0 = src_argb[2] >> 4;
371     uint8_t a0 = src_argb[3] >> 4;
372     uint8_t b1 = src_argb[4] >> 4;
373     uint8_t g1 = src_argb[5] >> 4;
374     uint8_t r1 = src_argb[6] >> 4;
375     uint8_t a1 = src_argb[7] >> 4;
376     *(uint32_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
377                             (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
378     dst_rgb += 4;
379     src_argb += 8;
380   }
381   if (width & 1) {
382     uint8_t b0 = src_argb[0] >> 4;
383     uint8_t g0 = src_argb[1] >> 4;
384     uint8_t r0 = src_argb[2] >> 4;
385     uint8_t a0 = src_argb[3] >> 4;
386     *(uint16_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
387   }
388 }
389 
ABGRToAR30Row_C(const uint8_t * src_abgr,uint8_t * dst_ar30,int width)390 void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) {
391   int x;
392   for (x = 0; x < width; ++x) {
393     uint32_t b0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2);
394     uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2);
395     uint32_t r0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
396     uint32_t a0 = (src_abgr[3] >> 6);
397     *(uint32_t*)(dst_ar30) = r0 | (g0 << 10) | (b0 << 20) | (a0 << 30);
398     dst_ar30 += 4;
399     src_abgr += 4;
400   }
401 }
402 
ARGBToAR30Row_C(const uint8_t * src_argb,uint8_t * dst_ar30,int width)403 void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
404   int x;
405   for (x = 0; x < width; ++x) {
406     uint32_t b0 = (src_argb[0] >> 6) | ((uint32_t)(src_argb[0]) << 2);
407     uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2);
408     uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2);
409     uint32_t a0 = (src_argb[3] >> 6);
410     *(uint32_t*)(dst_ar30) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30);
411     dst_ar30 += 4;
412     src_argb += 4;
413   }
414 }
415 
416 #ifdef LIBYUV_RGB7
417 // Old 7 bit math for compatibility on unsupported platforms.
RGBToY(uint8_t r,uint8_t g,uint8_t b)418 static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
419   return ((33 * r + 65 * g + 13 * b) >> 7) + 16;
420 }
421 #else
422 // 8 bit
423 // Intel SSE/AVX uses the following equivalent formula
424 // 0x7e80 = (66 + 129 + 25) * -128 + 0x1000 (for +16) and 0x0080 for round.
425 //  return (66 * ((int)r - 128) + 129 * ((int)g - 128) + 25 * ((int)b - 128) +
426 //  0x7e80) >> 8;
427 
RGBToY(uint8_t r,uint8_t g,uint8_t b)428 static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
429   return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
430 }
431 #endif
432 
433 #define AVGB(a, b) (((a) + (b) + 1) >> 1)
434 
435 #ifdef LIBYUV_RGBTOU_TRUNCATE
RGBToU(uint8_t r,uint8_t g,uint8_t b)436 static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
437   return (112 * b - 74 * g - 38 * r + 0x8000) >> 8;
438 }
RGBToV(uint8_t r,uint8_t g,uint8_t b)439 static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
440   return (112 * r - 94 * g - 18 * b + 0x8000) >> 8;
441 }
442 #else
443 // TODO(fbarchard): Add rounding to SIMD and use this
RGBToU(uint8_t r,uint8_t g,uint8_t b)444 static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
445   return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
446 }
RGBToV(uint8_t r,uint8_t g,uint8_t b)447 static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
448   return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
449 }
450 #endif
451 
452 #if !defined(LIBYUV_ARGBTOUV_PAVGB)
RGB2xToU(uint16_t r,uint16_t g,uint16_t b)453 static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) {
454   return ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8;
455 }
RGB2xToV(uint16_t r,uint16_t g,uint16_t b)456 static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
457   return ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8;
458 }
459 #endif
460 
461 // ARGBToY_C and ARGBToUV_C
462 // Intel version mimic SSE/AVX which does 2 pavgb
463 #if LIBYUV_ARGBTOUV_PAVGB
464 
465 #define MAKEROWY(NAME, R, G, B, BPP)                                         \
466   void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
467     int x;                                                                   \
468     for (x = 0; x < width; ++x) {                                            \
469       dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]);           \
470       src_argb0 += BPP;                                                      \
471       dst_y += 1;                                                            \
472     }                                                                        \
473   }                                                                          \
474   void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb,          \
475                        uint8_t* dst_u, uint8_t* dst_v, int width) {          \
476     const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb;                     \
477     int x;                                                                   \
478     for (x = 0; x < width - 1; x += 2) {                                     \
479       uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]),                      \
480                         AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP]));         \
481       uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]),                      \
482                         AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP]));         \
483       uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]),                      \
484                         AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP]));         \
485       dst_u[0] = RGBToU(ar, ag, ab);                                         \
486       dst_v[0] = RGBToV(ar, ag, ab);                                         \
487       src_rgb0 += BPP * 2;                                                   \
488       src_rgb1 += BPP * 2;                                                   \
489       dst_u += 1;                                                            \
490       dst_v += 1;                                                            \
491     }                                                                        \
492     if (width & 1) {                                                         \
493       uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]);                           \
494       uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]);                           \
495       uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]);                           \
496       dst_u[0] = RGBToU(ar, ag, ab);                                         \
497       dst_v[0] = RGBToV(ar, ag, ab);                                         \
498     }                                                                        \
499   }
500 #else
501 // ARM version does sum / 2 then multiply by 2x smaller coefficients
502 #define MAKEROWY(NAME, R, G, B, BPP)                                         \
503   void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
504     int x;                                                                   \
505     for (x = 0; x < width; ++x) {                                            \
506       dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]);           \
507       src_argb0 += BPP;                                                      \
508       dst_y += 1;                                                            \
509     }                                                                        \
510   }                                                                          \
511   void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb,          \
512                        uint8_t* dst_u, uint8_t* dst_v, int width) {          \
513     const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb;                     \
514     int x;                                                                   \
515     for (x = 0; x < width - 1; x += 2) {                                     \
516       uint16_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] +         \
517                      src_rgb1[B + BPP] + 1) >>                               \
518                     1;                                                       \
519       uint16_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] +         \
520                      src_rgb1[G + BPP] + 1) >>                               \
521                     1;                                                       \
522       uint16_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] +         \
523                      src_rgb1[R + BPP] + 1) >>                               \
524                     1;                                                       \
525       dst_u[0] = RGB2xToU(ar, ag, ab);                                       \
526       dst_v[0] = RGB2xToV(ar, ag, ab);                                       \
527       src_rgb0 += BPP * 2;                                                   \
528       src_rgb1 += BPP * 2;                                                   \
529       dst_u += 1;                                                            \
530       dst_v += 1;                                                            \
531     }                                                                        \
532     if (width & 1) {                                                         \
533       uint16_t ab = src_rgb0[B] + src_rgb1[B];                               \
534       uint16_t ag = src_rgb0[G] + src_rgb1[G];                               \
535       uint16_t ar = src_rgb0[R] + src_rgb1[R];                               \
536       dst_u[0] = RGB2xToU(ar, ag, ab);                                       \
537       dst_v[0] = RGB2xToV(ar, ag, ab);                                       \
538     }                                                                        \
539   }
540 #endif
541 
542 MAKEROWY(ARGB, 2, 1, 0, 4)
543 MAKEROWY(BGRA, 1, 2, 3, 4)
544 MAKEROWY(ABGR, 0, 1, 2, 4)
545 MAKEROWY(RGBA, 3, 2, 1, 4)
546 MAKEROWY(RGB24, 2, 1, 0, 3)
547 MAKEROWY(RAW, 0, 1, 2, 3)
548 #undef MAKEROWY
549 
550 // JPeg uses a variation on BT.601-1 full range
551 // y =  0.29900 * r + 0.58700 * g + 0.11400 * b
552 // u = -0.16874 * r - 0.33126 * g + 0.50000 * b  + center
553 // v =  0.50000 * r - 0.41869 * g - 0.08131 * b  + center
554 // BT.601 Mpeg range uses:
555 // b 0.1016 * 255 = 25.908 = 25
556 // g 0.5078 * 255 = 129.489 = 129
557 // r 0.2578 * 255 = 65.739 = 66
558 // JPeg 7 bit Y (deprecated)
559 // b 0.11400 * 128 = 14.592 = 15
560 // g 0.58700 * 128 = 75.136 = 75
561 // r 0.29900 * 128 = 38.272 = 38
562 // JPeg 8 bit Y:
563 // b 0.11400 * 256 = 29.184 = 29
564 // g 0.58700 * 256 = 150.272 = 150
565 // r 0.29900 * 256 = 76.544 = 77
566 // JPeg 8 bit U:
567 // b  0.50000 * 255 = 127.5 = 127
568 // g -0.33126 * 255 = -84.4713 = -84
569 // r -0.16874 * 255 = -43.0287 = -43
570 // JPeg 8 bit V:
571 // b -0.08131 * 255 = -20.73405 = -20
572 // g -0.41869 * 255 = -106.76595 = -107
573 // r  0.50000 * 255 = 127.5 = 127
574 
575 #ifdef LIBYUV_RGB7
576 // Old 7 bit math for compatibility on unsupported platforms.
RGBToYJ(uint8_t r,uint8_t g,uint8_t b)577 static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
578   return (38 * r + 75 * g + 15 * b + 64) >> 7;
579 }
580 #else
581 // 8 bit
582 static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
583   return (77 * r + 150 * g + 29 * b + 128) >> 8;
584 }
585 #endif
586 
587 #if defined(LIBYUV_ARGBTOUV_PAVGB)
RGBToUJ(uint8_t r,uint8_t g,uint8_t b)588 static __inline int RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
589   return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
590 }
RGBToVJ(uint8_t r,uint8_t g,uint8_t b)591 static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
592   return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
593 }
594 #else
RGB2xToUJ(uint16_t r,uint16_t g,uint16_t b)595 static __inline int RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) {
596   return ((127 / 2) * b - (84 / 2) * g - (43 / 2) * r + 0x8080) >> 8;
597 }
RGB2xToVJ(uint16_t r,uint16_t g,uint16_t b)598 static __inline int RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
599   return ((127 / 2) * r - (107 / 2) * g - (20 / 2) * b + 0x8080) >> 8;
600 }
601 #endif
602 
603 // ARGBToYJ_C and ARGBToUVJ_C
604 // Intel version mimic SSE/AVX which does 2 pavgb
605 #if LIBYUV_ARGBTOUV_PAVGB
606 #define MAKEROWYJ(NAME, R, G, B, BPP)                                         \
607   void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
608     int x;                                                                    \
609     for (x = 0; x < width; ++x) {                                             \
610       dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]);           \
611       src_argb0 += BPP;                                                       \
612       dst_y += 1;                                                             \
613     }                                                                         \
614   }                                                                           \
615   void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb,          \
616                         uint8_t* dst_u, uint8_t* dst_v, int width) {          \
617     const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb;                      \
618     int x;                                                                    \
619     for (x = 0; x < width - 1; x += 2) {                                      \
620       uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]),                       \
621                         AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP]));          \
622       uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]),                       \
623                         AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP]));          \
624       uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]),                       \
625                         AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP]));          \
626       dst_u[0] = RGBToUJ(ar, ag, ab);                                         \
627       dst_v[0] = RGBToVJ(ar, ag, ab);                                         \
628       src_rgb0 += BPP * 2;                                                    \
629       src_rgb1 += BPP * 2;                                                    \
630       dst_u += 1;                                                             \
631       dst_v += 1;                                                             \
632     }                                                                         \
633     if (width & 1) {                                                          \
634       uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]);                            \
635       uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]);                            \
636       uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]);                            \
637       dst_u[0] = RGBToUJ(ar, ag, ab);                                         \
638       dst_v[0] = RGBToVJ(ar, ag, ab);                                         \
639     }                                                                         \
640   }
641 #else
642 // ARM version does sum / 2 then multiply by 2x smaller coefficients
643 #define MAKEROWYJ(NAME, R, G, B, BPP)                                         \
644   void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
645     int x;                                                                    \
646     for (x = 0; x < width; ++x) {                                             \
647       dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]);           \
648       src_argb0 += BPP;                                                       \
649       dst_y += 1;                                                             \
650     }                                                                         \
651   }                                                                           \
652   void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb,          \
653                         uint8_t* dst_u, uint8_t* dst_v, int width) {          \
654     const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb;                      \
655     int x;                                                                    \
656     for (x = 0; x < width - 1; x += 2) {                                      \
657       uint16_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] +          \
658                      src_rgb1[B + BPP] + 1) >>                                \
659                     1;                                                        \
660       uint16_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] +          \
661                      src_rgb1[G + BPP] + 1) >>                                \
662                     1;                                                        \
663       uint16_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] +          \
664                      src_rgb1[R + BPP] + 1) >>                                \
665                     1;                                                        \
666       dst_u[0] = RGB2xToUJ(ar, ag, ab);                                       \
667       dst_v[0] = RGB2xToVJ(ar, ag, ab);                                       \
668       src_rgb0 += BPP * 2;                                                    \
669       src_rgb1 += BPP * 2;                                                    \
670       dst_u += 1;                                                             \
671       dst_v += 1;                                                             \
672     }                                                                         \
673     if (width & 1) {                                                          \
674       uint16_t ab = (src_rgb0[B] + src_rgb1[B]);                              \
675       uint16_t ag = (src_rgb0[G] + src_rgb1[G]);                              \
676       uint16_t ar = (src_rgb0[R] + src_rgb1[R]);                              \
677       dst_u[0] = RGB2xToUJ(ar, ag, ab);                                       \
678       dst_v[0] = RGB2xToVJ(ar, ag, ab);                                       \
679     }                                                                         \
680   }
681 
682 #endif
683 
684 MAKEROWYJ(ARGB, 2, 1, 0, 4)
685 MAKEROWYJ(RGBA, 3, 2, 1, 4)
686 MAKEROWYJ(RGB24, 2, 1, 0, 3)
687 MAKEROWYJ(RAW, 0, 1, 2, 3)
688 #undef MAKEROWYJ
689 
RGB565ToYRow_C(const uint8_t * src_rgb565,uint8_t * dst_y,int width)690 void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
691   int x;
692   for (x = 0; x < width; ++x) {
693     uint8_t b = src_rgb565[0] & 0x1f;
694     uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
695     uint8_t r = src_rgb565[1] >> 3;
696     b = (b << 3) | (b >> 2);
697     g = (g << 2) | (g >> 4);
698     r = (r << 3) | (r >> 2);
699     dst_y[0] = RGBToY(r, g, b);
700     src_rgb565 += 2;
701     dst_y += 1;
702   }
703 }
704 
ARGB1555ToYRow_C(const uint8_t * src_argb1555,uint8_t * dst_y,int width)705 void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
706   int x;
707   for (x = 0; x < width; ++x) {
708     uint8_t b = src_argb1555[0] & 0x1f;
709     uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
710     uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
711     b = (b << 3) | (b >> 2);
712     g = (g << 3) | (g >> 2);
713     r = (r << 3) | (r >> 2);
714     dst_y[0] = RGBToY(r, g, b);
715     src_argb1555 += 2;
716     dst_y += 1;
717   }
718 }
719 
ARGB4444ToYRow_C(const uint8_t * src_argb4444,uint8_t * dst_y,int width)720 void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) {
721   int x;
722   for (x = 0; x < width; ++x) {
723     uint8_t b = src_argb4444[0] & 0x0f;
724     uint8_t g = src_argb4444[0] >> 4;
725     uint8_t r = src_argb4444[1] & 0x0f;
726     b = (b << 4) | b;
727     g = (g << 4) | g;
728     r = (r << 4) | r;
729     dst_y[0] = RGBToY(r, g, b);
730     src_argb4444 += 2;
731     dst_y += 1;
732   }
733 }
734 
RGB565ToUVRow_C(const uint8_t * src_rgb565,int src_stride_rgb565,uint8_t * dst_u,uint8_t * dst_v,int width)735 void RGB565ToUVRow_C(const uint8_t* src_rgb565,
736                      int src_stride_rgb565,
737                      uint8_t* dst_u,
738                      uint8_t* dst_v,
739                      int width) {
740   const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565;
741   int x;
742   for (x = 0; x < width - 1; x += 2) {
743     uint8_t b0 = src_rgb565[0] & 0x1f;
744     uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
745     uint8_t r0 = src_rgb565[1] >> 3;
746     uint8_t b1 = src_rgb565[2] & 0x1f;
747     uint8_t g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
748     uint8_t r1 = src_rgb565[3] >> 3;
749     uint8_t b2 = next_rgb565[0] & 0x1f;
750     uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
751     uint8_t r2 = next_rgb565[1] >> 3;
752     uint8_t b3 = next_rgb565[2] & 0x1f;
753     uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
754     uint8_t r3 = next_rgb565[3] >> 3;
755 
756     b0 = (b0 << 3) | (b0 >> 2);
757     g0 = (g0 << 2) | (g0 >> 4);
758     r0 = (r0 << 3) | (r0 >> 2);
759     b1 = (b1 << 3) | (b1 >> 2);
760     g1 = (g1 << 2) | (g1 >> 4);
761     r1 = (r1 << 3) | (r1 >> 2);
762     b2 = (b2 << 3) | (b2 >> 2);
763     g2 = (g2 << 2) | (g2 >> 4);
764     r2 = (r2 << 3) | (r2 >> 2);
765     b3 = (b3 << 3) | (b3 >> 2);
766     g3 = (g3 << 2) | (g3 >> 4);
767     r3 = (r3 << 3) | (r3 >> 2);
768 
769 #if LIBYUV_ARGBTOUV_PAVGB
770     uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
771     uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
772     uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
773     dst_u[0] = RGBToU(ar, ag, ab);
774     dst_v[0] = RGBToV(ar, ag, ab);
775 #else
776     uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
777     uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
778     uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
779     dst_u[0] = RGB2xToU(r, g, b);
780     dst_v[0] = RGB2xToV(r, g, b);
781 #endif
782 
783     src_rgb565 += 4;
784     next_rgb565 += 4;
785     dst_u += 1;
786     dst_v += 1;
787   }
788   if (width & 1) {
789     uint8_t b0 = src_rgb565[0] & 0x1f;
790     uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
791     uint8_t r0 = src_rgb565[1] >> 3;
792     uint8_t b2 = next_rgb565[0] & 0x1f;
793     uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
794     uint8_t r2 = next_rgb565[1] >> 3;
795 
796     b0 = (b0 << 3) | (b0 >> 2);
797     g0 = (g0 << 2) | (g0 >> 4);
798     r0 = (r0 << 3) | (r0 >> 2);
799     b2 = (b2 << 3) | (b2 >> 2);
800     g2 = (g2 << 2) | (g2 >> 4);
801     r2 = (r2 << 3) | (r2 >> 2);
802 
803 #if LIBYUV_ARGBTOUV_PAVGB
804     uint8_t ab = AVGB(b0, b2);
805     uint8_t ag = AVGB(g0, g2);
806     uint8_t ar = AVGB(r0, r2);
807     dst_u[0] = RGBToU(ar, ag, ab);
808     dst_v[0] = RGBToV(ar, ag, ab);
809 #else
810     uint16_t b = b0 + b2;
811     uint16_t g = g0 + g2;
812     uint16_t r = r0 + r2;
813     dst_u[0] = RGB2xToU(r, g, b);
814     dst_v[0] = RGB2xToV(r, g, b);
815 #endif
816   }
817 }
818 
ARGB1555ToUVRow_C(const uint8_t * src_argb1555,int src_stride_argb1555,uint8_t * dst_u,uint8_t * dst_v,int width)819 void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
820                        int src_stride_argb1555,
821                        uint8_t* dst_u,
822                        uint8_t* dst_v,
823                        int width) {
824   const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555;
825   int x;
826   for (x = 0; x < width - 1; x += 2) {
827     uint8_t b0 = src_argb1555[0] & 0x1f;
828     uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
829     uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
830     uint8_t b1 = src_argb1555[2] & 0x1f;
831     uint8_t g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
832     uint8_t r1 = (src_argb1555[3] & 0x7c) >> 2;
833     uint8_t b2 = next_argb1555[0] & 0x1f;
834     uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
835     uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2;
836     uint8_t b3 = next_argb1555[2] & 0x1f;
837     uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
838     uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2;
839 
840     b0 = (b0 << 3) | (b0 >> 2);
841     g0 = (g0 << 3) | (g0 >> 2);
842     r0 = (r0 << 3) | (r0 >> 2);
843     b1 = (b1 << 3) | (b1 >> 2);
844     g1 = (g1 << 3) | (g1 >> 2);
845     r1 = (r1 << 3) | (r1 >> 2);
846     b2 = (b2 << 3) | (b2 >> 2);
847     g2 = (g2 << 3) | (g2 >> 2);
848     r2 = (r2 << 3) | (r2 >> 2);
849     b3 = (b3 << 3) | (b3 >> 2);
850     g3 = (g3 << 3) | (g3 >> 2);
851     r3 = (r3 << 3) | (r3 >> 2);
852 
853 #if LIBYUV_ARGBTOUV_PAVGB
854     uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
855     uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
856     uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
857     dst_u[0] = RGBToU(ar, ag, ab);
858     dst_v[0] = RGBToV(ar, ag, ab);
859 #else
860     uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
861     uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
862     uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
863     dst_u[0] = RGB2xToU(r, g, b);
864     dst_v[0] = RGB2xToV(r, g, b);
865 #endif
866 
867     src_argb1555 += 4;
868     next_argb1555 += 4;
869     dst_u += 1;
870     dst_v += 1;
871   }
872   if (width & 1) {
873     uint8_t b0 = src_argb1555[0] & 0x1f;
874     uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
875     uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
876     uint8_t b2 = next_argb1555[0] & 0x1f;
877     uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
878     uint8_t r2 = next_argb1555[1] >> 3;
879 
880     b0 = (b0 << 3) | (b0 >> 2);
881     g0 = (g0 << 3) | (g0 >> 2);
882     r0 = (r0 << 3) | (r0 >> 2);
883     b2 = (b2 << 3) | (b2 >> 2);
884     g2 = (g2 << 3) | (g2 >> 2);
885     r2 = (r2 << 3) | (r2 >> 2);
886 
887 #if LIBYUV_ARGBTOUV_PAVGB
888     uint8_t ab = AVGB(b0, b2);
889     uint8_t ag = AVGB(g0, g2);
890     uint8_t ar = AVGB(r0, r2);
891     dst_u[0] = RGBToU(ar, ag, ab);
892     dst_v[0] = RGBToV(ar, ag, ab);
893 #else
894     uint16_t b = b0 + b2;
895     uint16_t g = g0 + g2;
896     uint16_t r = r0 + r2;
897     dst_u[0] = RGB2xToU(r, g, b);
898     dst_v[0] = RGB2xToV(r, g, b);
899 #endif
900   }
901 }
902 
ARGB4444ToUVRow_C(const uint8_t * src_argb4444,int src_stride_argb4444,uint8_t * dst_u,uint8_t * dst_v,int width)903 void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
904                        int src_stride_argb4444,
905                        uint8_t* dst_u,
906                        uint8_t* dst_v,
907                        int width) {
908   const uint8_t* next_argb4444 = src_argb4444 + src_stride_argb4444;
909   int x;
910   for (x = 0; x < width - 1; x += 2) {
911     uint8_t b0 = src_argb4444[0] & 0x0f;
912     uint8_t g0 = src_argb4444[0] >> 4;
913     uint8_t r0 = src_argb4444[1] & 0x0f;
914     uint8_t b1 = src_argb4444[2] & 0x0f;
915     uint8_t g1 = src_argb4444[2] >> 4;
916     uint8_t r1 = src_argb4444[3] & 0x0f;
917     uint8_t b2 = next_argb4444[0] & 0x0f;
918     uint8_t g2 = next_argb4444[0] >> 4;
919     uint8_t r2 = next_argb4444[1] & 0x0f;
920     uint8_t b3 = next_argb4444[2] & 0x0f;
921     uint8_t g3 = next_argb4444[2] >> 4;
922     uint8_t r3 = next_argb4444[3] & 0x0f;
923 
924     b0 = (b0 << 4) | b0;
925     g0 = (g0 << 4) | g0;
926     r0 = (r0 << 4) | r0;
927     b1 = (b1 << 4) | b1;
928     g1 = (g1 << 4) | g1;
929     r1 = (r1 << 4) | r1;
930     b2 = (b2 << 4) | b2;
931     g2 = (g2 << 4) | g2;
932     r2 = (r2 << 4) | r2;
933     b3 = (b3 << 4) | b3;
934     g3 = (g3 << 4) | g3;
935     r3 = (r3 << 4) | r3;
936 
937 #if LIBYUV_ARGBTOUV_PAVGB
938     uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
939     uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
940     uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
941     dst_u[0] = RGBToU(ar, ag, ab);
942     dst_v[0] = RGBToV(ar, ag, ab);
943 #else
944     uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
945     uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
946     uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
947     dst_u[0] = RGB2xToU(r, g, b);
948     dst_v[0] = RGB2xToV(r, g, b);
949 #endif
950 
951     src_argb4444 += 4;
952     next_argb4444 += 4;
953     dst_u += 1;
954     dst_v += 1;
955   }
956   if (width & 1) {
957     uint8_t b0 = src_argb4444[0] & 0x0f;
958     uint8_t g0 = src_argb4444[0] >> 4;
959     uint8_t r0 = src_argb4444[1] & 0x0f;
960     uint8_t b2 = next_argb4444[0] & 0x0f;
961     uint8_t g2 = next_argb4444[0] >> 4;
962     uint8_t r2 = next_argb4444[1] & 0x0f;
963 
964     b0 = (b0 << 4) | b0;
965     g0 = (g0 << 4) | g0;
966     r0 = (r0 << 4) | r0;
967     b2 = (b2 << 4) | b2;
968     g2 = (g2 << 4) | g2;
969     r2 = (r2 << 4) | r2;
970 
971 #if LIBYUV_ARGBTOUV_PAVGB
972     uint8_t ab = AVGB(b0, b2);
973     uint8_t ag = AVGB(g0, g2);
974     uint8_t ar = AVGB(r0, r2);
975     dst_u[0] = RGBToU(ar, ag, ab);
976     dst_v[0] = RGBToV(ar, ag, ab);
977 #else
978     uint16_t b = b0 + b2;
979     uint16_t g = g0 + g2;
980     uint16_t r = r0 + r2;
981     dst_u[0] = RGB2xToU(r, g, b);
982     dst_v[0] = RGB2xToV(r, g, b);
983 #endif
984   }
985 }
986 
ARGBToUV444Row_C(const uint8_t * src_argb,uint8_t * dst_u,uint8_t * dst_v,int width)987 void ARGBToUV444Row_C(const uint8_t* src_argb,
988                       uint8_t* dst_u,
989                       uint8_t* dst_v,
990                       int width) {
991   int x;
992   for (x = 0; x < width; ++x) {
993     uint8_t ab = src_argb[0];
994     uint8_t ag = src_argb[1];
995     uint8_t ar = src_argb[2];
996     dst_u[0] = RGBToU(ar, ag, ab);
997     dst_v[0] = RGBToV(ar, ag, ab);
998     src_argb += 4;
999     dst_u += 1;
1000     dst_v += 1;
1001   }
1002 }
1003 
ARGBGrayRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)1004 void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
1005   int x;
1006   for (x = 0; x < width; ++x) {
1007     uint8_t y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
1008     dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
1009     dst_argb[3] = src_argb[3];
1010     dst_argb += 4;
1011     src_argb += 4;
1012   }
1013 }
1014 
1015 // Convert a row of image to Sepia tone.
ARGBSepiaRow_C(uint8_t * dst_argb,int width)1016 void ARGBSepiaRow_C(uint8_t* dst_argb, int width) {
1017   int x;
1018   for (x = 0; x < width; ++x) {
1019     int b = dst_argb[0];
1020     int g = dst_argb[1];
1021     int r = dst_argb[2];
1022     int sb = (b * 17 + g * 68 + r * 35) >> 7;
1023     int sg = (b * 22 + g * 88 + r * 45) >> 7;
1024     int sr = (b * 24 + g * 98 + r * 50) >> 7;
1025     // b does not over flow. a is preserved from original.
1026     dst_argb[0] = sb;
1027     dst_argb[1] = clamp255(sg);
1028     dst_argb[2] = clamp255(sr);
1029     dst_argb += 4;
1030   }
1031 }
1032 
1033 // Apply color matrix to a row of image. Matrix is signed.
1034 // TODO(fbarchard): Consider adding rounding (+32).
ARGBColorMatrixRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const int8_t * matrix_argb,int width)1035 void ARGBColorMatrixRow_C(const uint8_t* src_argb,
1036                           uint8_t* dst_argb,
1037                           const int8_t* matrix_argb,
1038                           int width) {
1039   int x;
1040   for (x = 0; x < width; ++x) {
1041     int b = src_argb[0];
1042     int g = src_argb[1];
1043     int r = src_argb[2];
1044     int a = src_argb[3];
1045     int sb = (b * matrix_argb[0] + g * matrix_argb[1] + r * matrix_argb[2] +
1046               a * matrix_argb[3]) >>
1047              6;
1048     int sg = (b * matrix_argb[4] + g * matrix_argb[5] + r * matrix_argb[6] +
1049               a * matrix_argb[7]) >>
1050              6;
1051     int sr = (b * matrix_argb[8] + g * matrix_argb[9] + r * matrix_argb[10] +
1052               a * matrix_argb[11]) >>
1053              6;
1054     int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] +
1055               a * matrix_argb[15]) >>
1056              6;
1057     dst_argb[0] = Clamp(sb);
1058     dst_argb[1] = Clamp(sg);
1059     dst_argb[2] = Clamp(sr);
1060     dst_argb[3] = Clamp(sa);
1061     src_argb += 4;
1062     dst_argb += 4;
1063   }
1064 }
1065 
1066 // Apply color table to a row of image.
ARGBColorTableRow_C(uint8_t * dst_argb,const uint8_t * table_argb,int width)1067 void ARGBColorTableRow_C(uint8_t* dst_argb,
1068                          const uint8_t* table_argb,
1069                          int width) {
1070   int x;
1071   for (x = 0; x < width; ++x) {
1072     int b = dst_argb[0];
1073     int g = dst_argb[1];
1074     int r = dst_argb[2];
1075     int a = dst_argb[3];
1076     dst_argb[0] = table_argb[b * 4 + 0];
1077     dst_argb[1] = table_argb[g * 4 + 1];
1078     dst_argb[2] = table_argb[r * 4 + 2];
1079     dst_argb[3] = table_argb[a * 4 + 3];
1080     dst_argb += 4;
1081   }
1082 }
1083 
1084 // Apply color table to a row of image.
RGBColorTableRow_C(uint8_t * dst_argb,const uint8_t * table_argb,int width)1085 void RGBColorTableRow_C(uint8_t* dst_argb,
1086                         const uint8_t* table_argb,
1087                         int width) {
1088   int x;
1089   for (x = 0; x < width; ++x) {
1090     int b = dst_argb[0];
1091     int g = dst_argb[1];
1092     int r = dst_argb[2];
1093     dst_argb[0] = table_argb[b * 4 + 0];
1094     dst_argb[1] = table_argb[g * 4 + 1];
1095     dst_argb[2] = table_argb[r * 4 + 2];
1096     dst_argb += 4;
1097   }
1098 }
1099 
ARGBQuantizeRow_C(uint8_t * dst_argb,int scale,int interval_size,int interval_offset,int width)1100 void ARGBQuantizeRow_C(uint8_t* dst_argb,
1101                        int scale,
1102                        int interval_size,
1103                        int interval_offset,
1104                        int width) {
1105   int x;
1106   for (x = 0; x < width; ++x) {
1107     int b = dst_argb[0];
1108     int g = dst_argb[1];
1109     int r = dst_argb[2];
1110     dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
1111     dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
1112     dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
1113     dst_argb += 4;
1114   }
1115 }
1116 
1117 #define REPEAT8(v) (v) | ((v) << 8)
1118 #define SHADE(f, v) v* f >> 24
1119 
ARGBShadeRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width,uint32_t value)1120 void ARGBShadeRow_C(const uint8_t* src_argb,
1121                     uint8_t* dst_argb,
1122                     int width,
1123                     uint32_t value) {
1124   const uint32_t b_scale = REPEAT8(value & 0xff);
1125   const uint32_t g_scale = REPEAT8((value >> 8) & 0xff);
1126   const uint32_t r_scale = REPEAT8((value >> 16) & 0xff);
1127   const uint32_t a_scale = REPEAT8(value >> 24);
1128 
1129   int i;
1130   for (i = 0; i < width; ++i) {
1131     const uint32_t b = REPEAT8(src_argb[0]);
1132     const uint32_t g = REPEAT8(src_argb[1]);
1133     const uint32_t r = REPEAT8(src_argb[2]);
1134     const uint32_t a = REPEAT8(src_argb[3]);
1135     dst_argb[0] = SHADE(b, b_scale);
1136     dst_argb[1] = SHADE(g, g_scale);
1137     dst_argb[2] = SHADE(r, r_scale);
1138     dst_argb[3] = SHADE(a, a_scale);
1139     src_argb += 4;
1140     dst_argb += 4;
1141   }
1142 }
1143 #undef REPEAT8
1144 #undef SHADE
1145 
1146 #define REPEAT8(v) (v) | ((v) << 8)
1147 #define SHADE(f, v) v* f >> 16
1148 
ARGBMultiplyRow_C(const uint8_t * src_argb0,const uint8_t * src_argb1,uint8_t * dst_argb,int width)1149 void ARGBMultiplyRow_C(const uint8_t* src_argb0,
1150                        const uint8_t* src_argb1,
1151                        uint8_t* dst_argb,
1152                        int width) {
1153   int i;
1154   for (i = 0; i < width; ++i) {
1155     const uint32_t b = REPEAT8(src_argb0[0]);
1156     const uint32_t g = REPEAT8(src_argb0[1]);
1157     const uint32_t r = REPEAT8(src_argb0[2]);
1158     const uint32_t a = REPEAT8(src_argb0[3]);
1159     const uint32_t b_scale = src_argb1[0];
1160     const uint32_t g_scale = src_argb1[1];
1161     const uint32_t r_scale = src_argb1[2];
1162     const uint32_t a_scale = src_argb1[3];
1163     dst_argb[0] = SHADE(b, b_scale);
1164     dst_argb[1] = SHADE(g, g_scale);
1165     dst_argb[2] = SHADE(r, r_scale);
1166     dst_argb[3] = SHADE(a, a_scale);
1167     src_argb0 += 4;
1168     src_argb1 += 4;
1169     dst_argb += 4;
1170   }
1171 }
1172 #undef REPEAT8
1173 #undef SHADE
1174 
1175 #define SHADE(f, v) clamp255(v + f)
1176 
ARGBAddRow_C(const uint8_t * src_argb0,const uint8_t * src_argb1,uint8_t * dst_argb,int width)1177 void ARGBAddRow_C(const uint8_t* src_argb0,
1178                   const uint8_t* src_argb1,
1179                   uint8_t* dst_argb,
1180                   int width) {
1181   int i;
1182   for (i = 0; i < width; ++i) {
1183     const int b = src_argb0[0];
1184     const int g = src_argb0[1];
1185     const int r = src_argb0[2];
1186     const int a = src_argb0[3];
1187     const int b_add = src_argb1[0];
1188     const int g_add = src_argb1[1];
1189     const int r_add = src_argb1[2];
1190     const int a_add = src_argb1[3];
1191     dst_argb[0] = SHADE(b, b_add);
1192     dst_argb[1] = SHADE(g, g_add);
1193     dst_argb[2] = SHADE(r, r_add);
1194     dst_argb[3] = SHADE(a, a_add);
1195     src_argb0 += 4;
1196     src_argb1 += 4;
1197     dst_argb += 4;
1198   }
1199 }
1200 #undef SHADE
1201 
1202 #define SHADE(f, v) clamp0(f - v)
1203 
ARGBSubtractRow_C(const uint8_t * src_argb0,const uint8_t * src_argb1,uint8_t * dst_argb,int width)1204 void ARGBSubtractRow_C(const uint8_t* src_argb0,
1205                        const uint8_t* src_argb1,
1206                        uint8_t* dst_argb,
1207                        int width) {
1208   int i;
1209   for (i = 0; i < width; ++i) {
1210     const int b = src_argb0[0];
1211     const int g = src_argb0[1];
1212     const int r = src_argb0[2];
1213     const int a = src_argb0[3];
1214     const int b_sub = src_argb1[0];
1215     const int g_sub = src_argb1[1];
1216     const int r_sub = src_argb1[2];
1217     const int a_sub = src_argb1[3];
1218     dst_argb[0] = SHADE(b, b_sub);
1219     dst_argb[1] = SHADE(g, g_sub);
1220     dst_argb[2] = SHADE(r, r_sub);
1221     dst_argb[3] = SHADE(a, a_sub);
1222     src_argb0 += 4;
1223     src_argb1 += 4;
1224     dst_argb += 4;
1225   }
1226 }
1227 #undef SHADE
1228 
1229 // Sobel functions which mimics SSSE3.
SobelXRow_C(const uint8_t * src_y0,const uint8_t * src_y1,const uint8_t * src_y2,uint8_t * dst_sobelx,int width)1230 void SobelXRow_C(const uint8_t* src_y0,
1231                  const uint8_t* src_y1,
1232                  const uint8_t* src_y2,
1233                  uint8_t* dst_sobelx,
1234                  int width) {
1235   int i;
1236   for (i = 0; i < width; ++i) {
1237     int a = src_y0[i];
1238     int b = src_y1[i];
1239     int c = src_y2[i];
1240     int a_sub = src_y0[i + 2];
1241     int b_sub = src_y1[i + 2];
1242     int c_sub = src_y2[i + 2];
1243     int a_diff = a - a_sub;
1244     int b_diff = b - b_sub;
1245     int c_diff = c - c_sub;
1246     int sobel = Abs(a_diff + b_diff * 2 + c_diff);
1247     dst_sobelx[i] = (uint8_t)(clamp255(sobel));
1248   }
1249 }
1250 
SobelYRow_C(const uint8_t * src_y0,const uint8_t * src_y1,uint8_t * dst_sobely,int width)1251 void SobelYRow_C(const uint8_t* src_y0,
1252                  const uint8_t* src_y1,
1253                  uint8_t* dst_sobely,
1254                  int width) {
1255   int i;
1256   for (i = 0; i < width; ++i) {
1257     int a = src_y0[i + 0];
1258     int b = src_y0[i + 1];
1259     int c = src_y0[i + 2];
1260     int a_sub = src_y1[i + 0];
1261     int b_sub = src_y1[i + 1];
1262     int c_sub = src_y1[i + 2];
1263     int a_diff = a - a_sub;
1264     int b_diff = b - b_sub;
1265     int c_diff = c - c_sub;
1266     int sobel = Abs(a_diff + b_diff * 2 + c_diff);
1267     dst_sobely[i] = (uint8_t)(clamp255(sobel));
1268   }
1269 }
1270 
SobelRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)1271 void SobelRow_C(const uint8_t* src_sobelx,
1272                 const uint8_t* src_sobely,
1273                 uint8_t* dst_argb,
1274                 int width) {
1275   int i;
1276   for (i = 0; i < width; ++i) {
1277     int r = src_sobelx[i];
1278     int b = src_sobely[i];
1279     int s = clamp255(r + b);
1280     dst_argb[0] = (uint8_t)(s);
1281     dst_argb[1] = (uint8_t)(s);
1282     dst_argb[2] = (uint8_t)(s);
1283     dst_argb[3] = (uint8_t)(255u);
1284     dst_argb += 4;
1285   }
1286 }
1287 
SobelToPlaneRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_y,int width)1288 void SobelToPlaneRow_C(const uint8_t* src_sobelx,
1289                        const uint8_t* src_sobely,
1290                        uint8_t* dst_y,
1291                        int width) {
1292   int i;
1293   for (i = 0; i < width; ++i) {
1294     int r = src_sobelx[i];
1295     int b = src_sobely[i];
1296     int s = clamp255(r + b);
1297     dst_y[i] = (uint8_t)(s);
1298   }
1299 }
1300 
SobelXYRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)1301 void SobelXYRow_C(const uint8_t* src_sobelx,
1302                   const uint8_t* src_sobely,
1303                   uint8_t* dst_argb,
1304                   int width) {
1305   int i;
1306   for (i = 0; i < width; ++i) {
1307     int r = src_sobelx[i];
1308     int b = src_sobely[i];
1309     int g = clamp255(r + b);
1310     dst_argb[0] = (uint8_t)(b);
1311     dst_argb[1] = (uint8_t)(g);
1312     dst_argb[2] = (uint8_t)(r);
1313     dst_argb[3] = (uint8_t)(255u);
1314     dst_argb += 4;
1315   }
1316 }
1317 
J400ToARGBRow_C(const uint8_t * src_y,uint8_t * dst_argb,int width)1318 void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
1319   // Copy a Y to RGB.
1320   int x;
1321   for (x = 0; x < width; ++x) {
1322     uint8_t y = src_y[0];
1323     dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
1324     dst_argb[3] = 255u;
1325     dst_argb += 4;
1326     ++src_y;
1327   }
1328 }
1329 
1330 // TODO(fbarchard): Unify these structures to be platform independent.
1331 // TODO(fbarchard): Generate SIMD structures from float matrix.
1332 
1333 // BT.601 YUV to RGB reference
1334 //  R = (Y - 16) * 1.164              - V * -1.596
1335 //  G = (Y - 16) * 1.164 - U *  0.391 - V *  0.813
1336 //  B = (Y - 16) * 1.164 - U * -2.018
1337 
1338 // Y contribution to R,G,B.  Scale and bias.
1339 #define YG 18997  /* round(1.164 * 64 * 256 * 256 / 257) */
1340 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1341 
1342 // U and V contributions to R,G,B.
1343 #define UB -128 /* max(-128, round(-2.018 * 64)) */
1344 #define UG 25   /* round(0.391 * 64) */
1345 #define VG 52   /* round(0.813 * 64) */
1346 #define VR -102 /* round(-1.596 * 64) */
1347 
1348 // Bias values to subtract 16 from Y and 128 from U and V.
1349 #define BB (UB * 128 + YGB)
1350 #define BG (UG * 128 + VG * 128 + YGB)
1351 #define BR (VR * 128 + YGB)
1352 
1353 #if defined(__aarch64__)  // 64 bit arm
1354 const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
1355     {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1356     {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1357     {UG, VG, UG, VG, UG, VG, UG, VG},
1358     {UG, VG, UG, VG, UG, VG, UG, VG},
1359     {BB, BG, BR, YGB, 0, 0, 0, 0},
1360     {0x0101 * YG, YG, 0, 0}};
1361 const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
1362     {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1363     {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1364     {VG, UG, VG, UG, VG, UG, VG, UG},
1365     {VG, UG, VG, UG, VG, UG, VG, UG},
1366     {BR, BG, BB, YGB, 0, 0, 0, 0},
1367     {0x0101 * YG, YG, 0, 0}};
1368 #elif defined(__arm__)  // 32 bit arm
1369 const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
1370     {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
1371     {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
1372     {BB, BG, BR, YGB, 0, 0, 0, 0},
1373     {0x0101 * YG, YG, 0, 0}};
1374 const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
1375     {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
1376     {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
1377     {BR, BG, BB, YGB, 0, 0, 0, 0},
1378     {0x0101 * YG, YG, 0, 0}};
1379 #else
1380 const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
1381     {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
1382      UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
1383     {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
1384      UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
1385     {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
1386      0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
1387     {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1388     {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1389     {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1390     {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
1391     {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
1392      YGB}};
1393 const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
1394     {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
1395      VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
1396     {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
1397      VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
1398     {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
1399      0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
1400     {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1401     {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1402     {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1403     {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
1404     {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
1405      YGB}};
1406 #endif
1407 
1408 #undef BB
1409 #undef BG
1410 #undef BR
1411 #undef YGB
1412 #undef UB
1413 #undef UG
1414 #undef VG
1415 #undef VR
1416 #undef YG
1417 
1418 // JPEG YUV to RGB reference
1419 // *  R = Y                - V * -1.40200
1420 // *  G = Y - U *  0.34414 - V *  0.71414
1421 // *  B = Y - U * -1.77200
1422 
1423 // Y contribution to R,G,B.  Scale and bias.
1424 #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
1425 #define YGB 32   /* 64 / 2 */
1426 
1427 // U and V contributions to R,G,B.
1428 #define UB -113 /* round(-1.77200 * 64) */
1429 #define UG 22   /* round(0.34414 * 64) */
1430 #define VG 46   /* round(0.71414  * 64) */
1431 #define VR -90  /* round(-1.40200 * 64) */
1432 
1433 // Bias values to round, and subtract 128 from U and V.
1434 #define BB (UB * 128 + YGB)
1435 #define BG (UG * 128 + VG * 128 + YGB)
1436 #define BR (VR * 128 + YGB)
1437 
1438 #if defined(__aarch64__)
1439 const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
1440     {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1441     {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1442     {UG, VG, UG, VG, UG, VG, UG, VG},
1443     {UG, VG, UG, VG, UG, VG, UG, VG},
1444     {BB, BG, BR, YGB, 0, 0, 0, 0},
1445     {0x0101 * YG, YG, 0, 0}};
1446 const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
1447     {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1448     {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1449     {VG, UG, VG, UG, VG, UG, VG, UG},
1450     {VG, UG, VG, UG, VG, UG, VG, UG},
1451     {BR, BG, BB, YGB, 0, 0, 0, 0},
1452     {0x0101 * YG, YG, 0, 0}};
1453 #elif defined(__arm__)
1454 const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
1455     {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
1456     {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
1457     {BB, BG, BR, YGB, 0, 0, 0, 0},
1458     {0x0101 * YG, YG, 0, 0}};
1459 const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
1460     {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
1461     {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
1462     {BR, BG, BB, YGB, 0, 0, 0, 0},
1463     {0x0101 * YG, YG, 0, 0}};
1464 #else
1465 const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
1466     {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
1467      UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
1468     {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
1469      UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
1470     {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
1471      0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
1472     {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1473     {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1474     {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1475     {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
1476     {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
1477      YGB}};
1478 const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
1479     {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
1480      VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
1481     {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
1482      VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
1483     {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
1484      0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
1485     {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1486     {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1487     {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1488     {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
1489     {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
1490      YGB}};
1491 #endif
1492 
1493 #undef BB
1494 #undef BG
1495 #undef BR
1496 #undef YGB
1497 #undef UB
1498 #undef UG
1499 #undef VG
1500 #undef VR
1501 #undef YG
1502 
1503 // BT.709 YUV to RGB reference
1504 //  R = (Y - 16) * 1.164              - V * -1.793
1505 //  G = (Y - 16) * 1.164 - U *  0.213 - V *  0.533
1506 //  B = (Y - 16) * 1.164 - U * -2.112
1507 // See also http://www.equasys.de/colorconversion.html
1508 
1509 // Y contribution to R,G,B.  Scale and bias.
1510 #define YG 18997  /* round(1.164 * 64 * 256 * 256 / 257) */
1511 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1512 
1513 // TODO(fbarchard): Find way to express 2.112 instead of 2.0.
1514 // U and V contributions to R,G,B.
1515 #define UB -128 /* max(-128, round(-2.112 * 64)) */
1516 #define UG 14   /* round(0.213 * 64) */
1517 #define VG 34   /* round(0.533  * 64) */
1518 #define VR -115 /* round(-1.793 * 64) */
1519 
1520 // Bias values to round, and subtract 128 from U and V.
1521 #define BB (UB * 128 + YGB)
1522 #define BG (UG * 128 + VG * 128 + YGB)
1523 #define BR (VR * 128 + YGB)
1524 
1525 #if defined(__aarch64__)
1526 const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
1527     {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1528     {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1529     {UG, VG, UG, VG, UG, VG, UG, VG},
1530     {UG, VG, UG, VG, UG, VG, UG, VG},
1531     {BB, BG, BR, YGB, 0, 0, 0, 0},
1532     {0x0101 * YG, YG, 0, 0}};
1533 const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
1534     {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1535     {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1536     {VG, UG, VG, UG, VG, UG, VG, UG},
1537     {VG, UG, VG, UG, VG, UG, VG, UG},
1538     {BR, BG, BB, YGB, 0, 0, 0, 0},
1539     {0x0101 * YG, YG, 0, 0}};
1540 #elif defined(__arm__)
1541 const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
1542     {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
1543     {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
1544     {BB, BG, BR, YGB, 0, 0, 0, 0},
1545     {0x0101 * YG, YG, 0, 0}};
1546 const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
1547     {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
1548     {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
1549     {BR, BG, BB, YGB, 0, 0, 0, 0},
1550     {0x0101 * YG, YG, 0, 0}};
1551 #else
1552 const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
1553     {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
1554      UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
1555     {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
1556      UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
1557     {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
1558      0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
1559     {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1560     {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1561     {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1562     {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
1563     {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
1564      YGB}};
1565 const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
1566     {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
1567      VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
1568     {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
1569      VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
1570     {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
1571      0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
1572     {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1573     {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1574     {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1575     {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
1576     {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
1577      YGB}};
1578 #endif
1579 
1580 #undef BB
1581 #undef BG
1582 #undef BR
1583 #undef YGB
1584 #undef UB
1585 #undef UG
1586 #undef VG
1587 #undef VR
1588 #undef YG
1589 
1590 // BT.2020 YUV to RGB reference
1591 //  R = (Y - 16) * 1.164384                - V * -1.67867
1592 //  G = (Y - 16) * 1.164384 - U * 0.187326 - V *  0.65042
1593 //  B = (Y - 16) * 1.164384 - U * -2.14177
1594 
1595 // Y contribution to R,G,B.  Scale and bias.
1596 #define YG 19003  /* round(1.164384 * 64 * 256 * 256 / 257) */
1597 #define YGB -1160 /* 1.164384 * 64 * -16 + 64 / 2 */
1598 
1599 // TODO(fbarchard): Improve accuracy; the B channel is off by 7%.
1600 // U and V contributions to R,G,B.
1601 #define UB -128 /* max(-128, round(-2.142 * 64)) */
1602 #define UG 12   /* round(0.187326 * 64) */
1603 #define VG 42   /* round(0.65042 * 64) */
1604 #define VR -107 /* round(-1.67867 * 64) */
1605 
1606 // Bias values to round, and subtract 128 from U and V.
1607 #define BB (UB * 128 + YGB)
1608 #define BG (UG * 128 + VG * 128 + YGB)
1609 #define BR (VR * 128 + YGB)
1610 
1611 #if defined(__aarch64__)
1612 const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = {
1613     {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1614     {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1615     {UG, VG, UG, VG, UG, VG, UG, VG},
1616     {UG, VG, UG, VG, UG, VG, UG, VG},
1617     {BB, BG, BR, YGB, 0, 0, 0, 0},
1618     {0x0101 * YG, YG, 0, 0}};
1619 const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
1620     {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1621     {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1622     {VG, UG, VG, UG, VG, UG, VG, UG},
1623     {VG, UG, VG, UG, VG, UG, VG, UG},
1624     {BR, BG, BB, YGB, 0, 0, 0, 0},
1625     {0x0101 * YG, YG, 0, 0}};
1626 #elif defined(__arm__)
1627 const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = {
1628     {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
1629     {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
1630     {BB, BG, BR, YGB, 0, 0, 0, 0},
1631     {0x0101 * YG, YG, 0, 0}};
1632 const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
1633     {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
1634     {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
1635     {BR, BG, BB, YGB, 0, 0, 0, 0},
1636     {0x0101 * YG, YG, 0, 0}};
1637 #else
1638 const struct YuvConstants SIMD_ALIGNED(kYuv2020Constants) = {
1639     {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
1640      UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
1641     {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
1642      UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
1643     {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
1644      0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
1645     {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1646     {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1647     {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1648     {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
1649     {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
1650      YGB}};
1651 const struct YuvConstants SIMD_ALIGNED(kYvu2020Constants) = {
1652     {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
1653      VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
1654     {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
1655      VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
1656     {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
1657      0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
1658     {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1659     {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1660     {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1661     {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG},
1662     {YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB, YGB,
1663      YGB}};
1664 #endif
1665 
1666 #undef BB
1667 #undef BG
1668 #undef BR
1669 #undef YGB
1670 #undef UB
1671 #undef UG
1672 #undef VG
1673 #undef VR
1674 #undef YG
1675 
1676 // C reference code that mimics the YUV assembly.
1677 // Reads 8 bit YUV and leaves result as 16 bit.
YuvPixel(uint8_t y,uint8_t u,uint8_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1678 static __inline void YuvPixel(uint8_t y,
1679                               uint8_t u,
1680                               uint8_t v,
1681                               uint8_t* b,
1682                               uint8_t* g,
1683                               uint8_t* r,
1684                               const struct YuvConstants* yuvconstants) {
1685 #if defined(__aarch64__)
1686   int ub = -yuvconstants->kUVToRB[0];
1687   int ug = yuvconstants->kUVToG[0];
1688   int vg = yuvconstants->kUVToG[1];
1689   int vr = -yuvconstants->kUVToRB[1];
1690   int bb = yuvconstants->kUVBiasBGR[0];
1691   int bg = yuvconstants->kUVBiasBGR[1];
1692   int br = yuvconstants->kUVBiasBGR[2];
1693   int yg = yuvconstants->kYToRgb[1];
1694 #elif defined(__arm__)
1695   int ub = -yuvconstants->kUVToRB[0];
1696   int ug = yuvconstants->kUVToG[0];
1697   int vg = yuvconstants->kUVToG[4];
1698   int vr = -yuvconstants->kUVToRB[4];
1699   int bb = yuvconstants->kUVBiasBGR[0];
1700   int bg = yuvconstants->kUVBiasBGR[1];
1701   int br = yuvconstants->kUVBiasBGR[2];
1702   int yg = yuvconstants->kYToRgb[1];
1703 #else
1704   int ub = yuvconstants->kUVToB[0];
1705   int ug = yuvconstants->kUVToG[0];
1706   int vg = yuvconstants->kUVToG[1];
1707   int vr = yuvconstants->kUVToR[1];
1708   int bb = yuvconstants->kUVBiasB[0];
1709   int bg = yuvconstants->kUVBiasG[0];
1710   int br = yuvconstants->kUVBiasR[0];
1711   int yg = yuvconstants->kYToRgb[0];
1712 #endif
1713 
1714   uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
1715   *b = Clamp((int32_t)(-(u * ub) + y1 + bb) >> 6);
1716   *g = Clamp((int32_t)(-(u * ug + v * vg) + y1 + bg) >> 6);
1717   *r = Clamp((int32_t)(-(v * vr) + y1 + br) >> 6);
1718 }
1719 
1720 // Reads 8 bit YUV and leaves result as 16 bit.
YuvPixel8_16(uint8_t y,uint8_t u,uint8_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1721 static __inline void YuvPixel8_16(uint8_t y,
1722                                   uint8_t u,
1723                                   uint8_t v,
1724                                   int* b,
1725                                   int* g,
1726                                   int* r,
1727                                   const struct YuvConstants* yuvconstants) {
1728 #if defined(__aarch64__)
1729   int ub = -yuvconstants->kUVToRB[0];
1730   int ug = yuvconstants->kUVToG[0];
1731   int vg = yuvconstants->kUVToG[1];
1732   int vr = -yuvconstants->kUVToRB[1];
1733   int bb = yuvconstants->kUVBiasBGR[0];
1734   int bg = yuvconstants->kUVBiasBGR[1];
1735   int br = yuvconstants->kUVBiasBGR[2];
1736   int yg = yuvconstants->kYToRgb[1];
1737 #elif defined(__arm__)
1738   int ub = -yuvconstants->kUVToRB[0];
1739   int ug = yuvconstants->kUVToG[0];
1740   int vg = yuvconstants->kUVToG[4];
1741   int vr = -yuvconstants->kUVToRB[4];
1742   int bb = yuvconstants->kUVBiasBGR[0];
1743   int bg = yuvconstants->kUVBiasBGR[1];
1744   int br = yuvconstants->kUVBiasBGR[2];
1745   int yg = yuvconstants->kYToRgb[1];
1746 #else
1747   int ub = yuvconstants->kUVToB[0];
1748   int ug = yuvconstants->kUVToG[0];
1749   int vg = yuvconstants->kUVToG[1];
1750   int vr = yuvconstants->kUVToR[1];
1751   int bb = yuvconstants->kUVBiasB[0];
1752   int bg = yuvconstants->kUVBiasG[0];
1753   int br = yuvconstants->kUVBiasR[0];
1754   int yg = yuvconstants->kYToRgb[0];
1755 #endif
1756 
1757   uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
1758   *b = (int)(-(u * ub) + y1 + bb);
1759   *g = (int)(-(u * ug + v * vg) + y1 + bg);
1760   *r = (int)(-(v * vr) + y1 + br);
1761 }
1762 
1763 // C reference code that mimics the YUV 16 bit assembly.
1764 // Reads 10 bit YUV and leaves result as 16 bit.
YuvPixel16(int16_t y,int16_t u,int16_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1765 static __inline void YuvPixel16(int16_t y,
1766                                 int16_t u,
1767                                 int16_t v,
1768                                 int* b,
1769                                 int* g,
1770                                 int* r,
1771                                 const struct YuvConstants* yuvconstants) {
1772 #if defined(__aarch64__)
1773   int ub = -yuvconstants->kUVToRB[0];
1774   int ug = yuvconstants->kUVToG[0];
1775   int vg = yuvconstants->kUVToG[1];
1776   int vr = -yuvconstants->kUVToRB[1];
1777   int bb = yuvconstants->kUVBiasBGR[0];
1778   int bg = yuvconstants->kUVBiasBGR[1];
1779   int br = yuvconstants->kUVBiasBGR[2];
1780   int yg = yuvconstants->kYToRgb[1];
1781 #elif defined(__arm__)
1782   int ub = -yuvconstants->kUVToRB[0];
1783   int ug = yuvconstants->kUVToG[0];
1784   int vg = yuvconstants->kUVToG[4];
1785   int vr = -yuvconstants->kUVToRB[4];
1786   int bb = yuvconstants->kUVBiasBGR[0];
1787   int bg = yuvconstants->kUVBiasBGR[1];
1788   int br = yuvconstants->kUVBiasBGR[2];
1789   int yg = yuvconstants->kYToRgb[1];
1790 #else
1791   int ub = yuvconstants->kUVToB[0];
1792   int ug = yuvconstants->kUVToG[0];
1793   int vg = yuvconstants->kUVToG[1];
1794   int vr = yuvconstants->kUVToR[1];
1795   int bb = yuvconstants->kUVBiasB[0];
1796   int bg = yuvconstants->kUVBiasG[0];
1797   int br = yuvconstants->kUVBiasR[0];
1798   int yg = yuvconstants->kYToRgb[0];
1799 #endif
1800 
1801   uint32_t y1 = (uint32_t)((y << 6) * yg) >> 16;
1802   u = clamp255(u >> 2);
1803   v = clamp255(v >> 2);
1804   *b = (int)(-(u * ub) + y1 + bb);
1805   *g = (int)(-(u * ug + v * vg) + y1 + bg);
1806   *r = (int)(-(v * vr) + y1 + br);
1807 }
1808 
1809 // C reference code that mimics the YUV 10 bit assembly.
1810 // Reads 10 bit YUV and clamps down to 8 bit RGB.
YuvPixel10(uint16_t y,uint16_t u,uint16_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1811 static __inline void YuvPixel10(uint16_t y,
1812                                 uint16_t u,
1813                                 uint16_t v,
1814                                 uint8_t* b,
1815                                 uint8_t* g,
1816                                 uint8_t* r,
1817                                 const struct YuvConstants* yuvconstants) {
1818   int b16;
1819   int g16;
1820   int r16;
1821   YuvPixel16(y, u, v, &b16, &g16, &r16, yuvconstants);
1822   *b = Clamp(b16 >> 6);
1823   *g = Clamp(g16 >> 6);
1824   *r = Clamp(r16 >> 6);
1825 }
1826 
1827 // C reference code that mimics the YUV assembly.
1828 // Reads 8 bit YUV and leaves result as 16 bit.
YPixel(uint8_t y,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1829 static __inline void YPixel(uint8_t y,
1830                             uint8_t* b,
1831                             uint8_t* g,
1832                             uint8_t* r,
1833                             const struct YuvConstants* yuvconstants) {
1834 #if defined(__aarch64__) || defined(__arm__)
1835   int ygb = yuvconstants->kUVBiasBGR[3];
1836   int yg = yuvconstants->kYToRgb[1];
1837 #else
1838   int ygb = yuvconstants->kYBiasToRgb[0];
1839   int yg = yuvconstants->kYToRgb[0];
1840 #endif
1841   uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
1842   *b = Clamp(((int32_t)(y1) + ygb) >> 6);
1843   *g = Clamp(((int32_t)(y1) + ygb) >> 6);
1844   *r = Clamp(((int32_t)(y1) + ygb) >> 6);
1845 }
1846 
1847 #if !defined(LIBYUV_DISABLE_NEON) && \
1848     (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
1849 // C mimic assembly.
1850 // TODO(fbarchard): Remove subsampling from Neon.
I444ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1851 void I444ToARGBRow_C(const uint8_t* src_y,
1852                      const uint8_t* src_u,
1853                      const uint8_t* src_v,
1854                      uint8_t* rgb_buf,
1855                      const struct YuvConstants* yuvconstants,
1856                      int width) {
1857   int x;
1858   for (x = 0; x < width - 1; x += 2) {
1859     uint8_t u = (src_u[0] + src_u[1] + 1) >> 1;
1860     uint8_t v = (src_v[0] + src_v[1] + 1) >> 1;
1861     YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
1862              yuvconstants);
1863     rgb_buf[3] = 255;
1864     YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
1865              yuvconstants);
1866     rgb_buf[7] = 255;
1867     src_y += 2;
1868     src_u += 2;
1869     src_v += 2;
1870     rgb_buf += 8;  // Advance 2 pixels.
1871   }
1872   if (width & 1) {
1873     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1874              rgb_buf + 2, yuvconstants);
1875     rgb_buf[3] = 255;
1876   }
1877 }
1878 #else
I444ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1879 void I444ToARGBRow_C(const uint8_t* src_y,
1880                      const uint8_t* src_u,
1881                      const uint8_t* src_v,
1882                      uint8_t* rgb_buf,
1883                      const struct YuvConstants* yuvconstants,
1884                      int width) {
1885   int x;
1886   for (x = 0; x < width; ++x) {
1887     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1888              rgb_buf + 2, yuvconstants);
1889     rgb_buf[3] = 255;
1890     src_y += 1;
1891     src_u += 1;
1892     src_v += 1;
1893     rgb_buf += 4;  // Advance 1 pixel.
1894   }
1895 }
1896 #endif
1897 
1898 // Also used for 420
I422ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1899 void I422ToARGBRow_C(const uint8_t* src_y,
1900                      const uint8_t* src_u,
1901                      const uint8_t* src_v,
1902                      uint8_t* rgb_buf,
1903                      const struct YuvConstants* yuvconstants,
1904                      int width) {
1905   int x;
1906   for (x = 0; x < width - 1; x += 2) {
1907     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1908              rgb_buf + 2, yuvconstants);
1909     rgb_buf[3] = 255;
1910     YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1911              rgb_buf + 6, yuvconstants);
1912     rgb_buf[7] = 255;
1913     src_y += 2;
1914     src_u += 1;
1915     src_v += 1;
1916     rgb_buf += 8;  // Advance 2 pixels.
1917   }
1918   if (width & 1) {
1919     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1920              rgb_buf + 2, yuvconstants);
1921     rgb_buf[3] = 255;
1922   }
1923 }
1924 
1925 // 10 bit YUV to ARGB
I210ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1926 void I210ToARGBRow_C(const uint16_t* src_y,
1927                      const uint16_t* src_u,
1928                      const uint16_t* src_v,
1929                      uint8_t* rgb_buf,
1930                      const struct YuvConstants* yuvconstants,
1931                      int width) {
1932   int x;
1933   for (x = 0; x < width - 1; x += 2) {
1934     YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1935                rgb_buf + 2, yuvconstants);
1936     rgb_buf[3] = 255;
1937     YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1938                rgb_buf + 6, yuvconstants);
1939     rgb_buf[7] = 255;
1940     src_y += 2;
1941     src_u += 1;
1942     src_v += 1;
1943     rgb_buf += 8;  // Advance 2 pixels.
1944   }
1945   if (width & 1) {
1946     YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1947                rgb_buf + 2, yuvconstants);
1948     rgb_buf[3] = 255;
1949   }
1950 }
1951 
StoreAR30(uint8_t * rgb_buf,int b,int g,int r)1952 static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
1953   uint32_t ar30;
1954   b = b >> 4;  // convert 10.6 to 10 bit.
1955   g = g >> 4;
1956   r = r >> 4;
1957   b = Clamp10(b);
1958   g = Clamp10(g);
1959   r = Clamp10(r);
1960   ar30 = b | ((uint32_t)g << 10) | ((uint32_t)r << 20) | 0xc0000000;
1961   (*(uint32_t*)rgb_buf) = ar30;
1962 }
1963 
1964 // 10 bit YUV to 10 bit AR30
I210ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1965 void I210ToAR30Row_C(const uint16_t* src_y,
1966                      const uint16_t* src_u,
1967                      const uint16_t* src_v,
1968                      uint8_t* rgb_buf,
1969                      const struct YuvConstants* yuvconstants,
1970                      int width) {
1971   int x;
1972   int b;
1973   int g;
1974   int r;
1975   for (x = 0; x < width - 1; x += 2) {
1976     YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
1977     StoreAR30(rgb_buf, b, g, r);
1978     YuvPixel16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
1979     StoreAR30(rgb_buf + 4, b, g, r);
1980     src_y += 2;
1981     src_u += 1;
1982     src_v += 1;
1983     rgb_buf += 8;  // Advance 2 pixels.
1984   }
1985   if (width & 1) {
1986     YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
1987     StoreAR30(rgb_buf, b, g, r);
1988   }
1989 }
1990 
1991 // 8 bit YUV to 10 bit AR30
1992 // Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
I422ToAR30Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1993 void I422ToAR30Row_C(const uint8_t* src_y,
1994                      const uint8_t* src_u,
1995                      const uint8_t* src_v,
1996                      uint8_t* rgb_buf,
1997                      const struct YuvConstants* yuvconstants,
1998                      int width) {
1999   int x;
2000   int b;
2001   int g;
2002   int r;
2003   for (x = 0; x < width - 1; x += 2) {
2004     YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2005     StoreAR30(rgb_buf, b, g, r);
2006     YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2007     StoreAR30(rgb_buf + 4, b, g, r);
2008     src_y += 2;
2009     src_u += 1;
2010     src_v += 1;
2011     rgb_buf += 8;  // Advance 2 pixels.
2012   }
2013   if (width & 1) {
2014     YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2015     StoreAR30(rgb_buf, b, g, r);
2016   }
2017 }
2018 
I422AlphaToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,const uint8_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2019 void I422AlphaToARGBRow_C(const uint8_t* src_y,
2020                           const uint8_t* src_u,
2021                           const uint8_t* src_v,
2022                           const uint8_t* src_a,
2023                           uint8_t* rgb_buf,
2024                           const struct YuvConstants* yuvconstants,
2025                           int width) {
2026   int x;
2027   for (x = 0; x < width - 1; x += 2) {
2028     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2029              rgb_buf + 2, yuvconstants);
2030     rgb_buf[3] = src_a[0];
2031     YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
2032              rgb_buf + 6, yuvconstants);
2033     rgb_buf[7] = src_a[1];
2034     src_y += 2;
2035     src_u += 1;
2036     src_v += 1;
2037     src_a += 2;
2038     rgb_buf += 8;  // Advance 2 pixels.
2039   }
2040   if (width & 1) {
2041     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2042              rgb_buf + 2, yuvconstants);
2043     rgb_buf[3] = src_a[0];
2044   }
2045 }
2046 
I422ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2047 void I422ToRGB24Row_C(const uint8_t* src_y,
2048                       const uint8_t* src_u,
2049                       const uint8_t* src_v,
2050                       uint8_t* rgb_buf,
2051                       const struct YuvConstants* yuvconstants,
2052                       int width) {
2053   int x;
2054   for (x = 0; x < width - 1; x += 2) {
2055     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2056              rgb_buf + 2, yuvconstants);
2057     YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
2058              rgb_buf + 5, yuvconstants);
2059     src_y += 2;
2060     src_u += 1;
2061     src_v += 1;
2062     rgb_buf += 6;  // Advance 2 pixels.
2063   }
2064   if (width & 1) {
2065     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2066              rgb_buf + 2, yuvconstants);
2067   }
2068 }
2069 
I422ToARGB4444Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)2070 void I422ToARGB4444Row_C(const uint8_t* src_y,
2071                          const uint8_t* src_u,
2072                          const uint8_t* src_v,
2073                          uint8_t* dst_argb4444,
2074                          const struct YuvConstants* yuvconstants,
2075                          int width) {
2076   uint8_t b0;
2077   uint8_t g0;
2078   uint8_t r0;
2079   uint8_t b1;
2080   uint8_t g1;
2081   uint8_t r1;
2082   int x;
2083   for (x = 0; x < width - 1; x += 2) {
2084     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2085     YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2086     b0 = b0 >> 4;
2087     g0 = g0 >> 4;
2088     r0 = r0 >> 4;
2089     b1 = b1 >> 4;
2090     g1 = g1 >> 4;
2091     r1 = r1 >> 4;
2092     *(uint32_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | (b1 << 16) |
2093                                  (g1 << 20) | (r1 << 24) | 0xf000f000;
2094     src_y += 2;
2095     src_u += 1;
2096     src_v += 1;
2097     dst_argb4444 += 4;  // Advance 2 pixels.
2098   }
2099   if (width & 1) {
2100     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2101     b0 = b0 >> 4;
2102     g0 = g0 >> 4;
2103     r0 = r0 >> 4;
2104     *(uint16_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
2105   }
2106 }
2107 
I422ToARGB1555Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)2108 void I422ToARGB1555Row_C(const uint8_t* src_y,
2109                          const uint8_t* src_u,
2110                          const uint8_t* src_v,
2111                          uint8_t* dst_argb1555,
2112                          const struct YuvConstants* yuvconstants,
2113                          int width) {
2114   uint8_t b0;
2115   uint8_t g0;
2116   uint8_t r0;
2117   uint8_t b1;
2118   uint8_t g1;
2119   uint8_t r1;
2120   int x;
2121   for (x = 0; x < width - 1; x += 2) {
2122     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2123     YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2124     b0 = b0 >> 3;
2125     g0 = g0 >> 3;
2126     r0 = r0 >> 3;
2127     b1 = b1 >> 3;
2128     g1 = g1 >> 3;
2129     r1 = r1 >> 3;
2130     *(uint32_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | (b1 << 16) |
2131                                  (g1 << 21) | (r1 << 26) | 0x80008000;
2132     src_y += 2;
2133     src_u += 1;
2134     src_v += 1;
2135     dst_argb1555 += 4;  // Advance 2 pixels.
2136   }
2137   if (width & 1) {
2138     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2139     b0 = b0 >> 3;
2140     g0 = g0 >> 3;
2141     r0 = r0 >> 3;
2142     *(uint16_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
2143   }
2144 }
2145 
I422ToRGB565Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2146 void I422ToRGB565Row_C(const uint8_t* src_y,
2147                        const uint8_t* src_u,
2148                        const uint8_t* src_v,
2149                        uint8_t* dst_rgb565,
2150                        const struct YuvConstants* yuvconstants,
2151                        int width) {
2152   uint8_t b0;
2153   uint8_t g0;
2154   uint8_t r0;
2155   uint8_t b1;
2156   uint8_t g1;
2157   uint8_t r1;
2158   int x;
2159   for (x = 0; x < width - 1; x += 2) {
2160     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2161     YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2162     b0 = b0 >> 3;
2163     g0 = g0 >> 2;
2164     r0 = r0 >> 3;
2165     b1 = b1 >> 3;
2166     g1 = g1 >> 2;
2167     r1 = r1 >> 3;
2168     *(uint32_t*)(dst_rgb565) =
2169         b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
2170     src_y += 2;
2171     src_u += 1;
2172     src_v += 1;
2173     dst_rgb565 += 4;  // Advance 2 pixels.
2174   }
2175   if (width & 1) {
2176     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2177     b0 = b0 >> 3;
2178     g0 = g0 >> 2;
2179     r0 = r0 >> 3;
2180     *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
2181   }
2182 }
2183 
NV12ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2184 void NV12ToARGBRow_C(const uint8_t* src_y,
2185                      const uint8_t* src_uv,
2186                      uint8_t* rgb_buf,
2187                      const struct YuvConstants* yuvconstants,
2188                      int width) {
2189   int x;
2190   for (x = 0; x < width - 1; x += 2) {
2191     YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2192              rgb_buf + 2, yuvconstants);
2193     rgb_buf[3] = 255;
2194     YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
2195              rgb_buf + 6, yuvconstants);
2196     rgb_buf[7] = 255;
2197     src_y += 2;
2198     src_uv += 2;
2199     rgb_buf += 8;  // Advance 2 pixels.
2200   }
2201   if (width & 1) {
2202     YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2203              rgb_buf + 2, yuvconstants);
2204     rgb_buf[3] = 255;
2205   }
2206 }
2207 
NV21ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2208 void NV21ToARGBRow_C(const uint8_t* src_y,
2209                      const uint8_t* src_vu,
2210                      uint8_t* rgb_buf,
2211                      const struct YuvConstants* yuvconstants,
2212                      int width) {
2213   int x;
2214   for (x = 0; x < width - 1; x += 2) {
2215     YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2216              rgb_buf + 2, yuvconstants);
2217     rgb_buf[3] = 255;
2218     YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
2219              rgb_buf + 6, yuvconstants);
2220     rgb_buf[7] = 255;
2221     src_y += 2;
2222     src_vu += 2;
2223     rgb_buf += 8;  // Advance 2 pixels.
2224   }
2225   if (width & 1) {
2226     YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2227              rgb_buf + 2, yuvconstants);
2228     rgb_buf[3] = 255;
2229   }
2230 }
2231 
NV12ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2232 void NV12ToRGB24Row_C(const uint8_t* src_y,
2233                       const uint8_t* src_uv,
2234                       uint8_t* rgb_buf,
2235                       const struct YuvConstants* yuvconstants,
2236                       int width) {
2237   int x;
2238   for (x = 0; x < width - 1; x += 2) {
2239     YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2240              rgb_buf + 2, yuvconstants);
2241     YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4,
2242              rgb_buf + 5, yuvconstants);
2243     src_y += 2;
2244     src_uv += 2;
2245     rgb_buf += 6;  // Advance 2 pixels.
2246   }
2247   if (width & 1) {
2248     YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2249              rgb_buf + 2, yuvconstants);
2250   }
2251 }
2252 
NV21ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2253 void NV21ToRGB24Row_C(const uint8_t* src_y,
2254                       const uint8_t* src_vu,
2255                       uint8_t* rgb_buf,
2256                       const struct YuvConstants* yuvconstants,
2257                       int width) {
2258   int x;
2259   for (x = 0; x < width - 1; x += 2) {
2260     YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2261              rgb_buf + 2, yuvconstants);
2262     YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4,
2263              rgb_buf + 5, yuvconstants);
2264     src_y += 2;
2265     src_vu += 2;
2266     rgb_buf += 6;  // Advance 2 pixels.
2267   }
2268   if (width & 1) {
2269     YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2270              rgb_buf + 2, yuvconstants);
2271   }
2272 }
2273 
NV12ToRGB565Row_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2274 void NV12ToRGB565Row_C(const uint8_t* src_y,
2275                        const uint8_t* src_uv,
2276                        uint8_t* dst_rgb565,
2277                        const struct YuvConstants* yuvconstants,
2278                        int width) {
2279   uint8_t b0;
2280   uint8_t g0;
2281   uint8_t r0;
2282   uint8_t b1;
2283   uint8_t g1;
2284   uint8_t r1;
2285   int x;
2286   for (x = 0; x < width - 1; x += 2) {
2287     YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
2288     YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
2289     b0 = b0 >> 3;
2290     g0 = g0 >> 2;
2291     r0 = r0 >> 3;
2292     b1 = b1 >> 3;
2293     g1 = g1 >> 2;
2294     r1 = r1 >> 3;
2295     *(uint32_t*)(dst_rgb565) =
2296         b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
2297     src_y += 2;
2298     src_uv += 2;
2299     dst_rgb565 += 4;  // Advance 2 pixels.
2300   }
2301   if (width & 1) {
2302     YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
2303     b0 = b0 >> 3;
2304     g0 = g0 >> 2;
2305     r0 = r0 >> 3;
2306     *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
2307   }
2308 }
2309 
YUY2ToARGBRow_C(const uint8_t * src_yuy2,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2310 void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
2311                      uint8_t* rgb_buf,
2312                      const struct YuvConstants* yuvconstants,
2313                      int width) {
2314   int x;
2315   for (x = 0; x < width - 1; x += 2) {
2316     YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
2317              rgb_buf + 2, yuvconstants);
2318     rgb_buf[3] = 255;
2319     YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
2320              rgb_buf + 6, yuvconstants);
2321     rgb_buf[7] = 255;
2322     src_yuy2 += 4;
2323     rgb_buf += 8;  // Advance 2 pixels.
2324   }
2325   if (width & 1) {
2326     YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
2327              rgb_buf + 2, yuvconstants);
2328     rgb_buf[3] = 255;
2329   }
2330 }
2331 
UYVYToARGBRow_C(const uint8_t * src_uyvy,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2332 void UYVYToARGBRow_C(const uint8_t* src_uyvy,
2333                      uint8_t* rgb_buf,
2334                      const struct YuvConstants* yuvconstants,
2335                      int width) {
2336   int x;
2337   for (x = 0; x < width - 1; x += 2) {
2338     YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
2339              rgb_buf + 2, yuvconstants);
2340     rgb_buf[3] = 255;
2341     YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
2342              rgb_buf + 6, yuvconstants);
2343     rgb_buf[7] = 255;
2344     src_uyvy += 4;
2345     rgb_buf += 8;  // Advance 2 pixels.
2346   }
2347   if (width & 1) {
2348     YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
2349              rgb_buf + 2, yuvconstants);
2350     rgb_buf[3] = 255;
2351   }
2352 }
2353 
I422ToRGBARow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2354 void I422ToRGBARow_C(const uint8_t* src_y,
2355                      const uint8_t* src_u,
2356                      const uint8_t* src_v,
2357                      uint8_t* rgb_buf,
2358                      const struct YuvConstants* yuvconstants,
2359                      int width) {
2360   int x;
2361   for (x = 0; x < width - 1; x += 2) {
2362     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
2363              rgb_buf + 3, yuvconstants);
2364     rgb_buf[0] = 255;
2365     YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
2366              rgb_buf + 7, yuvconstants);
2367     rgb_buf[4] = 255;
2368     src_y += 2;
2369     src_u += 1;
2370     src_v += 1;
2371     rgb_buf += 8;  // Advance 2 pixels.
2372   }
2373   if (width & 1) {
2374     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
2375              rgb_buf + 3, yuvconstants);
2376     rgb_buf[0] = 255;
2377   }
2378 }
2379 
I400ToARGBRow_C(const uint8_t * src_y,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2380 void I400ToARGBRow_C(const uint8_t* src_y,
2381                      uint8_t* rgb_buf,
2382                      const struct YuvConstants* yuvconstants,
2383                      int width) {
2384   int x;
2385   for (x = 0; x < width - 1; x += 2) {
2386     YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
2387     rgb_buf[3] = 255;
2388     YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
2389     rgb_buf[7] = 255;
2390     src_y += 2;
2391     rgb_buf += 8;  // Advance 2 pixels.
2392   }
2393   if (width & 1) {
2394     YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
2395     rgb_buf[3] = 255;
2396   }
2397 }
2398 
MirrorRow_C(const uint8_t * src,uint8_t * dst,int width)2399 void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2400   int x;
2401   src += width - 1;
2402   for (x = 0; x < width - 1; x += 2) {
2403     dst[x] = src[0];
2404     dst[x + 1] = src[-1];
2405     src -= 2;
2406   }
2407   if (width & 1) {
2408     dst[width - 1] = src[0];
2409   }
2410 }
2411 
MirrorUVRow_C(const uint8_t * src_uv,uint8_t * dst_uv,int width)2412 void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
2413   int x;
2414   src_uv += (width - 1) << 1;
2415   for (x = 0; x < width; ++x) {
2416     dst_uv[0] = src_uv[0];
2417     dst_uv[1] = src_uv[1];
2418     src_uv -= 2;
2419     dst_uv += 2;
2420   }
2421 }
2422 
MirrorSplitUVRow_C(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)2423 void MirrorSplitUVRow_C(const uint8_t* src_uv,
2424                         uint8_t* dst_u,
2425                         uint8_t* dst_v,
2426                         int width) {
2427   int x;
2428   src_uv += (width - 1) << 1;
2429   for (x = 0; x < width - 1; x += 2) {
2430     dst_u[x] = src_uv[0];
2431     dst_u[x + 1] = src_uv[-2];
2432     dst_v[x] = src_uv[1];
2433     dst_v[x + 1] = src_uv[-2 + 1];
2434     src_uv -= 4;
2435   }
2436   if (width & 1) {
2437     dst_u[width - 1] = src_uv[0];
2438     dst_v[width - 1] = src_uv[1];
2439   }
2440 }
2441 
ARGBMirrorRow_C(const uint8_t * src,uint8_t * dst,int width)2442 void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2443   int x;
2444   const uint32_t* src32 = (const uint32_t*)(src);
2445   uint32_t* dst32 = (uint32_t*)(dst);
2446   src32 += width - 1;
2447   for (x = 0; x < width - 1; x += 2) {
2448     dst32[x] = src32[0];
2449     dst32[x + 1] = src32[-1];
2450     src32 -= 2;
2451   }
2452   if (width & 1) {
2453     dst32[width - 1] = src32[0];
2454   }
2455 }
2456 
RGB24MirrorRow_C(const uint8_t * src_rgb24,uint8_t * dst_rgb24,int width)2457 void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width) {
2458   int x;
2459   src_rgb24 += width * 3 - 3;
2460   for (x = 0; x < width; ++x) {
2461     uint8_t b = src_rgb24[0];
2462     uint8_t g = src_rgb24[1];
2463     uint8_t r = src_rgb24[2];
2464     dst_rgb24[0] = b;
2465     dst_rgb24[1] = g;
2466     dst_rgb24[2] = r;
2467     src_rgb24 -= 3;
2468     dst_rgb24 += 3;
2469   }
2470 }
2471 
SplitUVRow_C(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)2472 void SplitUVRow_C(const uint8_t* src_uv,
2473                   uint8_t* dst_u,
2474                   uint8_t* dst_v,
2475                   int width) {
2476   int x;
2477   for (x = 0; x < width - 1; x += 2) {
2478     dst_u[x] = src_uv[0];
2479     dst_u[x + 1] = src_uv[2];
2480     dst_v[x] = src_uv[1];
2481     dst_v[x + 1] = src_uv[3];
2482     src_uv += 4;
2483   }
2484   if (width & 1) {
2485     dst_u[width - 1] = src_uv[0];
2486     dst_v[width - 1] = src_uv[1];
2487   }
2488 }
2489 
MergeUVRow_C(const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_uv,int width)2490 void MergeUVRow_C(const uint8_t* src_u,
2491                   const uint8_t* src_v,
2492                   uint8_t* dst_uv,
2493                   int width) {
2494   int x;
2495   for (x = 0; x < width - 1; x += 2) {
2496     dst_uv[0] = src_u[x];
2497     dst_uv[1] = src_v[x];
2498     dst_uv[2] = src_u[x + 1];
2499     dst_uv[3] = src_v[x + 1];
2500     dst_uv += 4;
2501   }
2502   if (width & 1) {
2503     dst_uv[0] = src_u[width - 1];
2504     dst_uv[1] = src_v[width - 1];
2505   }
2506 }
2507 
SplitRGBRow_C(const uint8_t * src_rgb,uint8_t * dst_r,uint8_t * dst_g,uint8_t * dst_b,int width)2508 void SplitRGBRow_C(const uint8_t* src_rgb,
2509                    uint8_t* dst_r,
2510                    uint8_t* dst_g,
2511                    uint8_t* dst_b,
2512                    int width) {
2513   int x;
2514   for (x = 0; x < width; ++x) {
2515     dst_r[x] = src_rgb[0];
2516     dst_g[x] = src_rgb[1];
2517     dst_b[x] = src_rgb[2];
2518     src_rgb += 3;
2519   }
2520 }
2521 
MergeRGBRow_C(const uint8_t * src_r,const uint8_t * src_g,const uint8_t * src_b,uint8_t * dst_rgb,int width)2522 void MergeRGBRow_C(const uint8_t* src_r,
2523                    const uint8_t* src_g,
2524                    const uint8_t* src_b,
2525                    uint8_t* dst_rgb,
2526                    int width) {
2527   int x;
2528   for (x = 0; x < width; ++x) {
2529     dst_rgb[0] = src_r[x];
2530     dst_rgb[1] = src_g[x];
2531     dst_rgb[2] = src_b[x];
2532     dst_rgb += 3;
2533   }
2534 }
2535 
2536 // Use scale to convert lsb formats to msb, depending how many bits there are:
2537 // 128 = 9 bits
2538 // 64 = 10 bits
2539 // 16 = 12 bits
2540 // 1 = 16 bits
MergeUVRow_16_C(const uint16_t * src_u,const uint16_t * src_v,uint16_t * dst_uv,int scale,int width)2541 void MergeUVRow_16_C(const uint16_t* src_u,
2542                      const uint16_t* src_v,
2543                      uint16_t* dst_uv,
2544                      int scale,
2545                      int width) {
2546   int x;
2547   for (x = 0; x < width - 1; x += 2) {
2548     dst_uv[0] = src_u[x] * scale;
2549     dst_uv[1] = src_v[x] * scale;
2550     dst_uv[2] = src_u[x + 1] * scale;
2551     dst_uv[3] = src_v[x + 1] * scale;
2552     dst_uv += 4;
2553   }
2554   if (width & 1) {
2555     dst_uv[0] = src_u[width - 1] * scale;
2556     dst_uv[1] = src_v[width - 1] * scale;
2557   }
2558 }
2559 
MultiplyRow_16_C(const uint16_t * src_y,uint16_t * dst_y,int scale,int width)2560 void MultiplyRow_16_C(const uint16_t* src_y,
2561                       uint16_t* dst_y,
2562                       int scale,
2563                       int width) {
2564   int x;
2565   for (x = 0; x < width; ++x) {
2566     dst_y[x] = src_y[x] * scale;
2567   }
2568 }
2569 
2570 // Use scale to convert lsb formats to msb, depending how many bits there are:
2571 // 32768 = 9 bits
2572 // 16384 = 10 bits
2573 // 4096 = 12 bits
2574 // 256 = 16 bits
Convert16To8Row_C(const uint16_t * src_y,uint8_t * dst_y,int scale,int width)2575 void Convert16To8Row_C(const uint16_t* src_y,
2576                        uint8_t* dst_y,
2577                        int scale,
2578                        int width) {
2579   int x;
2580   for (x = 0; x < width; ++x) {
2581     dst_y[x] = clamp255((src_y[x] * scale) >> 16);
2582   }
2583 }
2584 
2585 // Use scale to convert lsb formats to msb, depending how many bits there are:
2586 // 1024 = 10 bits
Convert8To16Row_C(const uint8_t * src_y,uint16_t * dst_y,int scale,int width)2587 void Convert8To16Row_C(const uint8_t* src_y,
2588                        uint16_t* dst_y,
2589                        int scale,
2590                        int width) {
2591   int x;
2592   scale *= 0x0101;  // replicates the byte.
2593   for (x = 0; x < width; ++x) {
2594     dst_y[x] = (src_y[x] * scale) >> 16;
2595   }
2596 }
2597 
CopyRow_C(const uint8_t * src,uint8_t * dst,int count)2598 void CopyRow_C(const uint8_t* src, uint8_t* dst, int count) {
2599   memcpy(dst, src, count);
2600 }
2601 
CopyRow_16_C(const uint16_t * src,uint16_t * dst,int count)2602 void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count) {
2603   memcpy(dst, src, count * 2);
2604 }
2605 
SetRow_C(uint8_t * dst,uint8_t v8,int width)2606 void SetRow_C(uint8_t* dst, uint8_t v8, int width) {
2607   memset(dst, v8, width);
2608 }
2609 
ARGBSetRow_C(uint8_t * dst_argb,uint32_t v32,int width)2610 void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width) {
2611   int x;
2612   for (x = 0; x < width; ++x) {
2613     memcpy(dst_argb + x * sizeof v32, &v32, sizeof v32);
2614   }
2615 }
2616 
2617 // Filter 2 rows of YUY2 UV's (422) into U and V (420).
YUY2ToUVRow_C(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)2618 void YUY2ToUVRow_C(const uint8_t* src_yuy2,
2619                    int src_stride_yuy2,
2620                    uint8_t* dst_u,
2621                    uint8_t* dst_v,
2622                    int width) {
2623   // Output a row of UV values, filtering 2 rows of YUY2.
2624   int x;
2625   for (x = 0; x < width; x += 2) {
2626     dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
2627     dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
2628     src_yuy2 += 4;
2629     dst_u += 1;
2630     dst_v += 1;
2631   }
2632 }
2633 
2634 // Copy row of YUY2 UV's (422) into U and V (422).
YUY2ToUV422Row_C(const uint8_t * src_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)2635 void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
2636                       uint8_t* dst_u,
2637                       uint8_t* dst_v,
2638                       int width) {
2639   // Output a row of UV values.
2640   int x;
2641   for (x = 0; x < width; x += 2) {
2642     dst_u[0] = src_yuy2[1];
2643     dst_v[0] = src_yuy2[3];
2644     src_yuy2 += 4;
2645     dst_u += 1;
2646     dst_v += 1;
2647   }
2648 }
2649 
2650 // Copy row of YUY2 Y's (422) into Y (420/422).
YUY2ToYRow_C(const uint8_t * src_yuy2,uint8_t * dst_y,int width)2651 void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
2652   // Output a row of Y values.
2653   int x;
2654   for (x = 0; x < width - 1; x += 2) {
2655     dst_y[x] = src_yuy2[0];
2656     dst_y[x + 1] = src_yuy2[2];
2657     src_yuy2 += 4;
2658   }
2659   if (width & 1) {
2660     dst_y[width - 1] = src_yuy2[0];
2661   }
2662 }
2663 
2664 // Filter 2 rows of UYVY UV's (422) into U and V (420).
UYVYToUVRow_C(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)2665 void UYVYToUVRow_C(const uint8_t* src_uyvy,
2666                    int src_stride_uyvy,
2667                    uint8_t* dst_u,
2668                    uint8_t* dst_v,
2669                    int width) {
2670   // Output a row of UV values.
2671   int x;
2672   for (x = 0; x < width; x += 2) {
2673     dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
2674     dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
2675     src_uyvy += 4;
2676     dst_u += 1;
2677     dst_v += 1;
2678   }
2679 }
2680 
2681 // Copy row of UYVY UV's (422) into U and V (422).
UYVYToUV422Row_C(const uint8_t * src_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)2682 void UYVYToUV422Row_C(const uint8_t* src_uyvy,
2683                       uint8_t* dst_u,
2684                       uint8_t* dst_v,
2685                       int width) {
2686   // Output a row of UV values.
2687   int x;
2688   for (x = 0; x < width; x += 2) {
2689     dst_u[0] = src_uyvy[0];
2690     dst_v[0] = src_uyvy[2];
2691     src_uyvy += 4;
2692     dst_u += 1;
2693     dst_v += 1;
2694   }
2695 }
2696 
2697 // Copy row of UYVY Y's (422) into Y (420/422).
UYVYToYRow_C(const uint8_t * src_uyvy,uint8_t * dst_y,int width)2698 void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
2699   // Output a row of Y values.
2700   int x;
2701   for (x = 0; x < width - 1; x += 2) {
2702     dst_y[x] = src_uyvy[1];
2703     dst_y[x + 1] = src_uyvy[3];
2704     src_uyvy += 4;
2705   }
2706   if (width & 1) {
2707     dst_y[width - 1] = src_uyvy[1];
2708   }
2709 }
2710 
2711 #define BLEND(f, b, a) clamp255((((256 - a) * b) >> 8) + f)
2712 
2713 // Blend src_argb0 over src_argb1 and store to dst_argb.
2714 // dst_argb may be src_argb0 or src_argb1.
2715 // This code mimics the SSSE3 version for better testability.
ARGBBlendRow_C(const uint8_t * src_argb0,const uint8_t * src_argb1,uint8_t * dst_argb,int width)2716 void ARGBBlendRow_C(const uint8_t* src_argb0,
2717                     const uint8_t* src_argb1,
2718                     uint8_t* dst_argb,
2719                     int width) {
2720   int x;
2721   for (x = 0; x < width - 1; x += 2) {
2722     uint32_t fb = src_argb0[0];
2723     uint32_t fg = src_argb0[1];
2724     uint32_t fr = src_argb0[2];
2725     uint32_t a = src_argb0[3];
2726     uint32_t bb = src_argb1[0];
2727     uint32_t bg = src_argb1[1];
2728     uint32_t br = src_argb1[2];
2729     dst_argb[0] = BLEND(fb, bb, a);
2730     dst_argb[1] = BLEND(fg, bg, a);
2731     dst_argb[2] = BLEND(fr, br, a);
2732     dst_argb[3] = 255u;
2733 
2734     fb = src_argb0[4 + 0];
2735     fg = src_argb0[4 + 1];
2736     fr = src_argb0[4 + 2];
2737     a = src_argb0[4 + 3];
2738     bb = src_argb1[4 + 0];
2739     bg = src_argb1[4 + 1];
2740     br = src_argb1[4 + 2];
2741     dst_argb[4 + 0] = BLEND(fb, bb, a);
2742     dst_argb[4 + 1] = BLEND(fg, bg, a);
2743     dst_argb[4 + 2] = BLEND(fr, br, a);
2744     dst_argb[4 + 3] = 255u;
2745     src_argb0 += 8;
2746     src_argb1 += 8;
2747     dst_argb += 8;
2748   }
2749 
2750   if (width & 1) {
2751     uint32_t fb = src_argb0[0];
2752     uint32_t fg = src_argb0[1];
2753     uint32_t fr = src_argb0[2];
2754     uint32_t a = src_argb0[3];
2755     uint32_t bb = src_argb1[0];
2756     uint32_t bg = src_argb1[1];
2757     uint32_t br = src_argb1[2];
2758     dst_argb[0] = BLEND(fb, bb, a);
2759     dst_argb[1] = BLEND(fg, bg, a);
2760     dst_argb[2] = BLEND(fr, br, a);
2761     dst_argb[3] = 255u;
2762   }
2763 }
2764 #undef BLEND
2765 
2766 #define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8
BlendPlaneRow_C(const uint8_t * src0,const uint8_t * src1,const uint8_t * alpha,uint8_t * dst,int width)2767 void BlendPlaneRow_C(const uint8_t* src0,
2768                      const uint8_t* src1,
2769                      const uint8_t* alpha,
2770                      uint8_t* dst,
2771                      int width) {
2772   int x;
2773   for (x = 0; x < width - 1; x += 2) {
2774     dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
2775     dst[1] = UBLEND(src0[1], src1[1], alpha[1]);
2776     src0 += 2;
2777     src1 += 2;
2778     alpha += 2;
2779     dst += 2;
2780   }
2781   if (width & 1) {
2782     dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
2783   }
2784 }
2785 #undef UBLEND
2786 
2787 #if defined(__aarch64__) || defined(__arm__)
2788 #define ATTENUATE(f, a) (f * a + 128) >> 8
2789 #else
2790 // This code mimics the SSSE3 version for better testability.
2791 #define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
2792 #endif
2793 
2794 // Multiply source RGB by alpha and store to destination.
ARGBAttenuateRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)2795 void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
2796   int i;
2797   for (i = 0; i < width - 1; i += 2) {
2798     uint32_t b = src_argb[0];
2799     uint32_t g = src_argb[1];
2800     uint32_t r = src_argb[2];
2801     uint32_t a = src_argb[3];
2802     dst_argb[0] = ATTENUATE(b, a);
2803     dst_argb[1] = ATTENUATE(g, a);
2804     dst_argb[2] = ATTENUATE(r, a);
2805     dst_argb[3] = a;
2806     b = src_argb[4];
2807     g = src_argb[5];
2808     r = src_argb[6];
2809     a = src_argb[7];
2810     dst_argb[4] = ATTENUATE(b, a);
2811     dst_argb[5] = ATTENUATE(g, a);
2812     dst_argb[6] = ATTENUATE(r, a);
2813     dst_argb[7] = a;
2814     src_argb += 8;
2815     dst_argb += 8;
2816   }
2817 
2818   if (width & 1) {
2819     const uint32_t b = src_argb[0];
2820     const uint32_t g = src_argb[1];
2821     const uint32_t r = src_argb[2];
2822     const uint32_t a = src_argb[3];
2823     dst_argb[0] = ATTENUATE(b, a);
2824     dst_argb[1] = ATTENUATE(g, a);
2825     dst_argb[2] = ATTENUATE(r, a);
2826     dst_argb[3] = a;
2827   }
2828 }
2829 #undef ATTENUATE
2830 
2831 // Divide source RGB by alpha and store to destination.
2832 // b = (b * 255 + (a / 2)) / a;
2833 // g = (g * 255 + (a / 2)) / a;
2834 // r = (r * 255 + (a / 2)) / a;
2835 // Reciprocal method is off by 1 on some values. ie 125
2836 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
2837 #define T(a) 0x01000000 + (0x10000 / a)
2838 const uint32_t fixed_invtbl8[256] = {
2839     0x01000000, 0x0100ffff, T(0x02), T(0x03),   T(0x04), T(0x05), T(0x06),
2840     T(0x07),    T(0x08),    T(0x09), T(0x0a),   T(0x0b), T(0x0c), T(0x0d),
2841     T(0x0e),    T(0x0f),    T(0x10), T(0x11),   T(0x12), T(0x13), T(0x14),
2842     T(0x15),    T(0x16),    T(0x17), T(0x18),   T(0x19), T(0x1a), T(0x1b),
2843     T(0x1c),    T(0x1d),    T(0x1e), T(0x1f),   T(0x20), T(0x21), T(0x22),
2844     T(0x23),    T(0x24),    T(0x25), T(0x26),   T(0x27), T(0x28), T(0x29),
2845     T(0x2a),    T(0x2b),    T(0x2c), T(0x2d),   T(0x2e), T(0x2f), T(0x30),
2846     T(0x31),    T(0x32),    T(0x33), T(0x34),   T(0x35), T(0x36), T(0x37),
2847     T(0x38),    T(0x39),    T(0x3a), T(0x3b),   T(0x3c), T(0x3d), T(0x3e),
2848     T(0x3f),    T(0x40),    T(0x41), T(0x42),   T(0x43), T(0x44), T(0x45),
2849     T(0x46),    T(0x47),    T(0x48), T(0x49),   T(0x4a), T(0x4b), T(0x4c),
2850     T(0x4d),    T(0x4e),    T(0x4f), T(0x50),   T(0x51), T(0x52), T(0x53),
2851     T(0x54),    T(0x55),    T(0x56), T(0x57),   T(0x58), T(0x59), T(0x5a),
2852     T(0x5b),    T(0x5c),    T(0x5d), T(0x5e),   T(0x5f), T(0x60), T(0x61),
2853     T(0x62),    T(0x63),    T(0x64), T(0x65),   T(0x66), T(0x67), T(0x68),
2854     T(0x69),    T(0x6a),    T(0x6b), T(0x6c),   T(0x6d), T(0x6e), T(0x6f),
2855     T(0x70),    T(0x71),    T(0x72), T(0x73),   T(0x74), T(0x75), T(0x76),
2856     T(0x77),    T(0x78),    T(0x79), T(0x7a),   T(0x7b), T(0x7c), T(0x7d),
2857     T(0x7e),    T(0x7f),    T(0x80), T(0x81),   T(0x82), T(0x83), T(0x84),
2858     T(0x85),    T(0x86),    T(0x87), T(0x88),   T(0x89), T(0x8a), T(0x8b),
2859     T(0x8c),    T(0x8d),    T(0x8e), T(0x8f),   T(0x90), T(0x91), T(0x92),
2860     T(0x93),    T(0x94),    T(0x95), T(0x96),   T(0x97), T(0x98), T(0x99),
2861     T(0x9a),    T(0x9b),    T(0x9c), T(0x9d),   T(0x9e), T(0x9f), T(0xa0),
2862     T(0xa1),    T(0xa2),    T(0xa3), T(0xa4),   T(0xa5), T(0xa6), T(0xa7),
2863     T(0xa8),    T(0xa9),    T(0xaa), T(0xab),   T(0xac), T(0xad), T(0xae),
2864     T(0xaf),    T(0xb0),    T(0xb1), T(0xb2),   T(0xb3), T(0xb4), T(0xb5),
2865     T(0xb6),    T(0xb7),    T(0xb8), T(0xb9),   T(0xba), T(0xbb), T(0xbc),
2866     T(0xbd),    T(0xbe),    T(0xbf), T(0xc0),   T(0xc1), T(0xc2), T(0xc3),
2867     T(0xc4),    T(0xc5),    T(0xc6), T(0xc7),   T(0xc8), T(0xc9), T(0xca),
2868     T(0xcb),    T(0xcc),    T(0xcd), T(0xce),   T(0xcf), T(0xd0), T(0xd1),
2869     T(0xd2),    T(0xd3),    T(0xd4), T(0xd5),   T(0xd6), T(0xd7), T(0xd8),
2870     T(0xd9),    T(0xda),    T(0xdb), T(0xdc),   T(0xdd), T(0xde), T(0xdf),
2871     T(0xe0),    T(0xe1),    T(0xe2), T(0xe3),   T(0xe4), T(0xe5), T(0xe6),
2872     T(0xe7),    T(0xe8),    T(0xe9), T(0xea),   T(0xeb), T(0xec), T(0xed),
2873     T(0xee),    T(0xef),    T(0xf0), T(0xf1),   T(0xf2), T(0xf3), T(0xf4),
2874     T(0xf5),    T(0xf6),    T(0xf7), T(0xf8),   T(0xf9), T(0xfa), T(0xfb),
2875     T(0xfc),    T(0xfd),    T(0xfe), 0x01000100};
2876 #undef T
2877 
ARGBUnattenuateRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)2878 void ARGBUnattenuateRow_C(const uint8_t* src_argb,
2879                           uint8_t* dst_argb,
2880                           int width) {
2881   int i;
2882   for (i = 0; i < width; ++i) {
2883     uint32_t b = src_argb[0];
2884     uint32_t g = src_argb[1];
2885     uint32_t r = src_argb[2];
2886     const uint32_t a = src_argb[3];
2887     const uint32_t ia = fixed_invtbl8[a] & 0xffff;  // 8.8 fixed point
2888     b = (b * ia) >> 8;
2889     g = (g * ia) >> 8;
2890     r = (r * ia) >> 8;
2891     // Clamping should not be necessary but is free in assembly.
2892     dst_argb[0] = clamp255(b);
2893     dst_argb[1] = clamp255(g);
2894     dst_argb[2] = clamp255(r);
2895     dst_argb[3] = a;
2896     src_argb += 4;
2897     dst_argb += 4;
2898   }
2899 }
2900 
ComputeCumulativeSumRow_C(const uint8_t * row,int32_t * cumsum,const int32_t * previous_cumsum,int width)2901 void ComputeCumulativeSumRow_C(const uint8_t* row,
2902                                int32_t* cumsum,
2903                                const int32_t* previous_cumsum,
2904                                int width) {
2905   int32_t row_sum[4] = {0, 0, 0, 0};
2906   int x;
2907   for (x = 0; x < width; ++x) {
2908     row_sum[0] += row[x * 4 + 0];
2909     row_sum[1] += row[x * 4 + 1];
2910     row_sum[2] += row[x * 4 + 2];
2911     row_sum[3] += row[x * 4 + 3];
2912     cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0];
2913     cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1];
2914     cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2];
2915     cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3];
2916   }
2917 }
2918 
CumulativeSumToAverageRow_C(const int32_t * tl,const int32_t * bl,int w,int area,uint8_t * dst,int count)2919 void CumulativeSumToAverageRow_C(const int32_t* tl,
2920                                  const int32_t* bl,
2921                                  int w,
2922                                  int area,
2923                                  uint8_t* dst,
2924                                  int count) {
2925   float ooa = 1.0f / area;
2926   int i;
2927   for (i = 0; i < count; ++i) {
2928     dst[0] = (uint8_t)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
2929     dst[1] = (uint8_t)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
2930     dst[2] = (uint8_t)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
2931     dst[3] = (uint8_t)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
2932     dst += 4;
2933     tl += 4;
2934     bl += 4;
2935   }
2936 }
2937 
2938 // Copy pixels from rotated source to destination row with a slope.
2939 LIBYUV_API
ARGBAffineRow_C(const uint8_t * src_argb,int src_argb_stride,uint8_t * dst_argb,const float * uv_dudv,int width)2940 void ARGBAffineRow_C(const uint8_t* src_argb,
2941                      int src_argb_stride,
2942                      uint8_t* dst_argb,
2943                      const float* uv_dudv,
2944                      int width) {
2945   int i;
2946   // Render a row of pixels from source into a buffer.
2947   float uv[2];
2948   uv[0] = uv_dudv[0];
2949   uv[1] = uv_dudv[1];
2950   for (i = 0; i < width; ++i) {
2951     int x = (int)(uv[0]);
2952     int y = (int)(uv[1]);
2953     *(uint32_t*)(dst_argb) =
2954         *(const uint32_t*)(src_argb + y * src_argb_stride + x * 4);
2955     dst_argb += 4;
2956     uv[0] += uv_dudv[2];
2957     uv[1] += uv_dudv[3];
2958   }
2959 }
2960 
2961 // Blend 2 rows into 1.
HalfRow_C(const uint8_t * src_uv,ptrdiff_t src_uv_stride,uint8_t * dst_uv,int width)2962 static void HalfRow_C(const uint8_t* src_uv,
2963                       ptrdiff_t src_uv_stride,
2964                       uint8_t* dst_uv,
2965                       int width) {
2966   int x;
2967   for (x = 0; x < width; ++x) {
2968     dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
2969   }
2970 }
2971 
HalfRow_16_C(const uint16_t * src_uv,ptrdiff_t src_uv_stride,uint16_t * dst_uv,int width)2972 static void HalfRow_16_C(const uint16_t* src_uv,
2973                          ptrdiff_t src_uv_stride,
2974                          uint16_t* dst_uv,
2975                          int width) {
2976   int x;
2977   for (x = 0; x < width; ++x) {
2978     dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
2979   }
2980 }
2981 
2982 // C version 2x2 -> 2x1.
InterpolateRow_C(uint8_t * dst_ptr,const uint8_t * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)2983 void InterpolateRow_C(uint8_t* dst_ptr,
2984                       const uint8_t* src_ptr,
2985                       ptrdiff_t src_stride,
2986                       int width,
2987                       int source_y_fraction) {
2988   int y1_fraction = source_y_fraction;
2989   int y0_fraction = 256 - y1_fraction;
2990   const uint8_t* src_ptr1 = src_ptr + src_stride;
2991   int x;
2992   if (y1_fraction == 0) {
2993     memcpy(dst_ptr, src_ptr, width);
2994     return;
2995   }
2996   if (y1_fraction == 128) {
2997     HalfRow_C(src_ptr, src_stride, dst_ptr, width);
2998     return;
2999   }
3000   for (x = 0; x < width - 1; x += 2) {
3001     dst_ptr[0] =
3002         (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
3003     dst_ptr[1] =
3004         (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction + 128) >> 8;
3005     src_ptr += 2;
3006     src_ptr1 += 2;
3007     dst_ptr += 2;
3008   }
3009   if (width & 1) {
3010     dst_ptr[0] =
3011         (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
3012   }
3013 }
3014 
InterpolateRow_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)3015 void InterpolateRow_16_C(uint16_t* dst_ptr,
3016                          const uint16_t* src_ptr,
3017                          ptrdiff_t src_stride,
3018                          int width,
3019                          int source_y_fraction) {
3020   int y1_fraction = source_y_fraction;
3021   int y0_fraction = 256 - y1_fraction;
3022   const uint16_t* src_ptr1 = src_ptr + src_stride;
3023   int x;
3024   if (source_y_fraction == 0) {
3025     memcpy(dst_ptr, src_ptr, width * 2);
3026     return;
3027   }
3028   if (source_y_fraction == 128) {
3029     HalfRow_16_C(src_ptr, src_stride, dst_ptr, width);
3030     return;
3031   }
3032   for (x = 0; x < width - 1; x += 2) {
3033     dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
3034     dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
3035     src_ptr += 2;
3036     src_ptr1 += 2;
3037     dst_ptr += 2;
3038   }
3039   if (width & 1) {
3040     dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
3041   }
3042 }
3043 
3044 // Use first 4 shuffler values to reorder ARGB channels.
ARGBShuffleRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const uint8_t * shuffler,int width)3045 void ARGBShuffleRow_C(const uint8_t* src_argb,
3046                       uint8_t* dst_argb,
3047                       const uint8_t* shuffler,
3048                       int width) {
3049   int index0 = shuffler[0];
3050   int index1 = shuffler[1];
3051   int index2 = shuffler[2];
3052   int index3 = shuffler[3];
3053   // Shuffle a row of ARGB.
3054   int x;
3055   for (x = 0; x < width; ++x) {
3056     // To support in-place conversion.
3057     uint8_t b = src_argb[index0];
3058     uint8_t g = src_argb[index1];
3059     uint8_t r = src_argb[index2];
3060     uint8_t a = src_argb[index3];
3061     dst_argb[0] = b;
3062     dst_argb[1] = g;
3063     dst_argb[2] = r;
3064     dst_argb[3] = a;
3065     src_argb += 4;
3066     dst_argb += 4;
3067   }
3068 }
3069 
I422ToYUY2Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)3070 void I422ToYUY2Row_C(const uint8_t* src_y,
3071                      const uint8_t* src_u,
3072                      const uint8_t* src_v,
3073                      uint8_t* dst_frame,
3074                      int width) {
3075   int x;
3076   for (x = 0; x < width - 1; x += 2) {
3077     dst_frame[0] = src_y[0];
3078     dst_frame[1] = src_u[0];
3079     dst_frame[2] = src_y[1];
3080     dst_frame[3] = src_v[0];
3081     dst_frame += 4;
3082     src_y += 2;
3083     src_u += 1;
3084     src_v += 1;
3085   }
3086   if (width & 1) {
3087     dst_frame[0] = src_y[0];
3088     dst_frame[1] = src_u[0];
3089     dst_frame[2] = 0;
3090     dst_frame[3] = src_v[0];
3091   }
3092 }
3093 
I422ToUYVYRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)3094 void I422ToUYVYRow_C(const uint8_t* src_y,
3095                      const uint8_t* src_u,
3096                      const uint8_t* src_v,
3097                      uint8_t* dst_frame,
3098                      int width) {
3099   int x;
3100   for (x = 0; x < width - 1; x += 2) {
3101     dst_frame[0] = src_u[0];
3102     dst_frame[1] = src_y[0];
3103     dst_frame[2] = src_v[0];
3104     dst_frame[3] = src_y[1];
3105     dst_frame += 4;
3106     src_y += 2;
3107     src_u += 1;
3108     src_v += 1;
3109   }
3110   if (width & 1) {
3111     dst_frame[0] = src_u[0];
3112     dst_frame[1] = src_y[0];
3113     dst_frame[2] = src_v[0];
3114     dst_frame[3] = 0;
3115   }
3116 }
3117 
ARGBPolynomialRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const float * poly,int width)3118 void ARGBPolynomialRow_C(const uint8_t* src_argb,
3119                          uint8_t* dst_argb,
3120                          const float* poly,
3121                          int width) {
3122   int i;
3123   for (i = 0; i < width; ++i) {
3124     float b = (float)(src_argb[0]);
3125     float g = (float)(src_argb[1]);
3126     float r = (float)(src_argb[2]);
3127     float a = (float)(src_argb[3]);
3128     float b2 = b * b;
3129     float g2 = g * g;
3130     float r2 = r * r;
3131     float a2 = a * a;
3132     float db = poly[0] + poly[4] * b;
3133     float dg = poly[1] + poly[5] * g;
3134     float dr = poly[2] + poly[6] * r;
3135     float da = poly[3] + poly[7] * a;
3136     float b3 = b2 * b;
3137     float g3 = g2 * g;
3138     float r3 = r2 * r;
3139     float a3 = a2 * a;
3140     db += poly[8] * b2;
3141     dg += poly[9] * g2;
3142     dr += poly[10] * r2;
3143     da += poly[11] * a2;
3144     db += poly[12] * b3;
3145     dg += poly[13] * g3;
3146     dr += poly[14] * r3;
3147     da += poly[15] * a3;
3148 
3149     dst_argb[0] = Clamp((int32_t)(db));
3150     dst_argb[1] = Clamp((int32_t)(dg));
3151     dst_argb[2] = Clamp((int32_t)(dr));
3152     dst_argb[3] = Clamp((int32_t)(da));
3153     src_argb += 4;
3154     dst_argb += 4;
3155   }
3156 }
3157 
3158 // Samples assumed to be unsigned in low 9, 10 or 12 bits.  Scale factor
3159 // adjust the source integer range to the half float range desired.
3160 
3161 // This magic constant is 2^-112. Multiplying by this
3162 // is the same as subtracting 112 from the exponent, which
3163 // is the difference in exponent bias between 32-bit and
3164 // 16-bit floats. Once we've done this subtraction, we can
3165 // simply extract the low bits of the exponent and the high
3166 // bits of the mantissa from our float and we're done.
3167 
3168 // Work around GCC 7 punning warning -Wstrict-aliasing
3169 #if defined(__GNUC__)
3170 typedef uint32_t __attribute__((__may_alias__)) uint32_alias_t;
3171 #else
3172 typedef uint32_t uint32_alias_t;
3173 #endif
3174 
HalfFloatRow_C(const uint16_t * src,uint16_t * dst,float scale,int width)3175 void HalfFloatRow_C(const uint16_t* src,
3176                     uint16_t* dst,
3177                     float scale,
3178                     int width) {
3179   int i;
3180   float mult = 1.9259299444e-34f * scale;
3181   for (i = 0; i < width; ++i) {
3182     float value = src[i] * mult;
3183     dst[i] = (uint16_t)((*(const uint32_alias_t*)&value) >> 13);
3184   }
3185 }
3186 
ByteToFloatRow_C(const uint8_t * src,float * dst,float scale,int width)3187 void ByteToFloatRow_C(const uint8_t* src, float* dst, float scale, int width) {
3188   int i;
3189   for (i = 0; i < width; ++i) {
3190     float value = src[i] * scale;
3191     dst[i] = value;
3192   }
3193 }
3194 
ARGBLumaColorTableRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width,const uint8_t * luma,uint32_t lumacoeff)3195 void ARGBLumaColorTableRow_C(const uint8_t* src_argb,
3196                              uint8_t* dst_argb,
3197                              int width,
3198                              const uint8_t* luma,
3199                              uint32_t lumacoeff) {
3200   uint32_t bc = lumacoeff & 0xff;
3201   uint32_t gc = (lumacoeff >> 8) & 0xff;
3202   uint32_t rc = (lumacoeff >> 16) & 0xff;
3203 
3204   int i;
3205   for (i = 0; i < width - 1; i += 2) {
3206     // Luminance in rows, color values in columns.
3207     const uint8_t* luma0 =
3208         ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
3209         luma;
3210     const uint8_t* luma1;
3211     dst_argb[0] = luma0[src_argb[0]];
3212     dst_argb[1] = luma0[src_argb[1]];
3213     dst_argb[2] = luma0[src_argb[2]];
3214     dst_argb[3] = src_argb[3];
3215     luma1 =
3216         ((src_argb[4] * bc + src_argb[5] * gc + src_argb[6] * rc) & 0x7F00u) +
3217         luma;
3218     dst_argb[4] = luma1[src_argb[4]];
3219     dst_argb[5] = luma1[src_argb[5]];
3220     dst_argb[6] = luma1[src_argb[6]];
3221     dst_argb[7] = src_argb[7];
3222     src_argb += 8;
3223     dst_argb += 8;
3224   }
3225   if (width & 1) {
3226     // Luminance in rows, color values in columns.
3227     const uint8_t* luma0 =
3228         ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
3229         luma;
3230     dst_argb[0] = luma0[src_argb[0]];
3231     dst_argb[1] = luma0[src_argb[1]];
3232     dst_argb[2] = luma0[src_argb[2]];
3233     dst_argb[3] = src_argb[3];
3234   }
3235 }
3236 
ARGBCopyAlphaRow_C(const uint8_t * src,uint8_t * dst,int width)3237 void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
3238   int i;
3239   for (i = 0; i < width - 1; i += 2) {
3240     dst[3] = src[3];
3241     dst[7] = src[7];
3242     dst += 8;
3243     src += 8;
3244   }
3245   if (width & 1) {
3246     dst[3] = src[3];
3247   }
3248 }
3249 
ARGBExtractAlphaRow_C(const uint8_t * src_argb,uint8_t * dst_a,int width)3250 void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width) {
3251   int i;
3252   for (i = 0; i < width - 1; i += 2) {
3253     dst_a[0] = src_argb[3];
3254     dst_a[1] = src_argb[7];
3255     dst_a += 2;
3256     src_argb += 8;
3257   }
3258   if (width & 1) {
3259     dst_a[0] = src_argb[3];
3260   }
3261 }
3262 
ARGBCopyYToAlphaRow_C(const uint8_t * src,uint8_t * dst,int width)3263 void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
3264   int i;
3265   for (i = 0; i < width - 1; i += 2) {
3266     dst[3] = src[0];
3267     dst[7] = src[1];
3268     dst += 8;
3269     src += 2;
3270   }
3271   if (width & 1) {
3272     dst[3] = src[0];
3273   }
3274 }
3275 
3276 // Maximum temporary width for wrappers to process at a time, in pixels.
3277 #define MAXTWIDTH 2048
3278 
3279 #if !(defined(_MSC_VER) && defined(_M_IX86)) && \
3280     defined(HAS_I422TORGB565ROW_SSSE3)
3281 // row_win.cc has asm version, but GCC uses 2 step wrapper.
I422ToRGB565Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3282 void I422ToRGB565Row_SSSE3(const uint8_t* src_y,
3283                            const uint8_t* src_u,
3284                            const uint8_t* src_v,
3285                            uint8_t* dst_rgb565,
3286                            const struct YuvConstants* yuvconstants,
3287                            int width) {
3288   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3289   while (width > 0) {
3290     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3291     I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
3292     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3293     src_y += twidth;
3294     src_u += twidth / 2;
3295     src_v += twidth / 2;
3296     dst_rgb565 += twidth * 2;
3297     width -= twidth;
3298   }
3299 }
3300 #endif
3301 
3302 #if defined(HAS_I422TOARGB1555ROW_SSSE3)
I422ToARGB1555Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)3303 void I422ToARGB1555Row_SSSE3(const uint8_t* src_y,
3304                              const uint8_t* src_u,
3305                              const uint8_t* src_v,
3306                              uint8_t* dst_argb1555,
3307                              const struct YuvConstants* yuvconstants,
3308                              int width) {
3309   // Row buffer for intermediate ARGB pixels.
3310   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3311   while (width > 0) {
3312     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3313     I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
3314     ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
3315     src_y += twidth;
3316     src_u += twidth / 2;
3317     src_v += twidth / 2;
3318     dst_argb1555 += twidth * 2;
3319     width -= twidth;
3320   }
3321 }
3322 #endif
3323 
3324 #if defined(HAS_I422TOARGB4444ROW_SSSE3)
I422ToARGB4444Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)3325 void I422ToARGB4444Row_SSSE3(const uint8_t* src_y,
3326                              const uint8_t* src_u,
3327                              const uint8_t* src_v,
3328                              uint8_t* dst_argb4444,
3329                              const struct YuvConstants* yuvconstants,
3330                              int width) {
3331   // Row buffer for intermediate ARGB pixels.
3332   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3333   while (width > 0) {
3334     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3335     I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
3336     ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
3337     src_y += twidth;
3338     src_u += twidth / 2;
3339     src_v += twidth / 2;
3340     dst_argb4444 += twidth * 2;
3341     width -= twidth;
3342   }
3343 }
3344 #endif
3345 
3346 #if defined(HAS_NV12TORGB565ROW_SSSE3)
NV12ToRGB565Row_SSSE3(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3347 void NV12ToRGB565Row_SSSE3(const uint8_t* src_y,
3348                            const uint8_t* src_uv,
3349                            uint8_t* dst_rgb565,
3350                            const struct YuvConstants* yuvconstants,
3351                            int width) {
3352   // Row buffer for intermediate ARGB pixels.
3353   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3354   while (width > 0) {
3355     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3356     NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
3357     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3358     src_y += twidth;
3359     src_uv += twidth;
3360     dst_rgb565 += twidth * 2;
3361     width -= twidth;
3362   }
3363 }
3364 #endif
3365 
3366 #if defined(HAS_NV12TORGB24ROW_SSSE3)
NV12ToRGB24Row_SSSE3(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3367 void NV12ToRGB24Row_SSSE3(const uint8_t* src_y,
3368                           const uint8_t* src_uv,
3369                           uint8_t* dst_rgb24,
3370                           const struct YuvConstants* yuvconstants,
3371                           int width) {
3372   // Row buffer for intermediate ARGB pixels.
3373   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3374   while (width > 0) {
3375     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3376     NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
3377     ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3378     src_y += twidth;
3379     src_uv += twidth;
3380     dst_rgb24 += twidth * 3;
3381     width -= twidth;
3382   }
3383 }
3384 #endif
3385 
3386 #if defined(HAS_NV21TORGB24ROW_SSSE3)
NV21ToRGB24Row_SSSE3(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3387 void NV21ToRGB24Row_SSSE3(const uint8_t* src_y,
3388                           const uint8_t* src_vu,
3389                           uint8_t* dst_rgb24,
3390                           const struct YuvConstants* yuvconstants,
3391                           int width) {
3392   // Row buffer for intermediate ARGB pixels.
3393   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3394   while (width > 0) {
3395     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3396     NV21ToARGBRow_SSSE3(src_y, src_vu, row, yuvconstants, twidth);
3397     ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3398     src_y += twidth;
3399     src_vu += twidth;
3400     dst_rgb24 += twidth * 3;
3401     width -= twidth;
3402   }
3403 }
3404 #endif
3405 
3406 #if defined(HAS_NV12TORGB24ROW_AVX2)
NV12ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3407 void NV12ToRGB24Row_AVX2(const uint8_t* src_y,
3408                          const uint8_t* src_uv,
3409                          uint8_t* dst_rgb24,
3410                          const struct YuvConstants* yuvconstants,
3411                          int width) {
3412   // Row buffer for intermediate ARGB pixels.
3413   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3414   while (width > 0) {
3415     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3416     NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
3417 #if defined(HAS_ARGBTORGB24ROW_AVX2)
3418     ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
3419 #else
3420     ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3421 #endif
3422     src_y += twidth;
3423     src_uv += twidth;
3424     dst_rgb24 += twidth * 3;
3425     width -= twidth;
3426   }
3427 }
3428 #endif
3429 
3430 #if defined(HAS_NV21TORGB24ROW_AVX2)
NV21ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3431 void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
3432                          const uint8_t* src_vu,
3433                          uint8_t* dst_rgb24,
3434                          const struct YuvConstants* yuvconstants,
3435                          int width) {
3436   // Row buffer for intermediate ARGB pixels.
3437   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3438   while (width > 0) {
3439     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3440     NV21ToARGBRow_AVX2(src_y, src_vu, row, yuvconstants, twidth);
3441 #if defined(HAS_ARGBTORGB24ROW_AVX2)
3442     ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
3443 #else
3444     ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3445 #endif
3446     src_y += twidth;
3447     src_vu += twidth;
3448     dst_rgb24 += twidth * 3;
3449     width -= twidth;
3450   }
3451 }
3452 #endif
3453 
3454 #if defined(HAS_I422TORGB565ROW_AVX2)
I422ToRGB565Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3455 void I422ToRGB565Row_AVX2(const uint8_t* src_y,
3456                           const uint8_t* src_u,
3457                           const uint8_t* src_v,
3458                           uint8_t* dst_rgb565,
3459                           const struct YuvConstants* yuvconstants,
3460                           int width) {
3461   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3462   while (width > 0) {
3463     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3464     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3465 #if defined(HAS_ARGBTORGB565ROW_AVX2)
3466     ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
3467 #else
3468     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3469 #endif
3470     src_y += twidth;
3471     src_u += twidth / 2;
3472     src_v += twidth / 2;
3473     dst_rgb565 += twidth * 2;
3474     width -= twidth;
3475   }
3476 }
3477 #endif
3478 
3479 #if defined(HAS_I422TOARGB1555ROW_AVX2)
I422ToARGB1555Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)3480 void I422ToARGB1555Row_AVX2(const uint8_t* src_y,
3481                             const uint8_t* src_u,
3482                             const uint8_t* src_v,
3483                             uint8_t* dst_argb1555,
3484                             const struct YuvConstants* yuvconstants,
3485                             int width) {
3486   // Row buffer for intermediate ARGB pixels.
3487   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3488   while (width > 0) {
3489     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3490     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3491 #if defined(HAS_ARGBTOARGB1555ROW_AVX2)
3492     ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
3493 #else
3494     ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
3495 #endif
3496     src_y += twidth;
3497     src_u += twidth / 2;
3498     src_v += twidth / 2;
3499     dst_argb1555 += twidth * 2;
3500     width -= twidth;
3501   }
3502 }
3503 #endif
3504 
3505 #if defined(HAS_I422TOARGB4444ROW_AVX2)
I422ToARGB4444Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)3506 void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
3507                             const uint8_t* src_u,
3508                             const uint8_t* src_v,
3509                             uint8_t* dst_argb4444,
3510                             const struct YuvConstants* yuvconstants,
3511                             int width) {
3512   // Row buffer for intermediate ARGB pixels.
3513   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3514   while (width > 0) {
3515     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3516     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3517 #if defined(HAS_ARGBTOARGB4444ROW_AVX2)
3518     ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
3519 #else
3520     ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
3521 #endif
3522     src_y += twidth;
3523     src_u += twidth / 2;
3524     src_v += twidth / 2;
3525     dst_argb4444 += twidth * 2;
3526     width -= twidth;
3527   }
3528 }
3529 #endif
3530 
3531 #if defined(HAS_I422TORGB24ROW_AVX2)
I422ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3532 void I422ToRGB24Row_AVX2(const uint8_t* src_y,
3533                          const uint8_t* src_u,
3534                          const uint8_t* src_v,
3535                          uint8_t* dst_rgb24,
3536                          const struct YuvConstants* yuvconstants,
3537                          int width) {
3538   // Row buffer for intermediate ARGB pixels.
3539   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3540   while (width > 0) {
3541     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3542     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3543 #if defined(HAS_ARGBTORGB24ROW_AVX2)
3544     ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
3545 #else
3546     ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3547 #endif
3548     src_y += twidth;
3549     src_u += twidth / 2;
3550     src_v += twidth / 2;
3551     dst_rgb24 += twidth * 3;
3552     width -= twidth;
3553   }
3554 }
3555 #endif
3556 
3557 #if defined(HAS_NV12TORGB565ROW_AVX2)
NV12ToRGB565Row_AVX2(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3558 void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
3559                           const uint8_t* src_uv,
3560                           uint8_t* dst_rgb565,
3561                           const struct YuvConstants* yuvconstants,
3562                           int width) {
3563   // Row buffer for intermediate ARGB pixels.
3564   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3565   while (width > 0) {
3566     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3567     NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
3568 #if defined(HAS_ARGBTORGB565ROW_AVX2)
3569     ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
3570 #else
3571     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3572 #endif
3573     src_y += twidth;
3574     src_uv += twidth;
3575     dst_rgb565 += twidth * 2;
3576     width -= twidth;
3577   }
3578 }
3579 #endif
3580 
3581 #ifdef HAS_RGB24TOYJROW_AVX2
3582 // Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
RGB24ToYJRow_AVX2(const uint8_t * src_rgb24,uint8_t * dst_yj,int width)3583 void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
3584   // Row buffer for intermediate ARGB pixels.
3585   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3586   while (width > 0) {
3587     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3588     RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
3589     ARGBToYJRow_AVX2(row, dst_yj, twidth);
3590     src_rgb24 += twidth * 3;
3591     dst_yj += twidth;
3592     width -= twidth;
3593   }
3594 }
3595 #endif  // HAS_RGB24TOYJROW_AVX2
3596 
3597 #ifdef HAS_RAWTOYJROW_AVX2
3598 // Convert 16 RAW pixels (64 bytes) to 16 YJ values.
RAWToYJRow_AVX2(const uint8_t * src_raw,uint8_t * dst_yj,int width)3599 void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
3600   // Row buffer for intermediate ARGB pixels.
3601   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3602   while (width > 0) {
3603     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3604     RAWToARGBRow_SSSE3(src_raw, row, twidth);
3605     ARGBToYJRow_AVX2(row, dst_yj, twidth);
3606     src_raw += twidth * 3;
3607     dst_yj += twidth;
3608     width -= twidth;
3609   }
3610 }
3611 #endif  // HAS_RAWTOYJROW_AVX2
3612 
3613 #ifdef HAS_RGB24TOYJROW_SSSE3
3614 // Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
RGB24ToYJRow_SSSE3(const uint8_t * src_rgb24,uint8_t * dst_yj,int width)3615 void RGB24ToYJRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
3616   // Row buffer for intermediate ARGB pixels.
3617   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3618   while (width > 0) {
3619     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3620     RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
3621     ARGBToYJRow_SSSE3(row, dst_yj, twidth);
3622     src_rgb24 += twidth * 3;
3623     dst_yj += twidth;
3624     width -= twidth;
3625   }
3626 }
3627 #endif  // HAS_RGB24TOYJROW_SSSE3
3628 
3629 #ifdef HAS_RAWTOYJROW_SSSE3
3630 // Convert 16 RAW pixels (64 bytes) to 16 YJ values.
RAWToYJRow_SSSE3(const uint8_t * src_raw,uint8_t * dst_yj,int width)3631 void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
3632   // Row buffer for intermediate ARGB pixels.
3633   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3634   while (width > 0) {
3635     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3636     RAWToARGBRow_SSSE3(src_raw, row, twidth);
3637     ARGBToYJRow_SSSE3(row, dst_yj, twidth);
3638     src_raw += twidth * 3;
3639     dst_yj += twidth;
3640     width -= twidth;
3641   }
3642 }
3643 #endif  // HAS_RAWTOYJROW_SSSE3
3644 
ScaleSumSamples_C(const float * src,float * dst,float scale,int width)3645 float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) {
3646   float fsum = 0.f;
3647   int i;
3648   for (i = 0; i < width; ++i) {
3649     float v = *src++;
3650     fsum += v * v;
3651     *dst++ = v * scale;
3652   }
3653   return fsum;
3654 }
3655 
ScaleMaxSamples_C(const float * src,float * dst,float scale,int width)3656 float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width) {
3657   float fmax = 0.f;
3658   int i;
3659   for (i = 0; i < width; ++i) {
3660     float v = *src++;
3661     float vs = v * scale;
3662     fmax = (v > fmax) ? v : fmax;
3663     *dst++ = vs;
3664   }
3665   return fmax;
3666 }
3667 
ScaleSamples_C(const float * src,float * dst,float scale,int width)3668 void ScaleSamples_C(const float* src, float* dst, float scale, int width) {
3669   int i;
3670   for (i = 0; i < width; ++i) {
3671     *dst++ = *src++ * scale;
3672   }
3673 }
3674 
GaussRow_C(const uint32_t * src,uint16_t * dst,int width)3675 void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) {
3676   int i;
3677   for (i = 0; i < width; ++i) {
3678     *dst++ =
3679         (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8;
3680     ++src;
3681   }
3682 }
3683 
3684 // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
GaussCol_C(const uint16_t * src0,const uint16_t * src1,const uint16_t * src2,const uint16_t * src3,const uint16_t * src4,uint32_t * dst,int width)3685 void GaussCol_C(const uint16_t* src0,
3686                 const uint16_t* src1,
3687                 const uint16_t* src2,
3688                 const uint16_t* src3,
3689                 const uint16_t* src4,
3690                 uint32_t* dst,
3691                 int width) {
3692   int i;
3693   for (i = 0; i < width; ++i) {
3694     *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
3695   }
3696 }
3697 
GaussRow_F32_C(const float * src,float * dst,int width)3698 void GaussRow_F32_C(const float* src, float* dst, int width) {
3699   int i;
3700   for (i = 0; i < width; ++i) {
3701     *dst++ = (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4]) *
3702              (1.0f / 256.0f);
3703     ++src;
3704   }
3705 }
3706 
3707 // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
GaussCol_F32_C(const float * src0,const float * src1,const float * src2,const float * src3,const float * src4,float * dst,int width)3708 void GaussCol_F32_C(const float* src0,
3709                     const float* src1,
3710                     const float* src2,
3711                     const float* src3,
3712                     const float* src4,
3713                     float* dst,
3714                     int width) {
3715   int i;
3716   for (i = 0; i < width; ++i) {
3717     *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
3718   }
3719 }
3720 
3721 // Convert biplanar NV21 to packed YUV24
NV21ToYUV24Row_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_yuv24,int width)3722 void NV21ToYUV24Row_C(const uint8_t* src_y,
3723                       const uint8_t* src_vu,
3724                       uint8_t* dst_yuv24,
3725                       int width) {
3726   int x;
3727   for (x = 0; x < width - 1; x += 2) {
3728     dst_yuv24[0] = src_vu[0];  // V
3729     dst_yuv24[1] = src_vu[1];  // U
3730     dst_yuv24[2] = src_y[0];   // Y0
3731     dst_yuv24[3] = src_vu[0];  // V
3732     dst_yuv24[4] = src_vu[1];  // U
3733     dst_yuv24[5] = src_y[1];   // Y1
3734     src_y += 2;
3735     src_vu += 2;
3736     dst_yuv24 += 6;  // Advance 2 pixels.
3737   }
3738   if (width & 1) {
3739     dst_yuv24[0] = src_vu[0];  // V
3740     dst_yuv24[1] = src_vu[1];  // U
3741     dst_yuv24[2] = src_y[0];   // Y0
3742   }
3743 }
3744 
3745 // Filter 2 rows of AYUV UV's (444) into UV (420).
AYUVToUVRow_C(const uint8_t * src_ayuv,int src_stride_ayuv,uint8_t * dst_uv,int width)3746 void AYUVToUVRow_C(const uint8_t* src_ayuv,
3747                    int src_stride_ayuv,
3748                    uint8_t* dst_uv,
3749                    int width) {
3750   // Output a row of UV values, filtering 2x2 rows of AYUV.
3751   int x;
3752   for (x = 0; x < width; x += 2) {
3753     dst_uv[0] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
3754                  src_ayuv[src_stride_ayuv + 5] + 2) >>
3755                 2;
3756     dst_uv[1] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
3757                  src_ayuv[src_stride_ayuv + 4] + 2) >>
3758                 2;
3759     src_ayuv += 8;
3760     dst_uv += 2;
3761   }
3762   if (width & 1) {
3763     dst_uv[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] +
3764                  src_ayuv[src_stride_ayuv + 0] + 2) >>
3765                 2;
3766     dst_uv[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] +
3767                  src_ayuv[src_stride_ayuv + 1] + 2) >>
3768                 2;
3769   }
3770 }
3771 
3772 // Filter 2 rows of AYUV UV's (444) into VU (420).
AYUVToVURow_C(const uint8_t * src_ayuv,int src_stride_ayuv,uint8_t * dst_vu,int width)3773 void AYUVToVURow_C(const uint8_t* src_ayuv,
3774                    int src_stride_ayuv,
3775                    uint8_t* dst_vu,
3776                    int width) {
3777   // Output a row of VU values, filtering 2x2 rows of AYUV.
3778   int x;
3779   for (x = 0; x < width; x += 2) {
3780     dst_vu[0] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
3781                  src_ayuv[src_stride_ayuv + 4] + 2) >>
3782                 2;
3783     dst_vu[1] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
3784                  src_ayuv[src_stride_ayuv + 5] + 2) >>
3785                 2;
3786     src_ayuv += 8;
3787     dst_vu += 2;
3788   }
3789   if (width & 1) {
3790     dst_vu[0] = (src_ayuv[0] + src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] +
3791                  src_ayuv[src_stride_ayuv + 0] + 2) >>
3792                 2;
3793     dst_vu[1] = (src_ayuv[1] + src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] +
3794                  src_ayuv[src_stride_ayuv + 1] + 2) >>
3795                 2;
3796   }
3797 }
3798 
3799 // Copy row of AYUV Y's into Y
AYUVToYRow_C(const uint8_t * src_ayuv,uint8_t * dst_y,int width)3800 void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
3801   // Output a row of Y values.
3802   int x;
3803   for (x = 0; x < width; ++x) {
3804     dst_y[x] = src_ayuv[2];  // v,u,y,a
3805     src_ayuv += 4;
3806   }
3807 }
3808 
3809 // Convert UV plane of NV12 to VU of NV21.
SwapUVRow_C(const uint8_t * src_uv,uint8_t * dst_vu,int width)3810 void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
3811   int x;
3812   for (x = 0; x < width; ++x) {
3813     uint8_t u = src_uv[0];
3814     uint8_t v = src_uv[1];
3815     dst_vu[0] = v;
3816     dst_vu[1] = u;
3817     src_uv += 2;
3818     dst_vu += 2;
3819   }
3820 }
3821 
HalfMergeUVRow_C(const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_uv,int width)3822 void HalfMergeUVRow_C(const uint8_t* src_u,
3823                       int src_stride_u,
3824                       const uint8_t* src_v,
3825                       int src_stride_v,
3826                       uint8_t* dst_uv,
3827                       int width) {
3828   int x;
3829   for (x = 0; x < width - 1; x += 2) {
3830     dst_uv[0] = (src_u[0] + src_u[1] + src_u[src_stride_u] +
3831                  src_u[src_stride_u + 1] + 2) >>
3832                 2;
3833     dst_uv[1] = (src_v[0] + src_v[1] + src_v[src_stride_v] +
3834                  src_v[src_stride_v + 1] + 2) >>
3835                 2;
3836     src_u += 2;
3837     src_v += 2;
3838     dst_uv += 2;
3839   }
3840   if (width & 1) {
3841     dst_uv[0] = (src_u[0] + src_u[src_stride_u] + 1) >> 1;
3842     dst_uv[1] = (src_v[0] + src_v[src_stride_v] + 1) >> 1;
3843   }
3844 }
3845 
3846 #ifdef __cplusplus
3847 }  // extern "C"
3848 }  // namespace libyuv
3849 #endif
3850