• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "libyuv/row.h"
12 
13 #include <assert.h>
14 #include <string.h>  // For memcpy and memset.
15 
16 #include "libyuv/basic_types.h"
17 #include "libyuv/convert_argb.h"  // For kYuvI601Constants
18 
19 #ifdef __cplusplus
20 namespace libyuv {
21 extern "C" {
22 #endif
23 
24 #ifdef __cplusplus
25 #define STATIC_CAST(type, expr) static_cast<type>(expr)
26 #else
27 #define STATIC_CAST(type, expr) (type)(expr)
28 #endif
29 
30 // This macro controls YUV to RGB using unsigned math to extend range of
31 // YUV to RGB coefficients to 0 to 4 instead of 0 to 2 for more accuracy on B:
32 // LIBYUV_UNLIMITED_DATA
33 
34 // Macros to enable unlimited data for each colorspace
35 // LIBYUV_UNLIMITED_BT601
36 // LIBYUV_UNLIMITED_BT709
37 // LIBYUV_UNLIMITED_BT2020
38 
39 // The following macro from row_win makes the C code match the row_win code,
40 // which is 7 bit fixed point for ARGBToI420:
41 #if !defined(LIBYUV_BIT_EXACT) && !defined(LIBYUV_DISABLE_X86) && \
42     defined(_MSC_VER) && !defined(__clang__) &&                   \
43     (defined(_M_IX86) || defined(_M_X64))
44 #define LIBYUV_RGB7 1
45 #endif
46 
47 #if !defined(LIBYUV_BIT_EXACT) && (defined(__x86_64__) || defined(_M_X64) || \
48                                    defined(__i386__) || defined(_M_IX86))
49 #define LIBYUV_ARGBTOUV_PAVGB 1
50 #define LIBYUV_RGBTOU_TRUNCATE 1
51 #endif
52 #if defined(LIBYUV_BIT_EXACT)
53 #define LIBYUV_UNATTENUATE_DUP 1
54 #endif
55 
56 // llvm x86 is poor at ternary operator, so use branchless min/max.
57 
58 #define USE_BRANCHLESS 1
59 #if USE_BRANCHLESS
clamp0(int32_t v)60 static __inline int32_t clamp0(int32_t v) {
61   return -(v >= 0) & v;
62 }
63 // TODO(fbarchard): make clamp255 preserve negative values.
clamp255(int32_t v)64 static __inline int32_t clamp255(int32_t v) {
65   return (-(v >= 255) | v) & 255;
66 }
67 
clamp1023(int32_t v)68 static __inline int32_t clamp1023(int32_t v) {
69   return (-(v >= 1023) | v) & 1023;
70 }
71 
72 // clamp to max
ClampMax(int32_t v,int32_t max)73 static __inline int32_t ClampMax(int32_t v, int32_t max) {
74   return (-(v >= max) | v) & max;
75 }
76 
Abs(int32_t v)77 static __inline uint32_t Abs(int32_t v) {
78   int m = -(v < 0);
79   return (v + m) ^ m;
80 }
81 #else   // USE_BRANCHLESS
82 static __inline int32_t clamp0(int32_t v) {
83   return (v < 0) ? 0 : v;
84 }
85 
86 static __inline int32_t clamp255(int32_t v) {
87   return (v > 255) ? 255 : v;
88 }
89 
90 static __inline int32_t clamp1023(int32_t v) {
91   return (v > 1023) ? 1023 : v;
92 }
93 
94 static __inline int32_t ClampMax(int32_t v, int32_t max) {
95   return (v > max) ? max : v;
96 }
97 
98 static __inline uint32_t Abs(int32_t v) {
99   return (v < 0) ? -v : v;
100 }
101 #endif  // USE_BRANCHLESS
Clamp(int32_t val)102 static __inline uint32_t Clamp(int32_t val) {
103   int v = clamp0(val);
104   return (uint32_t)(clamp255(v));
105 }
106 
Clamp10(int32_t val)107 static __inline uint32_t Clamp10(int32_t val) {
108   int v = clamp0(val);
109   return (uint32_t)(clamp1023(v));
110 }
111 
112 // Little Endian
113 #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
114     defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) ||     \
115     (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
116 #define WRITEWORD(p, v) *(uint32_t*)(p) = v
117 #else
WRITEWORD(uint8_t * p,uint32_t v)118 static inline void WRITEWORD(uint8_t* p, uint32_t v) {
119   p[0] = (uint8_t)(v & 255);
120   p[1] = (uint8_t)((v >> 8) & 255);
121   p[2] = (uint8_t)((v >> 16) & 255);
122   p[3] = (uint8_t)((v >> 24) & 255);
123 }
124 #endif
125 
RGB24ToARGBRow_C(const uint8_t * src_rgb24,uint8_t * dst_argb,int width)126 void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
127   int x;
128   for (x = 0; x < width; ++x) {
129     uint8_t b = src_rgb24[0];
130     uint8_t g = src_rgb24[1];
131     uint8_t r = src_rgb24[2];
132     dst_argb[0] = b;
133     dst_argb[1] = g;
134     dst_argb[2] = r;
135     dst_argb[3] = 255u;
136     dst_argb += 4;
137     src_rgb24 += 3;
138   }
139 }
140 
RAWToARGBRow_C(const uint8_t * src_raw,uint8_t * dst_argb,int width)141 void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
142   int x;
143   for (x = 0; x < width; ++x) {
144     uint8_t r = src_raw[0];
145     uint8_t g = src_raw[1];
146     uint8_t b = src_raw[2];
147     dst_argb[0] = b;
148     dst_argb[1] = g;
149     dst_argb[2] = r;
150     dst_argb[3] = 255u;
151     dst_argb += 4;
152     src_raw += 3;
153   }
154 }
155 
RAWToRGBARow_C(const uint8_t * src_raw,uint8_t * dst_rgba,int width)156 void RAWToRGBARow_C(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
157   int x;
158   for (x = 0; x < width; ++x) {
159     uint8_t r = src_raw[0];
160     uint8_t g = src_raw[1];
161     uint8_t b = src_raw[2];
162     dst_rgba[0] = 255u;
163     dst_rgba[1] = b;
164     dst_rgba[2] = g;
165     dst_rgba[3] = r;
166     dst_rgba += 4;
167     src_raw += 3;
168   }
169 }
170 
RAWToRGB24Row_C(const uint8_t * src_raw,uint8_t * dst_rgb24,int width)171 void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
172   int x;
173   for (x = 0; x < width; ++x) {
174     uint8_t r = src_raw[0];
175     uint8_t g = src_raw[1];
176     uint8_t b = src_raw[2];
177     dst_rgb24[0] = b;
178     dst_rgb24[1] = g;
179     dst_rgb24[2] = r;
180     dst_rgb24 += 3;
181     src_raw += 3;
182   }
183 }
184 
RGB565ToARGBRow_C(const uint8_t * src_rgb565,uint8_t * dst_argb,int width)185 void RGB565ToARGBRow_C(const uint8_t* src_rgb565,
186                        uint8_t* dst_argb,
187                        int width) {
188   int x;
189   for (x = 0; x < width; ++x) {
190     uint8_t b = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
191     uint8_t g = STATIC_CAST(
192         uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
193     uint8_t r = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
194     dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
195     dst_argb[1] = STATIC_CAST(uint8_t, (g << 2) | (g >> 4));
196     dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
197     dst_argb[3] = 255u;
198     dst_argb += 4;
199     src_rgb565 += 2;
200   }
201 }
202 
ARGB1555ToARGBRow_C(const uint8_t * src_argb1555,uint8_t * dst_argb,int width)203 void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
204                          uint8_t* dst_argb,
205                          int width) {
206   int x;
207   for (x = 0; x < width; ++x) {
208     uint8_t b = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
209     uint8_t g = STATIC_CAST(
210         uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
211     uint8_t r = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
212     uint8_t a = STATIC_CAST(uint8_t, src_argb1555[1] >> 7);
213     dst_argb[0] = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
214     dst_argb[1] = STATIC_CAST(uint8_t, (g << 3) | (g >> 2));
215     dst_argb[2] = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
216     dst_argb[3] = -a;
217     dst_argb += 4;
218     src_argb1555 += 2;
219   }
220 }
221 
ARGB4444ToARGBRow_C(const uint8_t * src_argb4444,uint8_t * dst_argb,int width)222 void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
223                          uint8_t* dst_argb,
224                          int width) {
225   int x;
226   for (x = 0; x < width; ++x) {
227     uint8_t b = STATIC_CAST(uint8_t, src_argb4444[0] & 0x0f);
228     uint8_t g = STATIC_CAST(uint8_t, src_argb4444[0] >> 4);
229     uint8_t r = STATIC_CAST(uint8_t, src_argb4444[1] & 0x0f);
230     uint8_t a = STATIC_CAST(uint8_t, src_argb4444[1] >> 4);
231     dst_argb[0] = STATIC_CAST(uint8_t, (b << 4) | b);
232     dst_argb[1] = STATIC_CAST(uint8_t, (g << 4) | g);
233     dst_argb[2] = STATIC_CAST(uint8_t, (r << 4) | r);
234     dst_argb[3] = STATIC_CAST(uint8_t, (a << 4) | a);
235     dst_argb += 4;
236     src_argb4444 += 2;
237   }
238 }
239 
AR30ToARGBRow_C(const uint8_t * src_ar30,uint8_t * dst_argb,int width)240 void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width) {
241   int x;
242   for (x = 0; x < width; ++x) {
243     uint32_t ar30;
244     memcpy(&ar30, src_ar30, sizeof ar30);
245     uint32_t b = (ar30 >> 2) & 0xff;
246     uint32_t g = (ar30 >> 12) & 0xff;
247     uint32_t r = (ar30 >> 22) & 0xff;
248     uint32_t a = (ar30 >> 30) * 0x55;  // Replicate 2 bits to 8 bits.
249     *(uint32_t*)(dst_argb) = b | (g << 8) | (r << 16) | (a << 24);
250     dst_argb += 4;
251     src_ar30 += 4;
252   }
253 }
254 
AR30ToABGRRow_C(const uint8_t * src_ar30,uint8_t * dst_abgr,int width)255 void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width) {
256   int x;
257   for (x = 0; x < width; ++x) {
258     uint32_t ar30;
259     memcpy(&ar30, src_ar30, sizeof ar30);
260     uint32_t b = (ar30 >> 2) & 0xff;
261     uint32_t g = (ar30 >> 12) & 0xff;
262     uint32_t r = (ar30 >> 22) & 0xff;
263     uint32_t a = (ar30 >> 30) * 0x55;  // Replicate 2 bits to 8 bits.
264     *(uint32_t*)(dst_abgr) = r | (g << 8) | (b << 16) | (a << 24);
265     dst_abgr += 4;
266     src_ar30 += 4;
267   }
268 }
269 
AR30ToAB30Row_C(const uint8_t * src_ar30,uint8_t * dst_ab30,int width)270 void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) {
271   int x;
272   for (x = 0; x < width; ++x) {
273     uint32_t ar30;
274     memcpy(&ar30, src_ar30, sizeof ar30);
275     uint32_t b = ar30 & 0x3ff;
276     uint32_t ga = ar30 & 0xc00ffc00;
277     uint32_t r = (ar30 >> 20) & 0x3ff;
278     *(uint32_t*)(dst_ab30) = r | ga | (b << 20);
279     dst_ab30 += 4;
280     src_ar30 += 4;
281   }
282 }
283 
ARGBToABGRRow_C(const uint8_t * src_argb,uint8_t * dst_abgr,int width)284 void ARGBToABGRRow_C(const uint8_t* src_argb, uint8_t* dst_abgr, int width) {
285   int x;
286   for (x = 0; x < width; ++x) {
287     uint8_t b = src_argb[0];
288     uint8_t g = src_argb[1];
289     uint8_t r = src_argb[2];
290     uint8_t a = src_argb[3];
291     dst_abgr[0] = r;
292     dst_abgr[1] = g;
293     dst_abgr[2] = b;
294     dst_abgr[3] = a;
295     dst_abgr += 4;
296     src_argb += 4;
297   }
298 }
299 
ARGBToBGRARow_C(const uint8_t * src_argb,uint8_t * dst_bgra,int width)300 void ARGBToBGRARow_C(const uint8_t* src_argb, uint8_t* dst_bgra, int width) {
301   int x;
302   for (x = 0; x < width; ++x) {
303     uint8_t b = src_argb[0];
304     uint8_t g = src_argb[1];
305     uint8_t r = src_argb[2];
306     uint8_t a = src_argb[3];
307     dst_bgra[0] = a;
308     dst_bgra[1] = r;
309     dst_bgra[2] = g;
310     dst_bgra[3] = b;
311     dst_bgra += 4;
312     src_argb += 4;
313   }
314 }
315 
ARGBToRGBARow_C(const uint8_t * src_argb,uint8_t * dst_rgba,int width)316 void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgba, int width) {
317   int x;
318   for (x = 0; x < width; ++x) {
319     uint8_t b = src_argb[0];
320     uint8_t g = src_argb[1];
321     uint8_t r = src_argb[2];
322     uint8_t a = src_argb[3];
323     dst_rgba[0] = a;
324     dst_rgba[1] = b;
325     dst_rgba[2] = g;
326     dst_rgba[3] = r;
327     dst_rgba += 4;
328     src_argb += 4;
329   }
330 }
331 
ARGBToRGB24Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)332 void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
333   int x;
334   for (x = 0; x < width; ++x) {
335     uint8_t b = src_argb[0];
336     uint8_t g = src_argb[1];
337     uint8_t r = src_argb[2];
338     dst_rgb[0] = b;
339     dst_rgb[1] = g;
340     dst_rgb[2] = r;
341     dst_rgb += 3;
342     src_argb += 4;
343   }
344 }
345 
ARGBToRAWRow_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)346 void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
347   int x;
348   for (x = 0; x < width; ++x) {
349     uint8_t b = src_argb[0];
350     uint8_t g = src_argb[1];
351     uint8_t r = src_argb[2];
352     dst_rgb[0] = r;
353     dst_rgb[1] = g;
354     dst_rgb[2] = b;
355     dst_rgb += 3;
356     src_argb += 4;
357   }
358 }
359 
RGBAToARGBRow_C(const uint8_t * src_rgba,uint8_t * dst_argb,int width)360 void RGBAToARGBRow_C(const uint8_t* src_rgba, uint8_t* dst_argb, int width) {
361   int x;
362   for (x = 0; x < width; ++x) {
363     uint8_t a = src_rgba[0];
364     uint8_t b = src_rgba[1];
365     uint8_t g = src_rgba[2];
366     uint8_t r = src_rgba[3];
367     dst_argb[0] = b;
368     dst_argb[1] = g;
369     dst_argb[2] = r;
370     dst_argb[3] = a;
371     dst_argb += 4;
372     src_rgba += 4;
373   }
374 }
375 
ARGBToRGB565Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)376 void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
377   int x;
378   for (x = 0; x < width - 1; x += 2) {
379     uint8_t b0 = src_argb[0] >> 3;
380     uint8_t g0 = src_argb[1] >> 2;
381     uint8_t r0 = src_argb[2] >> 3;
382     uint8_t b1 = src_argb[4] >> 3;
383     uint8_t g1 = src_argb[5] >> 2;
384     uint8_t r1 = src_argb[6] >> 3;
385     WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
386                            (r1 << 27));
387     dst_rgb += 4;
388     src_argb += 8;
389   }
390   if (width & 1) {
391     uint8_t b0 = src_argb[0] >> 3;
392     uint8_t g0 = src_argb[1] >> 2;
393     uint8_t r0 = src_argb[2] >> 3;
394     *(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
395   }
396 }
397 
398 // dither4 is a row of 4 values from 4x4 dither matrix.
399 // The 4x4 matrix contains values to increase RGB.  When converting to
400 // fewer bits (565) this provides an ordered dither.
401 // The order in the 4x4 matrix in first byte is upper left.
402 // The 4 values are passed as an int, then referenced as an array, so
403 // endian will not affect order of the original matrix.  But the dither4
404 // will containing the first pixel in the lower byte for little endian
405 // or the upper byte for big endian.
ARGBToRGB565DitherRow_C(const uint8_t * src_argb,uint8_t * dst_rgb,uint32_t dither4,int width)406 void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
407                              uint8_t* dst_rgb,
408                              uint32_t dither4,
409                              int width) {
410   int x;
411   for (x = 0; x < width - 1; x += 2) {
412     int dither0 = ((const unsigned char*)(&dither4))[x & 3];
413     int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
414     uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3);
415     uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2);
416     uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3);
417     uint8_t b1 = STATIC_CAST(uint8_t, clamp255(src_argb[4] + dither1) >> 3);
418     uint8_t g1 = STATIC_CAST(uint8_t, clamp255(src_argb[5] + dither1) >> 2);
419     uint8_t r1 = STATIC_CAST(uint8_t, clamp255(src_argb[6] + dither1) >> 3);
420     *(uint16_t*)(dst_rgb + 0) =
421         STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
422     *(uint16_t*)(dst_rgb + 2) =
423         STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 11));
424     dst_rgb += 4;
425     src_argb += 8;
426   }
427   if (width & 1) {
428     int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
429     uint8_t b0 = STATIC_CAST(uint8_t, clamp255(src_argb[0] + dither0) >> 3);
430     uint8_t g0 = STATIC_CAST(uint8_t, clamp255(src_argb[1] + dither0) >> 2);
431     uint8_t r0 = STATIC_CAST(uint8_t, clamp255(src_argb[2] + dither0) >> 3);
432     *(uint16_t*)(dst_rgb) = STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
433   }
434 }
435 
ARGBToARGB1555Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)436 void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
437   int x;
438   for (x = 0; x < width - 1; x += 2) {
439     uint8_t b0 = src_argb[0] >> 3;
440     uint8_t g0 = src_argb[1] >> 3;
441     uint8_t r0 = src_argb[2] >> 3;
442     uint8_t a0 = src_argb[3] >> 7;
443     uint8_t b1 = src_argb[4] >> 3;
444     uint8_t g1 = src_argb[5] >> 3;
445     uint8_t r1 = src_argb[6] >> 3;
446     uint8_t a1 = src_argb[7] >> 7;
447     *(uint16_t*)(dst_rgb + 0) =
448         STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15));
449     *(uint16_t*)(dst_rgb + 2) =
450         STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 10) | (a1 << 15));
451     dst_rgb += 4;
452     src_argb += 8;
453   }
454   if (width & 1) {
455     uint8_t b0 = src_argb[0] >> 3;
456     uint8_t g0 = src_argb[1] >> 3;
457     uint8_t r0 = src_argb[2] >> 3;
458     uint8_t a0 = src_argb[3] >> 7;
459     *(uint16_t*)(dst_rgb) =
460         STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | (a0 << 15));
461   }
462 }
463 
ARGBToARGB4444Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)464 void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
465   int x;
466   for (x = 0; x < width - 1; x += 2) {
467     uint8_t b0 = src_argb[0] >> 4;
468     uint8_t g0 = src_argb[1] >> 4;
469     uint8_t r0 = src_argb[2] >> 4;
470     uint8_t a0 = src_argb[3] >> 4;
471     uint8_t b1 = src_argb[4] >> 4;
472     uint8_t g1 = src_argb[5] >> 4;
473     uint8_t r1 = src_argb[6] >> 4;
474     uint8_t a1 = src_argb[7] >> 4;
475     *(uint16_t*)(dst_rgb + 0) =
476         STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12));
477     *(uint16_t*)(dst_rgb + 2) =
478         STATIC_CAST(uint16_t, b1 | (g1 << 4) | (r1 << 8) | (a1 << 12));
479     dst_rgb += 4;
480     src_argb += 8;
481   }
482   if (width & 1) {
483     uint8_t b0 = src_argb[0] >> 4;
484     uint8_t g0 = src_argb[1] >> 4;
485     uint8_t r0 = src_argb[2] >> 4;
486     uint8_t a0 = src_argb[3] >> 4;
487     *(uint16_t*)(dst_rgb) =
488         STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | (a0 << 12));
489   }
490 }
491 
ABGRToAR30Row_C(const uint8_t * src_abgr,uint8_t * dst_ar30,int width)492 void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) {
493   int x;
494   for (x = 0; x < width; ++x) {
495     uint32_t r0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2);
496     uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2);
497     uint32_t b0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
498     uint32_t a0 = (src_abgr[3] >> 6);
499     *(uint32_t*)(dst_ar30) =
500         STATIC_CAST(uint32_t, b0 | (g0 << 10) | (r0 << 20) | (a0 << 30));
501     dst_ar30 += 4;
502     src_abgr += 4;
503   }
504 }
505 
ARGBToAR30Row_C(const uint8_t * src_argb,uint8_t * dst_ar30,int width)506 void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
507   int x;
508   for (x = 0; x < width; ++x) {
509     uint32_t b0 = (src_argb[0] >> 6) | ((uint32_t)(src_argb[0]) << 2);
510     uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2);
511     uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2);
512     uint32_t a0 = (src_argb[3] >> 6);
513     *(uint32_t*)(dst_ar30) =
514         STATIC_CAST(uint32_t, b0 | (g0 << 10) | (r0 << 20) | (a0 << 30));
515     dst_ar30 += 4;
516     src_argb += 4;
517   }
518 }
519 
ARGBToAR64Row_C(const uint8_t * src_argb,uint16_t * dst_ar64,int width)520 void ARGBToAR64Row_C(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
521   int x;
522   for (x = 0; x < width; ++x) {
523     uint16_t b = src_argb[0] * 0x0101;
524     uint16_t g = src_argb[1] * 0x0101;
525     uint16_t r = src_argb[2] * 0x0101;
526     uint16_t a = src_argb[3] * 0x0101;
527     dst_ar64[0] = b;
528     dst_ar64[1] = g;
529     dst_ar64[2] = r;
530     dst_ar64[3] = a;
531     dst_ar64 += 4;
532     src_argb += 4;
533   }
534 }
535 
ARGBToAB64Row_C(const uint8_t * src_argb,uint16_t * dst_ab64,int width)536 void ARGBToAB64Row_C(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
537   int x;
538   for (x = 0; x < width; ++x) {
539     uint16_t b = src_argb[0] * 0x0101;
540     uint16_t g = src_argb[1] * 0x0101;
541     uint16_t r = src_argb[2] * 0x0101;
542     uint16_t a = src_argb[3] * 0x0101;
543     dst_ab64[0] = r;
544     dst_ab64[1] = g;
545     dst_ab64[2] = b;
546     dst_ab64[3] = a;
547     dst_ab64 += 4;
548     src_argb += 4;
549   }
550 }
551 
AR64ToARGBRow_C(const uint16_t * src_ar64,uint8_t * dst_argb,int width)552 void AR64ToARGBRow_C(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
553   int x;
554   for (x = 0; x < width; ++x) {
555     uint8_t b = src_ar64[0] >> 8;
556     uint8_t g = src_ar64[1] >> 8;
557     uint8_t r = src_ar64[2] >> 8;
558     uint8_t a = src_ar64[3] >> 8;
559     dst_argb[0] = b;
560     dst_argb[1] = g;
561     dst_argb[2] = r;
562     dst_argb[3] = a;
563     dst_argb += 4;
564     src_ar64 += 4;
565   }
566 }
567 
AB64ToARGBRow_C(const uint16_t * src_ab64,uint8_t * dst_argb,int width)568 void AB64ToARGBRow_C(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
569   int x;
570   for (x = 0; x < width; ++x) {
571     uint8_t r = src_ab64[0] >> 8;
572     uint8_t g = src_ab64[1] >> 8;
573     uint8_t b = src_ab64[2] >> 8;
574     uint8_t a = src_ab64[3] >> 8;
575     dst_argb[0] = b;
576     dst_argb[1] = g;
577     dst_argb[2] = r;
578     dst_argb[3] = a;
579     dst_argb += 4;
580     src_ab64 += 4;
581   }
582 }
583 
AR64ToAB64Row_C(const uint16_t * src_ar64,uint16_t * dst_ab64,int width)584 void AR64ToAB64Row_C(const uint16_t* src_ar64, uint16_t* dst_ab64, int width) {
585   int x;
586   for (x = 0; x < width; ++x) {
587     uint16_t b = src_ar64[0];
588     uint16_t g = src_ar64[1];
589     uint16_t r = src_ar64[2];
590     uint16_t a = src_ar64[3];
591     dst_ab64[0] = r;
592     dst_ab64[1] = g;
593     dst_ab64[2] = b;
594     dst_ab64[3] = a;
595     dst_ab64 += 4;
596     src_ar64 += 4;
597   }
598 }
599 
600 // TODO(fbarchard): Make shuffle compatible with SIMD versions
AR64ShuffleRow_C(const uint8_t * src_ar64,uint8_t * dst_ar64,const uint8_t * shuffler,int width)601 void AR64ShuffleRow_C(const uint8_t* src_ar64,
602                       uint8_t* dst_ar64,
603                       const uint8_t* shuffler,
604                       int width) {
605   const uint16_t* src_ar64_16 = (const uint16_t*)src_ar64;
606   uint16_t* dst_ar64_16 = (uint16_t*)dst_ar64;
607   int index0 = shuffler[0] / 2;
608   int index1 = shuffler[2] / 2;
609   int index2 = shuffler[4] / 2;
610   int index3 = shuffler[6] / 2;
611   // Shuffle a row of AR64.
612   int x;
613   for (x = 0; x < width / 2; ++x) {
614     // To support in-place conversion.
615     uint16_t b = src_ar64_16[index0];
616     uint16_t g = src_ar64_16[index1];
617     uint16_t r = src_ar64_16[index2];
618     uint16_t a = src_ar64_16[index3];
619     dst_ar64_16[0] = b;
620     dst_ar64_16[1] = g;
621     dst_ar64_16[2] = r;
622     dst_ar64_16[3] = a;
623     src_ar64_16 += 4;
624     dst_ar64_16 += 4;
625   }
626 }
627 
628 #ifdef LIBYUV_RGB7
629 // Old 7 bit math for compatibility on unsupported platforms.
RGBToY(uint8_t r,uint8_t g,uint8_t b)630 static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) {
631   return STATIC_CAST(uint8_t, ((33 * r + 65 * g + 13 * b) >> 7) + 16);
632 }
633 #else
634 // 8 bit
635 // Intel SSE/AVX uses the following equivalent formula
636 // 0x7e80 = (66 + 129 + 25) * -128 + 0x1000 (for +16) and 0x0080 for round.
637 //  return (66 * ((int)r - 128) + 129 * ((int)g - 128) + 25 * ((int)b - 128) +
638 //  0x7e80) >> 8;
639 
RGBToY(uint8_t r,uint8_t g,uint8_t b)640 static __inline uint8_t RGBToY(uint8_t r, uint8_t g, uint8_t b) {
641   return STATIC_CAST(uint8_t, (66 * r + 129 * g + 25 * b + 0x1080) >> 8);
642 }
643 #endif
644 
645 #define AVGB(a, b) (((a) + (b) + 1) >> 1)
646 
647 // LIBYUV_RGBTOU_TRUNCATE mimics x86 code that does not round.
648 #ifdef LIBYUV_RGBTOU_TRUNCATE
RGBToU(uint8_t r,uint8_t g,uint8_t b)649 static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
650   return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8000) >> 8);
651 }
RGBToV(uint8_t r,uint8_t g,uint8_t b)652 static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
653   return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8000) >> 8);
654 }
655 #else
656 // TODO(fbarchard): Add rounding to x86 SIMD and use this
RGBToU(uint8_t r,uint8_t g,uint8_t b)657 static __inline uint8_t RGBToU(uint8_t r, uint8_t g, uint8_t b) {
658   return STATIC_CAST(uint8_t, (112 * b - 74 * g - 38 * r + 0x8080) >> 8);
659 }
RGBToV(uint8_t r,uint8_t g,uint8_t b)660 static __inline uint8_t RGBToV(uint8_t r, uint8_t g, uint8_t b) {
661   return STATIC_CAST(uint8_t, (112 * r - 94 * g - 18 * b + 0x8080) >> 8);
662 }
663 #endif
664 
665 // LIBYUV_ARGBTOUV_PAVGB mimics x86 code that subsamples with 2 pavgb.
666 #if !defined(LIBYUV_ARGBTOUV_PAVGB)
RGB2xToU(uint16_t r,uint16_t g,uint16_t b)667 static __inline int RGB2xToU(uint16_t r, uint16_t g, uint16_t b) {
668   return STATIC_CAST(
669       uint8_t, ((112 / 2) * b - (74 / 2) * g - (38 / 2) * r + 0x8080) >> 8);
670 }
RGB2xToV(uint16_t r,uint16_t g,uint16_t b)671 static __inline int RGB2xToV(uint16_t r, uint16_t g, uint16_t b) {
672   return STATIC_CAST(
673       uint8_t, ((112 / 2) * r - (94 / 2) * g - (18 / 2) * b + 0x8080) >> 8);
674 }
675 #endif
676 
677 // ARGBToY_C and ARGBToUV_C
678 // Intel version mimic SSE/AVX which does 2 pavgb
679 #if LIBYUV_ARGBTOUV_PAVGB
680 #define MAKEROWY(NAME, R, G, B, BPP)                                       \
681   void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
682     int x;                                                                 \
683     for (x = 0; x < width; ++x) {                                          \
684       dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]);               \
685       src_rgb += BPP;                                                      \
686       dst_y += 1;                                                          \
687     }                                                                      \
688   }                                                                        \
689   void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb,         \
690                        uint8_t* dst_u, uint8_t* dst_v, int width) {        \
691     const uint8_t* src_rgb1 = src_rgb + src_stride_rgb;                    \
692     int x;                                                                 \
693     for (x = 0; x < width - 1; x += 2) {                                   \
694       uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]),                     \
695                         AVGB(src_rgb[B + BPP], src_rgb1[B + BPP]));        \
696       uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]),                     \
697                         AVGB(src_rgb[G + BPP], src_rgb1[G + BPP]));        \
698       uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]),                     \
699                         AVGB(src_rgb[R + BPP], src_rgb1[R + BPP]));        \
700       dst_u[0] = RGBToU(ar, ag, ab);                                       \
701       dst_v[0] = RGBToV(ar, ag, ab);                                       \
702       src_rgb += BPP * 2;                                                  \
703       src_rgb1 += BPP * 2;                                                 \
704       dst_u += 1;                                                          \
705       dst_v += 1;                                                          \
706     }                                                                      \
707     if (width & 1) {                                                       \
708       uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]);                          \
709       uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]);                          \
710       uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]);                          \
711       dst_u[0] = RGBToU(ar, ag, ab);                                       \
712       dst_v[0] = RGBToV(ar, ag, ab);                                       \
713     }                                                                      \
714   }
715 #else
716 // ARM version does sum / 2 then multiply by 2x smaller coefficients
717 #define MAKEROWY(NAME, R, G, B, BPP)                                       \
718   void NAME##ToYRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
719     int x;                                                                 \
720     for (x = 0; x < width; ++x) {                                          \
721       dst_y[0] = RGBToY(src_rgb[R], src_rgb[G], src_rgb[B]);               \
722       src_rgb += BPP;                                                      \
723       dst_y += 1;                                                          \
724     }                                                                      \
725   }                                                                        \
726   void NAME##ToUVRow_C(const uint8_t* src_rgb, int src_stride_rgb,         \
727                        uint8_t* dst_u, uint8_t* dst_v, int width) {        \
728     const uint8_t* src_rgb1 = src_rgb + src_stride_rgb;                    \
729     int x;                                                                 \
730     for (x = 0; x < width - 1; x += 2) {                                   \
731       uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] +         \
732                      src_rgb1[B + BPP] + 1) >>                             \
733                     1;                                                     \
734       uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] +         \
735                      src_rgb1[G + BPP] + 1) >>                             \
736                     1;                                                     \
737       uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] +         \
738                      src_rgb1[R + BPP] + 1) >>                             \
739                     1;                                                     \
740       dst_u[0] = RGB2xToU(ar, ag, ab);                                     \
741       dst_v[0] = RGB2xToV(ar, ag, ab);                                     \
742       src_rgb += BPP * 2;                                                  \
743       src_rgb1 += BPP * 2;                                                 \
744       dst_u += 1;                                                          \
745       dst_v += 1;                                                          \
746     }                                                                      \
747     if (width & 1) {                                                       \
748       uint16_t ab = src_rgb[B] + src_rgb1[B];                              \
749       uint16_t ag = src_rgb[G] + src_rgb1[G];                              \
750       uint16_t ar = src_rgb[R] + src_rgb1[R];                              \
751       dst_u[0] = RGB2xToU(ar, ag, ab);                                     \
752       dst_v[0] = RGB2xToV(ar, ag, ab);                                     \
753     }                                                                      \
754   }
755 #endif
756 
757 MAKEROWY(ARGB, 2, 1, 0, 4)
758 MAKEROWY(BGRA, 1, 2, 3, 4)
759 MAKEROWY(ABGR, 0, 1, 2, 4)
760 MAKEROWY(RGBA, 3, 2, 1, 4)
761 MAKEROWY(RGB24, 2, 1, 0, 3)
762 MAKEROWY(RAW, 0, 1, 2, 3)
763 #undef MAKEROWY
764 
765 // JPeg uses a variation on BT.601-1 full range
766 // y =  0.29900 * r + 0.58700 * g + 0.11400 * b
767 // u = -0.16874 * r - 0.33126 * g + 0.50000 * b  + center
768 // v =  0.50000 * r - 0.41869 * g - 0.08131 * b  + center
769 // BT.601 Mpeg range uses:
770 // b 0.1016 * 255 = 25.908 = 25
771 // g 0.5078 * 255 = 129.489 = 129
772 // r 0.2578 * 255 = 65.739 = 66
773 // JPeg 7 bit Y (deprecated)
774 // b 0.11400 * 128 = 14.592 = 15
775 // g 0.58700 * 128 = 75.136 = 75
776 // r 0.29900 * 128 = 38.272 = 38
777 // JPeg 8 bit Y:
778 // b 0.11400 * 256 = 29.184 = 29
779 // g 0.58700 * 256 = 150.272 = 150
780 // r 0.29900 * 256 = 76.544 = 77
781 // JPeg 8 bit U:
782 // b  0.50000 * 255 = 127.5 = 127
783 // g -0.33126 * 255 = -84.4713 = -84
784 // r -0.16874 * 255 = -43.0287 = -43
785 // JPeg 8 bit V:
786 // b -0.08131 * 255 = -20.73405 = -20
787 // g -0.41869 * 255 = -106.76595 = -107
788 // r  0.50000 * 255 = 127.5 = 127
789 
790 #ifdef LIBYUV_RGB7
791 // Old 7 bit math for compatibility on unsupported platforms.
RGBToYJ(uint8_t r,uint8_t g,uint8_t b)792 static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
793   return (38 * r + 75 * g + 15 * b + 64) >> 7;
794 }
795 #else
796 // 8 bit
797 static __inline uint8_t RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
798   return (77 * r + 150 * g + 29 * b + 128) >> 8;
799 }
800 #endif
801 
802 #if defined(LIBYUV_ARGBTOUV_PAVGB)
RGBToUJ(uint8_t r,uint8_t g,uint8_t b)803 static __inline uint8_t RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
804   return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
805 }
RGBToVJ(uint8_t r,uint8_t g,uint8_t b)806 static __inline uint8_t RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
807   return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
808 }
809 #else
RGB2xToUJ(uint16_t r,uint16_t g,uint16_t b)810 static __inline uint8_t RGB2xToUJ(uint16_t r, uint16_t g, uint16_t b) {
811   return ((127 / 2) * b - (84 / 2) * g - (43 / 2) * r + 0x8080) >> 8;
812 }
RGB2xToVJ(uint16_t r,uint16_t g,uint16_t b)813 static __inline uint8_t RGB2xToVJ(uint16_t r, uint16_t g, uint16_t b) {
814   return ((127 / 2) * r - (107 / 2) * g - (20 / 2) * b + 0x8080) >> 8;
815 }
816 #endif
817 
818 // ARGBToYJ_C and ARGBToUVJ_C
819 // Intel version mimic SSE/AVX which does 2 pavgb
820 #if LIBYUV_ARGBTOUV_PAVGB
821 #define MAKEROWYJ(NAME, R, G, B, BPP)                                       \
822   void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
823     int x;                                                                  \
824     for (x = 0; x < width; ++x) {                                           \
825       dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]);               \
826       src_rgb += BPP;                                                       \
827       dst_y += 1;                                                           \
828     }                                                                       \
829   }                                                                         \
830   void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb,         \
831                         uint8_t* dst_u, uint8_t* dst_v, int width) {        \
832     const uint8_t* src_rgb1 = src_rgb + src_stride_rgb;                     \
833     int x;                                                                  \
834     for (x = 0; x < width - 1; x += 2) {                                    \
835       uint8_t ab = AVGB(AVGB(src_rgb[B], src_rgb1[B]),                      \
836                         AVGB(src_rgb[B + BPP], src_rgb1[B + BPP]));         \
837       uint8_t ag = AVGB(AVGB(src_rgb[G], src_rgb1[G]),                      \
838                         AVGB(src_rgb[G + BPP], src_rgb1[G + BPP]));         \
839       uint8_t ar = AVGB(AVGB(src_rgb[R], src_rgb1[R]),                      \
840                         AVGB(src_rgb[R + BPP], src_rgb1[R + BPP]));         \
841       dst_u[0] = RGBToUJ(ar, ag, ab);                                       \
842       dst_v[0] = RGBToVJ(ar, ag, ab);                                       \
843       src_rgb += BPP * 2;                                                   \
844       src_rgb1 += BPP * 2;                                                  \
845       dst_u += 1;                                                           \
846       dst_v += 1;                                                           \
847     }                                                                       \
848     if (width & 1) {                                                        \
849       uint8_t ab = AVGB(src_rgb[B], src_rgb1[B]);                           \
850       uint8_t ag = AVGB(src_rgb[G], src_rgb1[G]);                           \
851       uint8_t ar = AVGB(src_rgb[R], src_rgb1[R]);                           \
852       dst_u[0] = RGBToUJ(ar, ag, ab);                                       \
853       dst_v[0] = RGBToVJ(ar, ag, ab);                                       \
854     }                                                                       \
855   }
856 #else
857 // ARM version does sum / 2 then multiply by 2x smaller coefficients
858 #define MAKEROWYJ(NAME, R, G, B, BPP)                                       \
859   void NAME##ToYJRow_C(const uint8_t* src_rgb, uint8_t* dst_y, int width) { \
860     int x;                                                                  \
861     for (x = 0; x < width; ++x) {                                           \
862       dst_y[0] = RGBToYJ(src_rgb[R], src_rgb[G], src_rgb[B]);               \
863       src_rgb += BPP;                                                       \
864       dst_y += 1;                                                           \
865     }                                                                       \
866   }                                                                         \
867   void NAME##ToUVJRow_C(const uint8_t* src_rgb, int src_stride_rgb,         \
868                         uint8_t* dst_u, uint8_t* dst_v, int width) {        \
869     const uint8_t* src_rgb1 = src_rgb + src_stride_rgb;                     \
870     int x;                                                                  \
871     for (x = 0; x < width - 1; x += 2) {                                    \
872       uint16_t ab = (src_rgb[B] + src_rgb[B + BPP] + src_rgb1[B] +          \
873                      src_rgb1[B + BPP] + 1) >>                              \
874                     1;                                                      \
875       uint16_t ag = (src_rgb[G] + src_rgb[G + BPP] + src_rgb1[G] +          \
876                      src_rgb1[G + BPP] + 1) >>                              \
877                     1;                                                      \
878       uint16_t ar = (src_rgb[R] + src_rgb[R + BPP] + src_rgb1[R] +          \
879                      src_rgb1[R + BPP] + 1) >>                              \
880                     1;                                                      \
881       dst_u[0] = RGB2xToUJ(ar, ag, ab);                                     \
882       dst_v[0] = RGB2xToVJ(ar, ag, ab);                                     \
883       src_rgb += BPP * 2;                                                   \
884       src_rgb1 += BPP * 2;                                                  \
885       dst_u += 1;                                                           \
886       dst_v += 1;                                                           \
887     }                                                                       \
888     if (width & 1) {                                                        \
889       uint16_t ab = (src_rgb[B] + src_rgb1[B]);                             \
890       uint16_t ag = (src_rgb[G] + src_rgb1[G]);                             \
891       uint16_t ar = (src_rgb[R] + src_rgb1[R]);                             \
892       dst_u[0] = RGB2xToUJ(ar, ag, ab);                                     \
893       dst_v[0] = RGB2xToVJ(ar, ag, ab);                                     \
894     }                                                                       \
895   }
896 
897 #endif
898 
899 MAKEROWYJ(ARGB, 2, 1, 0, 4)
900 MAKEROWYJ(ABGR, 0, 1, 2, 4)
901 MAKEROWYJ(RGBA, 3, 2, 1, 4)
902 MAKEROWYJ(RGB24, 2, 1, 0, 3)
903 MAKEROWYJ(RAW, 0, 1, 2, 3)
904 #undef MAKEROWYJ
905 
RGB565ToYRow_C(const uint8_t * src_rgb565,uint8_t * dst_y,int width)906 void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
907   int x;
908   for (x = 0; x < width; ++x) {
909     uint8_t b = src_rgb565[0] & 0x1f;
910     uint8_t g = STATIC_CAST(
911         uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
912     uint8_t r = src_rgb565[1] >> 3;
913     b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
914     g = STATIC_CAST(uint8_t, (g << 2) | (g >> 4));
915     r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
916     dst_y[0] = RGBToY(r, g, b);
917     src_rgb565 += 2;
918     dst_y += 1;
919   }
920 }
921 
ARGB1555ToYRow_C(const uint8_t * src_argb1555,uint8_t * dst_y,int width)922 void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
923   int x;
924   for (x = 0; x < width; ++x) {
925     uint8_t b = src_argb1555[0] & 0x1f;
926     uint8_t g = STATIC_CAST(
927         uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
928     uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
929     b = STATIC_CAST(uint8_t, (b << 3) | (b >> 2));
930     g = STATIC_CAST(uint8_t, (g << 3) | (g >> 2));
931     r = STATIC_CAST(uint8_t, (r << 3) | (r >> 2));
932     dst_y[0] = RGBToY(r, g, b);
933     src_argb1555 += 2;
934     dst_y += 1;
935   }
936 }
937 
ARGB4444ToYRow_C(const uint8_t * src_argb4444,uint8_t * dst_y,int width)938 void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) {
939   int x;
940   for (x = 0; x < width; ++x) {
941     uint8_t b = src_argb4444[0] & 0x0f;
942     uint8_t g = src_argb4444[0] >> 4;
943     uint8_t r = src_argb4444[1] & 0x0f;
944     b = STATIC_CAST(uint8_t, (b << 4) | b);
945     g = STATIC_CAST(uint8_t, (g << 4) | g);
946     r = STATIC_CAST(uint8_t, (r << 4) | r);
947     dst_y[0] = RGBToY(r, g, b);
948     src_argb4444 += 2;
949     dst_y += 1;
950   }
951 }
952 
RGB565ToUVRow_C(const uint8_t * src_rgb565,int src_stride_rgb565,uint8_t * dst_u,uint8_t * dst_v,int width)953 void RGB565ToUVRow_C(const uint8_t* src_rgb565,
954                      int src_stride_rgb565,
955                      uint8_t* dst_u,
956                      uint8_t* dst_v,
957                      int width) {
958   const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565;
959   int x;
960   for (x = 0; x < width - 1; x += 2) {
961     uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
962     uint8_t g0 = STATIC_CAST(
963         uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
964     uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
965     uint8_t b1 = STATIC_CAST(uint8_t, src_rgb565[2] & 0x1f);
966     uint8_t g1 = STATIC_CAST(
967         uint8_t, (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3));
968     uint8_t r1 = STATIC_CAST(uint8_t, src_rgb565[3] >> 3);
969     uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f);
970     uint8_t g2 = STATIC_CAST(
971         uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3));
972     uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3);
973     uint8_t b3 = STATIC_CAST(uint8_t, next_rgb565[2] & 0x1f);
974     uint8_t g3 = STATIC_CAST(
975         uint8_t, (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3));
976     uint8_t r3 = STATIC_CAST(uint8_t, next_rgb565[3] >> 3);
977 
978     b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
979     g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4));
980     r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
981     b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2));
982     g1 = STATIC_CAST(uint8_t, (g1 << 2) | (g1 >> 4));
983     r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2));
984     b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
985     g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
986     r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
987     b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2));
988     g3 = STATIC_CAST(uint8_t, (g3 << 2) | (g3 >> 4));
989     r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2));
990 
991 #if LIBYUV_ARGBTOUV_PAVGB
992     uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
993     uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
994     uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
995     dst_u[0] = RGBToU(ar, ag, ab);
996     dst_v[0] = RGBToV(ar, ag, ab);
997 #else
998     uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
999     uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
1000     uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
1001     dst_u[0] = RGB2xToU(r, g, b);
1002     dst_v[0] = RGB2xToV(r, g, b);
1003 #endif
1004 
1005     src_rgb565 += 4;
1006     next_rgb565 += 4;
1007     dst_u += 1;
1008     dst_v += 1;
1009   }
1010   if (width & 1) {
1011     uint8_t b0 = STATIC_CAST(uint8_t, src_rgb565[0] & 0x1f);
1012     uint8_t g0 = STATIC_CAST(
1013         uint8_t, (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3));
1014     uint8_t r0 = STATIC_CAST(uint8_t, src_rgb565[1] >> 3);
1015     uint8_t b2 = STATIC_CAST(uint8_t, next_rgb565[0] & 0x1f);
1016     uint8_t g2 = STATIC_CAST(
1017         uint8_t, (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3));
1018     uint8_t r2 = STATIC_CAST(uint8_t, next_rgb565[1] >> 3);
1019     b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
1020     g0 = STATIC_CAST(uint8_t, (g0 << 2) | (g0 >> 4));
1021     r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
1022     b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
1023     g2 = STATIC_CAST(uint8_t, (g2 << 2) | (g2 >> 4));
1024     r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
1025 
1026 #if LIBYUV_ARGBTOUV_PAVGB
1027     uint8_t ab = AVGB(b0, b2);
1028     uint8_t ag = AVGB(g0, g2);
1029     uint8_t ar = AVGB(r0, r2);
1030     dst_u[0] = RGBToU(ar, ag, ab);
1031     dst_v[0] = RGBToV(ar, ag, ab);
1032 #else
1033     uint16_t b = b0 + b2;
1034     uint16_t g = g0 + g2;
1035     uint16_t r = r0 + r2;
1036     dst_u[0] = RGB2xToU(r, g, b);
1037     dst_v[0] = RGB2xToV(r, g, b);
1038 #endif
1039   }
1040 }
1041 
ARGB1555ToUVRow_C(const uint8_t * src_argb1555,int src_stride_argb1555,uint8_t * dst_u,uint8_t * dst_v,int width)1042 void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
1043                        int src_stride_argb1555,
1044                        uint8_t* dst_u,
1045                        uint8_t* dst_v,
1046                        int width) {
1047   const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555;
1048   int x;
1049   for (x = 0; x < width - 1; x += 2) {
1050     uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
1051     uint8_t g0 = STATIC_CAST(
1052         uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
1053     uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
1054     uint8_t b1 = STATIC_CAST(uint8_t, src_argb1555[2] & 0x1f);
1055     uint8_t g1 = STATIC_CAST(
1056         uint8_t, (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3));
1057     uint8_t r1 = STATIC_CAST(uint8_t, (src_argb1555[3] & 0x7c) >> 2);
1058     uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f);
1059     uint8_t g2 = STATIC_CAST(
1060         uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3));
1061     uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2);
1062     uint8_t b3 = STATIC_CAST(uint8_t, next_argb1555[2] & 0x1f);
1063     uint8_t g3 = STATIC_CAST(
1064         uint8_t, (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3));
1065     uint8_t r3 = STATIC_CAST(uint8_t, (next_argb1555[3] & 0x7c) >> 2);
1066 
1067     b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
1068     g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2));
1069     r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
1070     b1 = STATIC_CAST(uint8_t, (b1 << 3) | (b1 >> 2));
1071     g1 = STATIC_CAST(uint8_t, (g1 << 3) | (g1 >> 2));
1072     r1 = STATIC_CAST(uint8_t, (r1 << 3) | (r1 >> 2));
1073     b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
1074     g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2));
1075     r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
1076     b3 = STATIC_CAST(uint8_t, (b3 << 3) | (b3 >> 2));
1077     g3 = STATIC_CAST(uint8_t, (g3 << 3) | (g3 >> 2));
1078     r3 = STATIC_CAST(uint8_t, (r3 << 3) | (r3 >> 2));
1079 
1080 #if LIBYUV_ARGBTOUV_PAVGB
1081     uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
1082     uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
1083     uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
1084     dst_u[0] = RGBToU(ar, ag, ab);
1085     dst_v[0] = RGBToV(ar, ag, ab);
1086 #else
1087     uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
1088     uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
1089     uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
1090     dst_u[0] = RGB2xToU(r, g, b);
1091     dst_v[0] = RGB2xToV(r, g, b);
1092 #endif
1093 
1094     src_argb1555 += 4;
1095     next_argb1555 += 4;
1096     dst_u += 1;
1097     dst_v += 1;
1098   }
1099   if (width & 1) {
1100     uint8_t b0 = STATIC_CAST(uint8_t, src_argb1555[0] & 0x1f);
1101     uint8_t g0 = STATIC_CAST(
1102         uint8_t, (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3));
1103     uint8_t r0 = STATIC_CAST(uint8_t, (src_argb1555[1] & 0x7c) >> 2);
1104     uint8_t b2 = STATIC_CAST(uint8_t, next_argb1555[0] & 0x1f);
1105     uint8_t g2 = STATIC_CAST(
1106         uint8_t, (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3));
1107     uint8_t r2 = STATIC_CAST(uint8_t, (next_argb1555[1] & 0x7c) >> 2);
1108 
1109     b0 = STATIC_CAST(uint8_t, (b0 << 3) | (b0 >> 2));
1110     g0 = STATIC_CAST(uint8_t, (g0 << 3) | (g0 >> 2));
1111     r0 = STATIC_CAST(uint8_t, (r0 << 3) | (r0 >> 2));
1112     b2 = STATIC_CAST(uint8_t, (b2 << 3) | (b2 >> 2));
1113     g2 = STATIC_CAST(uint8_t, (g2 << 3) | (g2 >> 2));
1114     r2 = STATIC_CAST(uint8_t, (r2 << 3) | (r2 >> 2));
1115 
1116 #if LIBYUV_ARGBTOUV_PAVGB
1117     uint8_t ab = AVGB(b0, b2);
1118     uint8_t ag = AVGB(g0, g2);
1119     uint8_t ar = AVGB(r0, r2);
1120     dst_u[0] = RGBToU(ar, ag, ab);
1121     dst_v[0] = RGBToV(ar, ag, ab);
1122 #else
1123     uint16_t b = b0 + b2;
1124     uint16_t g = g0 + g2;
1125     uint16_t r = r0 + r2;
1126     dst_u[0] = RGB2xToU(r, g, b);
1127     dst_v[0] = RGB2xToV(r, g, b);
1128 #endif
1129   }
1130 }
1131 
ARGB4444ToUVRow_C(const uint8_t * src_argb4444,int src_stride_argb4444,uint8_t * dst_u,uint8_t * dst_v,int width)1132 void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
1133                        int src_stride_argb4444,
1134                        uint8_t* dst_u,
1135                        uint8_t* dst_v,
1136                        int width) {
1137   const uint8_t* next_argb4444 = src_argb4444 + src_stride_argb4444;
1138   int x;
1139   for (x = 0; x < width - 1; x += 2) {
1140     uint8_t b0 = src_argb4444[0] & 0x0f;
1141     uint8_t g0 = src_argb4444[0] >> 4;
1142     uint8_t r0 = src_argb4444[1] & 0x0f;
1143     uint8_t b1 = src_argb4444[2] & 0x0f;
1144     uint8_t g1 = src_argb4444[2] >> 4;
1145     uint8_t r1 = src_argb4444[3] & 0x0f;
1146     uint8_t b2 = next_argb4444[0] & 0x0f;
1147     uint8_t g2 = next_argb4444[0] >> 4;
1148     uint8_t r2 = next_argb4444[1] & 0x0f;
1149     uint8_t b3 = next_argb4444[2] & 0x0f;
1150     uint8_t g3 = next_argb4444[2] >> 4;
1151     uint8_t r3 = next_argb4444[3] & 0x0f;
1152 
1153     b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0);
1154     g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0);
1155     r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0);
1156     b1 = STATIC_CAST(uint8_t, (b1 << 4) | b1);
1157     g1 = STATIC_CAST(uint8_t, (g1 << 4) | g1);
1158     r1 = STATIC_CAST(uint8_t, (r1 << 4) | r1);
1159     b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2);
1160     g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2);
1161     r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2);
1162     b3 = STATIC_CAST(uint8_t, (b3 << 4) | b3);
1163     g3 = STATIC_CAST(uint8_t, (g3 << 4) | g3);
1164     r3 = STATIC_CAST(uint8_t, (r3 << 4) | r3);
1165 
1166 #if LIBYUV_ARGBTOUV_PAVGB
1167     uint8_t ab = AVGB(AVGB(b0, b2), AVGB(b1, b3));
1168     uint8_t ag = AVGB(AVGB(g0, g2), AVGB(g1, g3));
1169     uint8_t ar = AVGB(AVGB(r0, r2), AVGB(r1, r3));
1170     dst_u[0] = RGBToU(ar, ag, ab);
1171     dst_v[0] = RGBToV(ar, ag, ab);
1172 #else
1173     uint16_t b = (b0 + b1 + b2 + b3 + 1) >> 1;
1174     uint16_t g = (g0 + g1 + g2 + g3 + 1) >> 1;
1175     uint16_t r = (r0 + r1 + r2 + r3 + 1) >> 1;
1176     dst_u[0] = RGB2xToU(r, g, b);
1177     dst_v[0] = RGB2xToV(r, g, b);
1178 #endif
1179 
1180     src_argb4444 += 4;
1181     next_argb4444 += 4;
1182     dst_u += 1;
1183     dst_v += 1;
1184   }
1185   if (width & 1) {
1186     uint8_t b0 = src_argb4444[0] & 0x0f;
1187     uint8_t g0 = src_argb4444[0] >> 4;
1188     uint8_t r0 = src_argb4444[1] & 0x0f;
1189     uint8_t b2 = next_argb4444[0] & 0x0f;
1190     uint8_t g2 = next_argb4444[0] >> 4;
1191     uint8_t r2 = next_argb4444[1] & 0x0f;
1192 
1193     b0 = STATIC_CAST(uint8_t, (b0 << 4) | b0);
1194     g0 = STATIC_CAST(uint8_t, (g0 << 4) | g0);
1195     r0 = STATIC_CAST(uint8_t, (r0 << 4) | r0);
1196     b2 = STATIC_CAST(uint8_t, (b2 << 4) | b2);
1197     g2 = STATIC_CAST(uint8_t, (g2 << 4) | g2);
1198     r2 = STATIC_CAST(uint8_t, (r2 << 4) | r2);
1199 
1200 #if LIBYUV_ARGBTOUV_PAVGB
1201     uint8_t ab = AVGB(b0, b2);
1202     uint8_t ag = AVGB(g0, g2);
1203     uint8_t ar = AVGB(r0, r2);
1204     dst_u[0] = RGBToU(ar, ag, ab);
1205     dst_v[0] = RGBToV(ar, ag, ab);
1206 #else
1207     uint16_t b = b0 + b2;
1208     uint16_t g = g0 + g2;
1209     uint16_t r = r0 + r2;
1210     dst_u[0] = RGB2xToU(r, g, b);
1211     dst_v[0] = RGB2xToV(r, g, b);
1212 #endif
1213   }
1214 }
1215 
ARGBToUV444Row_C(const uint8_t * src_argb,uint8_t * dst_u,uint8_t * dst_v,int width)1216 void ARGBToUV444Row_C(const uint8_t* src_argb,
1217                       uint8_t* dst_u,
1218                       uint8_t* dst_v,
1219                       int width) {
1220   int x;
1221   for (x = 0; x < width; ++x) {
1222     uint8_t ab = src_argb[0];
1223     uint8_t ag = src_argb[1];
1224     uint8_t ar = src_argb[2];
1225     dst_u[0] = RGBToU(ar, ag, ab);
1226     dst_v[0] = RGBToV(ar, ag, ab);
1227     src_argb += 4;
1228     dst_u += 1;
1229     dst_v += 1;
1230   }
1231 }
1232 
ARGBGrayRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)1233 void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
1234   int x;
1235   for (x = 0; x < width; ++x) {
1236     uint8_t y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
1237     dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
1238     dst_argb[3] = src_argb[3];
1239     dst_argb += 4;
1240     src_argb += 4;
1241   }
1242 }
1243 
1244 // Convert a row of image to Sepia tone.
ARGBSepiaRow_C(uint8_t * dst_argb,int width)1245 void ARGBSepiaRow_C(uint8_t* dst_argb, int width) {
1246   int x;
1247   for (x = 0; x < width; ++x) {
1248     int b = dst_argb[0];
1249     int g = dst_argb[1];
1250     int r = dst_argb[2];
1251     int sb = (b * 17 + g * 68 + r * 35) >> 7;
1252     int sg = (b * 22 + g * 88 + r * 45) >> 7;
1253     int sr = (b * 24 + g * 98 + r * 50) >> 7;
1254     // b does not over flow. a is preserved from original.
1255     dst_argb[0] = STATIC_CAST(uint8_t, sb);
1256     dst_argb[1] = STATIC_CAST(uint8_t, clamp255(sg));
1257     dst_argb[2] = STATIC_CAST(uint8_t, clamp255(sr));
1258     dst_argb += 4;
1259   }
1260 }
1261 
1262 // Apply color matrix to a row of image. Matrix is signed.
1263 // TODO(fbarchard): Consider adding rounding (+32).
ARGBColorMatrixRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const int8_t * matrix_argb,int width)1264 void ARGBColorMatrixRow_C(const uint8_t* src_argb,
1265                           uint8_t* dst_argb,
1266                           const int8_t* matrix_argb,
1267                           int width) {
1268   int x;
1269   for (x = 0; x < width; ++x) {
1270     int b = src_argb[0];
1271     int g = src_argb[1];
1272     int r = src_argb[2];
1273     int a = src_argb[3];
1274     int sb = (b * matrix_argb[0] + g * matrix_argb[1] + r * matrix_argb[2] +
1275               a * matrix_argb[3]) >>
1276              6;
1277     int sg = (b * matrix_argb[4] + g * matrix_argb[5] + r * matrix_argb[6] +
1278               a * matrix_argb[7]) >>
1279              6;
1280     int sr = (b * matrix_argb[8] + g * matrix_argb[9] + r * matrix_argb[10] +
1281               a * matrix_argb[11]) >>
1282              6;
1283     int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] +
1284               a * matrix_argb[15]) >>
1285              6;
1286     dst_argb[0] = STATIC_CAST(uint8_t, Clamp(sb));
1287     dst_argb[1] = STATIC_CAST(uint8_t, Clamp(sg));
1288     dst_argb[2] = STATIC_CAST(uint8_t, Clamp(sr));
1289     dst_argb[3] = STATIC_CAST(uint8_t, Clamp(sa));
1290     src_argb += 4;
1291     dst_argb += 4;
1292   }
1293 }
1294 
1295 // Apply color table to a row of image.
ARGBColorTableRow_C(uint8_t * dst_argb,const uint8_t * table_argb,int width)1296 void ARGBColorTableRow_C(uint8_t* dst_argb,
1297                          const uint8_t* table_argb,
1298                          int width) {
1299   int x;
1300   for (x = 0; x < width; ++x) {
1301     int b = dst_argb[0];
1302     int g = dst_argb[1];
1303     int r = dst_argb[2];
1304     int a = dst_argb[3];
1305     dst_argb[0] = table_argb[b * 4 + 0];
1306     dst_argb[1] = table_argb[g * 4 + 1];
1307     dst_argb[2] = table_argb[r * 4 + 2];
1308     dst_argb[3] = table_argb[a * 4 + 3];
1309     dst_argb += 4;
1310   }
1311 }
1312 
1313 // Apply color table to a row of image.
RGBColorTableRow_C(uint8_t * dst_argb,const uint8_t * table_argb,int width)1314 void RGBColorTableRow_C(uint8_t* dst_argb,
1315                         const uint8_t* table_argb,
1316                         int width) {
1317   int x;
1318   for (x = 0; x < width; ++x) {
1319     int b = dst_argb[0];
1320     int g = dst_argb[1];
1321     int r = dst_argb[2];
1322     dst_argb[0] = table_argb[b * 4 + 0];
1323     dst_argb[1] = table_argb[g * 4 + 1];
1324     dst_argb[2] = table_argb[r * 4 + 2];
1325     dst_argb += 4;
1326   }
1327 }
1328 
ARGBQuantizeRow_C(uint8_t * dst_argb,int scale,int interval_size,int interval_offset,int width)1329 void ARGBQuantizeRow_C(uint8_t* dst_argb,
1330                        int scale,
1331                        int interval_size,
1332                        int interval_offset,
1333                        int width) {
1334   int x;
1335   for (x = 0; x < width; ++x) {
1336     int b = dst_argb[0];
1337     int g = dst_argb[1];
1338     int r = dst_argb[2];
1339     dst_argb[0] = STATIC_CAST(
1340         uint8_t, (b * scale >> 16) * interval_size + interval_offset);
1341     dst_argb[1] = STATIC_CAST(
1342         uint8_t, (g * scale >> 16) * interval_size + interval_offset);
1343     dst_argb[2] = STATIC_CAST(
1344         uint8_t, (r * scale >> 16) * interval_size + interval_offset);
1345     dst_argb += 4;
1346   }
1347 }
1348 
1349 #define REPEAT8(v) (v) | ((v) << 8)
1350 #define SHADE(f, v) v* f >> 24
1351 
ARGBShadeRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width,uint32_t value)1352 void ARGBShadeRow_C(const uint8_t* src_argb,
1353                     uint8_t* dst_argb,
1354                     int width,
1355                     uint32_t value) {
1356   const uint32_t b_scale = REPEAT8(value & 0xff);
1357   const uint32_t g_scale = REPEAT8((value >> 8) & 0xff);
1358   const uint32_t r_scale = REPEAT8((value >> 16) & 0xff);
1359   const uint32_t a_scale = REPEAT8(value >> 24);
1360 
1361   int i;
1362   for (i = 0; i < width; ++i) {
1363     const uint32_t b = REPEAT8(src_argb[0]);
1364     const uint32_t g = REPEAT8(src_argb[1]);
1365     const uint32_t r = REPEAT8(src_argb[2]);
1366     const uint32_t a = REPEAT8(src_argb[3]);
1367     dst_argb[0] = SHADE(b, b_scale);
1368     dst_argb[1] = SHADE(g, g_scale);
1369     dst_argb[2] = SHADE(r, r_scale);
1370     dst_argb[3] = SHADE(a, a_scale);
1371     src_argb += 4;
1372     dst_argb += 4;
1373   }
1374 }
1375 #undef REPEAT8
1376 #undef SHADE
1377 
1378 #define REPEAT8(v) (v) | ((v) << 8)
1379 #define SHADE(f, v) v* f >> 16
1380 
ARGBMultiplyRow_C(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)1381 void ARGBMultiplyRow_C(const uint8_t* src_argb,
1382                        const uint8_t* src_argb1,
1383                        uint8_t* dst_argb,
1384                        int width) {
1385   int i;
1386   for (i = 0; i < width; ++i) {
1387     const uint32_t b = REPEAT8(src_argb[0]);
1388     const uint32_t g = REPEAT8(src_argb[1]);
1389     const uint32_t r = REPEAT8(src_argb[2]);
1390     const uint32_t a = REPEAT8(src_argb[3]);
1391     const uint32_t b_scale = src_argb1[0];
1392     const uint32_t g_scale = src_argb1[1];
1393     const uint32_t r_scale = src_argb1[2];
1394     const uint32_t a_scale = src_argb1[3];
1395     dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_scale));
1396     dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_scale));
1397     dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_scale));
1398     dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_scale));
1399     src_argb += 4;
1400     src_argb1 += 4;
1401     dst_argb += 4;
1402   }
1403 }
1404 #undef REPEAT8
1405 #undef SHADE
1406 
1407 #define SHADE(f, v) clamp255(v + f)
1408 
ARGBAddRow_C(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)1409 void ARGBAddRow_C(const uint8_t* src_argb,
1410                   const uint8_t* src_argb1,
1411                   uint8_t* dst_argb,
1412                   int width) {
1413   int i;
1414   for (i = 0; i < width; ++i) {
1415     const int b = src_argb[0];
1416     const int g = src_argb[1];
1417     const int r = src_argb[2];
1418     const int a = src_argb[3];
1419     const int b_add = src_argb1[0];
1420     const int g_add = src_argb1[1];
1421     const int r_add = src_argb1[2];
1422     const int a_add = src_argb1[3];
1423     dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_add));
1424     dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_add));
1425     dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_add));
1426     dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_add));
1427     src_argb += 4;
1428     src_argb1 += 4;
1429     dst_argb += 4;
1430   }
1431 }
1432 #undef SHADE
1433 
1434 #define SHADE(f, v) clamp0(f - v)
1435 
ARGBSubtractRow_C(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)1436 void ARGBSubtractRow_C(const uint8_t* src_argb,
1437                        const uint8_t* src_argb1,
1438                        uint8_t* dst_argb,
1439                        int width) {
1440   int i;
1441   for (i = 0; i < width; ++i) {
1442     const int b = src_argb[0];
1443     const int g = src_argb[1];
1444     const int r = src_argb[2];
1445     const int a = src_argb[3];
1446     const int b_sub = src_argb1[0];
1447     const int g_sub = src_argb1[1];
1448     const int r_sub = src_argb1[2];
1449     const int a_sub = src_argb1[3];
1450     dst_argb[0] = STATIC_CAST(uint8_t, SHADE(b, b_sub));
1451     dst_argb[1] = STATIC_CAST(uint8_t, SHADE(g, g_sub));
1452     dst_argb[2] = STATIC_CAST(uint8_t, SHADE(r, r_sub));
1453     dst_argb[3] = STATIC_CAST(uint8_t, SHADE(a, a_sub));
1454     src_argb += 4;
1455     src_argb1 += 4;
1456     dst_argb += 4;
1457   }
1458 }
1459 #undef SHADE
1460 
1461 // Sobel functions which mimics SSSE3.
SobelXRow_C(const uint8_t * src_y0,const uint8_t * src_y1,const uint8_t * src_y2,uint8_t * dst_sobelx,int width)1462 void SobelXRow_C(const uint8_t* src_y0,
1463                  const uint8_t* src_y1,
1464                  const uint8_t* src_y2,
1465                  uint8_t* dst_sobelx,
1466                  int width) {
1467   int i;
1468   for (i = 0; i < width; ++i) {
1469     int a = src_y0[i];
1470     int b = src_y1[i];
1471     int c = src_y2[i];
1472     int a_sub = src_y0[i + 2];
1473     int b_sub = src_y1[i + 2];
1474     int c_sub = src_y2[i + 2];
1475     int a_diff = a - a_sub;
1476     int b_diff = b - b_sub;
1477     int c_diff = c - c_sub;
1478     int sobel = Abs(a_diff + b_diff * 2 + c_diff);
1479     dst_sobelx[i] = (uint8_t)(clamp255(sobel));
1480   }
1481 }
1482 
SobelYRow_C(const uint8_t * src_y0,const uint8_t * src_y1,uint8_t * dst_sobely,int width)1483 void SobelYRow_C(const uint8_t* src_y0,
1484                  const uint8_t* src_y1,
1485                  uint8_t* dst_sobely,
1486                  int width) {
1487   int i;
1488   for (i = 0; i < width; ++i) {
1489     int a = src_y0[i + 0];
1490     int b = src_y0[i + 1];
1491     int c = src_y0[i + 2];
1492     int a_sub = src_y1[i + 0];
1493     int b_sub = src_y1[i + 1];
1494     int c_sub = src_y1[i + 2];
1495     int a_diff = a - a_sub;
1496     int b_diff = b - b_sub;
1497     int c_diff = c - c_sub;
1498     int sobel = Abs(a_diff + b_diff * 2 + c_diff);
1499     dst_sobely[i] = (uint8_t)(clamp255(sobel));
1500   }
1501 }
1502 
SobelRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)1503 void SobelRow_C(const uint8_t* src_sobelx,
1504                 const uint8_t* src_sobely,
1505                 uint8_t* dst_argb,
1506                 int width) {
1507   int i;
1508   for (i = 0; i < width; ++i) {
1509     int r = src_sobelx[i];
1510     int b = src_sobely[i];
1511     int s = clamp255(r + b);
1512     dst_argb[0] = (uint8_t)(s);
1513     dst_argb[1] = (uint8_t)(s);
1514     dst_argb[2] = (uint8_t)(s);
1515     dst_argb[3] = (uint8_t)(255u);
1516     dst_argb += 4;
1517   }
1518 }
1519 
SobelToPlaneRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_y,int width)1520 void SobelToPlaneRow_C(const uint8_t* src_sobelx,
1521                        const uint8_t* src_sobely,
1522                        uint8_t* dst_y,
1523                        int width) {
1524   int i;
1525   for (i = 0; i < width; ++i) {
1526     int r = src_sobelx[i];
1527     int b = src_sobely[i];
1528     int s = clamp255(r + b);
1529     dst_y[i] = (uint8_t)(s);
1530   }
1531 }
1532 
SobelXYRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)1533 void SobelXYRow_C(const uint8_t* src_sobelx,
1534                   const uint8_t* src_sobely,
1535                   uint8_t* dst_argb,
1536                   int width) {
1537   int i;
1538   for (i = 0; i < width; ++i) {
1539     int r = src_sobelx[i];
1540     int b = src_sobely[i];
1541     int g = clamp255(r + b);
1542     dst_argb[0] = (uint8_t)(b);
1543     dst_argb[1] = (uint8_t)(g);
1544     dst_argb[2] = (uint8_t)(r);
1545     dst_argb[3] = (uint8_t)(255u);
1546     dst_argb += 4;
1547   }
1548 }
1549 
J400ToARGBRow_C(const uint8_t * src_y,uint8_t * dst_argb,int width)1550 void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
1551   // Copy a Y to RGB.
1552   int x;
1553   for (x = 0; x < width; ++x) {
1554     uint8_t y = src_y[0];
1555     dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
1556     dst_argb[3] = 255u;
1557     dst_argb += 4;
1558     ++src_y;
1559   }
1560 }
1561 
1562 // Macros to create SIMD specific yuv to rgb conversion constants.
1563 
1564 // clang-format off
1565 
1566 #if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
1567 // Bias values include subtract 128 from U and V, bias from Y and rounding.
1568 // For B and R bias is negative. For G bias is positive.
1569 #define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR)                             \
1570   {{UB, VR, UG, VG, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},                     \
1571    {YG, (UB * 128 - YB), (UG * 128 + VG * 128 + YB), (VR * 128 - YB), YB, 0, \
1572     0, 0}}
1573 #else
1574 #define YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR)                     \
1575   {{UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,          \
1576     UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},         \
1577    {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,  \
1578     UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, \
1579    {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,          \
1580     0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},         \
1581    {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}, \
1582    {YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB, YB}}
1583 #endif
1584 
1585 // clang-format on
1586 
1587 #define MAKEYUVCONSTANTS(name, YG, YB, UB, UG, VG, VR)            \
1588   const struct YuvConstants SIMD_ALIGNED(kYuv##name##Constants) = \
1589       YUVCONSTANTSBODY(YG, YB, UB, UG, VG, VR);                   \
1590   const struct YuvConstants SIMD_ALIGNED(kYvu##name##Constants) = \
1591       YUVCONSTANTSBODY(YG, YB, VR, VG, UG, UB);
1592 
1593 // TODO(fbarchard): Generate SIMD structures from float matrix.
1594 
1595 // BT.601 limited range YUV to RGB reference
1596 //  R = (Y - 16) * 1.164             + V * 1.596
1597 //  G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
1598 //  B = (Y - 16) * 1.164 + U * 2.018
1599 // KR = 0.299; KB = 0.114
1600 
1601 // U and V contributions to R,G,B.
1602 #if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT601)
1603 #define UB 129 /* round(2.018 * 64) */
1604 #else
1605 #define UB 128 /* max(128, round(2.018 * 64)) */
1606 #endif
1607 #define UG 25  /* round(0.391 * 64) */
1608 #define VG 52  /* round(0.813 * 64) */
1609 #define VR 102 /* round(1.596 * 64) */
1610 
1611 // Y contribution to R,G,B.  Scale and bias.
1612 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1613 #define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1614 
MAKEYUVCONSTANTS(I601,YG,YB,UB,UG,VG,VR)1615 MAKEYUVCONSTANTS(I601, YG, YB, UB, UG, VG, VR)
1616 
1617 #undef YG
1618 #undef YB
1619 #undef UB
1620 #undef UG
1621 #undef VG
1622 #undef VR
1623 
1624 // BT.601 full range YUV to RGB reference (aka JPEG)
1625 // *  R = Y               + V * 1.40200
1626 // *  G = Y - U * 0.34414 - V * 0.71414
1627 // *  B = Y + U * 1.77200
1628 // KR = 0.299; KB = 0.114
1629 
1630 // U and V contributions to R,G,B.
1631 #define UB 113 /* round(1.77200 * 64) */
1632 #define UG 22  /* round(0.34414 * 64) */
1633 #define VG 46  /* round(0.71414 * 64) */
1634 #define VR 90  /* round(1.40200 * 64) */
1635 
1636 // Y contribution to R,G,B.  Scale and bias.
1637 #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
1638 #define YB 32    /* 64 / 2 */
1639 
1640 MAKEYUVCONSTANTS(JPEG, YG, YB, UB, UG, VG, VR)
1641 
1642 #undef YG
1643 #undef YB
1644 #undef UB
1645 #undef UG
1646 #undef VG
1647 #undef VR
1648 
1649 // BT.709 limited range YUV to RGB reference
1650 //  R = (Y - 16) * 1.164             + V * 1.793
1651 //  G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533
1652 //  B = (Y - 16) * 1.164 + U * 2.112
1653 //  KR = 0.2126, KB = 0.0722
1654 
1655 // U and V contributions to R,G,B.
1656 #if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT709)
1657 #define UB 135 /* round(2.112 * 64) */
1658 #else
1659 #define UB 128 /* max(128, round(2.112 * 64)) */
1660 #endif
1661 #define UG 14  /* round(0.213 * 64) */
1662 #define VG 34  /* round(0.533 * 64) */
1663 #define VR 115 /* round(1.793 * 64) */
1664 
1665 // Y contribution to R,G,B.  Scale and bias.
1666 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1667 #define YB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1668 
1669 MAKEYUVCONSTANTS(H709, YG, YB, UB, UG, VG, VR)
1670 
1671 #undef YG
1672 #undef YB
1673 #undef UB
1674 #undef UG
1675 #undef VG
1676 #undef VR
1677 
1678 // BT.709 full range YUV to RGB reference
1679 //  R = Y               + V * 1.5748
1680 //  G = Y - U * 0.18732 - V * 0.46812
1681 //  B = Y + U * 1.8556
1682 //  KR = 0.2126, KB = 0.0722
1683 
1684 // U and V contributions to R,G,B.
1685 #define UB 119 /* round(1.8556 * 64) */
1686 #define UG 12  /* round(0.18732 * 64) */
1687 #define VG 30  /* round(0.46812 * 64) */
1688 #define VR 101 /* round(1.5748 * 64) */
1689 
1690 // Y contribution to R,G,B.  Scale and bias.  (same as jpeg)
1691 #define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
1692 #define YB 32    /* 64 / 2 */
1693 
1694 MAKEYUVCONSTANTS(F709, YG, YB, UB, UG, VG, VR)
1695 
1696 #undef YG
1697 #undef YB
1698 #undef UB
1699 #undef UG
1700 #undef VG
1701 #undef VR
1702 
1703 // BT.2020 limited range YUV to RGB reference
1704 //  R = (Y - 16) * 1.164384                + V * 1.67867
1705 //  G = (Y - 16) * 1.164384 - U * 0.187326 - V * 0.65042
1706 //  B = (Y - 16) * 1.164384 + U * 2.14177
1707 // KR = 0.2627; KB = 0.0593
1708 
1709 // U and V contributions to R,G,B.
1710 #if defined(LIBYUV_UNLIMITED_DATA) || defined(LIBYUV_UNLIMITED_BT2020)
1711 #define UB 137 /* round(2.142 * 64) */
1712 #else
1713 #define UB 128 /* max(128, round(2.142 * 64)) */
1714 #endif
1715 #define UG 12  /* round(0.187326 * 64) */
1716 #define VG 42  /* round(0.65042 * 64) */
1717 #define VR 107 /* round(1.67867 * 64) */
1718 
1719 // Y contribution to R,G,B.  Scale and bias.
1720 #define YG 19003 /* round(1.164384 * 64 * 256 * 256 / 257) */
1721 #define YB -1160 /* 1.164384 * 64 * -16 + 64 / 2 */
1722 
1723 MAKEYUVCONSTANTS(2020, YG, YB, UB, UG, VG, VR)
1724 
1725 #undef YG
1726 #undef YB
1727 #undef UB
1728 #undef UG
1729 #undef VG
1730 #undef VR
1731 
1732 // BT.2020 full range YUV to RGB reference
1733 //  R = Y                + V * 1.474600
1734 //  G = Y - U * 0.164553 - V * 0.571353
1735 //  B = Y + U * 1.881400
1736 // KR = 0.2627; KB = 0.0593
1737 
1738 #define UB 120 /* round(1.881400 * 64) */
1739 #define UG 11  /* round(0.164553 * 64) */
1740 #define VG 37  /* round(0.571353 * 64) */
1741 #define VR 94  /* round(1.474600 * 64) */
1742 
1743 // Y contribution to R,G,B.  Scale and bias.  (same as jpeg)
1744 #define YG 16320 /* round(1 * 64 * 256 * 256 / 257) */
1745 #define YB 32    /* 64 / 2 */
1746 
1747 MAKEYUVCONSTANTS(V2020, YG, YB, UB, UG, VG, VR)
1748 
1749 #undef YG
1750 #undef YB
1751 #undef UB
1752 #undef UG
1753 #undef VG
1754 #undef VR
1755 
1756 #undef BB
1757 #undef BG
1758 #undef BR
1759 
1760 #undef MAKEYUVCONSTANTS
1761 
1762 #if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
1763 #define LOAD_YUV_CONSTANTS                 \
1764   int ub = yuvconstants->kUVCoeff[0];      \
1765   int vr = yuvconstants->kUVCoeff[1];      \
1766   int ug = yuvconstants->kUVCoeff[2];      \
1767   int vg = yuvconstants->kUVCoeff[3];      \
1768   int yg = yuvconstants->kRGBCoeffBias[0]; \
1769   int bb = yuvconstants->kRGBCoeffBias[1]; \
1770   int bg = yuvconstants->kRGBCoeffBias[2]; \
1771   int br = yuvconstants->kRGBCoeffBias[3]
1772 
1773 #define CALC_RGB16                         \
1774   int32_t y1 = (uint32_t)(y32 * yg) >> 16; \
1775   int b16 = y1 + (u * ub) - bb;            \
1776   int g16 = y1 + bg - (u * ug + v * vg);   \
1777   int r16 = y1 + (v * vr) - br
1778 #else
1779 #define LOAD_YUV_CONSTANTS           \
1780   int ub = yuvconstants->kUVToB[0];  \
1781   int ug = yuvconstants->kUVToG[0];  \
1782   int vg = yuvconstants->kUVToG[1];  \
1783   int vr = yuvconstants->kUVToR[1];  \
1784   int yg = yuvconstants->kYToRgb[0]; \
1785   int yb = yuvconstants->kYBiasToRgb[0]
1786 
1787 #define CALC_RGB16                                \
1788   int32_t y1 = ((uint32_t)(y32 * yg) >> 16) + yb; \
1789   int8_t ui = (int8_t)u;                          \
1790   int8_t vi = (int8_t)v;                          \
1791   ui -= 0x80;                                     \
1792   vi -= 0x80;                                     \
1793   int b16 = y1 + (ui * ub);                       \
1794   int g16 = y1 - (ui * ug + vi * vg);             \
1795   int r16 = y1 + (vi * vr)
1796 #endif
1797 
1798 // C reference code that mimics the YUV assembly.
1799 // Reads 8 bit YUV and leaves result as 16 bit.
1800 static __inline void YuvPixel(uint8_t y,
1801                               uint8_t u,
1802                               uint8_t v,
1803                               uint8_t* b,
1804                               uint8_t* g,
1805                               uint8_t* r,
1806                               const struct YuvConstants* yuvconstants) {
1807   LOAD_YUV_CONSTANTS;
1808   uint32_t y32 = y * 0x0101;
1809   CALC_RGB16;
1810   *b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6));
1811   *g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6));
1812   *r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6));
1813 }
1814 
1815 // Reads 8 bit YUV and leaves result as 16 bit.
YuvPixel8_16(uint8_t y,uint8_t u,uint8_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1816 static __inline void YuvPixel8_16(uint8_t y,
1817                                   uint8_t u,
1818                                   uint8_t v,
1819                                   int* b,
1820                                   int* g,
1821                                   int* r,
1822                                   const struct YuvConstants* yuvconstants) {
1823   LOAD_YUV_CONSTANTS;
1824   uint32_t y32 = y * 0x0101;
1825   CALC_RGB16;
1826   *b = b16;
1827   *g = g16;
1828   *r = r16;
1829 }
1830 
1831 // C reference code that mimics the YUV 16 bit assembly.
1832 // Reads 10 bit YUV and leaves result as 16 bit.
YuvPixel10_16(uint16_t y,uint16_t u,uint16_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1833 static __inline void YuvPixel10_16(uint16_t y,
1834                                    uint16_t u,
1835                                    uint16_t v,
1836                                    int* b,
1837                                    int* g,
1838                                    int* r,
1839                                    const struct YuvConstants* yuvconstants) {
1840   LOAD_YUV_CONSTANTS;
1841   uint32_t y32 = (y << 6) | (y >> 4);
1842   u = STATIC_CAST(uint8_t, clamp255(u >> 2));
1843   v = STATIC_CAST(uint8_t, clamp255(v >> 2));
1844   CALC_RGB16;
1845   *b = b16;
1846   *g = g16;
1847   *r = r16;
1848 }
1849 
1850 // C reference code that mimics the YUV 16 bit assembly.
1851 // Reads 12 bit YUV and leaves result as 16 bit.
YuvPixel12_16(int16_t y,int16_t u,int16_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1852 static __inline void YuvPixel12_16(int16_t y,
1853                                    int16_t u,
1854                                    int16_t v,
1855                                    int* b,
1856                                    int* g,
1857                                    int* r,
1858                                    const struct YuvConstants* yuvconstants) {
1859   LOAD_YUV_CONSTANTS;
1860   uint32_t y32 = (y << 4) | (y >> 8);
1861   u = STATIC_CAST(uint8_t, clamp255(u >> 4));
1862   v = STATIC_CAST(uint8_t, clamp255(v >> 4));
1863   CALC_RGB16;
1864   *b = b16;
1865   *g = g16;
1866   *r = r16;
1867 }
1868 
1869 // C reference code that mimics the YUV 10 bit assembly.
1870 // Reads 10 bit YUV and clamps down to 8 bit RGB.
YuvPixel10(uint16_t y,uint16_t u,uint16_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1871 static __inline void YuvPixel10(uint16_t y,
1872                                 uint16_t u,
1873                                 uint16_t v,
1874                                 uint8_t* b,
1875                                 uint8_t* g,
1876                                 uint8_t* r,
1877                                 const struct YuvConstants* yuvconstants) {
1878   int b16;
1879   int g16;
1880   int r16;
1881   YuvPixel10_16(y, u, v, &b16, &g16, &r16, yuvconstants);
1882   *b = STATIC_CAST(uint8_t, Clamp(b16 >> 6));
1883   *g = STATIC_CAST(uint8_t, Clamp(g16 >> 6));
1884   *r = STATIC_CAST(uint8_t, Clamp(r16 >> 6));
1885 }
1886 
1887 // C reference code that mimics the YUV 12 bit assembly.
1888 // Reads 12 bit YUV and clamps down to 8 bit RGB.
YuvPixel12(uint16_t y,uint16_t u,uint16_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1889 static __inline void YuvPixel12(uint16_t y,
1890                                 uint16_t u,
1891                                 uint16_t v,
1892                                 uint8_t* b,
1893                                 uint8_t* g,
1894                                 uint8_t* r,
1895                                 const struct YuvConstants* yuvconstants) {
1896   int b16;
1897   int g16;
1898   int r16;
1899   YuvPixel12_16(y, u, v, &b16, &g16, &r16, yuvconstants);
1900   *b = STATIC_CAST(uint8_t, Clamp(b16 >> 6));
1901   *g = STATIC_CAST(uint8_t, Clamp(g16 >> 6));
1902   *r = STATIC_CAST(uint8_t, Clamp(r16 >> 6));
1903 }
1904 
1905 // C reference code that mimics the YUV 16 bit assembly.
1906 // Reads 16 bit YUV and leaves result as 8 bit.
YuvPixel16_8(uint16_t y,uint16_t u,uint16_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1907 static __inline void YuvPixel16_8(uint16_t y,
1908                                   uint16_t u,
1909                                   uint16_t v,
1910                                   uint8_t* b,
1911                                   uint8_t* g,
1912                                   uint8_t* r,
1913                                   const struct YuvConstants* yuvconstants) {
1914   LOAD_YUV_CONSTANTS;
1915   uint32_t y32 = y;
1916   u = STATIC_CAST(uint16_t, clamp255(u >> 8));
1917   v = STATIC_CAST(uint16_t, clamp255(v >> 8));
1918   CALC_RGB16;
1919   *b = STATIC_CAST(uint8_t, Clamp((int32_t)(b16) >> 6));
1920   *g = STATIC_CAST(uint8_t, Clamp((int32_t)(g16) >> 6));
1921   *r = STATIC_CAST(uint8_t, Clamp((int32_t)(r16) >> 6));
1922 }
1923 
1924 // C reference code that mimics the YUV 16 bit assembly.
1925 // Reads 16 bit YUV and leaves result as 16 bit.
YuvPixel16_16(uint16_t y,uint16_t u,uint16_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1926 static __inline void YuvPixel16_16(uint16_t y,
1927                                    uint16_t u,
1928                                    uint16_t v,
1929                                    int* b,
1930                                    int* g,
1931                                    int* r,
1932                                    const struct YuvConstants* yuvconstants) {
1933   LOAD_YUV_CONSTANTS;
1934   uint32_t y32 = y;
1935   u = STATIC_CAST(uint16_t, clamp255(u >> 8));
1936   v = STATIC_CAST(uint16_t, clamp255(v >> 8));
1937   CALC_RGB16;
1938   *b = b16;
1939   *g = g16;
1940   *r = r16;
1941 }
1942 
1943 // C reference code that mimics the YUV assembly.
1944 // Reads 8 bit YUV and leaves result as 8 bit.
YPixel(uint8_t y,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1945 static __inline void YPixel(uint8_t y,
1946                             uint8_t* b,
1947                             uint8_t* g,
1948                             uint8_t* r,
1949                             const struct YuvConstants* yuvconstants) {
1950 #if defined(__aarch64__) || defined(__arm__) || defined(__riscv)
1951   int yg = yuvconstants->kRGBCoeffBias[0];
1952   int ygb = yuvconstants->kRGBCoeffBias[4];
1953 #else
1954   int ygb = yuvconstants->kYBiasToRgb[0];
1955   int yg = yuvconstants->kYToRgb[0];
1956 #endif
1957   uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
1958   uint8_t b8 = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6));
1959   *b = b8;
1960   *g = b8;
1961   *r = b8;
1962 }
1963 
I444ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1964 void I444ToARGBRow_C(const uint8_t* src_y,
1965                      const uint8_t* src_u,
1966                      const uint8_t* src_v,
1967                      uint8_t* rgb_buf,
1968                      const struct YuvConstants* yuvconstants,
1969                      int width) {
1970   int x;
1971   for (x = 0; x < width; ++x) {
1972     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1973              rgb_buf + 2, yuvconstants);
1974     rgb_buf[3] = 255;
1975     src_y += 1;
1976     src_u += 1;
1977     src_v += 1;
1978     rgb_buf += 4;  // Advance 1 pixel.
1979   }
1980 }
1981 
I444ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1982 void I444ToRGB24Row_C(const uint8_t* src_y,
1983                       const uint8_t* src_u,
1984                       const uint8_t* src_v,
1985                       uint8_t* rgb_buf,
1986                       const struct YuvConstants* yuvconstants,
1987                       int width) {
1988   int x;
1989   for (x = 0; x < width; ++x) {
1990     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1991              rgb_buf + 2, yuvconstants);
1992     src_y += 1;
1993     src_u += 1;
1994     src_v += 1;
1995     rgb_buf += 3;  // Advance 1 pixel.
1996   }
1997 }
1998 
1999 // Also used for 420
I422ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2000 void I422ToARGBRow_C(const uint8_t* src_y,
2001                      const uint8_t* src_u,
2002                      const uint8_t* src_v,
2003                      uint8_t* rgb_buf,
2004                      const struct YuvConstants* yuvconstants,
2005                      int width) {
2006   int x;
2007   for (x = 0; x < width - 1; x += 2) {
2008     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2009              rgb_buf + 2, yuvconstants);
2010     rgb_buf[3] = 255;
2011     YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
2012              rgb_buf + 6, yuvconstants);
2013     rgb_buf[7] = 255;
2014     src_y += 2;
2015     src_u += 1;
2016     src_v += 1;
2017     rgb_buf += 8;  // Advance 2 pixels.
2018   }
2019   if (width & 1) {
2020     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2021              rgb_buf + 2, yuvconstants);
2022     rgb_buf[3] = 255;
2023   }
2024 }
2025 
2026 // 10 bit YUV to ARGB
I210ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2027 void I210ToARGBRow_C(const uint16_t* src_y,
2028                      const uint16_t* src_u,
2029                      const uint16_t* src_v,
2030                      uint8_t* rgb_buf,
2031                      const struct YuvConstants* yuvconstants,
2032                      int width) {
2033   int x;
2034   for (x = 0; x < width - 1; x += 2) {
2035     YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2036                rgb_buf + 2, yuvconstants);
2037     rgb_buf[3] = 255;
2038     YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
2039                rgb_buf + 6, yuvconstants);
2040     rgb_buf[7] = 255;
2041     src_y += 2;
2042     src_u += 1;
2043     src_v += 1;
2044     rgb_buf += 8;  // Advance 2 pixels.
2045   }
2046   if (width & 1) {
2047     YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2048                rgb_buf + 2, yuvconstants);
2049     rgb_buf[3] = 255;
2050   }
2051 }
2052 
I410ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2053 void I410ToARGBRow_C(const uint16_t* src_y,
2054                      const uint16_t* src_u,
2055                      const uint16_t* src_v,
2056                      uint8_t* rgb_buf,
2057                      const struct YuvConstants* yuvconstants,
2058                      int width) {
2059   int x;
2060   for (x = 0; x < width; ++x) {
2061     YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2062                rgb_buf + 2, yuvconstants);
2063     rgb_buf[3] = 255;
2064     src_y += 1;
2065     src_u += 1;
2066     src_v += 1;
2067     rgb_buf += 4;  // Advance 1 pixels.
2068   }
2069 }
2070 
I210AlphaToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,const uint16_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2071 void I210AlphaToARGBRow_C(const uint16_t* src_y,
2072                           const uint16_t* src_u,
2073                           const uint16_t* src_v,
2074                           const uint16_t* src_a,
2075                           uint8_t* rgb_buf,
2076                           const struct YuvConstants* yuvconstants,
2077                           int width) {
2078   int x;
2079   for (x = 0; x < width - 1; x += 2) {
2080     YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2081                rgb_buf + 2, yuvconstants);
2082     rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
2083     YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
2084                rgb_buf + 6, yuvconstants);
2085     rgb_buf[7] = STATIC_CAST(uint8_t, clamp255(src_a[1] >> 2));
2086     src_y += 2;
2087     src_u += 1;
2088     src_v += 1;
2089     src_a += 2;
2090     rgb_buf += 8;  // Advance 2 pixels.
2091   }
2092   if (width & 1) {
2093     YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2094                rgb_buf + 2, yuvconstants);
2095     rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
2096   }
2097 }
2098 
I410AlphaToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,const uint16_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2099 void I410AlphaToARGBRow_C(const uint16_t* src_y,
2100                           const uint16_t* src_u,
2101                           const uint16_t* src_v,
2102                           const uint16_t* src_a,
2103                           uint8_t* rgb_buf,
2104                           const struct YuvConstants* yuvconstants,
2105                           int width) {
2106   int x;
2107   for (x = 0; x < width; ++x) {
2108     YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2109                rgb_buf + 2, yuvconstants);
2110     rgb_buf[3] = STATIC_CAST(uint8_t, clamp255(src_a[0] >> 2));
2111     src_y += 1;
2112     src_u += 1;
2113     src_v += 1;
2114     src_a += 1;
2115     rgb_buf += 4;  // Advance 1 pixels.
2116   }
2117 }
2118 
2119 // 12 bit YUV to ARGB
I212ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2120 void I212ToARGBRow_C(const uint16_t* src_y,
2121                      const uint16_t* src_u,
2122                      const uint16_t* src_v,
2123                      uint8_t* rgb_buf,
2124                      const struct YuvConstants* yuvconstants,
2125                      int width) {
2126   int x;
2127   for (x = 0; x < width - 1; x += 2) {
2128     YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2129                rgb_buf + 2, yuvconstants);
2130     rgb_buf[3] = 255;
2131     YuvPixel12(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
2132                rgb_buf + 6, yuvconstants);
2133     rgb_buf[7] = 255;
2134     src_y += 2;
2135     src_u += 1;
2136     src_v += 1;
2137     rgb_buf += 8;  // Advance 2 pixels.
2138   }
2139   if (width & 1) {
2140     YuvPixel12(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2141                rgb_buf + 2, yuvconstants);
2142     rgb_buf[3] = 255;
2143   }
2144 }
2145 
StoreAR30(uint8_t * rgb_buf,int b,int g,int r)2146 static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
2147   uint32_t ar30;
2148   b = b >> 4;  // convert 8 bit 10.6 to 10 bit.
2149   g = g >> 4;
2150   r = r >> 4;
2151   b = Clamp10(b);
2152   g = Clamp10(g);
2153   r = Clamp10(r);
2154   ar30 = b | ((uint32_t)g << 10) | ((uint32_t)r << 20) | 0xc0000000;
2155   (*(uint32_t*)rgb_buf) = ar30;
2156 }
2157 
2158 // 10 bit YUV to 10 bit AR30
I210ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2159 void I210ToAR30Row_C(const uint16_t* src_y,
2160                      const uint16_t* src_u,
2161                      const uint16_t* src_v,
2162                      uint8_t* rgb_buf,
2163                      const struct YuvConstants* yuvconstants,
2164                      int width) {
2165   int x;
2166   int b;
2167   int g;
2168   int r;
2169   for (x = 0; x < width - 1; x += 2) {
2170     YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2171     StoreAR30(rgb_buf, b, g, r);
2172     YuvPixel10_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2173     StoreAR30(rgb_buf + 4, b, g, r);
2174     src_y += 2;
2175     src_u += 1;
2176     src_v += 1;
2177     rgb_buf += 8;  // Advance 2 pixels.
2178   }
2179   if (width & 1) {
2180     YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2181     StoreAR30(rgb_buf, b, g, r);
2182   }
2183 }
2184 
2185 // 12 bit YUV to 10 bit AR30
I212ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2186 void I212ToAR30Row_C(const uint16_t* src_y,
2187                      const uint16_t* src_u,
2188                      const uint16_t* src_v,
2189                      uint8_t* rgb_buf,
2190                      const struct YuvConstants* yuvconstants,
2191                      int width) {
2192   int x;
2193   int b;
2194   int g;
2195   int r;
2196   for (x = 0; x < width - 1; x += 2) {
2197     YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2198     StoreAR30(rgb_buf, b, g, r);
2199     YuvPixel12_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2200     StoreAR30(rgb_buf + 4, b, g, r);
2201     src_y += 2;
2202     src_u += 1;
2203     src_v += 1;
2204     rgb_buf += 8;  // Advance 2 pixels.
2205   }
2206   if (width & 1) {
2207     YuvPixel12_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2208     StoreAR30(rgb_buf, b, g, r);
2209   }
2210 }
2211 
I410ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2212 void I410ToAR30Row_C(const uint16_t* src_y,
2213                      const uint16_t* src_u,
2214                      const uint16_t* src_v,
2215                      uint8_t* rgb_buf,
2216                      const struct YuvConstants* yuvconstants,
2217                      int width) {
2218   int x;
2219   int b;
2220   int g;
2221   int r;
2222   for (x = 0; x < width; ++x) {
2223     YuvPixel10_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2224     StoreAR30(rgb_buf, b, g, r);
2225     src_y += 1;
2226     src_u += 1;
2227     src_v += 1;
2228     rgb_buf += 4;  // Advance 1 pixel.
2229   }
2230 }
2231 
2232 // P210 has 10 bits in msb of 16 bit NV12 style layout.
P210ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_uv,uint8_t * dst_argb,const struct YuvConstants * yuvconstants,int width)2233 void P210ToARGBRow_C(const uint16_t* src_y,
2234                      const uint16_t* src_uv,
2235                      uint8_t* dst_argb,
2236                      const struct YuvConstants* yuvconstants,
2237                      int width) {
2238   int x;
2239   for (x = 0; x < width - 1; x += 2) {
2240     YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
2241                  dst_argb + 2, yuvconstants);
2242     dst_argb[3] = 255;
2243     YuvPixel16_8(src_y[1], src_uv[0], src_uv[1], dst_argb + 4, dst_argb + 5,
2244                  dst_argb + 6, yuvconstants);
2245     dst_argb[7] = 255;
2246     src_y += 2;
2247     src_uv += 2;
2248     dst_argb += 8;  // Advance 2 pixels.
2249   }
2250   if (width & 1) {
2251     YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
2252                  dst_argb + 2, yuvconstants);
2253     dst_argb[3] = 255;
2254   }
2255 }
2256 
P410ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_uv,uint8_t * dst_argb,const struct YuvConstants * yuvconstants,int width)2257 void P410ToARGBRow_C(const uint16_t* src_y,
2258                      const uint16_t* src_uv,
2259                      uint8_t* dst_argb,
2260                      const struct YuvConstants* yuvconstants,
2261                      int width) {
2262   int x;
2263   for (x = 0; x < width; ++x) {
2264     YuvPixel16_8(src_y[0], src_uv[0], src_uv[1], dst_argb + 0, dst_argb + 1,
2265                  dst_argb + 2, yuvconstants);
2266     dst_argb[3] = 255;
2267     src_y += 1;
2268     src_uv += 2;
2269     dst_argb += 4;  // Advance 1 pixels.
2270   }
2271 }
2272 
P210ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_uv,uint8_t * dst_ar30,const struct YuvConstants * yuvconstants,int width)2273 void P210ToAR30Row_C(const uint16_t* src_y,
2274                      const uint16_t* src_uv,
2275                      uint8_t* dst_ar30,
2276                      const struct YuvConstants* yuvconstants,
2277                      int width) {
2278   int x;
2279   int b;
2280   int g;
2281   int r;
2282   for (x = 0; x < width - 1; x += 2) {
2283     YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2284     StoreAR30(dst_ar30, b, g, r);
2285     YuvPixel16_16(src_y[1], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2286     StoreAR30(dst_ar30 + 4, b, g, r);
2287     src_y += 2;
2288     src_uv += 2;
2289     dst_ar30 += 8;  // Advance 2 pixels.
2290   }
2291   if (width & 1) {
2292     YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2293     StoreAR30(dst_ar30, b, g, r);
2294   }
2295 }
2296 
P410ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_uv,uint8_t * dst_ar30,const struct YuvConstants * yuvconstants,int width)2297 void P410ToAR30Row_C(const uint16_t* src_y,
2298                      const uint16_t* src_uv,
2299                      uint8_t* dst_ar30,
2300                      const struct YuvConstants* yuvconstants,
2301                      int width) {
2302   int x;
2303   int b;
2304   int g;
2305   int r;
2306   for (x = 0; x < width; ++x) {
2307     YuvPixel16_16(src_y[0], src_uv[0], src_uv[1], &b, &g, &r, yuvconstants);
2308     StoreAR30(dst_ar30, b, g, r);
2309     src_y += 1;
2310     src_uv += 2;
2311     dst_ar30 += 4;  // Advance 1 pixel.
2312   }
2313 }
2314 
2315 // 8 bit YUV to 10 bit AR30
2316 // Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
I422ToAR30Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2317 void I422ToAR30Row_C(const uint8_t* src_y,
2318                      const uint8_t* src_u,
2319                      const uint8_t* src_v,
2320                      uint8_t* rgb_buf,
2321                      const struct YuvConstants* yuvconstants,
2322                      int width) {
2323   int x;
2324   int b;
2325   int g;
2326   int r;
2327   for (x = 0; x < width - 1; x += 2) {
2328     YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2329     StoreAR30(rgb_buf, b, g, r);
2330     YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2331     StoreAR30(rgb_buf + 4, b, g, r);
2332     src_y += 2;
2333     src_u += 1;
2334     src_v += 1;
2335     rgb_buf += 8;  // Advance 2 pixels.
2336   }
2337   if (width & 1) {
2338     YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
2339     StoreAR30(rgb_buf, b, g, r);
2340   }
2341 }
2342 
I444AlphaToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,const uint8_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2343 void I444AlphaToARGBRow_C(const uint8_t* src_y,
2344                           const uint8_t* src_u,
2345                           const uint8_t* src_v,
2346                           const uint8_t* src_a,
2347                           uint8_t* rgb_buf,
2348                           const struct YuvConstants* yuvconstants,
2349                           int width) {
2350   int x;
2351   for (x = 0; x < width; ++x) {
2352     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2353              rgb_buf + 2, yuvconstants);
2354     rgb_buf[3] = src_a[0];
2355     src_y += 1;
2356     src_u += 1;
2357     src_v += 1;
2358     src_a += 1;
2359     rgb_buf += 4;  // Advance 1 pixel.
2360   }
2361 }
2362 
I422AlphaToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,const uint8_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2363 void I422AlphaToARGBRow_C(const uint8_t* src_y,
2364                           const uint8_t* src_u,
2365                           const uint8_t* src_v,
2366                           const uint8_t* src_a,
2367                           uint8_t* rgb_buf,
2368                           const struct YuvConstants* yuvconstants,
2369                           int width) {
2370   int x;
2371   for (x = 0; x < width - 1; x += 2) {
2372     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2373              rgb_buf + 2, yuvconstants);
2374     rgb_buf[3] = src_a[0];
2375     YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
2376              rgb_buf + 6, yuvconstants);
2377     rgb_buf[7] = src_a[1];
2378     src_y += 2;
2379     src_u += 1;
2380     src_v += 1;
2381     src_a += 2;
2382     rgb_buf += 8;  // Advance 2 pixels.
2383   }
2384   if (width & 1) {
2385     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2386              rgb_buf + 2, yuvconstants);
2387     rgb_buf[3] = src_a[0];
2388   }
2389 }
2390 
I422ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2391 void I422ToRGB24Row_C(const uint8_t* src_y,
2392                       const uint8_t* src_u,
2393                       const uint8_t* src_v,
2394                       uint8_t* rgb_buf,
2395                       const struct YuvConstants* yuvconstants,
2396                       int width) {
2397   int x;
2398   for (x = 0; x < width - 1; x += 2) {
2399     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2400              rgb_buf + 2, yuvconstants);
2401     YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
2402              rgb_buf + 5, yuvconstants);
2403     src_y += 2;
2404     src_u += 1;
2405     src_v += 1;
2406     rgb_buf += 6;  // Advance 2 pixels.
2407   }
2408   if (width & 1) {
2409     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
2410              rgb_buf + 2, yuvconstants);
2411   }
2412 }
2413 
I422ToARGB4444Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)2414 void I422ToARGB4444Row_C(const uint8_t* src_y,
2415                          const uint8_t* src_u,
2416                          const uint8_t* src_v,
2417                          uint8_t* dst_argb4444,
2418                          const struct YuvConstants* yuvconstants,
2419                          int width) {
2420   uint8_t b0;
2421   uint8_t g0;
2422   uint8_t r0;
2423   uint8_t b1;
2424   uint8_t g1;
2425   uint8_t r1;
2426   int x;
2427   for (x = 0; x < width - 1; x += 2) {
2428     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2429     YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2430     b0 = b0 >> 4;
2431     g0 = g0 >> 4;
2432     r0 = r0 >> 4;
2433     b1 = b1 >> 4;
2434     g1 = g1 >> 4;
2435     r1 = r1 >> 4;
2436     *(uint16_t*)(dst_argb4444 + 0) =
2437         STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | 0xf000);
2438     *(uint16_t*)(dst_argb4444 + 2) =
2439         STATIC_CAST(uint16_t, b1 | (g1 << 4) | (r1 << 8) | 0xf000);
2440     src_y += 2;
2441     src_u += 1;
2442     src_v += 1;
2443     dst_argb4444 += 4;  // Advance 2 pixels.
2444   }
2445   if (width & 1) {
2446     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2447     b0 = b0 >> 4;
2448     g0 = g0 >> 4;
2449     r0 = r0 >> 4;
2450     *(uint16_t*)(dst_argb4444) =
2451         STATIC_CAST(uint16_t, b0 | (g0 << 4) | (r0 << 8) | 0xf000);
2452   }
2453 }
2454 
I422ToARGB1555Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)2455 void I422ToARGB1555Row_C(const uint8_t* src_y,
2456                          const uint8_t* src_u,
2457                          const uint8_t* src_v,
2458                          uint8_t* dst_argb1555,
2459                          const struct YuvConstants* yuvconstants,
2460                          int width) {
2461   uint8_t b0;
2462   uint8_t g0;
2463   uint8_t r0;
2464   uint8_t b1;
2465   uint8_t g1;
2466   uint8_t r1;
2467   int x;
2468   for (x = 0; x < width - 1; x += 2) {
2469     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2470     YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2471     b0 = b0 >> 3;
2472     g0 = g0 >> 3;
2473     r0 = r0 >> 3;
2474     b1 = b1 >> 3;
2475     g1 = g1 >> 3;
2476     r1 = r1 >> 3;
2477     *(uint16_t*)(dst_argb1555 + 0) =
2478         STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | 0x8000);
2479     *(uint16_t*)(dst_argb1555 + 2) =
2480         STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 10) | 0x8000);
2481     src_y += 2;
2482     src_u += 1;
2483     src_v += 1;
2484     dst_argb1555 += 4;  // Advance 2 pixels.
2485   }
2486   if (width & 1) {
2487     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2488     b0 = b0 >> 3;
2489     g0 = g0 >> 3;
2490     r0 = r0 >> 3;
2491     *(uint16_t*)(dst_argb1555) =
2492         STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 10) | 0x8000);
2493   }
2494 }
2495 
I422ToRGB565Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2496 void I422ToRGB565Row_C(const uint8_t* src_y,
2497                        const uint8_t* src_u,
2498                        const uint8_t* src_v,
2499                        uint8_t* dst_rgb565,
2500                        const struct YuvConstants* yuvconstants,
2501                        int width) {
2502   uint8_t b0;
2503   uint8_t g0;
2504   uint8_t r0;
2505   uint8_t b1;
2506   uint8_t g1;
2507   uint8_t r1;
2508   int x;
2509   for (x = 0; x < width - 1; x += 2) {
2510     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2511     YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
2512     b0 = b0 >> 3;
2513     g0 = g0 >> 2;
2514     r0 = r0 >> 3;
2515     b1 = b1 >> 3;
2516     g1 = g1 >> 2;
2517     r1 = r1 >> 3;
2518     *(uint16_t*)(dst_rgb565 + 0) =
2519         STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
2520     *(uint16_t*)(dst_rgb565 + 2) =
2521         STATIC_CAST(uint16_t, b1 | (g1 << 5) | (r1 << 11));
2522     src_y += 2;
2523     src_u += 1;
2524     src_v += 1;
2525     dst_rgb565 += 4;  // Advance 2 pixels.
2526   }
2527   if (width & 1) {
2528     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
2529     b0 = b0 >> 3;
2530     g0 = g0 >> 2;
2531     r0 = r0 >> 3;
2532     *(uint16_t*)(dst_rgb565 + 0) =
2533         STATIC_CAST(uint16_t, b0 | (g0 << 5) | (r0 << 11));
2534   }
2535 }
2536 
NV12ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2537 void NV12ToARGBRow_C(const uint8_t* src_y,
2538                      const uint8_t* src_uv,
2539                      uint8_t* rgb_buf,
2540                      const struct YuvConstants* yuvconstants,
2541                      int width) {
2542   int x;
2543   for (x = 0; x < width - 1; x += 2) {
2544     YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2545              rgb_buf + 2, yuvconstants);
2546     rgb_buf[3] = 255;
2547     YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
2548              rgb_buf + 6, yuvconstants);
2549     rgb_buf[7] = 255;
2550     src_y += 2;
2551     src_uv += 2;
2552     rgb_buf += 8;  // Advance 2 pixels.
2553   }
2554   if (width & 1) {
2555     YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2556              rgb_buf + 2, yuvconstants);
2557     rgb_buf[3] = 255;
2558   }
2559 }
2560 
NV21ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2561 void NV21ToARGBRow_C(const uint8_t* src_y,
2562                      const uint8_t* src_vu,
2563                      uint8_t* rgb_buf,
2564                      const struct YuvConstants* yuvconstants,
2565                      int width) {
2566   int x;
2567   for (x = 0; x < width - 1; x += 2) {
2568     YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2569              rgb_buf + 2, yuvconstants);
2570     rgb_buf[3] = 255;
2571     YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
2572              rgb_buf + 6, yuvconstants);
2573     rgb_buf[7] = 255;
2574     src_y += 2;
2575     src_vu += 2;
2576     rgb_buf += 8;  // Advance 2 pixels.
2577   }
2578   if (width & 1) {
2579     YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2580              rgb_buf + 2, yuvconstants);
2581     rgb_buf[3] = 255;
2582   }
2583 }
2584 
NV12ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2585 void NV12ToRGB24Row_C(const uint8_t* src_y,
2586                       const uint8_t* src_uv,
2587                       uint8_t* rgb_buf,
2588                       const struct YuvConstants* yuvconstants,
2589                       int width) {
2590   int x;
2591   for (x = 0; x < width - 1; x += 2) {
2592     YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2593              rgb_buf + 2, yuvconstants);
2594     YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4,
2595              rgb_buf + 5, yuvconstants);
2596     src_y += 2;
2597     src_uv += 2;
2598     rgb_buf += 6;  // Advance 2 pixels.
2599   }
2600   if (width & 1) {
2601     YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
2602              rgb_buf + 2, yuvconstants);
2603   }
2604 }
2605 
NV21ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2606 void NV21ToRGB24Row_C(const uint8_t* src_y,
2607                       const uint8_t* src_vu,
2608                       uint8_t* rgb_buf,
2609                       const struct YuvConstants* yuvconstants,
2610                       int width) {
2611   int x;
2612   for (x = 0; x < width - 1; x += 2) {
2613     YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2614              rgb_buf + 2, yuvconstants);
2615     YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4,
2616              rgb_buf + 5, yuvconstants);
2617     src_y += 2;
2618     src_vu += 2;
2619     rgb_buf += 6;  // Advance 2 pixels.
2620   }
2621   if (width & 1) {
2622     YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
2623              rgb_buf + 2, yuvconstants);
2624   }
2625 }
2626 
NV12ToRGB565Row_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2627 void NV12ToRGB565Row_C(const uint8_t* src_y,
2628                        const uint8_t* src_uv,
2629                        uint8_t* dst_rgb565,
2630                        const struct YuvConstants* yuvconstants,
2631                        int width) {
2632   uint8_t b0;
2633   uint8_t g0;
2634   uint8_t r0;
2635   uint8_t b1;
2636   uint8_t g1;
2637   uint8_t r1;
2638   int x;
2639   for (x = 0; x < width - 1; x += 2) {
2640     YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
2641     YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
2642     b0 = b0 >> 3;
2643     g0 = g0 >> 2;
2644     r0 = r0 >> 3;
2645     b1 = b1 >> 3;
2646     g1 = g1 >> 2;
2647     r1 = r1 >> 3;
2648     *(uint16_t*)(dst_rgb565 + 0) = STATIC_CAST(uint16_t, b0) |
2649                                    STATIC_CAST(uint16_t, g0 << 5) |
2650                                    STATIC_CAST(uint16_t, r0 << 11);
2651     *(uint16_t*)(dst_rgb565 + 2) = STATIC_CAST(uint16_t, b1) |
2652                                    STATIC_CAST(uint16_t, g1 << 5) |
2653                                    STATIC_CAST(uint16_t, r1 << 11);
2654     src_y += 2;
2655     src_uv += 2;
2656     dst_rgb565 += 4;  // Advance 2 pixels.
2657   }
2658   if (width & 1) {
2659     YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
2660     b0 = b0 >> 3;
2661     g0 = g0 >> 2;
2662     r0 = r0 >> 3;
2663     *(uint16_t*)(dst_rgb565) = STATIC_CAST(uint16_t, b0) |
2664                                STATIC_CAST(uint16_t, g0 << 5) |
2665                                STATIC_CAST(uint16_t, r0 << 11);
2666   }
2667 }
2668 
YUY2ToARGBRow_C(const uint8_t * src_yuy2,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2669 void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
2670                      uint8_t* rgb_buf,
2671                      const struct YuvConstants* yuvconstants,
2672                      int width) {
2673   int x;
2674   for (x = 0; x < width - 1; x += 2) {
2675     YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
2676              rgb_buf + 2, yuvconstants);
2677     rgb_buf[3] = 255;
2678     YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
2679              rgb_buf + 6, yuvconstants);
2680     rgb_buf[7] = 255;
2681     src_yuy2 += 4;
2682     rgb_buf += 8;  // Advance 2 pixels.
2683   }
2684   if (width & 1) {
2685     YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
2686              rgb_buf + 2, yuvconstants);
2687     rgb_buf[3] = 255;
2688   }
2689 }
2690 
UYVYToARGBRow_C(const uint8_t * src_uyvy,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2691 void UYVYToARGBRow_C(const uint8_t* src_uyvy,
2692                      uint8_t* rgb_buf,
2693                      const struct YuvConstants* yuvconstants,
2694                      int width) {
2695   int x;
2696   for (x = 0; x < width - 1; x += 2) {
2697     YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
2698              rgb_buf + 2, yuvconstants);
2699     rgb_buf[3] = 255;
2700     YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
2701              rgb_buf + 6, yuvconstants);
2702     rgb_buf[7] = 255;
2703     src_uyvy += 4;
2704     rgb_buf += 8;  // Advance 2 pixels.
2705   }
2706   if (width & 1) {
2707     YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
2708              rgb_buf + 2, yuvconstants);
2709     rgb_buf[3] = 255;
2710   }
2711 }
2712 
I422ToRGBARow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2713 void I422ToRGBARow_C(const uint8_t* src_y,
2714                      const uint8_t* src_u,
2715                      const uint8_t* src_v,
2716                      uint8_t* rgb_buf,
2717                      const struct YuvConstants* yuvconstants,
2718                      int width) {
2719   int x;
2720   for (x = 0; x < width - 1; x += 2) {
2721     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
2722              rgb_buf + 3, yuvconstants);
2723     rgb_buf[0] = 255;
2724     YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
2725              rgb_buf + 7, yuvconstants);
2726     rgb_buf[4] = 255;
2727     src_y += 2;
2728     src_u += 1;
2729     src_v += 1;
2730     rgb_buf += 8;  // Advance 2 pixels.
2731   }
2732   if (width & 1) {
2733     YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
2734              rgb_buf + 3, yuvconstants);
2735     rgb_buf[0] = 255;
2736   }
2737 }
2738 
I400ToARGBRow_C(const uint8_t * src_y,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)2739 void I400ToARGBRow_C(const uint8_t* src_y,
2740                      uint8_t* rgb_buf,
2741                      const struct YuvConstants* yuvconstants,
2742                      int width) {
2743   int x;
2744   for (x = 0; x < width - 1; x += 2) {
2745     YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
2746     rgb_buf[3] = 255;
2747     YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
2748     rgb_buf[7] = 255;
2749     src_y += 2;
2750     rgb_buf += 8;  // Advance 2 pixels.
2751   }
2752   if (width & 1) {
2753     YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
2754     rgb_buf[3] = 255;
2755   }
2756 }
2757 
MirrorRow_C(const uint8_t * src,uint8_t * dst,int width)2758 void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2759   int x;
2760   src += width - 1;
2761   for (x = 0; x < width - 1; x += 2) {
2762     dst[x] = src[0];
2763     dst[x + 1] = src[-1];
2764     src -= 2;
2765   }
2766   if (width & 1) {
2767     dst[width - 1] = src[0];
2768   }
2769 }
2770 
MirrorRow_16_C(const uint16_t * src,uint16_t * dst,int width)2771 void MirrorRow_16_C(const uint16_t* src, uint16_t* dst, int width) {
2772   int x;
2773   src += width - 1;
2774   for (x = 0; x < width - 1; x += 2) {
2775     dst[x] = src[0];
2776     dst[x + 1] = src[-1];
2777     src -= 2;
2778   }
2779   if (width & 1) {
2780     dst[width - 1] = src[0];
2781   }
2782 }
2783 
MirrorUVRow_C(const uint8_t * src_uv,uint8_t * dst_uv,int width)2784 void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_uv, int width) {
2785   int x;
2786   src_uv += (width - 1) << 1;
2787   for (x = 0; x < width; ++x) {
2788     dst_uv[0] = src_uv[0];
2789     dst_uv[1] = src_uv[1];
2790     src_uv -= 2;
2791     dst_uv += 2;
2792   }
2793 }
2794 
MirrorSplitUVRow_C(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)2795 void MirrorSplitUVRow_C(const uint8_t* src_uv,
2796                         uint8_t* dst_u,
2797                         uint8_t* dst_v,
2798                         int width) {
2799   int x;
2800   src_uv += (width - 1) << 1;
2801   for (x = 0; x < width - 1; x += 2) {
2802     dst_u[x] = src_uv[0];
2803     dst_u[x + 1] = src_uv[-2];
2804     dst_v[x] = src_uv[1];
2805     dst_v[x + 1] = src_uv[-2 + 1];
2806     src_uv -= 4;
2807   }
2808   if (width & 1) {
2809     dst_u[width - 1] = src_uv[0];
2810     dst_v[width - 1] = src_uv[1];
2811   }
2812 }
2813 
ARGBMirrorRow_C(const uint8_t * src,uint8_t * dst,int width)2814 void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2815   int x;
2816   const uint32_t* src32 = (const uint32_t*)(src);
2817   uint32_t* dst32 = (uint32_t*)(dst);
2818   src32 += width - 1;
2819   for (x = 0; x < width - 1; x += 2) {
2820     dst32[x] = src32[0];
2821     dst32[x + 1] = src32[-1];
2822     src32 -= 2;
2823   }
2824   if (width & 1) {
2825     dst32[width - 1] = src32[0];
2826   }
2827 }
2828 
RGB24MirrorRow_C(const uint8_t * src_rgb24,uint8_t * dst_rgb24,int width)2829 void RGB24MirrorRow_C(const uint8_t* src_rgb24, uint8_t* dst_rgb24, int width) {
2830   int x;
2831   src_rgb24 += width * 3 - 3;
2832   for (x = 0; x < width; ++x) {
2833     uint8_t b = src_rgb24[0];
2834     uint8_t g = src_rgb24[1];
2835     uint8_t r = src_rgb24[2];
2836     dst_rgb24[0] = b;
2837     dst_rgb24[1] = g;
2838     dst_rgb24[2] = r;
2839     src_rgb24 -= 3;
2840     dst_rgb24 += 3;
2841   }
2842 }
2843 
SplitUVRow_C(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)2844 void SplitUVRow_C(const uint8_t* src_uv,
2845                   uint8_t* dst_u,
2846                   uint8_t* dst_v,
2847                   int width) {
2848   int x;
2849   for (x = 0; x < width - 1; x += 2) {
2850     dst_u[x] = src_uv[0];
2851     dst_u[x + 1] = src_uv[2];
2852     dst_v[x] = src_uv[1];
2853     dst_v[x + 1] = src_uv[3];
2854     src_uv += 4;
2855   }
2856   if (width & 1) {
2857     dst_u[width - 1] = src_uv[0];
2858     dst_v[width - 1] = src_uv[1];
2859   }
2860 }
2861 
MergeUVRow_C(const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_uv,int width)2862 void MergeUVRow_C(const uint8_t* src_u,
2863                   const uint8_t* src_v,
2864                   uint8_t* dst_uv,
2865                   int width) {
2866   int x;
2867   for (x = 0; x < width - 1; x += 2) {
2868     dst_uv[0] = src_u[x];
2869     dst_uv[1] = src_v[x];
2870     dst_uv[2] = src_u[x + 1];
2871     dst_uv[3] = src_v[x + 1];
2872     dst_uv += 4;
2873   }
2874   if (width & 1) {
2875     dst_uv[0] = src_u[width - 1];
2876     dst_uv[1] = src_v[width - 1];
2877   }
2878 }
2879 
DetileRow_C(const uint8_t * src,ptrdiff_t src_tile_stride,uint8_t * dst,int width)2880 void DetileRow_C(const uint8_t* src,
2881                  ptrdiff_t src_tile_stride,
2882                  uint8_t* dst,
2883                  int width) {
2884   int x;
2885   for (x = 0; x < width - 15; x += 16) {
2886     memcpy(dst, src, 16);
2887     dst += 16;
2888     src += src_tile_stride;
2889   }
2890   if (width & 15) {
2891     memcpy(dst, src, width & 15);
2892   }
2893 }
2894 
DetileRow_16_C(const uint16_t * src,ptrdiff_t src_tile_stride,uint16_t * dst,int width)2895 void DetileRow_16_C(const uint16_t* src,
2896                     ptrdiff_t src_tile_stride,
2897                     uint16_t* dst,
2898                     int width) {
2899   int x;
2900   for (x = 0; x < width - 15; x += 16) {
2901     memcpy(dst, src, 16 * sizeof(uint16_t));
2902     dst += 16;
2903     src += src_tile_stride;
2904   }
2905   if (width & 15) {
2906     memcpy(dst, src, (width & 15) * sizeof(uint16_t));
2907   }
2908 }
2909 
DetileSplitUVRow_C(const uint8_t * src_uv,ptrdiff_t src_tile_stride,uint8_t * dst_u,uint8_t * dst_v,int width)2910 void DetileSplitUVRow_C(const uint8_t* src_uv,
2911                         ptrdiff_t src_tile_stride,
2912                         uint8_t* dst_u,
2913                         uint8_t* dst_v,
2914                         int width) {
2915   int x;
2916   for (x = 0; x < width - 15; x += 16) {
2917     SplitUVRow_C(src_uv, dst_u, dst_v, 8);
2918     dst_u += 8;
2919     dst_v += 8;
2920     src_uv += src_tile_stride;
2921   }
2922   if (width & 15) {
2923     SplitUVRow_C(src_uv, dst_u, dst_v, ((width & 15) + 1) / 2);
2924   }
2925 }
2926 
DetileToYUY2_C(const uint8_t * src_y,ptrdiff_t src_y_tile_stride,const uint8_t * src_uv,ptrdiff_t src_uv_tile_stride,uint8_t * dst_yuy2,int width)2927 void DetileToYUY2_C(const uint8_t* src_y,
2928                     ptrdiff_t src_y_tile_stride,
2929                     const uint8_t* src_uv,
2930                     ptrdiff_t src_uv_tile_stride,
2931                     uint8_t* dst_yuy2,
2932                     int width) {
2933   for (int x = 0; x < width - 15; x += 16) {
2934     for (int i = 0; i < 8; i++) {
2935       dst_yuy2[0] = src_y[0];
2936       dst_yuy2[1] = src_uv[0];
2937       dst_yuy2[2] = src_y[1];
2938       dst_yuy2[3] = src_uv[1];
2939       dst_yuy2 += 4;
2940       src_y += 2;
2941       src_uv += 2;
2942     }
2943     src_y += src_y_tile_stride - 16;
2944     src_uv += src_uv_tile_stride - 16;
2945   }
2946 }
2947 
2948 // Unpack MT2T into tiled P010 64 pixels at a time. MT2T's bitstream is encoded
2949 // in 80 byte blocks representing 64 pixels each. The first 16 bytes of the
2950 // block contain all of the lower 2 bits of each pixel packed together, and the
2951 // next 64 bytes represent all the upper 8 bits of the pixel. The lower bits are
2952 // packed into 1x4 blocks, whereas the upper bits are packed in normal raster
2953 // order.
UnpackMT2T_C(const uint8_t * src,uint16_t * dst,size_t size)2954 void UnpackMT2T_C(const uint8_t* src, uint16_t* dst, size_t size) {
2955   for (size_t i = 0; i < size; i += 80) {
2956     const uint8_t* src_lower_bits = src;
2957     const uint8_t* src_upper_bits = src + 16;
2958 
2959     for (int j = 0; j < 4; j++) {
2960       for (int k = 0; k < 16; k++) {
2961         *dst++ = ((src_lower_bits[k] >> (j * 2)) & 0x3) << 6 |
2962                  (uint16_t)*src_upper_bits << 8 |
2963                  (uint16_t)*src_upper_bits >> 2;
2964         src_upper_bits++;
2965       }
2966     }
2967 
2968     src += 80;
2969   }
2970 }
2971 
SplitRGBRow_C(const uint8_t * src_rgb,uint8_t * dst_r,uint8_t * dst_g,uint8_t * dst_b,int width)2972 void SplitRGBRow_C(const uint8_t* src_rgb,
2973                    uint8_t* dst_r,
2974                    uint8_t* dst_g,
2975                    uint8_t* dst_b,
2976                    int width) {
2977   int x;
2978   for (x = 0; x < width; ++x) {
2979     dst_r[x] = src_rgb[0];
2980     dst_g[x] = src_rgb[1];
2981     dst_b[x] = src_rgb[2];
2982     src_rgb += 3;
2983   }
2984 }
2985 
MergeRGBRow_C(const uint8_t * src_r,const uint8_t * src_g,const uint8_t * src_b,uint8_t * dst_rgb,int width)2986 void MergeRGBRow_C(const uint8_t* src_r,
2987                    const uint8_t* src_g,
2988                    const uint8_t* src_b,
2989                    uint8_t* dst_rgb,
2990                    int width) {
2991   int x;
2992   for (x = 0; x < width; ++x) {
2993     dst_rgb[0] = src_r[x];
2994     dst_rgb[1] = src_g[x];
2995     dst_rgb[2] = src_b[x];
2996     dst_rgb += 3;
2997   }
2998 }
2999 
SplitARGBRow_C(const uint8_t * src_argb,uint8_t * dst_r,uint8_t * dst_g,uint8_t * dst_b,uint8_t * dst_a,int width)3000 void SplitARGBRow_C(const uint8_t* src_argb,
3001                     uint8_t* dst_r,
3002                     uint8_t* dst_g,
3003                     uint8_t* dst_b,
3004                     uint8_t* dst_a,
3005                     int width) {
3006   int x;
3007   for (x = 0; x < width; ++x) {
3008     dst_b[x] = src_argb[0];
3009     dst_g[x] = src_argb[1];
3010     dst_r[x] = src_argb[2];
3011     dst_a[x] = src_argb[3];
3012     src_argb += 4;
3013   }
3014 }
3015 
MergeARGBRow_C(const uint8_t * src_r,const uint8_t * src_g,const uint8_t * src_b,const uint8_t * src_a,uint8_t * dst_argb,int width)3016 void MergeARGBRow_C(const uint8_t* src_r,
3017                     const uint8_t* src_g,
3018                     const uint8_t* src_b,
3019                     const uint8_t* src_a,
3020                     uint8_t* dst_argb,
3021                     int width) {
3022   int x;
3023   for (x = 0; x < width; ++x) {
3024     dst_argb[0] = src_b[x];
3025     dst_argb[1] = src_g[x];
3026     dst_argb[2] = src_r[x];
3027     dst_argb[3] = src_a[x];
3028     dst_argb += 4;
3029   }
3030 }
3031 
MergeXR30Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,uint8_t * dst_ar30,int depth,int width)3032 void MergeXR30Row_C(const uint16_t* src_r,
3033                     const uint16_t* src_g,
3034                     const uint16_t* src_b,
3035                     uint8_t* dst_ar30,
3036                     int depth,
3037                     int width) {
3038   assert(depth >= 10);
3039   assert(depth <= 16);
3040   int x;
3041   int shift = depth - 10;
3042   uint32_t* dst_ar30_32 = (uint32_t*)dst_ar30;
3043   for (x = 0; x < width; ++x) {
3044     uint32_t r = clamp1023(src_r[x] >> shift);
3045     uint32_t g = clamp1023(src_g[x] >> shift);
3046     uint32_t b = clamp1023(src_b[x] >> shift);
3047     dst_ar30_32[x] = b | (g << 10) | (r << 20) | 0xc0000000;
3048   }
3049 }
3050 
MergeAR64Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,const uint16_t * src_a,uint16_t * dst_ar64,int depth,int width)3051 void MergeAR64Row_C(const uint16_t* src_r,
3052                     const uint16_t* src_g,
3053                     const uint16_t* src_b,
3054                     const uint16_t* src_a,
3055                     uint16_t* dst_ar64,
3056                     int depth,
3057                     int width) {
3058   assert(depth >= 1);
3059   assert(depth <= 16);
3060   int x;
3061   int shift = 16 - depth;
3062   int max = (1 << depth) - 1;
3063   for (x = 0; x < width; ++x) {
3064     dst_ar64[0] = STATIC_CAST(uint16_t, ClampMax(src_b[x], max) << shift);
3065     dst_ar64[1] = STATIC_CAST(uint16_t, ClampMax(src_g[x], max) << shift);
3066     dst_ar64[2] = STATIC_CAST(uint16_t, ClampMax(src_r[x], max) << shift);
3067     dst_ar64[3] = STATIC_CAST(uint16_t, ClampMax(src_a[x], max) << shift);
3068     dst_ar64 += 4;
3069   }
3070 }
3071 
MergeARGB16To8Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,const uint16_t * src_a,uint8_t * dst_argb,int depth,int width)3072 void MergeARGB16To8Row_C(const uint16_t* src_r,
3073                          const uint16_t* src_g,
3074                          const uint16_t* src_b,
3075                          const uint16_t* src_a,
3076                          uint8_t* dst_argb,
3077                          int depth,
3078                          int width) {
3079   assert(depth >= 8);
3080   assert(depth <= 16);
3081   int x;
3082   int shift = depth - 8;
3083   for (x = 0; x < width; ++x) {
3084     dst_argb[0] = STATIC_CAST(uint8_t, clamp255(src_b[x] >> shift));
3085     dst_argb[1] = STATIC_CAST(uint8_t, clamp255(src_g[x] >> shift));
3086     dst_argb[2] = STATIC_CAST(uint8_t, clamp255(src_r[x] >> shift));
3087     dst_argb[3] = STATIC_CAST(uint8_t, clamp255(src_a[x] >> shift));
3088     dst_argb += 4;
3089   }
3090 }
3091 
MergeXR64Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,uint16_t * dst_ar64,int depth,int width)3092 void MergeXR64Row_C(const uint16_t* src_r,
3093                     const uint16_t* src_g,
3094                     const uint16_t* src_b,
3095                     uint16_t* dst_ar64,
3096                     int depth,
3097                     int width) {
3098   assert(depth >= 1);
3099   assert(depth <= 16);
3100   int x;
3101   int shift = 16 - depth;
3102   int max = (1 << depth) - 1;
3103   for (x = 0; x < width; ++x) {
3104     dst_ar64[0] = STATIC_CAST(uint16_t, ClampMax(src_b[x], max) << shift);
3105     dst_ar64[1] = STATIC_CAST(uint16_t, ClampMax(src_g[x], max) << shift);
3106     dst_ar64[2] = STATIC_CAST(uint16_t, ClampMax(src_r[x], max) << shift);
3107     dst_ar64[3] = 0xffff;
3108     dst_ar64 += 4;
3109   }
3110 }
3111 
MergeXRGB16To8Row_C(const uint16_t * src_r,const uint16_t * src_g,const uint16_t * src_b,uint8_t * dst_argb,int depth,int width)3112 void MergeXRGB16To8Row_C(const uint16_t* src_r,
3113                          const uint16_t* src_g,
3114                          const uint16_t* src_b,
3115                          uint8_t* dst_argb,
3116                          int depth,
3117                          int width) {
3118   assert(depth >= 8);
3119   assert(depth <= 16);
3120   int x;
3121   int shift = depth - 8;
3122   for (x = 0; x < width; ++x) {
3123     dst_argb[0] = STATIC_CAST(uint8_t, clamp255(src_b[x] >> shift));
3124     dst_argb[1] = STATIC_CAST(uint8_t, clamp255(src_g[x] >> shift));
3125     dst_argb[2] = STATIC_CAST(uint8_t, clamp255(src_r[x] >> shift));
3126     dst_argb[3] = 0xff;
3127     dst_argb += 4;
3128   }
3129 }
3130 
SplitXRGBRow_C(const uint8_t * src_argb,uint8_t * dst_r,uint8_t * dst_g,uint8_t * dst_b,int width)3131 void SplitXRGBRow_C(const uint8_t* src_argb,
3132                     uint8_t* dst_r,
3133                     uint8_t* dst_g,
3134                     uint8_t* dst_b,
3135                     int width) {
3136   int x;
3137   for (x = 0; x < width; ++x) {
3138     dst_b[x] = src_argb[0];
3139     dst_g[x] = src_argb[1];
3140     dst_r[x] = src_argb[2];
3141     src_argb += 4;
3142   }
3143 }
3144 
MergeXRGBRow_C(const uint8_t * src_r,const uint8_t * src_g,const uint8_t * src_b,uint8_t * dst_argb,int width)3145 void MergeXRGBRow_C(const uint8_t* src_r,
3146                     const uint8_t* src_g,
3147                     const uint8_t* src_b,
3148                     uint8_t* dst_argb,
3149                     int width) {
3150   int x;
3151   for (x = 0; x < width; ++x) {
3152     dst_argb[0] = src_b[x];
3153     dst_argb[1] = src_g[x];
3154     dst_argb[2] = src_r[x];
3155     dst_argb[3] = 255;
3156     dst_argb += 4;
3157   }
3158 }
3159 
3160 // Convert lsb formats to msb, depending on sample depth.
MergeUVRow_16_C(const uint16_t * src_u,const uint16_t * src_v,uint16_t * dst_uv,int depth,int width)3161 void MergeUVRow_16_C(const uint16_t* src_u,
3162                      const uint16_t* src_v,
3163                      uint16_t* dst_uv,
3164                      int depth,
3165                      int width) {
3166   int shift = 16 - depth;
3167   assert(depth >= 8);
3168   assert(depth <= 16);
3169   int x;
3170   for (x = 0; x < width; ++x) {
3171     dst_uv[0] = STATIC_CAST(uint16_t, src_u[x] << shift);
3172     dst_uv[1] = STATIC_CAST(uint16_t, src_v[x] << shift);
3173     dst_uv += 2;
3174   }
3175 }
3176 
3177 // Convert msb formats to lsb, depending on sample depth.
SplitUVRow_16_C(const uint16_t * src_uv,uint16_t * dst_u,uint16_t * dst_v,int depth,int width)3178 void SplitUVRow_16_C(const uint16_t* src_uv,
3179                      uint16_t* dst_u,
3180                      uint16_t* dst_v,
3181                      int depth,
3182                      int width) {
3183   int shift = 16 - depth;
3184   int x;
3185   assert(depth >= 8);
3186   assert(depth <= 16);
3187   for (x = 0; x < width; ++x) {
3188     dst_u[x] = src_uv[0] >> shift;
3189     dst_v[x] = src_uv[1] >> shift;
3190     src_uv += 2;
3191   }
3192 }
3193 
MultiplyRow_16_C(const uint16_t * src_y,uint16_t * dst_y,int scale,int width)3194 void MultiplyRow_16_C(const uint16_t* src_y,
3195                       uint16_t* dst_y,
3196                       int scale,
3197                       int width) {
3198   int x;
3199   for (x = 0; x < width; ++x) {
3200     dst_y[x] = STATIC_CAST(uint16_t, src_y[x] * scale);
3201   }
3202 }
3203 
DivideRow_16_C(const uint16_t * src_y,uint16_t * dst_y,int scale,int width)3204 void DivideRow_16_C(const uint16_t* src_y,
3205                     uint16_t* dst_y,
3206                     int scale,
3207                     int width) {
3208   int x;
3209   for (x = 0; x < width; ++x) {
3210     dst_y[x] = (src_y[x] * scale) >> 16;
3211   }
3212 }
3213 
3214 // Use scale to convert lsb formats to msb, depending how many bits there are:
3215 // 32768 = 9 bits
3216 // 16384 = 10 bits
3217 // 4096 = 12 bits
3218 // 256 = 16 bits
3219 // TODO(fbarchard): change scale to bits
3220 #define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
3221 
Convert16To8Row_C(const uint16_t * src_y,uint8_t * dst_y,int scale,int width)3222 void Convert16To8Row_C(const uint16_t* src_y,
3223                        uint8_t* dst_y,
3224                        int scale,
3225                        int width) {
3226   int x;
3227   assert(scale >= 256);
3228   assert(scale <= 32768);
3229 
3230   for (x = 0; x < width; ++x) {
3231     dst_y[x] = STATIC_CAST(uint8_t, C16TO8(src_y[x], scale));
3232   }
3233 }
3234 
3235 // Use scale to convert lsb formats to msb, depending how many bits there are:
3236 // 1024 = 10 bits
Convert8To16Row_C(const uint8_t * src_y,uint16_t * dst_y,int scale,int width)3237 void Convert8To16Row_C(const uint8_t* src_y,
3238                        uint16_t* dst_y,
3239                        int scale,
3240                        int width) {
3241   int x;
3242   scale *= 0x0101;  // replicates the byte.
3243   for (x = 0; x < width; ++x) {
3244     dst_y[x] = (src_y[x] * scale) >> 16;
3245   }
3246 }
3247 
CopyRow_C(const uint8_t * src,uint8_t * dst,int count)3248 void CopyRow_C(const uint8_t* src, uint8_t* dst, int count) {
3249   memcpy(dst, src, count);
3250 }
3251 
CopyRow_16_C(const uint16_t * src,uint16_t * dst,int count)3252 void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count) {
3253   memcpy(dst, src, count * 2);
3254 }
3255 
SetRow_C(uint8_t * dst,uint8_t v8,int width)3256 void SetRow_C(uint8_t* dst, uint8_t v8, int width) {
3257   memset(dst, v8, width);
3258 }
3259 
ARGBSetRow_C(uint8_t * dst_argb,uint32_t v32,int width)3260 void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width) {
3261   int x;
3262   for (x = 0; x < width; ++x) {
3263     memcpy(dst_argb + x * sizeof v32, &v32, sizeof v32);
3264   }
3265 }
3266 
3267 // Filter 2 rows of YUY2 UV's (422) into U and V (420).
YUY2ToUVRow_C(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)3268 void YUY2ToUVRow_C(const uint8_t* src_yuy2,
3269                    int src_stride_yuy2,
3270                    uint8_t* dst_u,
3271                    uint8_t* dst_v,
3272                    int width) {
3273   // Output a row of UV values, filtering 2 rows of YUY2.
3274   int x;
3275   for (x = 0; x < width; x += 2) {
3276     dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
3277     dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
3278     src_yuy2 += 4;
3279     dst_u += 1;
3280     dst_v += 1;
3281   }
3282 }
3283 
3284 // Filter 2 rows of YUY2 UV's (422) into UV (NV12).
YUY2ToNVUVRow_C(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_uv,int width)3285 void YUY2ToNVUVRow_C(const uint8_t* src_yuy2,
3286                      int src_stride_yuy2,
3287                      uint8_t* dst_uv,
3288                      int width) {
3289   // Output a row of UV values, filtering 2 rows of YUY2.
3290   int x;
3291   for (x = 0; x < width; x += 2) {
3292     dst_uv[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
3293     dst_uv[1] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
3294     src_yuy2 += 4;
3295     dst_uv += 2;
3296   }
3297 }
3298 
3299 // Copy row of YUY2 UV's (422) into U and V (422).
YUY2ToUV422Row_C(const uint8_t * src_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)3300 void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
3301                       uint8_t* dst_u,
3302                       uint8_t* dst_v,
3303                       int width) {
3304   // Output a row of UV values.
3305   int x;
3306   for (x = 0; x < width; x += 2) {
3307     dst_u[0] = src_yuy2[1];
3308     dst_v[0] = src_yuy2[3];
3309     src_yuy2 += 4;
3310     dst_u += 1;
3311     dst_v += 1;
3312   }
3313 }
3314 
3315 // Copy row of YUY2 Y's (422) into Y (420/422).
YUY2ToYRow_C(const uint8_t * src_yuy2,uint8_t * dst_y,int width)3316 void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
3317   // Output a row of Y values.
3318   int x;
3319   for (x = 0; x < width - 1; x += 2) {
3320     dst_y[x] = src_yuy2[0];
3321     dst_y[x + 1] = src_yuy2[2];
3322     src_yuy2 += 4;
3323   }
3324   if (width & 1) {
3325     dst_y[width - 1] = src_yuy2[0];
3326   }
3327 }
3328 
3329 // Filter 2 rows of UYVY UV's (422) into U and V (420).
UYVYToUVRow_C(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)3330 void UYVYToUVRow_C(const uint8_t* src_uyvy,
3331                    int src_stride_uyvy,
3332                    uint8_t* dst_u,
3333                    uint8_t* dst_v,
3334                    int width) {
3335   // Output a row of UV values.
3336   int x;
3337   for (x = 0; x < width; x += 2) {
3338     dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
3339     dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
3340     src_uyvy += 4;
3341     dst_u += 1;
3342     dst_v += 1;
3343   }
3344 }
3345 
3346 // Copy row of UYVY UV's (422) into U and V (422).
UYVYToUV422Row_C(const uint8_t * src_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)3347 void UYVYToUV422Row_C(const uint8_t* src_uyvy,
3348                       uint8_t* dst_u,
3349                       uint8_t* dst_v,
3350                       int width) {
3351   // Output a row of UV values.
3352   int x;
3353   for (x = 0; x < width; x += 2) {
3354     dst_u[0] = src_uyvy[0];
3355     dst_v[0] = src_uyvy[2];
3356     src_uyvy += 4;
3357     dst_u += 1;
3358     dst_v += 1;
3359   }
3360 }
3361 
3362 // Copy row of UYVY Y's (422) into Y (420/422).
UYVYToYRow_C(const uint8_t * src_uyvy,uint8_t * dst_y,int width)3363 void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
3364   // Output a row of Y values.
3365   int x;
3366   for (x = 0; x < width - 1; x += 2) {
3367     dst_y[x] = src_uyvy[1];
3368     dst_y[x + 1] = src_uyvy[3];
3369     src_uyvy += 4;
3370   }
3371   if (width & 1) {
3372     dst_y[width - 1] = src_uyvy[1];
3373   }
3374 }
3375 
3376 #define BLEND(f, b, a) clamp255((((256 - a) * b) >> 8) + f)
3377 
3378 // Blend src_argb over src_argb1 and store to dst_argb.
3379 // dst_argb may be src_argb or src_argb1.
3380 // This code mimics the SSSE3 version for better testability.
ARGBBlendRow_C(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)3381 void ARGBBlendRow_C(const uint8_t* src_argb,
3382                     const uint8_t* src_argb1,
3383                     uint8_t* dst_argb,
3384                     int width) {
3385   int x;
3386   for (x = 0; x < width - 1; x += 2) {
3387     uint32_t fb = src_argb[0];
3388     uint32_t fg = src_argb[1];
3389     uint32_t fr = src_argb[2];
3390     uint32_t a = src_argb[3];
3391     uint32_t bb = src_argb1[0];
3392     uint32_t bg = src_argb1[1];
3393     uint32_t br = src_argb1[2];
3394     dst_argb[0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a));
3395     dst_argb[1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a));
3396     dst_argb[2] = STATIC_CAST(uint8_t, BLEND(fr, br, a));
3397     dst_argb[3] = 255u;
3398 
3399     fb = src_argb[4 + 0];
3400     fg = src_argb[4 + 1];
3401     fr = src_argb[4 + 2];
3402     a = src_argb[4 + 3];
3403     bb = src_argb1[4 + 0];
3404     bg = src_argb1[4 + 1];
3405     br = src_argb1[4 + 2];
3406     dst_argb[4 + 0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a));
3407     dst_argb[4 + 1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a));
3408     dst_argb[4 + 2] = STATIC_CAST(uint8_t, BLEND(fr, br, a));
3409     dst_argb[4 + 3] = 255u;
3410     src_argb += 8;
3411     src_argb1 += 8;
3412     dst_argb += 8;
3413   }
3414 
3415   if (width & 1) {
3416     uint32_t fb = src_argb[0];
3417     uint32_t fg = src_argb[1];
3418     uint32_t fr = src_argb[2];
3419     uint32_t a = src_argb[3];
3420     uint32_t bb = src_argb1[0];
3421     uint32_t bg = src_argb1[1];
3422     uint32_t br = src_argb1[2];
3423     dst_argb[0] = STATIC_CAST(uint8_t, BLEND(fb, bb, a));
3424     dst_argb[1] = STATIC_CAST(uint8_t, BLEND(fg, bg, a));
3425     dst_argb[2] = STATIC_CAST(uint8_t, BLEND(fr, br, a));
3426     dst_argb[3] = 255u;
3427   }
3428 }
3429 #undef BLEND
3430 
3431 #define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8
BlendPlaneRow_C(const uint8_t * src0,const uint8_t * src1,const uint8_t * alpha,uint8_t * dst,int width)3432 void BlendPlaneRow_C(const uint8_t* src0,
3433                      const uint8_t* src1,
3434                      const uint8_t* alpha,
3435                      uint8_t* dst,
3436                      int width) {
3437   int x;
3438   for (x = 0; x < width - 1; x += 2) {
3439     dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
3440     dst[1] = UBLEND(src0[1], src1[1], alpha[1]);
3441     src0 += 2;
3442     src1 += 2;
3443     alpha += 2;
3444     dst += 2;
3445   }
3446   if (width & 1) {
3447     dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
3448   }
3449 }
3450 #undef UBLEND
3451 
3452 #define ATTENUATE(f, a) (f * a + 255) >> 8
3453 
3454 // Multiply source RGB by alpha and store to destination.
ARGBAttenuateRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)3455 void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
3456   int i;
3457   for (i = 0; i < width - 1; i += 2) {
3458     uint32_t b = src_argb[0];
3459     uint32_t g = src_argb[1];
3460     uint32_t r = src_argb[2];
3461     uint32_t a = src_argb[3];
3462     dst_argb[0] = ATTENUATE(b, a);
3463     dst_argb[1] = ATTENUATE(g, a);
3464     dst_argb[2] = ATTENUATE(r, a);
3465     dst_argb[3] = STATIC_CAST(uint8_t, a);
3466     b = src_argb[4];
3467     g = src_argb[5];
3468     r = src_argb[6];
3469     a = src_argb[7];
3470     dst_argb[4] = ATTENUATE(b, a);
3471     dst_argb[5] = ATTENUATE(g, a);
3472     dst_argb[6] = ATTENUATE(r, a);
3473     dst_argb[7] = STATIC_CAST(uint8_t, a);
3474     src_argb += 8;
3475     dst_argb += 8;
3476   }
3477 
3478   if (width & 1) {
3479     const uint32_t b = src_argb[0];
3480     const uint32_t g = src_argb[1];
3481     const uint32_t r = src_argb[2];
3482     const uint32_t a = src_argb[3];
3483     dst_argb[0] = ATTENUATE(b, a);
3484     dst_argb[1] = ATTENUATE(g, a);
3485     dst_argb[2] = ATTENUATE(r, a);
3486     dst_argb[3] = STATIC_CAST(uint8_t, a);
3487   }
3488 }
3489 #undef ATTENUATE
3490 
3491 // Divide source RGB by alpha and store to destination.
3492 // b = (b * 255 + (a / 2)) / a;
3493 // g = (g * 255 + (a / 2)) / a;
3494 // r = (r * 255 + (a / 2)) / a;
3495 // Reciprocal method is off by 1 on some values. ie 125
3496 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
3497 #define T(a) 0x01000000 + (0x10000 / a)
3498 const uint32_t fixed_invtbl8[256] = {
3499     0x01000000, 0x0100ffff, T(0x02), T(0x03),   T(0x04), T(0x05), T(0x06),
3500     T(0x07),    T(0x08),    T(0x09), T(0x0a),   T(0x0b), T(0x0c), T(0x0d),
3501     T(0x0e),    T(0x0f),    T(0x10), T(0x11),   T(0x12), T(0x13), T(0x14),
3502     T(0x15),    T(0x16),    T(0x17), T(0x18),   T(0x19), T(0x1a), T(0x1b),
3503     T(0x1c),    T(0x1d),    T(0x1e), T(0x1f),   T(0x20), T(0x21), T(0x22),
3504     T(0x23),    T(0x24),    T(0x25), T(0x26),   T(0x27), T(0x28), T(0x29),
3505     T(0x2a),    T(0x2b),    T(0x2c), T(0x2d),   T(0x2e), T(0x2f), T(0x30),
3506     T(0x31),    T(0x32),    T(0x33), T(0x34),   T(0x35), T(0x36), T(0x37),
3507     T(0x38),    T(0x39),    T(0x3a), T(0x3b),   T(0x3c), T(0x3d), T(0x3e),
3508     T(0x3f),    T(0x40),    T(0x41), T(0x42),   T(0x43), T(0x44), T(0x45),
3509     T(0x46),    T(0x47),    T(0x48), T(0x49),   T(0x4a), T(0x4b), T(0x4c),
3510     T(0x4d),    T(0x4e),    T(0x4f), T(0x50),   T(0x51), T(0x52), T(0x53),
3511     T(0x54),    T(0x55),    T(0x56), T(0x57),   T(0x58), T(0x59), T(0x5a),
3512     T(0x5b),    T(0x5c),    T(0x5d), T(0x5e),   T(0x5f), T(0x60), T(0x61),
3513     T(0x62),    T(0x63),    T(0x64), T(0x65),   T(0x66), T(0x67), T(0x68),
3514     T(0x69),    T(0x6a),    T(0x6b), T(0x6c),   T(0x6d), T(0x6e), T(0x6f),
3515     T(0x70),    T(0x71),    T(0x72), T(0x73),   T(0x74), T(0x75), T(0x76),
3516     T(0x77),    T(0x78),    T(0x79), T(0x7a),   T(0x7b), T(0x7c), T(0x7d),
3517     T(0x7e),    T(0x7f),    T(0x80), T(0x81),   T(0x82), T(0x83), T(0x84),
3518     T(0x85),    T(0x86),    T(0x87), T(0x88),   T(0x89), T(0x8a), T(0x8b),
3519     T(0x8c),    T(0x8d),    T(0x8e), T(0x8f),   T(0x90), T(0x91), T(0x92),
3520     T(0x93),    T(0x94),    T(0x95), T(0x96),   T(0x97), T(0x98), T(0x99),
3521     T(0x9a),    T(0x9b),    T(0x9c), T(0x9d),   T(0x9e), T(0x9f), T(0xa0),
3522     T(0xa1),    T(0xa2),    T(0xa3), T(0xa4),   T(0xa5), T(0xa6), T(0xa7),
3523     T(0xa8),    T(0xa9),    T(0xaa), T(0xab),   T(0xac), T(0xad), T(0xae),
3524     T(0xaf),    T(0xb0),    T(0xb1), T(0xb2),   T(0xb3), T(0xb4), T(0xb5),
3525     T(0xb6),    T(0xb7),    T(0xb8), T(0xb9),   T(0xba), T(0xbb), T(0xbc),
3526     T(0xbd),    T(0xbe),    T(0xbf), T(0xc0),   T(0xc1), T(0xc2), T(0xc3),
3527     T(0xc4),    T(0xc5),    T(0xc6), T(0xc7),   T(0xc8), T(0xc9), T(0xca),
3528     T(0xcb),    T(0xcc),    T(0xcd), T(0xce),   T(0xcf), T(0xd0), T(0xd1),
3529     T(0xd2),    T(0xd3),    T(0xd4), T(0xd5),   T(0xd6), T(0xd7), T(0xd8),
3530     T(0xd9),    T(0xda),    T(0xdb), T(0xdc),   T(0xdd), T(0xde), T(0xdf),
3531     T(0xe0),    T(0xe1),    T(0xe2), T(0xe3),   T(0xe4), T(0xe5), T(0xe6),
3532     T(0xe7),    T(0xe8),    T(0xe9), T(0xea),   T(0xeb), T(0xec), T(0xed),
3533     T(0xee),    T(0xef),    T(0xf0), T(0xf1),   T(0xf2), T(0xf3), T(0xf4),
3534     T(0xf5),    T(0xf6),    T(0xf7), T(0xf8),   T(0xf9), T(0xfa), T(0xfb),
3535     T(0xfc),    T(0xfd),    T(0xfe), 0x01000100};
3536 #undef T
3537 
3538 #if LIBYUV_UNATTENUATE_DUP
3539 // This code mimics the Intel SIMD version for better testability.
3540 #define UNATTENUATE(f, ia) clamp255(((f | (f << 8)) * ia) >> 16)
3541 #else
3542 #define UNATTENUATE(f, ia) clamp255((f * ia) >> 8)
3543 #endif
3544 
3545 // mimics the Intel SIMD code for exactness.
ARGBUnattenuateRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)3546 void ARGBUnattenuateRow_C(const uint8_t* src_argb,
3547                           uint8_t* dst_argb,
3548                           int width) {
3549   int i;
3550   for (i = 0; i < width; ++i) {
3551     uint32_t b = src_argb[0];
3552     uint32_t g = src_argb[1];
3553     uint32_t r = src_argb[2];
3554     const uint32_t a = src_argb[3];
3555     const uint32_t ia = fixed_invtbl8[a] & 0xffff;  // 8.8 fixed point
3556 
3557     // Clamping should not be necessary but is free in assembly.
3558     dst_argb[0] = STATIC_CAST(uint8_t, UNATTENUATE(b, ia));
3559     dst_argb[1] = STATIC_CAST(uint8_t, UNATTENUATE(g, ia));
3560     dst_argb[2] = STATIC_CAST(uint8_t, UNATTENUATE(r, ia));
3561     dst_argb[3] = STATIC_CAST(uint8_t, a);
3562     src_argb += 4;
3563     dst_argb += 4;
3564   }
3565 }
3566 
ComputeCumulativeSumRow_C(const uint8_t * row,int32_t * cumsum,const int32_t * previous_cumsum,int width)3567 void ComputeCumulativeSumRow_C(const uint8_t* row,
3568                                int32_t* cumsum,
3569                                const int32_t* previous_cumsum,
3570                                int width) {
3571   int32_t row_sum[4] = {0, 0, 0, 0};
3572   int x;
3573   for (x = 0; x < width; ++x) {
3574     row_sum[0] += row[x * 4 + 0];
3575     row_sum[1] += row[x * 4 + 1];
3576     row_sum[2] += row[x * 4 + 2];
3577     row_sum[3] += row[x * 4 + 3];
3578     cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0];
3579     cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1];
3580     cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2];
3581     cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3];
3582   }
3583 }
3584 
CumulativeSumToAverageRow_C(const int32_t * tl,const int32_t * bl,int w,int area,uint8_t * dst,int count)3585 void CumulativeSumToAverageRow_C(const int32_t* tl,
3586                                  const int32_t* bl,
3587                                  int w,
3588                                  int area,
3589                                  uint8_t* dst,
3590                                  int count) {
3591   float ooa;
3592   int i;
3593   assert(area != 0);
3594 
3595   ooa = 1.0f / STATIC_CAST(float, area);
3596   for (i = 0; i < count; ++i) {
3597     dst[0] =
3598         (uint8_t)(STATIC_CAST(float, bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) *
3599                   ooa);
3600     dst[1] =
3601         (uint8_t)(STATIC_CAST(float, bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) *
3602                   ooa);
3603     dst[2] =
3604         (uint8_t)(STATIC_CAST(float, bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) *
3605                   ooa);
3606     dst[3] =
3607         (uint8_t)(STATIC_CAST(float, bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) *
3608                   ooa);
3609     dst += 4;
3610     tl += 4;
3611     bl += 4;
3612   }
3613 }
3614 
3615 // Copy pixels from rotated source to destination row with a slope.
3616 LIBYUV_API
ARGBAffineRow_C(const uint8_t * src_argb,int src_argb_stride,uint8_t * dst_argb,const float * uv_dudv,int width)3617 void ARGBAffineRow_C(const uint8_t* src_argb,
3618                      int src_argb_stride,
3619                      uint8_t* dst_argb,
3620                      const float* uv_dudv,
3621                      int width) {
3622   int i;
3623   // Render a row of pixels from source into a buffer.
3624   float uv[2];
3625   uv[0] = uv_dudv[0];
3626   uv[1] = uv_dudv[1];
3627   for (i = 0; i < width; ++i) {
3628     int x = (int)(uv[0]);
3629     int y = (int)(uv[1]);
3630     *(uint32_t*)(dst_argb) =
3631         *(const uint32_t*)(src_argb + y * src_argb_stride + x * 4);
3632     dst_argb += 4;
3633     uv[0] += uv_dudv[2];
3634     uv[1] += uv_dudv[3];
3635   }
3636 }
3637 
3638 // Blend 2 rows into 1.
HalfRow_C(const uint8_t * src_uv,ptrdiff_t src_uv_stride,uint8_t * dst_uv,int width)3639 static void HalfRow_C(const uint8_t* src_uv,
3640                       ptrdiff_t src_uv_stride,
3641                       uint8_t* dst_uv,
3642                       int width) {
3643   int x;
3644   for (x = 0; x < width; ++x) {
3645     dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
3646   }
3647 }
3648 
HalfRow_16_C(const uint16_t * src_uv,ptrdiff_t src_uv_stride,uint16_t * dst_uv,int width)3649 static void HalfRow_16_C(const uint16_t* src_uv,
3650                          ptrdiff_t src_uv_stride,
3651                          uint16_t* dst_uv,
3652                          int width) {
3653   int x;
3654   for (x = 0; x < width; ++x) {
3655     dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
3656   }
3657 }
3658 
HalfRow_16To8_C(const uint16_t * src_uv,ptrdiff_t src_uv_stride,uint8_t * dst_uv,int scale,int width)3659 static void HalfRow_16To8_C(const uint16_t* src_uv,
3660                             ptrdiff_t src_uv_stride,
3661                             uint8_t* dst_uv,
3662                             int scale,
3663                             int width) {
3664   int x;
3665   for (x = 0; x < width; ++x) {
3666     dst_uv[x] = STATIC_CAST(
3667         uint8_t,
3668         C16TO8((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1, scale));
3669   }
3670 }
3671 
3672 // C version 2x2 -> 2x1.
InterpolateRow_C(uint8_t * dst_ptr,const uint8_t * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)3673 void InterpolateRow_C(uint8_t* dst_ptr,
3674                       const uint8_t* src_ptr,
3675                       ptrdiff_t src_stride,
3676                       int width,
3677                       int source_y_fraction) {
3678   int y1_fraction = source_y_fraction;
3679   int y0_fraction = 256 - y1_fraction;
3680   const uint8_t* src_ptr1 = src_ptr + src_stride;
3681   int x;
3682   assert(source_y_fraction >= 0);
3683   assert(source_y_fraction < 256);
3684 
3685   if (y1_fraction == 0) {
3686     memcpy(dst_ptr, src_ptr, width);
3687     return;
3688   }
3689   if (y1_fraction == 128) {
3690     HalfRow_C(src_ptr, src_stride, dst_ptr, width);
3691     return;
3692   }
3693   for (x = 0; x < width; ++x) {
3694     dst_ptr[0] = STATIC_CAST(
3695         uint8_t,
3696         (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8);
3697     ++src_ptr;
3698     ++src_ptr1;
3699     ++dst_ptr;
3700   }
3701 }
3702 
3703 // C version 2x2 -> 2x1.
InterpolateRow_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)3704 void InterpolateRow_16_C(uint16_t* dst_ptr,
3705                          const uint16_t* src_ptr,
3706                          ptrdiff_t src_stride,
3707                          int width,
3708                          int source_y_fraction) {
3709   int y1_fraction = source_y_fraction;
3710   int y0_fraction = 256 - y1_fraction;
3711   const uint16_t* src_ptr1 = src_ptr + src_stride;
3712   int x;
3713   assert(source_y_fraction >= 0);
3714   assert(source_y_fraction < 256);
3715 
3716   if (y1_fraction == 0) {
3717     memcpy(dst_ptr, src_ptr, width * 2);
3718     return;
3719   }
3720   if (y1_fraction == 128) {
3721     HalfRow_16_C(src_ptr, src_stride, dst_ptr, width);
3722     return;
3723   }
3724   for (x = 0; x < width; ++x) {
3725     dst_ptr[0] = STATIC_CAST(
3726         uint16_t,
3727         (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8);
3728     ++src_ptr;
3729     ++src_ptr1;
3730     ++dst_ptr;
3731   }
3732 }
3733 
3734 // C version 2x2 16 bit-> 2x1 8 bit.
3735 // Use scale to convert lsb formats to msb, depending how many bits there are:
3736 // 32768 = 9 bits
3737 // 16384 = 10 bits
3738 // 4096 = 12 bits
3739 // 256 = 16 bits
3740 // TODO(fbarchard): change scale to bits
3741 
InterpolateRow_16To8_C(uint8_t * dst_ptr,const uint16_t * src_ptr,ptrdiff_t src_stride,int scale,int width,int source_y_fraction)3742 void InterpolateRow_16To8_C(uint8_t* dst_ptr,
3743                             const uint16_t* src_ptr,
3744                             ptrdiff_t src_stride,
3745                             int scale,
3746                             int width,
3747                             int source_y_fraction) {
3748   int y1_fraction = source_y_fraction;
3749   int y0_fraction = 256 - y1_fraction;
3750   const uint16_t* src_ptr1 = src_ptr + src_stride;
3751   int x;
3752   assert(source_y_fraction >= 0);
3753   assert(source_y_fraction < 256);
3754 
3755   if (source_y_fraction == 0) {
3756     Convert16To8Row_C(src_ptr, dst_ptr, scale, width);
3757     return;
3758   }
3759   if (source_y_fraction == 128) {
3760     HalfRow_16To8_C(src_ptr, src_stride, dst_ptr, scale, width);
3761     return;
3762   }
3763   for (x = 0; x < width; ++x) {
3764     dst_ptr[0] = STATIC_CAST(
3765         uint8_t,
3766         C16TO8(
3767             (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8,
3768             scale));
3769     src_ptr += 1;
3770     src_ptr1 += 1;
3771     dst_ptr += 1;
3772   }
3773 }
3774 
3775 // Use first 4 shuffler values to reorder ARGB channels.
ARGBShuffleRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const uint8_t * shuffler,int width)3776 void ARGBShuffleRow_C(const uint8_t* src_argb,
3777                       uint8_t* dst_argb,
3778                       const uint8_t* shuffler,
3779                       int width) {
3780   int index0 = shuffler[0];
3781   int index1 = shuffler[1];
3782   int index2 = shuffler[2];
3783   int index3 = shuffler[3];
3784   // Shuffle a row of ARGB.
3785   int x;
3786   for (x = 0; x < width; ++x) {
3787     // To support in-place conversion.
3788     uint8_t b = src_argb[index0];
3789     uint8_t g = src_argb[index1];
3790     uint8_t r = src_argb[index2];
3791     uint8_t a = src_argb[index3];
3792     dst_argb[0] = b;
3793     dst_argb[1] = g;
3794     dst_argb[2] = r;
3795     dst_argb[3] = a;
3796     src_argb += 4;
3797     dst_argb += 4;
3798   }
3799 }
3800 
I422ToYUY2Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)3801 void I422ToYUY2Row_C(const uint8_t* src_y,
3802                      const uint8_t* src_u,
3803                      const uint8_t* src_v,
3804                      uint8_t* dst_frame,
3805                      int width) {
3806   int x;
3807   for (x = 0; x < width - 1; x += 2) {
3808     dst_frame[0] = src_y[0];
3809     dst_frame[1] = src_u[0];
3810     dst_frame[2] = src_y[1];
3811     dst_frame[3] = src_v[0];
3812     dst_frame += 4;
3813     src_y += 2;
3814     src_u += 1;
3815     src_v += 1;
3816   }
3817   if (width & 1) {
3818     dst_frame[0] = src_y[0];
3819     dst_frame[1] = src_u[0];
3820     dst_frame[2] = 0;
3821     dst_frame[3] = src_v[0];
3822   }
3823 }
3824 
I422ToUYVYRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)3825 void I422ToUYVYRow_C(const uint8_t* src_y,
3826                      const uint8_t* src_u,
3827                      const uint8_t* src_v,
3828                      uint8_t* dst_frame,
3829                      int width) {
3830   int x;
3831   for (x = 0; x < width - 1; x += 2) {
3832     dst_frame[0] = src_u[0];
3833     dst_frame[1] = src_y[0];
3834     dst_frame[2] = src_v[0];
3835     dst_frame[3] = src_y[1];
3836     dst_frame += 4;
3837     src_y += 2;
3838     src_u += 1;
3839     src_v += 1;
3840   }
3841   if (width & 1) {
3842     dst_frame[0] = src_u[0];
3843     dst_frame[1] = src_y[0];
3844     dst_frame[2] = src_v[0];
3845     dst_frame[3] = 0;
3846   }
3847 }
3848 
ARGBPolynomialRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const float * poly,int width)3849 void ARGBPolynomialRow_C(const uint8_t* src_argb,
3850                          uint8_t* dst_argb,
3851                          const float* poly,
3852                          int width) {
3853   int i;
3854   for (i = 0; i < width; ++i) {
3855     float b = (float)(src_argb[0]);
3856     float g = (float)(src_argb[1]);
3857     float r = (float)(src_argb[2]);
3858     float a = (float)(src_argb[3]);
3859     float b2 = b * b;
3860     float g2 = g * g;
3861     float r2 = r * r;
3862     float a2 = a * a;
3863     float db = poly[0] + poly[4] * b;
3864     float dg = poly[1] + poly[5] * g;
3865     float dr = poly[2] + poly[6] * r;
3866     float da = poly[3] + poly[7] * a;
3867     float b3 = b2 * b;
3868     float g3 = g2 * g;
3869     float r3 = r2 * r;
3870     float a3 = a2 * a;
3871     db += poly[8] * b2;
3872     dg += poly[9] * g2;
3873     dr += poly[10] * r2;
3874     da += poly[11] * a2;
3875     db += poly[12] * b3;
3876     dg += poly[13] * g3;
3877     dr += poly[14] * r3;
3878     da += poly[15] * a3;
3879 
3880     dst_argb[0] = STATIC_CAST(uint8_t, Clamp((int32_t)(db)));
3881     dst_argb[1] = STATIC_CAST(uint8_t, Clamp((int32_t)(dg)));
3882     dst_argb[2] = STATIC_CAST(uint8_t, Clamp((int32_t)(dr)));
3883     dst_argb[3] = STATIC_CAST(uint8_t, Clamp((int32_t)(da)));
3884     src_argb += 4;
3885     dst_argb += 4;
3886   }
3887 }
3888 
3889 // Samples assumed to be unsigned in low 9, 10 or 12 bits.  Scale factor
3890 // adjust the source integer range to the half float range desired.
3891 
3892 // This magic constant is 2^-112. Multiplying by this
3893 // is the same as subtracting 112 from the exponent, which
3894 // is the difference in exponent bias between 32-bit and
3895 // 16-bit floats. Once we've done this subtraction, we can
3896 // simply extract the low bits of the exponent and the high
3897 // bits of the mantissa from our float and we're done.
3898 
3899 // Work around GCC 7 punning warning -Wstrict-aliasing
3900 #if defined(__GNUC__)
3901 typedef uint32_t __attribute__((__may_alias__)) uint32_alias_t;
3902 #else
3903 typedef uint32_t uint32_alias_t;
3904 #endif
3905 
HalfFloatRow_C(const uint16_t * src,uint16_t * dst,float scale,int width)3906 void HalfFloatRow_C(const uint16_t* src,
3907                     uint16_t* dst,
3908                     float scale,
3909                     int width) {
3910   int i;
3911   float mult = 1.9259299444e-34f * scale;
3912   for (i = 0; i < width; ++i) {
3913     float value = src[i] * mult;
3914     dst[i] = (uint16_t)((*(const uint32_alias_t*)&value) >> 13);
3915   }
3916 }
3917 
ByteToFloatRow_C(const uint8_t * src,float * dst,float scale,int width)3918 void ByteToFloatRow_C(const uint8_t* src, float* dst, float scale, int width) {
3919   int i;
3920   for (i = 0; i < width; ++i) {
3921     float value = src[i] * scale;
3922     dst[i] = value;
3923   }
3924 }
3925 
ARGBLumaColorTableRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width,const uint8_t * luma,uint32_t lumacoeff)3926 void ARGBLumaColorTableRow_C(const uint8_t* src_argb,
3927                              uint8_t* dst_argb,
3928                              int width,
3929                              const uint8_t* luma,
3930                              uint32_t lumacoeff) {
3931   uint32_t bc = lumacoeff & 0xff;
3932   uint32_t gc = (lumacoeff >> 8) & 0xff;
3933   uint32_t rc = (lumacoeff >> 16) & 0xff;
3934 
3935   int i;
3936   for (i = 0; i < width - 1; i += 2) {
3937     // Luminance in rows, color values in columns.
3938     const uint8_t* luma0 =
3939         ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
3940         luma;
3941     const uint8_t* luma1;
3942     dst_argb[0] = luma0[src_argb[0]];
3943     dst_argb[1] = luma0[src_argb[1]];
3944     dst_argb[2] = luma0[src_argb[2]];
3945     dst_argb[3] = src_argb[3];
3946     luma1 =
3947         ((src_argb[4] * bc + src_argb[5] * gc + src_argb[6] * rc) & 0x7F00u) +
3948         luma;
3949     dst_argb[4] = luma1[src_argb[4]];
3950     dst_argb[5] = luma1[src_argb[5]];
3951     dst_argb[6] = luma1[src_argb[6]];
3952     dst_argb[7] = src_argb[7];
3953     src_argb += 8;
3954     dst_argb += 8;
3955   }
3956   if (width & 1) {
3957     // Luminance in rows, color values in columns.
3958     const uint8_t* luma0 =
3959         ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
3960         luma;
3961     dst_argb[0] = luma0[src_argb[0]];
3962     dst_argb[1] = luma0[src_argb[1]];
3963     dst_argb[2] = luma0[src_argb[2]];
3964     dst_argb[3] = src_argb[3];
3965   }
3966 }
3967 
ARGBCopyAlphaRow_C(const uint8_t * src,uint8_t * dst,int width)3968 void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
3969   int i;
3970   for (i = 0; i < width - 1; i += 2) {
3971     dst[3] = src[3];
3972     dst[7] = src[7];
3973     dst += 8;
3974     src += 8;
3975   }
3976   if (width & 1) {
3977     dst[3] = src[3];
3978   }
3979 }
3980 
ARGBExtractAlphaRow_C(const uint8_t * src_argb,uint8_t * dst_a,int width)3981 void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width) {
3982   int i;
3983   for (i = 0; i < width - 1; i += 2) {
3984     dst_a[0] = src_argb[3];
3985     dst_a[1] = src_argb[7];
3986     dst_a += 2;
3987     src_argb += 8;
3988   }
3989   if (width & 1) {
3990     dst_a[0] = src_argb[3];
3991   }
3992 }
3993 
ARGBCopyYToAlphaRow_C(const uint8_t * src,uint8_t * dst,int width)3994 void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
3995   int i;
3996   for (i = 0; i < width - 1; i += 2) {
3997     dst[3] = src[0];
3998     dst[7] = src[1];
3999     dst += 8;
4000     src += 2;
4001   }
4002   if (width & 1) {
4003     dst[3] = src[0];
4004   }
4005 }
4006 
4007 // Maximum temporary width for wrappers to process at a time, in pixels.
4008 #define MAXTWIDTH 2048
4009 
4010 #if !(defined(_MSC_VER) && !defined(__clang__) && defined(_M_IX86)) && \
4011     defined(HAS_I422TORGB565ROW_SSSE3)
4012 // row_win.cc has asm version, but GCC uses 2 step wrapper.
I422ToRGB565Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)4013 void I422ToRGB565Row_SSSE3(const uint8_t* src_y,
4014                            const uint8_t* src_u,
4015                            const uint8_t* src_v,
4016                            uint8_t* dst_rgb565,
4017                            const struct YuvConstants* yuvconstants,
4018                            int width) {
4019   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4020   while (width > 0) {
4021     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4022     I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
4023     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
4024     src_y += twidth;
4025     src_u += twidth / 2;
4026     src_v += twidth / 2;
4027     dst_rgb565 += twidth * 2;
4028     width -= twidth;
4029   }
4030 }
4031 #endif
4032 
4033 #if defined(HAS_I422TOARGB1555ROW_SSSE3)
I422ToARGB1555Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)4034 void I422ToARGB1555Row_SSSE3(const uint8_t* src_y,
4035                              const uint8_t* src_u,
4036                              const uint8_t* src_v,
4037                              uint8_t* dst_argb1555,
4038                              const struct YuvConstants* yuvconstants,
4039                              int width) {
4040   // Row buffer for intermediate ARGB pixels.
4041   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4042   while (width > 0) {
4043     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4044     I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
4045     ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
4046     src_y += twidth;
4047     src_u += twidth / 2;
4048     src_v += twidth / 2;
4049     dst_argb1555 += twidth * 2;
4050     width -= twidth;
4051   }
4052 }
4053 #endif
4054 
4055 #if defined(HAS_I422TOARGB4444ROW_SSSE3)
I422ToARGB4444Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)4056 void I422ToARGB4444Row_SSSE3(const uint8_t* src_y,
4057                              const uint8_t* src_u,
4058                              const uint8_t* src_v,
4059                              uint8_t* dst_argb4444,
4060                              const struct YuvConstants* yuvconstants,
4061                              int width) {
4062   // Row buffer for intermediate ARGB pixels.
4063   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4064   while (width > 0) {
4065     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4066     I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
4067     ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
4068     src_y += twidth;
4069     src_u += twidth / 2;
4070     src_v += twidth / 2;
4071     dst_argb4444 += twidth * 2;
4072     width -= twidth;
4073   }
4074 }
4075 #endif
4076 
4077 #if defined(HAS_NV12TORGB565ROW_SSSE3)
NV12ToRGB565Row_SSSE3(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)4078 void NV12ToRGB565Row_SSSE3(const uint8_t* src_y,
4079                            const uint8_t* src_uv,
4080                            uint8_t* dst_rgb565,
4081                            const struct YuvConstants* yuvconstants,
4082                            int width) {
4083   // Row buffer for intermediate ARGB pixels.
4084   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4085   while (width > 0) {
4086     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4087     NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
4088     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
4089     src_y += twidth;
4090     src_uv += twidth;
4091     dst_rgb565 += twidth * 2;
4092     width -= twidth;
4093   }
4094 }
4095 #endif
4096 
4097 #if defined(HAS_NV12TORGB24ROW_SSSE3)
NV12ToRGB24Row_SSSE3(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)4098 void NV12ToRGB24Row_SSSE3(const uint8_t* src_y,
4099                           const uint8_t* src_uv,
4100                           uint8_t* dst_rgb24,
4101                           const struct YuvConstants* yuvconstants,
4102                           int width) {
4103   // Row buffer for intermediate ARGB pixels.
4104   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4105   while (width > 0) {
4106     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4107     NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
4108     ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4109     src_y += twidth;
4110     src_uv += twidth;
4111     dst_rgb24 += twidth * 3;
4112     width -= twidth;
4113   }
4114 }
4115 #endif
4116 
4117 #if defined(HAS_NV21TORGB24ROW_SSSE3)
NV21ToRGB24Row_SSSE3(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)4118 void NV21ToRGB24Row_SSSE3(const uint8_t* src_y,
4119                           const uint8_t* src_vu,
4120                           uint8_t* dst_rgb24,
4121                           const struct YuvConstants* yuvconstants,
4122                           int width) {
4123   // Row buffer for intermediate ARGB pixels.
4124   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4125   while (width > 0) {
4126     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4127     NV21ToARGBRow_SSSE3(src_y, src_vu, row, yuvconstants, twidth);
4128     ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4129     src_y += twidth;
4130     src_vu += twidth;
4131     dst_rgb24 += twidth * 3;
4132     width -= twidth;
4133   }
4134 }
4135 #endif
4136 
4137 #if defined(HAS_NV12TORGB24ROW_AVX2)
NV12ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)4138 void NV12ToRGB24Row_AVX2(const uint8_t* src_y,
4139                          const uint8_t* src_uv,
4140                          uint8_t* dst_rgb24,
4141                          const struct YuvConstants* yuvconstants,
4142                          int width) {
4143   // Row buffer for intermediate ARGB pixels.
4144   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4145   while (width > 0) {
4146     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4147     NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
4148 #if defined(HAS_ARGBTORGB24ROW_AVX2)
4149     ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
4150 #else
4151     ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4152 #endif
4153     src_y += twidth;
4154     src_uv += twidth;
4155     dst_rgb24 += twidth * 3;
4156     width -= twidth;
4157   }
4158 }
4159 #endif
4160 
4161 #if defined(HAS_NV21TORGB24ROW_AVX2)
NV21ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)4162 void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
4163                          const uint8_t* src_vu,
4164                          uint8_t* dst_rgb24,
4165                          const struct YuvConstants* yuvconstants,
4166                          int width) {
4167   // Row buffer for intermediate ARGB pixels.
4168   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4169   while (width > 0) {
4170     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4171     NV21ToARGBRow_AVX2(src_y, src_vu, row, yuvconstants, twidth);
4172 #if defined(HAS_ARGBTORGB24ROW_AVX2)
4173     ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
4174 #else
4175     ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4176 #endif
4177     src_y += twidth;
4178     src_vu += twidth;
4179     dst_rgb24 += twidth * 3;
4180     width -= twidth;
4181   }
4182 }
4183 #endif
4184 
4185 #if defined(HAS_I422TORGB565ROW_AVX2)
I422ToRGB565Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)4186 void I422ToRGB565Row_AVX2(const uint8_t* src_y,
4187                           const uint8_t* src_u,
4188                           const uint8_t* src_v,
4189                           uint8_t* dst_rgb565,
4190                           const struct YuvConstants* yuvconstants,
4191                           int width) {
4192   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4193   while (width > 0) {
4194     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4195     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4196 #if defined(HAS_ARGBTORGB565ROW_AVX2)
4197     ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
4198 #else
4199     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
4200 #endif
4201     src_y += twidth;
4202     src_u += twidth / 2;
4203     src_v += twidth / 2;
4204     dst_rgb565 += twidth * 2;
4205     width -= twidth;
4206   }
4207 }
4208 #endif
4209 
4210 #if defined(HAS_I422TOARGB1555ROW_AVX2)
I422ToARGB1555Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)4211 void I422ToARGB1555Row_AVX2(const uint8_t* src_y,
4212                             const uint8_t* src_u,
4213                             const uint8_t* src_v,
4214                             uint8_t* dst_argb1555,
4215                             const struct YuvConstants* yuvconstants,
4216                             int width) {
4217   // Row buffer for intermediate ARGB pixels.
4218   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4219   while (width > 0) {
4220     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4221     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4222 #if defined(HAS_ARGBTOARGB1555ROW_AVX2)
4223     ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
4224 #else
4225     ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
4226 #endif
4227     src_y += twidth;
4228     src_u += twidth / 2;
4229     src_v += twidth / 2;
4230     dst_argb1555 += twidth * 2;
4231     width -= twidth;
4232   }
4233 }
4234 #endif
4235 
4236 #if defined(HAS_I422TOARGB4444ROW_AVX2)
I422ToARGB4444Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)4237 void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
4238                             const uint8_t* src_u,
4239                             const uint8_t* src_v,
4240                             uint8_t* dst_argb4444,
4241                             const struct YuvConstants* yuvconstants,
4242                             int width) {
4243   // Row buffer for intermediate ARGB pixels.
4244   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4245   while (width > 0) {
4246     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4247     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4248 #if defined(HAS_ARGBTOARGB4444ROW_AVX2)
4249     ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
4250 #else
4251     ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
4252 #endif
4253     src_y += twidth;
4254     src_u += twidth / 2;
4255     src_v += twidth / 2;
4256     dst_argb4444 += twidth * 2;
4257     width -= twidth;
4258   }
4259 }
4260 #endif
4261 
4262 #if defined(HAS_I422TORGB24ROW_AVX2)
I422ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)4263 void I422ToRGB24Row_AVX2(const uint8_t* src_y,
4264                          const uint8_t* src_u,
4265                          const uint8_t* src_v,
4266                          uint8_t* dst_rgb24,
4267                          const struct YuvConstants* yuvconstants,
4268                          int width) {
4269   // Row buffer for intermediate ARGB pixels.
4270   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4271   while (width > 0) {
4272     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4273     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4274 #if defined(HAS_ARGBTORGB24ROW_AVX2)
4275     ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
4276 #else
4277     ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4278 #endif
4279     src_y += twidth;
4280     src_u += twidth / 2;
4281     src_v += twidth / 2;
4282     dst_rgb24 += twidth * 3;
4283     width -= twidth;
4284   }
4285 }
4286 #endif
4287 
4288 #if defined(HAS_I444TORGB24ROW_AVX2)
I444ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)4289 void I444ToRGB24Row_AVX2(const uint8_t* src_y,
4290                          const uint8_t* src_u,
4291                          const uint8_t* src_v,
4292                          uint8_t* dst_rgb24,
4293                          const struct YuvConstants* yuvconstants,
4294                          int width) {
4295   // Row buffer for intermediate ARGB pixels.
4296   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4297   while (width > 0) {
4298     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4299     I444ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
4300 #if defined(HAS_ARGBTORGB24ROW_AVX2)
4301     ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
4302 #else
4303     ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
4304 #endif
4305     src_y += twidth;
4306     src_u += twidth;
4307     src_v += twidth;
4308     dst_rgb24 += twidth * 3;
4309     width -= twidth;
4310   }
4311 }
4312 #endif
4313 
4314 #if defined(HAS_NV12TORGB565ROW_AVX2)
NV12ToRGB565Row_AVX2(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)4315 void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
4316                           const uint8_t* src_uv,
4317                           uint8_t* dst_rgb565,
4318                           const struct YuvConstants* yuvconstants,
4319                           int width) {
4320   // Row buffer for intermediate ARGB pixels.
4321   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4322   while (width > 0) {
4323     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4324     NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
4325 #if defined(HAS_ARGBTORGB565ROW_AVX2)
4326     ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
4327 #else
4328     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
4329 #endif
4330     src_y += twidth;
4331     src_uv += twidth;
4332     dst_rgb565 += twidth * 2;
4333     width -= twidth;
4334   }
4335 }
4336 #endif
4337 
4338 #ifdef HAS_RGB24TOYJROW_AVX2
4339 // Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
RGB24ToYJRow_AVX2(const uint8_t * src_rgb24,uint8_t * dst_yj,int width)4340 void RGB24ToYJRow_AVX2(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
4341   // Row buffer for intermediate ARGB pixels.
4342   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4343   while (width > 0) {
4344     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4345     RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
4346     ARGBToYJRow_AVX2(row, dst_yj, twidth);
4347     src_rgb24 += twidth * 3;
4348     dst_yj += twidth;
4349     width -= twidth;
4350   }
4351 }
4352 #endif  // HAS_RGB24TOYJROW_AVX2
4353 
4354 #ifdef HAS_RAWTOYJROW_AVX2
4355 // Convert 16 RAW pixels (64 bytes) to 16 YJ values.
RAWToYJRow_AVX2(const uint8_t * src_raw,uint8_t * dst_yj,int width)4356 void RAWToYJRow_AVX2(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
4357   // Row buffer for intermediate ARGB pixels.
4358   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4359   while (width > 0) {
4360     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4361     RAWToARGBRow_SSSE3(src_raw, row, twidth);
4362     ARGBToYJRow_AVX2(row, dst_yj, twidth);
4363     src_raw += twidth * 3;
4364     dst_yj += twidth;
4365     width -= twidth;
4366   }
4367 }
4368 #endif  // HAS_RAWTOYJROW_AVX2
4369 
4370 #ifdef HAS_RGB24TOYJROW_SSSE3
4371 // Convert 16 RGB24 pixels (64 bytes) to 16 YJ values.
RGB24ToYJRow_SSSE3(const uint8_t * src_rgb24,uint8_t * dst_yj,int width)4372 void RGB24ToYJRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
4373   // Row buffer for intermediate ARGB pixels.
4374   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4375   while (width > 0) {
4376     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4377     RGB24ToARGBRow_SSSE3(src_rgb24, row, twidth);
4378     ARGBToYJRow_SSSE3(row, dst_yj, twidth);
4379     src_rgb24 += twidth * 3;
4380     dst_yj += twidth;
4381     width -= twidth;
4382   }
4383 }
4384 #endif  // HAS_RGB24TOYJROW_SSSE3
4385 
4386 #ifdef HAS_RAWTOYJROW_SSSE3
4387 // Convert 16 RAW pixels (64 bytes) to 16 YJ values.
RAWToYJRow_SSSE3(const uint8_t * src_raw,uint8_t * dst_yj,int width)4388 void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
4389   // Row buffer for intermediate ARGB pixels.
4390   SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
4391   while (width > 0) {
4392     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4393     RAWToARGBRow_SSSE3(src_raw, row, twidth);
4394     ARGBToYJRow_SSSE3(row, dst_yj, twidth);
4395     src_raw += twidth * 3;
4396     dst_yj += twidth;
4397     width -= twidth;
4398   }
4399 }
4400 #endif  // HAS_RAWTOYJROW_SSSE3
4401 
4402 #ifdef HAS_INTERPOLATEROW_16TO8_AVX2
InterpolateRow_16To8_AVX2(uint8_t * dst_ptr,const uint16_t * src_ptr,ptrdiff_t src_stride,int scale,int width,int source_y_fraction)4403 void InterpolateRow_16To8_AVX2(uint8_t* dst_ptr,
4404                                const uint16_t* src_ptr,
4405                                ptrdiff_t src_stride,
4406                                int scale,
4407                                int width,
4408                                int source_y_fraction) {
4409   // Row buffer for intermediate 16 bit pixels.
4410   SIMD_ALIGNED(uint16_t row[MAXTWIDTH]);
4411   while (width > 0) {
4412     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
4413     InterpolateRow_16_C(row, src_ptr, src_stride, twidth, source_y_fraction);
4414     Convert16To8Row_AVX2(row, dst_ptr, scale, twidth);
4415     src_ptr += twidth;
4416     dst_ptr += twidth;
4417     width -= twidth;
4418   }
4419 }
4420 #endif  // HAS_INTERPOLATEROW_16TO8_AVX2
4421 
ScaleSumSamples_C(const float * src,float * dst,float scale,int width)4422 float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) {
4423   float fsum = 0.f;
4424   int i;
4425   for (i = 0; i < width; ++i) {
4426     float v = *src++;
4427     fsum += v * v;
4428     *dst++ = v * scale;
4429   }
4430   return fsum;
4431 }
4432 
ScaleMaxSamples_C(const float * src,float * dst,float scale,int width)4433 float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width) {
4434   float fmax = 0.f;
4435   int i;
4436   for (i = 0; i < width; ++i) {
4437     float v = *src++;
4438     float vs = v * scale;
4439     fmax = (v > fmax) ? v : fmax;
4440     *dst++ = vs;
4441   }
4442   return fmax;
4443 }
4444 
ScaleSamples_C(const float * src,float * dst,float scale,int width)4445 void ScaleSamples_C(const float* src, float* dst, float scale, int width) {
4446   int i;
4447   for (i = 0; i < width; ++i) {
4448     *dst++ = *src++ * scale;
4449   }
4450 }
4451 
GaussRow_C(const uint32_t * src,uint16_t * dst,int width)4452 void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) {
4453   int i;
4454   for (i = 0; i < width; ++i) {
4455     *dst++ = STATIC_CAST(
4456         uint16_t,
4457         (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8);
4458     ++src;
4459   }
4460 }
4461 
4462 // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
GaussCol_C(const uint16_t * src0,const uint16_t * src1,const uint16_t * src2,const uint16_t * src3,const uint16_t * src4,uint32_t * dst,int width)4463 void GaussCol_C(const uint16_t* src0,
4464                 const uint16_t* src1,
4465                 const uint16_t* src2,
4466                 const uint16_t* src3,
4467                 const uint16_t* src4,
4468                 uint32_t* dst,
4469                 int width) {
4470   int i;
4471   for (i = 0; i < width; ++i) {
4472     *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
4473   }
4474 }
4475 
GaussRow_F32_C(const float * src,float * dst,int width)4476 void GaussRow_F32_C(const float* src, float* dst, int width) {
4477   int i;
4478   for (i = 0; i < width; ++i) {
4479     *dst++ = (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4]) *
4480              (1.0f / 256.0f);
4481     ++src;
4482   }
4483 }
4484 
4485 // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
GaussCol_F32_C(const float * src0,const float * src1,const float * src2,const float * src3,const float * src4,float * dst,int width)4486 void GaussCol_F32_C(const float* src0,
4487                     const float* src1,
4488                     const float* src2,
4489                     const float* src3,
4490                     const float* src4,
4491                     float* dst,
4492                     int width) {
4493   int i;
4494   for (i = 0; i < width; ++i) {
4495     *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
4496   }
4497 }
4498 
4499 // Convert biplanar NV21 to packed YUV24
NV21ToYUV24Row_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_yuv24,int width)4500 void NV21ToYUV24Row_C(const uint8_t* src_y,
4501                       const uint8_t* src_vu,
4502                       uint8_t* dst_yuv24,
4503                       int width) {
4504   int x;
4505   for (x = 0; x < width - 1; x += 2) {
4506     dst_yuv24[0] = src_vu[0];  // V
4507     dst_yuv24[1] = src_vu[1];  // U
4508     dst_yuv24[2] = src_y[0];   // Y0
4509     dst_yuv24[3] = src_vu[0];  // V
4510     dst_yuv24[4] = src_vu[1];  // U
4511     dst_yuv24[5] = src_y[1];   // Y1
4512     src_y += 2;
4513     src_vu += 2;
4514     dst_yuv24 += 6;  // Advance 2 pixels.
4515   }
4516   if (width & 1) {
4517     dst_yuv24[0] = src_vu[0];  // V
4518     dst_yuv24[1] = src_vu[1];  // U
4519     dst_yuv24[2] = src_y[0];   // Y0
4520   }
4521 }
4522 
4523 // Filter 2 rows of AYUV UV's (444) into UV (420).
4524 // AYUV is VUYA in memory.  UV for NV12 is UV order in memory.
AYUVToUVRow_C(const uint8_t * src_ayuv,int src_stride_ayuv,uint8_t * dst_uv,int width)4525 void AYUVToUVRow_C(const uint8_t* src_ayuv,
4526                    int src_stride_ayuv,
4527                    uint8_t* dst_uv,
4528                    int width) {
4529   // Output a row of UV values, filtering 2x2 rows of AYUV.
4530   int x;
4531   for (x = 0; x < width - 1; x += 2) {
4532     dst_uv[0] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
4533                  src_ayuv[src_stride_ayuv + 5] + 2) >>
4534                 2;
4535     dst_uv[1] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
4536                  src_ayuv[src_stride_ayuv + 4] + 2) >>
4537                 2;
4538     src_ayuv += 8;
4539     dst_uv += 2;
4540   }
4541   if (width & 1) {
4542     dst_uv[0] = (src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + 1) >> 1;
4543     dst_uv[1] = (src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + 1) >> 1;
4544   }
4545 }
4546 
4547 // Filter 2 rows of AYUV UV's (444) into VU (420).
AYUVToVURow_C(const uint8_t * src_ayuv,int src_stride_ayuv,uint8_t * dst_vu,int width)4548 void AYUVToVURow_C(const uint8_t* src_ayuv,
4549                    int src_stride_ayuv,
4550                    uint8_t* dst_vu,
4551                    int width) {
4552   // Output a row of VU values, filtering 2x2 rows of AYUV.
4553   int x;
4554   for (x = 0; x < width - 1; x += 2) {
4555     dst_vu[0] = (src_ayuv[0] + src_ayuv[4] + src_ayuv[src_stride_ayuv + 0] +
4556                  src_ayuv[src_stride_ayuv + 4] + 2) >>
4557                 2;
4558     dst_vu[1] = (src_ayuv[1] + src_ayuv[5] + src_ayuv[src_stride_ayuv + 1] +
4559                  src_ayuv[src_stride_ayuv + 5] + 2) >>
4560                 2;
4561     src_ayuv += 8;
4562     dst_vu += 2;
4563   }
4564   if (width & 1) {
4565     dst_vu[0] = (src_ayuv[0] + src_ayuv[src_stride_ayuv + 0] + 1) >> 1;
4566     dst_vu[1] = (src_ayuv[1] + src_ayuv[src_stride_ayuv + 1] + 1) >> 1;
4567   }
4568 }
4569 
4570 // Copy row of AYUV Y's into Y
AYUVToYRow_C(const uint8_t * src_ayuv,uint8_t * dst_y,int width)4571 void AYUVToYRow_C(const uint8_t* src_ayuv, uint8_t* dst_y, int width) {
4572   // Output a row of Y values.
4573   int x;
4574   for (x = 0; x < width; ++x) {
4575     dst_y[x] = src_ayuv[2];  // v,u,y,a
4576     src_ayuv += 4;
4577   }
4578 }
4579 
4580 // Convert UV plane of NV12 to VU of NV21.
SwapUVRow_C(const uint8_t * src_uv,uint8_t * dst_vu,int width)4581 void SwapUVRow_C(const uint8_t* src_uv, uint8_t* dst_vu, int width) {
4582   int x;
4583   for (x = 0; x < width; ++x) {
4584     uint8_t u = src_uv[0];
4585     uint8_t v = src_uv[1];
4586     dst_vu[0] = v;
4587     dst_vu[1] = u;
4588     src_uv += 2;
4589     dst_vu += 2;
4590   }
4591 }
4592 
HalfMergeUVRow_C(const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_uv,int width)4593 void HalfMergeUVRow_C(const uint8_t* src_u,
4594                       int src_stride_u,
4595                       const uint8_t* src_v,
4596                       int src_stride_v,
4597                       uint8_t* dst_uv,
4598                       int width) {
4599   int x;
4600   for (x = 0; x < width - 1; x += 2) {
4601     dst_uv[0] = (src_u[0] + src_u[1] + src_u[src_stride_u] +
4602                  src_u[src_stride_u + 1] + 2) >>
4603                 2;
4604     dst_uv[1] = (src_v[0] + src_v[1] + src_v[src_stride_v] +
4605                  src_v[src_stride_v + 1] + 2) >>
4606                 2;
4607     src_u += 2;
4608     src_v += 2;
4609     dst_uv += 2;
4610   }
4611   if (width & 1) {
4612     dst_uv[0] = (src_u[0] + src_u[src_stride_u] + 1) >> 1;
4613     dst_uv[1] = (src_v[0] + src_v[src_stride_v] + 1) >> 1;
4614   }
4615 }
4616 
4617 #undef STATIC_CAST
4618 
4619 #ifdef __cplusplus
4620 }  // extern "C"
4621 }  // namespace libyuv
4622 #endif
4623