1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/row.h"
12
13 #include <stdio.h>
14 #include <string.h> // For memcpy and memset.
15
16 #include "libyuv/basic_types.h"
17
18 #ifdef __cplusplus
19 namespace libyuv {
20 extern "C" {
21 #endif
22
23 // llvm x86 is poor at ternary operator, so use branchless min/max.
24
25 #define USE_BRANCHLESS 1
26 #if USE_BRANCHLESS
clamp0(int32_t v)27 static __inline int32_t clamp0(int32_t v) {
28 return ((-(v) >> 31) & (v));
29 }
30
clamp255(int32_t v)31 static __inline int32_t clamp255(int32_t v) {
32 return (((255 - (v)) >> 31) | (v)) & 255;
33 }
34
clamp1023(int32_t v)35 static __inline int32_t clamp1023(int32_t v) {
36 return (((1023 - (v)) >> 31) | (v)) & 1023;
37 }
38
Abs(int32_t v)39 static __inline uint32_t Abs(int32_t v) {
40 int m = v >> 31;
41 return (v + m) ^ m;
42 }
43 #else // USE_BRANCHLESS
44 static __inline int32_t clamp0(int32_t v) {
45 return (v < 0) ? 0 : v;
46 }
47
48 static __inline int32_t clamp255(int32_t v) {
49 return (v > 255) ? 255 : v;
50 }
51
52 static __inline int32_t clamp1023(int32_t v) {
53 return (v > 1023) ? 1023 : v;
54 }
55
56 static __inline uint32_t Abs(int32_t v) {
57 return (v < 0) ? -v : v;
58 }
59 #endif // USE_BRANCHLESS
Clamp(int32_t val)60 static __inline uint32_t Clamp(int32_t val) {
61 int v = clamp0(val);
62 return (uint32_t)(clamp255(v));
63 }
64
Clamp10(int32_t val)65 static __inline uint32_t Clamp10(int32_t val) {
66 int v = clamp0(val);
67 return (uint32_t)(clamp1023(v));
68 }
69
70 // Little Endian
71 #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
72 defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) || \
73 (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
74 #define WRITEWORD(p, v) *(uint32_t*)(p) = v
75 #else
WRITEWORD(uint8_t * p,uint32_t v)76 static inline void WRITEWORD(uint8_t* p, uint32_t v) {
77 p[0] = (uint8_t)(v & 255);
78 p[1] = (uint8_t)((v >> 8) & 255);
79 p[2] = (uint8_t)((v >> 16) & 255);
80 p[3] = (uint8_t)((v >> 24) & 255);
81 }
82 #endif
83
RGB24ToARGBRow_C(const uint8_t * src_rgb24,uint8_t * dst_argb,int width)84 void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) {
85 int x;
86 for (x = 0; x < width; ++x) {
87 uint8_t b = src_rgb24[0];
88 uint8_t g = src_rgb24[1];
89 uint8_t r = src_rgb24[2];
90 dst_argb[0] = b;
91 dst_argb[1] = g;
92 dst_argb[2] = r;
93 dst_argb[3] = 255u;
94 dst_argb += 4;
95 src_rgb24 += 3;
96 }
97 }
98
RAWToARGBRow_C(const uint8_t * src_raw,uint8_t * dst_argb,int width)99 void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
100 int x;
101 for (x = 0; x < width; ++x) {
102 uint8_t r = src_raw[0];
103 uint8_t g = src_raw[1];
104 uint8_t b = src_raw[2];
105 dst_argb[0] = b;
106 dst_argb[1] = g;
107 dst_argb[2] = r;
108 dst_argb[3] = 255u;
109 dst_argb += 4;
110 src_raw += 3;
111 }
112 }
113
RAWToRGB24Row_C(const uint8_t * src_raw,uint8_t * dst_rgb24,int width)114 void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
115 int x;
116 for (x = 0; x < width; ++x) {
117 uint8_t r = src_raw[0];
118 uint8_t g = src_raw[1];
119 uint8_t b = src_raw[2];
120 dst_rgb24[0] = b;
121 dst_rgb24[1] = g;
122 dst_rgb24[2] = r;
123 dst_rgb24 += 3;
124 src_raw += 3;
125 }
126 }
127
RGB565ToARGBRow_C(const uint8_t * src_rgb565,uint8_t * dst_argb,int width)128 void RGB565ToARGBRow_C(const uint8_t* src_rgb565,
129 uint8_t* dst_argb,
130 int width) {
131 int x;
132 for (x = 0; x < width; ++x) {
133 uint8_t b = src_rgb565[0] & 0x1f;
134 uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
135 uint8_t r = src_rgb565[1] >> 3;
136 dst_argb[0] = (b << 3) | (b >> 2);
137 dst_argb[1] = (g << 2) | (g >> 4);
138 dst_argb[2] = (r << 3) | (r >> 2);
139 dst_argb[3] = 255u;
140 dst_argb += 4;
141 src_rgb565 += 2;
142 }
143 }
144
ARGB1555ToARGBRow_C(const uint8_t * src_argb1555,uint8_t * dst_argb,int width)145 void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
146 uint8_t* dst_argb,
147 int width) {
148 int x;
149 for (x = 0; x < width; ++x) {
150 uint8_t b = src_argb1555[0] & 0x1f;
151 uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
152 uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
153 uint8_t a = src_argb1555[1] >> 7;
154 dst_argb[0] = (b << 3) | (b >> 2);
155 dst_argb[1] = (g << 3) | (g >> 2);
156 dst_argb[2] = (r << 3) | (r >> 2);
157 dst_argb[3] = -a;
158 dst_argb += 4;
159 src_argb1555 += 2;
160 }
161 }
162
ARGB4444ToARGBRow_C(const uint8_t * src_argb4444,uint8_t * dst_argb,int width)163 void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
164 uint8_t* dst_argb,
165 int width) {
166 int x;
167 for (x = 0; x < width; ++x) {
168 uint8_t b = src_argb4444[0] & 0x0f;
169 uint8_t g = src_argb4444[0] >> 4;
170 uint8_t r = src_argb4444[1] & 0x0f;
171 uint8_t a = src_argb4444[1] >> 4;
172 dst_argb[0] = (b << 4) | b;
173 dst_argb[1] = (g << 4) | g;
174 dst_argb[2] = (r << 4) | r;
175 dst_argb[3] = (a << 4) | a;
176 dst_argb += 4;
177 src_argb4444 += 2;
178 }
179 }
180
AR30ToARGBRow_C(const uint8_t * src_ar30,uint8_t * dst_argb,int width)181 void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width) {
182 int x;
183 for (x = 0; x < width; ++x) {
184 uint32_t ar30 = *(const uint32_t*)src_ar30;
185 uint32_t b = (ar30 >> 2) & 0xff;
186 uint32_t g = (ar30 >> 12) & 0xff;
187 uint32_t r = (ar30 >> 22) & 0xff;
188 uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits.
189 *(uint32_t*)(dst_argb) = b | (g << 8) | (r << 16) | (a << 24);
190 dst_argb += 4;
191 src_ar30 += 4;
192 }
193 }
194
AR30ToABGRRow_C(const uint8_t * src_ar30,uint8_t * dst_abgr,int width)195 void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width) {
196 int x;
197 for (x = 0; x < width; ++x) {
198 uint32_t ar30 = *(const uint32_t*)src_ar30;
199 uint32_t b = (ar30 >> 2) & 0xff;
200 uint32_t g = (ar30 >> 12) & 0xff;
201 uint32_t r = (ar30 >> 22) & 0xff;
202 uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits.
203 *(uint32_t*)(dst_abgr) = r | (g << 8) | (b << 16) | (a << 24);
204 dst_abgr += 4;
205 src_ar30 += 4;
206 }
207 }
208
AR30ToAB30Row_C(const uint8_t * src_ar30,uint8_t * dst_ab30,int width)209 void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) {
210 int x;
211 for (x = 0; x < width; ++x) {
212 uint32_t ar30 = *(const uint32_t*)src_ar30;
213 uint32_t b = ar30 & 0x3ff;
214 uint32_t ga = ar30 & 0xc00ffc00;
215 uint32_t r = (ar30 >> 20) & 0x3ff;
216 *(uint32_t*)(dst_ab30) = r | ga | (b << 20);
217 dst_ab30 += 4;
218 src_ar30 += 4;
219 }
220 }
221
ARGBToRGB24Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)222 void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
223 int x;
224 for (x = 0; x < width; ++x) {
225 uint8_t b = src_argb[0];
226 uint8_t g = src_argb[1];
227 uint8_t r = src_argb[2];
228 dst_rgb[0] = b;
229 dst_rgb[1] = g;
230 dst_rgb[2] = r;
231 dst_rgb += 3;
232 src_argb += 4;
233 }
234 }
235
ARGBToRAWRow_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)236 void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
237 int x;
238 for (x = 0; x < width; ++x) {
239 uint8_t b = src_argb[0];
240 uint8_t g = src_argb[1];
241 uint8_t r = src_argb[2];
242 dst_rgb[0] = r;
243 dst_rgb[1] = g;
244 dst_rgb[2] = b;
245 dst_rgb += 3;
246 src_argb += 4;
247 }
248 }
249
ARGBToRGB565Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)250 void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
251 int x;
252 for (x = 0; x < width - 1; x += 2) {
253 uint8_t b0 = src_argb[0] >> 3;
254 uint8_t g0 = src_argb[1] >> 2;
255 uint8_t r0 = src_argb[2] >> 3;
256 uint8_t b1 = src_argb[4] >> 3;
257 uint8_t g1 = src_argb[5] >> 2;
258 uint8_t r1 = src_argb[6] >> 3;
259 WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
260 (r1 << 27));
261 dst_rgb += 4;
262 src_argb += 8;
263 }
264 if (width & 1) {
265 uint8_t b0 = src_argb[0] >> 3;
266 uint8_t g0 = src_argb[1] >> 2;
267 uint8_t r0 = src_argb[2] >> 3;
268 *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
269 }
270 }
271
272 // dither4 is a row of 4 values from 4x4 dither matrix.
273 // The 4x4 matrix contains values to increase RGB. When converting to
274 // fewer bits (565) this provides an ordered dither.
275 // The order in the 4x4 matrix in first byte is upper left.
276 // The 4 values are passed as an int, then referenced as an array, so
277 // endian will not affect order of the original matrix. But the dither4
278 // will containing the first pixel in the lower byte for little endian
279 // or the upper byte for big endian.
ARGBToRGB565DitherRow_C(const uint8_t * src_argb,uint8_t * dst_rgb,const uint32_t dither4,int width)280 void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
281 uint8_t* dst_rgb,
282 const uint32_t dither4,
283 int width) {
284 int x;
285 for (x = 0; x < width - 1; x += 2) {
286 int dither0 = ((const unsigned char*)(&dither4))[x & 3];
287 int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
288 uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
289 uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
290 uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
291 uint8_t b1 = clamp255(src_argb[4] + dither1) >> 3;
292 uint8_t g1 = clamp255(src_argb[5] + dither1) >> 2;
293 uint8_t r1 = clamp255(src_argb[6] + dither1) >> 3;
294 WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) |
295 (r1 << 27));
296 dst_rgb += 4;
297 src_argb += 8;
298 }
299 if (width & 1) {
300 int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
301 uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3;
302 uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2;
303 uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3;
304 *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
305 }
306 }
307
ARGBToARGB1555Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)308 void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
309 int x;
310 for (x = 0; x < width - 1; x += 2) {
311 uint8_t b0 = src_argb[0] >> 3;
312 uint8_t g0 = src_argb[1] >> 3;
313 uint8_t r0 = src_argb[2] >> 3;
314 uint8_t a0 = src_argb[3] >> 7;
315 uint8_t b1 = src_argb[4] >> 3;
316 uint8_t g1 = src_argb[5] >> 3;
317 uint8_t r1 = src_argb[6] >> 3;
318 uint8_t a1 = src_argb[7] >> 7;
319 *(uint32_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
320 (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
321 dst_rgb += 4;
322 src_argb += 8;
323 }
324 if (width & 1) {
325 uint8_t b0 = src_argb[0] >> 3;
326 uint8_t g0 = src_argb[1] >> 3;
327 uint8_t r0 = src_argb[2] >> 3;
328 uint8_t a0 = src_argb[3] >> 7;
329 *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
330 }
331 }
332
ARGBToARGB4444Row_C(const uint8_t * src_argb,uint8_t * dst_rgb,int width)333 void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
334 int x;
335 for (x = 0; x < width - 1; x += 2) {
336 uint8_t b0 = src_argb[0] >> 4;
337 uint8_t g0 = src_argb[1] >> 4;
338 uint8_t r0 = src_argb[2] >> 4;
339 uint8_t a0 = src_argb[3] >> 4;
340 uint8_t b1 = src_argb[4] >> 4;
341 uint8_t g1 = src_argb[5] >> 4;
342 uint8_t r1 = src_argb[6] >> 4;
343 uint8_t a1 = src_argb[7] >> 4;
344 *(uint32_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
345 (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
346 dst_rgb += 4;
347 src_argb += 8;
348 }
349 if (width & 1) {
350 uint8_t b0 = src_argb[0] >> 4;
351 uint8_t g0 = src_argb[1] >> 4;
352 uint8_t r0 = src_argb[2] >> 4;
353 uint8_t a0 = src_argb[3] >> 4;
354 *(uint16_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
355 }
356 }
357
ABGRToAR30Row_C(const uint8_t * src_abgr,uint8_t * dst_ar30,int width)358 void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) {
359 int x;
360 for (x = 0; x < width; ++x) {
361 uint32_t b0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2);
362 uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2);
363 uint32_t r0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2);
364 uint32_t a0 = (src_abgr[3] >> 6);
365 *(uint32_t*)(dst_ar30) = r0 | (g0 << 10) | (b0 << 20) | (a0 << 30);
366 dst_ar30 += 4;
367 src_abgr += 4;
368 }
369 }
370
ARGBToAR30Row_C(const uint8_t * src_argb,uint8_t * dst_ar30,int width)371 void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) {
372 int x;
373 for (x = 0; x < width; ++x) {
374 uint32_t b0 = (src_argb[0] >> 6) | ((uint32_t)(src_argb[0]) << 2);
375 uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2);
376 uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2);
377 uint32_t a0 = (src_argb[3] >> 6);
378 *(uint32_t*)(dst_ar30) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30);
379 dst_ar30 += 4;
380 src_argb += 4;
381 }
382 }
383
RGBToY(uint8_t r,uint8_t g,uint8_t b)384 static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) {
385 return (66 * r + 129 * g + 25 * b + 0x1080) >> 8;
386 }
387
RGBToU(uint8_t r,uint8_t g,uint8_t b)388 static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) {
389 return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
390 }
RGBToV(uint8_t r,uint8_t g,uint8_t b)391 static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) {
392 return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
393 }
394
395 // ARGBToY_C and ARGBToUV_C
396 #define MAKEROWY(NAME, R, G, B, BPP) \
397 void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
398 int x; \
399 for (x = 0; x < width; ++x) { \
400 dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \
401 src_argb0 += BPP; \
402 dst_y += 1; \
403 } \
404 } \
405 void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
406 uint8_t* dst_u, uint8_t* dst_v, int width) { \
407 const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
408 int x; \
409 for (x = 0; x < width - 1; x += 2) { \
410 uint8_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \
411 src_rgb1[B + BPP]) >> \
412 2; \
413 uint8_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \
414 src_rgb1[G + BPP]) >> \
415 2; \
416 uint8_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \
417 src_rgb1[R + BPP]) >> \
418 2; \
419 dst_u[0] = RGBToU(ar, ag, ab); \
420 dst_v[0] = RGBToV(ar, ag, ab); \
421 src_rgb0 += BPP * 2; \
422 src_rgb1 += BPP * 2; \
423 dst_u += 1; \
424 dst_v += 1; \
425 } \
426 if (width & 1) { \
427 uint8_t ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \
428 uint8_t ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \
429 uint8_t ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \
430 dst_u[0] = RGBToU(ar, ag, ab); \
431 dst_v[0] = RGBToV(ar, ag, ab); \
432 } \
433 }
434
435 MAKEROWY(ARGB, 2, 1, 0, 4)
436 MAKEROWY(BGRA, 1, 2, 3, 4)
437 MAKEROWY(ABGR, 0, 1, 2, 4)
438 MAKEROWY(RGBA, 3, 2, 1, 4)
439 MAKEROWY(RGB24, 2, 1, 0, 3)
440 MAKEROWY(RAW, 0, 1, 2, 3)
441 #undef MAKEROWY
442
443 // JPeg uses a variation on BT.601-1 full range
444 // y = 0.29900 * r + 0.58700 * g + 0.11400 * b
445 // u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center
446 // v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center
447 // BT.601 Mpeg range uses:
448 // b 0.1016 * 255 = 25.908 = 25
449 // g 0.5078 * 255 = 129.489 = 129
450 // r 0.2578 * 255 = 65.739 = 66
451 // JPeg 8 bit Y (not used):
452 // b 0.11400 * 256 = 29.184 = 29
453 // g 0.58700 * 256 = 150.272 = 150
454 // r 0.29900 * 256 = 76.544 = 77
455 // JPeg 7 bit Y:
456 // b 0.11400 * 128 = 14.592 = 15
457 // g 0.58700 * 128 = 75.136 = 75
458 // r 0.29900 * 128 = 38.272 = 38
459 // JPeg 8 bit U:
460 // b 0.50000 * 255 = 127.5 = 127
461 // g -0.33126 * 255 = -84.4713 = -84
462 // r -0.16874 * 255 = -43.0287 = -43
463 // JPeg 8 bit V:
464 // b -0.08131 * 255 = -20.73405 = -20
465 // g -0.41869 * 255 = -106.76595 = -107
466 // r 0.50000 * 255 = 127.5 = 127
467
RGBToYJ(uint8_t r,uint8_t g,uint8_t b)468 static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) {
469 return (38 * r + 75 * g + 15 * b + 64) >> 7;
470 }
471
RGBToUJ(uint8_t r,uint8_t g,uint8_t b)472 static __inline int RGBToUJ(uint8_t r, uint8_t g, uint8_t b) {
473 return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
474 }
RGBToVJ(uint8_t r,uint8_t g,uint8_t b)475 static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) {
476 return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
477 }
478
479 #define AVGB(a, b) (((a) + (b) + 1) >> 1)
480
481 // ARGBToYJ_C and ARGBToUVJ_C
482 #define MAKEROWYJ(NAME, R, G, B, BPP) \
483 void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \
484 int x; \
485 for (x = 0; x < width; ++x) { \
486 dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \
487 src_argb0 += BPP; \
488 dst_y += 1; \
489 } \
490 } \
491 void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \
492 uint8_t* dst_u, uint8_t* dst_v, int width) { \
493 const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \
494 int x; \
495 for (x = 0; x < width - 1; x += 2) { \
496 uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \
497 AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \
498 uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \
499 AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \
500 uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \
501 AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \
502 dst_u[0] = RGBToUJ(ar, ag, ab); \
503 dst_v[0] = RGBToVJ(ar, ag, ab); \
504 src_rgb0 += BPP * 2; \
505 src_rgb1 += BPP * 2; \
506 dst_u += 1; \
507 dst_v += 1; \
508 } \
509 if (width & 1) { \
510 uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]); \
511 uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]); \
512 uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]); \
513 dst_u[0] = RGBToUJ(ar, ag, ab); \
514 dst_v[0] = RGBToVJ(ar, ag, ab); \
515 } \
516 }
517
518 MAKEROWYJ(ARGB, 2, 1, 0, 4)
519 #undef MAKEROWYJ
520
RGB565ToYRow_C(const uint8_t * src_rgb565,uint8_t * dst_y,int width)521 void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
522 int x;
523 for (x = 0; x < width; ++x) {
524 uint8_t b = src_rgb565[0] & 0x1f;
525 uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
526 uint8_t r = src_rgb565[1] >> 3;
527 b = (b << 3) | (b >> 2);
528 g = (g << 2) | (g >> 4);
529 r = (r << 3) | (r >> 2);
530 dst_y[0] = RGBToY(r, g, b);
531 src_rgb565 += 2;
532 dst_y += 1;
533 }
534 }
535
ARGB1555ToYRow_C(const uint8_t * src_argb1555,uint8_t * dst_y,int width)536 void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) {
537 int x;
538 for (x = 0; x < width; ++x) {
539 uint8_t b = src_argb1555[0] & 0x1f;
540 uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
541 uint8_t r = (src_argb1555[1] & 0x7c) >> 2;
542 b = (b << 3) | (b >> 2);
543 g = (g << 3) | (g >> 2);
544 r = (r << 3) | (r >> 2);
545 dst_y[0] = RGBToY(r, g, b);
546 src_argb1555 += 2;
547 dst_y += 1;
548 }
549 }
550
ARGB4444ToYRow_C(const uint8_t * src_argb4444,uint8_t * dst_y,int width)551 void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) {
552 int x;
553 for (x = 0; x < width; ++x) {
554 uint8_t b = src_argb4444[0] & 0x0f;
555 uint8_t g = src_argb4444[0] >> 4;
556 uint8_t r = src_argb4444[1] & 0x0f;
557 b = (b << 4) | b;
558 g = (g << 4) | g;
559 r = (r << 4) | r;
560 dst_y[0] = RGBToY(r, g, b);
561 src_argb4444 += 2;
562 dst_y += 1;
563 }
564 }
565
RGB565ToUVRow_C(const uint8_t * src_rgb565,int src_stride_rgb565,uint8_t * dst_u,uint8_t * dst_v,int width)566 void RGB565ToUVRow_C(const uint8_t* src_rgb565,
567 int src_stride_rgb565,
568 uint8_t* dst_u,
569 uint8_t* dst_v,
570 int width) {
571 const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565;
572 int x;
573 for (x = 0; x < width - 1; x += 2) {
574 uint8_t b0 = src_rgb565[0] & 0x1f;
575 uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
576 uint8_t r0 = src_rgb565[1] >> 3;
577 uint8_t b1 = src_rgb565[2] & 0x1f;
578 uint8_t g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
579 uint8_t r1 = src_rgb565[3] >> 3;
580 uint8_t b2 = next_rgb565[0] & 0x1f;
581 uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
582 uint8_t r2 = next_rgb565[1] >> 3;
583 uint8_t b3 = next_rgb565[2] & 0x1f;
584 uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
585 uint8_t r3 = next_rgb565[3] >> 3;
586 uint8_t b = (b0 + b1 + b2 + b3); // 565 * 4 = 787.
587 uint8_t g = (g0 + g1 + g2 + g3);
588 uint8_t r = (r0 + r1 + r2 + r3);
589 b = (b << 1) | (b >> 6); // 787 -> 888.
590 r = (r << 1) | (r >> 6);
591 dst_u[0] = RGBToU(r, g, b);
592 dst_v[0] = RGBToV(r, g, b);
593 src_rgb565 += 4;
594 next_rgb565 += 4;
595 dst_u += 1;
596 dst_v += 1;
597 }
598 if (width & 1) {
599 uint8_t b0 = src_rgb565[0] & 0x1f;
600 uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
601 uint8_t r0 = src_rgb565[1] >> 3;
602 uint8_t b2 = next_rgb565[0] & 0x1f;
603 uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
604 uint8_t r2 = next_rgb565[1] >> 3;
605 uint8_t b = (b0 + b2); // 565 * 2 = 676.
606 uint8_t g = (g0 + g2);
607 uint8_t r = (r0 + r2);
608 b = (b << 2) | (b >> 4); // 676 -> 888
609 g = (g << 1) | (g >> 6);
610 r = (r << 2) | (r >> 4);
611 dst_u[0] = RGBToU(r, g, b);
612 dst_v[0] = RGBToV(r, g, b);
613 }
614 }
615
ARGB1555ToUVRow_C(const uint8_t * src_argb1555,int src_stride_argb1555,uint8_t * dst_u,uint8_t * dst_v,int width)616 void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
617 int src_stride_argb1555,
618 uint8_t* dst_u,
619 uint8_t* dst_v,
620 int width) {
621 const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555;
622 int x;
623 for (x = 0; x < width - 1; x += 2) {
624 uint8_t b0 = src_argb1555[0] & 0x1f;
625 uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
626 uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
627 uint8_t b1 = src_argb1555[2] & 0x1f;
628 uint8_t g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
629 uint8_t r1 = (src_argb1555[3] & 0x7c) >> 2;
630 uint8_t b2 = next_argb1555[0] & 0x1f;
631 uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
632 uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2;
633 uint8_t b3 = next_argb1555[2] & 0x1f;
634 uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
635 uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2;
636 uint8_t b = (b0 + b1 + b2 + b3); // 555 * 4 = 777.
637 uint8_t g = (g0 + g1 + g2 + g3);
638 uint8_t r = (r0 + r1 + r2 + r3);
639 b = (b << 1) | (b >> 6); // 777 -> 888.
640 g = (g << 1) | (g >> 6);
641 r = (r << 1) | (r >> 6);
642 dst_u[0] = RGBToU(r, g, b);
643 dst_v[0] = RGBToV(r, g, b);
644 src_argb1555 += 4;
645 next_argb1555 += 4;
646 dst_u += 1;
647 dst_v += 1;
648 }
649 if (width & 1) {
650 uint8_t b0 = src_argb1555[0] & 0x1f;
651 uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
652 uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2;
653 uint8_t b2 = next_argb1555[0] & 0x1f;
654 uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
655 uint8_t r2 = next_argb1555[1] >> 3;
656 uint8_t b = (b0 + b2); // 555 * 2 = 666.
657 uint8_t g = (g0 + g2);
658 uint8_t r = (r0 + r2);
659 b = (b << 2) | (b >> 4); // 666 -> 888.
660 g = (g << 2) | (g >> 4);
661 r = (r << 2) | (r >> 4);
662 dst_u[0] = RGBToU(r, g, b);
663 dst_v[0] = RGBToV(r, g, b);
664 }
665 }
666
ARGB4444ToUVRow_C(const uint8_t * src_argb4444,int src_stride_argb4444,uint8_t * dst_u,uint8_t * dst_v,int width)667 void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
668 int src_stride_argb4444,
669 uint8_t* dst_u,
670 uint8_t* dst_v,
671 int width) {
672 const uint8_t* next_argb4444 = src_argb4444 + src_stride_argb4444;
673 int x;
674 for (x = 0; x < width - 1; x += 2) {
675 uint8_t b0 = src_argb4444[0] & 0x0f;
676 uint8_t g0 = src_argb4444[0] >> 4;
677 uint8_t r0 = src_argb4444[1] & 0x0f;
678 uint8_t b1 = src_argb4444[2] & 0x0f;
679 uint8_t g1 = src_argb4444[2] >> 4;
680 uint8_t r1 = src_argb4444[3] & 0x0f;
681 uint8_t b2 = next_argb4444[0] & 0x0f;
682 uint8_t g2 = next_argb4444[0] >> 4;
683 uint8_t r2 = next_argb4444[1] & 0x0f;
684 uint8_t b3 = next_argb4444[2] & 0x0f;
685 uint8_t g3 = next_argb4444[2] >> 4;
686 uint8_t r3 = next_argb4444[3] & 0x0f;
687 uint8_t b = (b0 + b1 + b2 + b3); // 444 * 4 = 666.
688 uint8_t g = (g0 + g1 + g2 + g3);
689 uint8_t r = (r0 + r1 + r2 + r3);
690 b = (b << 2) | (b >> 4); // 666 -> 888.
691 g = (g << 2) | (g >> 4);
692 r = (r << 2) | (r >> 4);
693 dst_u[0] = RGBToU(r, g, b);
694 dst_v[0] = RGBToV(r, g, b);
695 src_argb4444 += 4;
696 next_argb4444 += 4;
697 dst_u += 1;
698 dst_v += 1;
699 }
700 if (width & 1) {
701 uint8_t b0 = src_argb4444[0] & 0x0f;
702 uint8_t g0 = src_argb4444[0] >> 4;
703 uint8_t r0 = src_argb4444[1] & 0x0f;
704 uint8_t b2 = next_argb4444[0] & 0x0f;
705 uint8_t g2 = next_argb4444[0] >> 4;
706 uint8_t r2 = next_argb4444[1] & 0x0f;
707 uint8_t b = (b0 + b2); // 444 * 2 = 555.
708 uint8_t g = (g0 + g2);
709 uint8_t r = (r0 + r2);
710 b = (b << 3) | (b >> 2); // 555 -> 888.
711 g = (g << 3) | (g >> 2);
712 r = (r << 3) | (r >> 2);
713 dst_u[0] = RGBToU(r, g, b);
714 dst_v[0] = RGBToV(r, g, b);
715 }
716 }
717
ARGBToUV444Row_C(const uint8_t * src_argb,uint8_t * dst_u,uint8_t * dst_v,int width)718 void ARGBToUV444Row_C(const uint8_t* src_argb,
719 uint8_t* dst_u,
720 uint8_t* dst_v,
721 int width) {
722 int x;
723 for (x = 0; x < width; ++x) {
724 uint8_t ab = src_argb[0];
725 uint8_t ag = src_argb[1];
726 uint8_t ar = src_argb[2];
727 dst_u[0] = RGBToU(ar, ag, ab);
728 dst_v[0] = RGBToV(ar, ag, ab);
729 src_argb += 4;
730 dst_u += 1;
731 dst_v += 1;
732 }
733 }
734
ARGBGrayRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)735 void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
736 int x;
737 for (x = 0; x < width; ++x) {
738 uint8_t y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
739 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
740 dst_argb[3] = src_argb[3];
741 dst_argb += 4;
742 src_argb += 4;
743 }
744 }
745
746 // Convert a row of image to Sepia tone.
ARGBSepiaRow_C(uint8_t * dst_argb,int width)747 void ARGBSepiaRow_C(uint8_t* dst_argb, int width) {
748 int x;
749 for (x = 0; x < width; ++x) {
750 int b = dst_argb[0];
751 int g = dst_argb[1];
752 int r = dst_argb[2];
753 int sb = (b * 17 + g * 68 + r * 35) >> 7;
754 int sg = (b * 22 + g * 88 + r * 45) >> 7;
755 int sr = (b * 24 + g * 98 + r * 50) >> 7;
756 // b does not over flow. a is preserved from original.
757 dst_argb[0] = sb;
758 dst_argb[1] = clamp255(sg);
759 dst_argb[2] = clamp255(sr);
760 dst_argb += 4;
761 }
762 }
763
764 // Apply color matrix to a row of image. Matrix is signed.
765 // TODO(fbarchard): Consider adding rounding (+32).
ARGBColorMatrixRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const int8_t * matrix_argb,int width)766 void ARGBColorMatrixRow_C(const uint8_t* src_argb,
767 uint8_t* dst_argb,
768 const int8_t* matrix_argb,
769 int width) {
770 int x;
771 for (x = 0; x < width; ++x) {
772 int b = src_argb[0];
773 int g = src_argb[1];
774 int r = src_argb[2];
775 int a = src_argb[3];
776 int sb = (b * matrix_argb[0] + g * matrix_argb[1] + r * matrix_argb[2] +
777 a * matrix_argb[3]) >>
778 6;
779 int sg = (b * matrix_argb[4] + g * matrix_argb[5] + r * matrix_argb[6] +
780 a * matrix_argb[7]) >>
781 6;
782 int sr = (b * matrix_argb[8] + g * matrix_argb[9] + r * matrix_argb[10] +
783 a * matrix_argb[11]) >>
784 6;
785 int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] +
786 a * matrix_argb[15]) >>
787 6;
788 dst_argb[0] = Clamp(sb);
789 dst_argb[1] = Clamp(sg);
790 dst_argb[2] = Clamp(sr);
791 dst_argb[3] = Clamp(sa);
792 src_argb += 4;
793 dst_argb += 4;
794 }
795 }
796
797 // Apply color table to a row of image.
ARGBColorTableRow_C(uint8_t * dst_argb,const uint8_t * table_argb,int width)798 void ARGBColorTableRow_C(uint8_t* dst_argb,
799 const uint8_t* table_argb,
800 int width) {
801 int x;
802 for (x = 0; x < width; ++x) {
803 int b = dst_argb[0];
804 int g = dst_argb[1];
805 int r = dst_argb[2];
806 int a = dst_argb[3];
807 dst_argb[0] = table_argb[b * 4 + 0];
808 dst_argb[1] = table_argb[g * 4 + 1];
809 dst_argb[2] = table_argb[r * 4 + 2];
810 dst_argb[3] = table_argb[a * 4 + 3];
811 dst_argb += 4;
812 }
813 }
814
815 // Apply color table to a row of image.
RGBColorTableRow_C(uint8_t * dst_argb,const uint8_t * table_argb,int width)816 void RGBColorTableRow_C(uint8_t* dst_argb,
817 const uint8_t* table_argb,
818 int width) {
819 int x;
820 for (x = 0; x < width; ++x) {
821 int b = dst_argb[0];
822 int g = dst_argb[1];
823 int r = dst_argb[2];
824 dst_argb[0] = table_argb[b * 4 + 0];
825 dst_argb[1] = table_argb[g * 4 + 1];
826 dst_argb[2] = table_argb[r * 4 + 2];
827 dst_argb += 4;
828 }
829 }
830
ARGBQuantizeRow_C(uint8_t * dst_argb,int scale,int interval_size,int interval_offset,int width)831 void ARGBQuantizeRow_C(uint8_t* dst_argb,
832 int scale,
833 int interval_size,
834 int interval_offset,
835 int width) {
836 int x;
837 for (x = 0; x < width; ++x) {
838 int b = dst_argb[0];
839 int g = dst_argb[1];
840 int r = dst_argb[2];
841 dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
842 dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
843 dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
844 dst_argb += 4;
845 }
846 }
847
848 #define REPEAT8(v) (v) | ((v) << 8)
849 #define SHADE(f, v) v* f >> 24
850
ARGBShadeRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width,uint32_t value)851 void ARGBShadeRow_C(const uint8_t* src_argb,
852 uint8_t* dst_argb,
853 int width,
854 uint32_t value) {
855 const uint32_t b_scale = REPEAT8(value & 0xff);
856 const uint32_t g_scale = REPEAT8((value >> 8) & 0xff);
857 const uint32_t r_scale = REPEAT8((value >> 16) & 0xff);
858 const uint32_t a_scale = REPEAT8(value >> 24);
859
860 int i;
861 for (i = 0; i < width; ++i) {
862 const uint32_t b = REPEAT8(src_argb[0]);
863 const uint32_t g = REPEAT8(src_argb[1]);
864 const uint32_t r = REPEAT8(src_argb[2]);
865 const uint32_t a = REPEAT8(src_argb[3]);
866 dst_argb[0] = SHADE(b, b_scale);
867 dst_argb[1] = SHADE(g, g_scale);
868 dst_argb[2] = SHADE(r, r_scale);
869 dst_argb[3] = SHADE(a, a_scale);
870 src_argb += 4;
871 dst_argb += 4;
872 }
873 }
874 #undef REPEAT8
875 #undef SHADE
876
877 #define REPEAT8(v) (v) | ((v) << 8)
878 #define SHADE(f, v) v* f >> 16
879
ARGBMultiplyRow_C(const uint8_t * src_argb0,const uint8_t * src_argb1,uint8_t * dst_argb,int width)880 void ARGBMultiplyRow_C(const uint8_t* src_argb0,
881 const uint8_t* src_argb1,
882 uint8_t* dst_argb,
883 int width) {
884 int i;
885 for (i = 0; i < width; ++i) {
886 const uint32_t b = REPEAT8(src_argb0[0]);
887 const uint32_t g = REPEAT8(src_argb0[1]);
888 const uint32_t r = REPEAT8(src_argb0[2]);
889 const uint32_t a = REPEAT8(src_argb0[3]);
890 const uint32_t b_scale = src_argb1[0];
891 const uint32_t g_scale = src_argb1[1];
892 const uint32_t r_scale = src_argb1[2];
893 const uint32_t a_scale = src_argb1[3];
894 dst_argb[0] = SHADE(b, b_scale);
895 dst_argb[1] = SHADE(g, g_scale);
896 dst_argb[2] = SHADE(r, r_scale);
897 dst_argb[3] = SHADE(a, a_scale);
898 src_argb0 += 4;
899 src_argb1 += 4;
900 dst_argb += 4;
901 }
902 }
903 #undef REPEAT8
904 #undef SHADE
905
906 #define SHADE(f, v) clamp255(v + f)
907
ARGBAddRow_C(const uint8_t * src_argb0,const uint8_t * src_argb1,uint8_t * dst_argb,int width)908 void ARGBAddRow_C(const uint8_t* src_argb0,
909 const uint8_t* src_argb1,
910 uint8_t* dst_argb,
911 int width) {
912 int i;
913 for (i = 0; i < width; ++i) {
914 const int b = src_argb0[0];
915 const int g = src_argb0[1];
916 const int r = src_argb0[2];
917 const int a = src_argb0[3];
918 const int b_add = src_argb1[0];
919 const int g_add = src_argb1[1];
920 const int r_add = src_argb1[2];
921 const int a_add = src_argb1[3];
922 dst_argb[0] = SHADE(b, b_add);
923 dst_argb[1] = SHADE(g, g_add);
924 dst_argb[2] = SHADE(r, r_add);
925 dst_argb[3] = SHADE(a, a_add);
926 src_argb0 += 4;
927 src_argb1 += 4;
928 dst_argb += 4;
929 }
930 }
931 #undef SHADE
932
933 #define SHADE(f, v) clamp0(f - v)
934
ARGBSubtractRow_C(const uint8_t * src_argb0,const uint8_t * src_argb1,uint8_t * dst_argb,int width)935 void ARGBSubtractRow_C(const uint8_t* src_argb0,
936 const uint8_t* src_argb1,
937 uint8_t* dst_argb,
938 int width) {
939 int i;
940 for (i = 0; i < width; ++i) {
941 const int b = src_argb0[0];
942 const int g = src_argb0[1];
943 const int r = src_argb0[2];
944 const int a = src_argb0[3];
945 const int b_sub = src_argb1[0];
946 const int g_sub = src_argb1[1];
947 const int r_sub = src_argb1[2];
948 const int a_sub = src_argb1[3];
949 dst_argb[0] = SHADE(b, b_sub);
950 dst_argb[1] = SHADE(g, g_sub);
951 dst_argb[2] = SHADE(r, r_sub);
952 dst_argb[3] = SHADE(a, a_sub);
953 src_argb0 += 4;
954 src_argb1 += 4;
955 dst_argb += 4;
956 }
957 }
958 #undef SHADE
959
960 // Sobel functions which mimics SSSE3.
SobelXRow_C(const uint8_t * src_y0,const uint8_t * src_y1,const uint8_t * src_y2,uint8_t * dst_sobelx,int width)961 void SobelXRow_C(const uint8_t* src_y0,
962 const uint8_t* src_y1,
963 const uint8_t* src_y2,
964 uint8_t* dst_sobelx,
965 int width) {
966 int i;
967 for (i = 0; i < width; ++i) {
968 int a = src_y0[i];
969 int b = src_y1[i];
970 int c = src_y2[i];
971 int a_sub = src_y0[i + 2];
972 int b_sub = src_y1[i + 2];
973 int c_sub = src_y2[i + 2];
974 int a_diff = a - a_sub;
975 int b_diff = b - b_sub;
976 int c_diff = c - c_sub;
977 int sobel = Abs(a_diff + b_diff * 2 + c_diff);
978 dst_sobelx[i] = (uint8_t)(clamp255(sobel));
979 }
980 }
981
SobelYRow_C(const uint8_t * src_y0,const uint8_t * src_y1,uint8_t * dst_sobely,int width)982 void SobelYRow_C(const uint8_t* src_y0,
983 const uint8_t* src_y1,
984 uint8_t* dst_sobely,
985 int width) {
986 int i;
987 for (i = 0; i < width; ++i) {
988 int a = src_y0[i + 0];
989 int b = src_y0[i + 1];
990 int c = src_y0[i + 2];
991 int a_sub = src_y1[i + 0];
992 int b_sub = src_y1[i + 1];
993 int c_sub = src_y1[i + 2];
994 int a_diff = a - a_sub;
995 int b_diff = b - b_sub;
996 int c_diff = c - c_sub;
997 int sobel = Abs(a_diff + b_diff * 2 + c_diff);
998 dst_sobely[i] = (uint8_t)(clamp255(sobel));
999 }
1000 }
1001
SobelRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)1002 void SobelRow_C(const uint8_t* src_sobelx,
1003 const uint8_t* src_sobely,
1004 uint8_t* dst_argb,
1005 int width) {
1006 int i;
1007 for (i = 0; i < width; ++i) {
1008 int r = src_sobelx[i];
1009 int b = src_sobely[i];
1010 int s = clamp255(r + b);
1011 dst_argb[0] = (uint8_t)(s);
1012 dst_argb[1] = (uint8_t)(s);
1013 dst_argb[2] = (uint8_t)(s);
1014 dst_argb[3] = (uint8_t)(255u);
1015 dst_argb += 4;
1016 }
1017 }
1018
SobelToPlaneRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_y,int width)1019 void SobelToPlaneRow_C(const uint8_t* src_sobelx,
1020 const uint8_t* src_sobely,
1021 uint8_t* dst_y,
1022 int width) {
1023 int i;
1024 for (i = 0; i < width; ++i) {
1025 int r = src_sobelx[i];
1026 int b = src_sobely[i];
1027 int s = clamp255(r + b);
1028 dst_y[i] = (uint8_t)(s);
1029 }
1030 }
1031
SobelXYRow_C(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)1032 void SobelXYRow_C(const uint8_t* src_sobelx,
1033 const uint8_t* src_sobely,
1034 uint8_t* dst_argb,
1035 int width) {
1036 int i;
1037 for (i = 0; i < width; ++i) {
1038 int r = src_sobelx[i];
1039 int b = src_sobely[i];
1040 int g = clamp255(r + b);
1041 dst_argb[0] = (uint8_t)(b);
1042 dst_argb[1] = (uint8_t)(g);
1043 dst_argb[2] = (uint8_t)(r);
1044 dst_argb[3] = (uint8_t)(255u);
1045 dst_argb += 4;
1046 }
1047 }
1048
J400ToARGBRow_C(const uint8_t * src_y,uint8_t * dst_argb,int width)1049 void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) {
1050 // Copy a Y to RGB.
1051 int x;
1052 for (x = 0; x < width; ++x) {
1053 uint8_t y = src_y[0];
1054 dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
1055 dst_argb[3] = 255u;
1056 dst_argb += 4;
1057 ++src_y;
1058 }
1059 }
1060
1061 // TODO(fbarchard): Unify these structures to be platform independent.
1062 // TODO(fbarchard): Generate SIMD structures from float matrix.
1063
1064 // BT.601 YUV to RGB reference
1065 // R = (Y - 16) * 1.164 - V * -1.596
1066 // G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
1067 // B = (Y - 16) * 1.164 - U * -2.018
1068
1069 // Y contribution to R,G,B. Scale and bias.
1070 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1071 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1072
1073 // U and V contributions to R,G,B.
1074 #define UB -128 /* max(-128, round(-2.018 * 64)) */
1075 #define UG 25 /* round(0.391 * 64) */
1076 #define VG 52 /* round(0.813 * 64) */
1077 #define VR -102 /* round(-1.596 * 64) */
1078
1079 // Bias values to subtract 16 from Y and 128 from U and V.
1080 #define BB (UB * 128 + YGB)
1081 #define BG (UG * 128 + VG * 128 + YGB)
1082 #define BR (VR * 128 + YGB)
1083
1084 #if defined(__aarch64__) // 64 bit arm
1085 const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
1086 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1087 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1088 {UG, VG, UG, VG, UG, VG, UG, VG},
1089 {UG, VG, UG, VG, UG, VG, UG, VG},
1090 {BB, BG, BR, 0, 0, 0, 0, 0},
1091 {0x0101 * YG, 0, 0, 0}};
1092 const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
1093 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1094 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1095 {VG, UG, VG, UG, VG, UG, VG, UG},
1096 {VG, UG, VG, UG, VG, UG, VG, UG},
1097 {BR, BG, BB, 0, 0, 0, 0, 0},
1098 {0x0101 * YG, 0, 0, 0}};
1099 #elif defined(__arm__) // 32 bit arm
1100 const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
1101 {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
1102 {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
1103 {BB, BG, BR, 0, 0, 0, 0, 0},
1104 {0x0101 * YG, 0, 0, 0}};
1105 const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
1106 {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
1107 {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
1108 {BR, BG, BB, 0, 0, 0, 0, 0},
1109 {0x0101 * YG, 0, 0, 0}};
1110 #else
1111 const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
1112 {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
1113 UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
1114 {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
1115 UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
1116 {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
1117 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
1118 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1119 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1120 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1121 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1122 const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
1123 {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
1124 VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
1125 {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
1126 VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
1127 {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
1128 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
1129 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1130 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1131 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1132 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1133 #endif
1134
1135 #undef BB
1136 #undef BG
1137 #undef BR
1138 #undef YGB
1139 #undef UB
1140 #undef UG
1141 #undef VG
1142 #undef VR
1143 #undef YG
1144
1145 // JPEG YUV to RGB reference
1146 // * R = Y - V * -1.40200
1147 // * G = Y - U * 0.34414 - V * 0.71414
1148 // * B = Y - U * -1.77200
1149
1150 // Y contribution to R,G,B. Scale and bias.
1151 #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
1152 #define YGB 32 /* 64 / 2 */
1153
1154 // U and V contributions to R,G,B.
1155 #define UB -113 /* round(-1.77200 * 64) */
1156 #define UG 22 /* round(0.34414 * 64) */
1157 #define VG 46 /* round(0.71414 * 64) */
1158 #define VR -90 /* round(-1.40200 * 64) */
1159
1160 // Bias values to round, and subtract 128 from U and V.
1161 #define BB (UB * 128 + YGB)
1162 #define BG (UG * 128 + VG * 128 + YGB)
1163 #define BR (VR * 128 + YGB)
1164
1165 #if defined(__aarch64__)
1166 const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
1167 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1168 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1169 {UG, VG, UG, VG, UG, VG, UG, VG},
1170 {UG, VG, UG, VG, UG, VG, UG, VG},
1171 {BB, BG, BR, 0, 0, 0, 0, 0},
1172 {0x0101 * YG, 0, 0, 0}};
1173 const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
1174 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1175 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1176 {VG, UG, VG, UG, VG, UG, VG, UG},
1177 {VG, UG, VG, UG, VG, UG, VG, UG},
1178 {BR, BG, BB, 0, 0, 0, 0, 0},
1179 {0x0101 * YG, 0, 0, 0}};
1180 #elif defined(__arm__)
1181 const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
1182 {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
1183 {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
1184 {BB, BG, BR, 0, 0, 0, 0, 0},
1185 {0x0101 * YG, 0, 0, 0}};
1186 const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
1187 {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
1188 {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
1189 {BR, BG, BB, 0, 0, 0, 0, 0},
1190 {0x0101 * YG, 0, 0, 0}};
1191 #else
1192 const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
1193 {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
1194 UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
1195 {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
1196 UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
1197 {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
1198 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
1199 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1200 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1201 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1202 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1203 const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
1204 {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
1205 VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
1206 {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
1207 VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
1208 {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
1209 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
1210 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1211 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1212 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1213 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1214 #endif
1215
1216 #undef BB
1217 #undef BG
1218 #undef BR
1219 #undef YGB
1220 #undef UB
1221 #undef UG
1222 #undef VG
1223 #undef VR
1224 #undef YG
1225
1226 // BT.709 YUV to RGB reference
1227 // R = (Y - 16) * 1.164 - V * -1.793
1228 // G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533
1229 // B = (Y - 16) * 1.164 - U * -2.112
1230 // See also http://www.equasys.de/colorconversion.html
1231
1232 // Y contribution to R,G,B. Scale and bias.
1233 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1234 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1235
1236 // TODO(fbarchard): Find way to express 2.112 instead of 2.0.
1237 // U and V contributions to R,G,B.
1238 #define UB -128 /* max(-128, round(-2.112 * 64)) */
1239 #define UG 14 /* round(0.213 * 64) */
1240 #define VG 34 /* round(0.533 * 64) */
1241 #define VR -115 /* round(-1.793 * 64) */
1242
1243 // Bias values to round, and subtract 128 from U and V.
1244 #define BB (UB * 128 + YGB)
1245 #define BG (UG * 128 + VG * 128 + YGB)
1246 #define BR (VR * 128 + YGB)
1247
1248 #if defined(__aarch64__)
1249 const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
1250 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1251 {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR},
1252 {UG, VG, UG, VG, UG, VG, UG, VG},
1253 {UG, VG, UG, VG, UG, VG, UG, VG},
1254 {BB, BG, BR, 0, 0, 0, 0, 0},
1255 {0x0101 * YG, 0, 0, 0}};
1256 const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
1257 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1258 {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB},
1259 {VG, UG, VG, UG, VG, UG, VG, UG},
1260 {VG, UG, VG, UG, VG, UG, VG, UG},
1261 {BR, BG, BB, 0, 0, 0, 0, 0},
1262 {0x0101 * YG, 0, 0, 0}};
1263 #elif defined(__arm__)
1264 const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
1265 {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0},
1266 {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0},
1267 {BB, BG, BR, 0, 0, 0, 0, 0},
1268 {0x0101 * YG, 0, 0, 0}};
1269 const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
1270 {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0},
1271 {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0},
1272 {BR, BG, BB, 0, 0, 0, 0, 0},
1273 {0x0101 * YG, 0, 0, 0}};
1274 #else
1275 const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
1276 {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
1277 UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0},
1278 {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
1279 UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG},
1280 {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
1281 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR},
1282 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1283 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1284 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1285 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1286 const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
1287 {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
1288 VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0},
1289 {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
1290 VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG},
1291 {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
1292 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB},
1293 {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR},
1294 {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG},
1295 {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB},
1296 {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}};
1297 #endif
1298
1299 #undef BB
1300 #undef BG
1301 #undef BR
1302 #undef YGB
1303 #undef UB
1304 #undef UG
1305 #undef VG
1306 #undef VR
1307 #undef YG
1308
1309 // C reference code that mimics the YUV assembly.
1310 // Reads 8 bit YUV and leaves result as 16 bit.
1311
YuvPixel(uint8_t y,uint8_t u,uint8_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1312 static __inline void YuvPixel(uint8_t y,
1313 uint8_t u,
1314 uint8_t v,
1315 uint8_t* b,
1316 uint8_t* g,
1317 uint8_t* r,
1318 const struct YuvConstants* yuvconstants) {
1319 #if defined(__aarch64__)
1320 int ub = -yuvconstants->kUVToRB[0];
1321 int ug = yuvconstants->kUVToG[0];
1322 int vg = yuvconstants->kUVToG[1];
1323 int vr = -yuvconstants->kUVToRB[1];
1324 int bb = yuvconstants->kUVBiasBGR[0];
1325 int bg = yuvconstants->kUVBiasBGR[1];
1326 int br = yuvconstants->kUVBiasBGR[2];
1327 int yg = yuvconstants->kYToRgb[0] / 0x0101;
1328 #elif defined(__arm__)
1329 int ub = -yuvconstants->kUVToRB[0];
1330 int ug = yuvconstants->kUVToG[0];
1331 int vg = yuvconstants->kUVToG[4];
1332 int vr = -yuvconstants->kUVToRB[4];
1333 int bb = yuvconstants->kUVBiasBGR[0];
1334 int bg = yuvconstants->kUVBiasBGR[1];
1335 int br = yuvconstants->kUVBiasBGR[2];
1336 int yg = yuvconstants->kYToRgb[0] / 0x0101;
1337 #else
1338 int ub = yuvconstants->kUVToB[0];
1339 int ug = yuvconstants->kUVToG[0];
1340 int vg = yuvconstants->kUVToG[1];
1341 int vr = yuvconstants->kUVToR[1];
1342 int bb = yuvconstants->kUVBiasB[0];
1343 int bg = yuvconstants->kUVBiasG[0];
1344 int br = yuvconstants->kUVBiasR[0];
1345 int yg = yuvconstants->kYToRgb[0];
1346 #endif
1347
1348 uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
1349 *b = Clamp((int32_t)(-(u * ub) + y1 + bb) >> 6);
1350 *g = Clamp((int32_t)(-(u * ug + v * vg) + y1 + bg) >> 6);
1351 *r = Clamp((int32_t)(-(v * vr) + y1 + br) >> 6);
1352 }
1353
1354 // Reads 8 bit YUV and leaves result as 16 bit.
YuvPixel8_16(uint8_t y,uint8_t u,uint8_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1355 static __inline void YuvPixel8_16(uint8_t y,
1356 uint8_t u,
1357 uint8_t v,
1358 int* b,
1359 int* g,
1360 int* r,
1361 const struct YuvConstants* yuvconstants) {
1362 #if defined(__aarch64__)
1363 int ub = -yuvconstants->kUVToRB[0];
1364 int ug = yuvconstants->kUVToG[0];
1365 int vg = yuvconstants->kUVToG[1];
1366 int vr = -yuvconstants->kUVToRB[1];
1367 int bb = yuvconstants->kUVBiasBGR[0];
1368 int bg = yuvconstants->kUVBiasBGR[1];
1369 int br = yuvconstants->kUVBiasBGR[2];
1370 int yg = yuvconstants->kYToRgb[0] / 0x0101;
1371 #elif defined(__arm__)
1372 int ub = -yuvconstants->kUVToRB[0];
1373 int ug = yuvconstants->kUVToG[0];
1374 int vg = yuvconstants->kUVToG[4];
1375 int vr = -yuvconstants->kUVToRB[4];
1376 int bb = yuvconstants->kUVBiasBGR[0];
1377 int bg = yuvconstants->kUVBiasBGR[1];
1378 int br = yuvconstants->kUVBiasBGR[2];
1379 int yg = yuvconstants->kYToRgb[0] / 0x0101;
1380 #else
1381 int ub = yuvconstants->kUVToB[0];
1382 int ug = yuvconstants->kUVToG[0];
1383 int vg = yuvconstants->kUVToG[1];
1384 int vr = yuvconstants->kUVToR[1];
1385 int bb = yuvconstants->kUVBiasB[0];
1386 int bg = yuvconstants->kUVBiasG[0];
1387 int br = yuvconstants->kUVBiasR[0];
1388 int yg = yuvconstants->kYToRgb[0];
1389 #endif
1390
1391 uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
1392 *b = (int)(-(u * ub) + y1 + bb);
1393 *g = (int)(-(u * ug + v * vg) + y1 + bg);
1394 *r = (int)(-(v * vr) + y1 + br);
1395 }
1396
1397 // C reference code that mimics the YUV 16 bit assembly.
1398 // Reads 10 bit YUV and leaves result as 16 bit.
YuvPixel16(int16_t y,int16_t u,int16_t v,int * b,int * g,int * r,const struct YuvConstants * yuvconstants)1399 static __inline void YuvPixel16(int16_t y,
1400 int16_t u,
1401 int16_t v,
1402 int* b,
1403 int* g,
1404 int* r,
1405 const struct YuvConstants* yuvconstants) {
1406 #if defined(__aarch64__)
1407 int ub = -yuvconstants->kUVToRB[0];
1408 int ug = yuvconstants->kUVToG[0];
1409 int vg = yuvconstants->kUVToG[1];
1410 int vr = -yuvconstants->kUVToRB[1];
1411 int bb = yuvconstants->kUVBiasBGR[0];
1412 int bg = yuvconstants->kUVBiasBGR[1];
1413 int br = yuvconstants->kUVBiasBGR[2];
1414 int yg = yuvconstants->kYToRgb[0] / 0x0101;
1415 #elif defined(__arm__)
1416 int ub = -yuvconstants->kUVToRB[0];
1417 int ug = yuvconstants->kUVToG[0];
1418 int vg = yuvconstants->kUVToG[4];
1419 int vr = -yuvconstants->kUVToRB[4];
1420 int bb = yuvconstants->kUVBiasBGR[0];
1421 int bg = yuvconstants->kUVBiasBGR[1];
1422 int br = yuvconstants->kUVBiasBGR[2];
1423 int yg = yuvconstants->kYToRgb[0] / 0x0101;
1424 #else
1425 int ub = yuvconstants->kUVToB[0];
1426 int ug = yuvconstants->kUVToG[0];
1427 int vg = yuvconstants->kUVToG[1];
1428 int vr = yuvconstants->kUVToR[1];
1429 int bb = yuvconstants->kUVBiasB[0];
1430 int bg = yuvconstants->kUVBiasG[0];
1431 int br = yuvconstants->kUVBiasR[0];
1432 int yg = yuvconstants->kYToRgb[0];
1433 #endif
1434
1435 uint32_t y1 = (uint32_t)((y << 6) * yg) >> 16;
1436 u = clamp255(u >> 2);
1437 v = clamp255(v >> 2);
1438 *b = (int)(-(u * ub) + y1 + bb);
1439 *g = (int)(-(u * ug + v * vg) + y1 + bg);
1440 *r = (int)(-(v * vr) + y1 + br);
1441 }
1442
1443 // C reference code that mimics the YUV 10 bit assembly.
1444 // Reads 10 bit YUV and clamps down to 8 bit RGB.
YuvPixel10(uint16_t y,uint16_t u,uint16_t v,uint8_t * b,uint8_t * g,uint8_t * r,const struct YuvConstants * yuvconstants)1445 static __inline void YuvPixel10(uint16_t y,
1446 uint16_t u,
1447 uint16_t v,
1448 uint8_t* b,
1449 uint8_t* g,
1450 uint8_t* r,
1451 const struct YuvConstants* yuvconstants) {
1452 int b16;
1453 int g16;
1454 int r16;
1455 YuvPixel16(y, u, v, &b16, &g16, &r16, yuvconstants);
1456 *b = Clamp(b16 >> 6);
1457 *g = Clamp(g16 >> 6);
1458 *r = Clamp(r16 >> 6);
1459 }
1460
1461 // Y contribution to R,G,B. Scale and bias.
1462 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
1463 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
1464
1465 // C reference code that mimics the YUV assembly.
YPixel(uint8_t y,uint8_t * b,uint8_t * g,uint8_t * r)1466 static __inline void YPixel(uint8_t y, uint8_t* b, uint8_t* g, uint8_t* r) {
1467 uint32_t y1 = (uint32_t)(y * 0x0101 * YG) >> 16;
1468 *b = Clamp((int32_t)(y1 + YGB) >> 6);
1469 *g = Clamp((int32_t)(y1 + YGB) >> 6);
1470 *r = Clamp((int32_t)(y1 + YGB) >> 6);
1471 }
1472
1473 #undef YG
1474 #undef YGB
1475
1476 #if !defined(LIBYUV_DISABLE_NEON) && \
1477 (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
1478 // C mimic assembly.
1479 // TODO(fbarchard): Remove subsampling from Neon.
I444ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1480 void I444ToARGBRow_C(const uint8_t* src_y,
1481 const uint8_t* src_u,
1482 const uint8_t* src_v,
1483 uint8_t* rgb_buf,
1484 const struct YuvConstants* yuvconstants,
1485 int width) {
1486 int x;
1487 for (x = 0; x < width - 1; x += 2) {
1488 uint8_t u = (src_u[0] + src_u[1] + 1) >> 1;
1489 uint8_t v = (src_v[0] + src_v[1] + 1) >> 1;
1490 YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
1491 yuvconstants);
1492 rgb_buf[3] = 255;
1493 YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
1494 yuvconstants);
1495 rgb_buf[7] = 255;
1496 src_y += 2;
1497 src_u += 2;
1498 src_v += 2;
1499 rgb_buf += 8; // Advance 2 pixels.
1500 }
1501 if (width & 1) {
1502 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1503 rgb_buf + 2, yuvconstants);
1504 rgb_buf[3] = 255;
1505 }
1506 }
1507 #else
I444ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1508 void I444ToARGBRow_C(const uint8_t* src_y,
1509 const uint8_t* src_u,
1510 const uint8_t* src_v,
1511 uint8_t* rgb_buf,
1512 const struct YuvConstants* yuvconstants,
1513 int width) {
1514 int x;
1515 for (x = 0; x < width; ++x) {
1516 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1517 rgb_buf + 2, yuvconstants);
1518 rgb_buf[3] = 255;
1519 src_y += 1;
1520 src_u += 1;
1521 src_v += 1;
1522 rgb_buf += 4; // Advance 1 pixel.
1523 }
1524 }
1525 #endif
1526
1527 // Also used for 420
I422ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1528 void I422ToARGBRow_C(const uint8_t* src_y,
1529 const uint8_t* src_u,
1530 const uint8_t* src_v,
1531 uint8_t* rgb_buf,
1532 const struct YuvConstants* yuvconstants,
1533 int width) {
1534 int x;
1535 for (x = 0; x < width - 1; x += 2) {
1536 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1537 rgb_buf + 2, yuvconstants);
1538 rgb_buf[3] = 255;
1539 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1540 rgb_buf + 6, yuvconstants);
1541 rgb_buf[7] = 255;
1542 src_y += 2;
1543 src_u += 1;
1544 src_v += 1;
1545 rgb_buf += 8; // Advance 2 pixels.
1546 }
1547 if (width & 1) {
1548 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1549 rgb_buf + 2, yuvconstants);
1550 rgb_buf[3] = 255;
1551 }
1552 }
1553
1554 // 10 bit YUV to ARGB
I210ToARGBRow_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1555 void I210ToARGBRow_C(const uint16_t* src_y,
1556 const uint16_t* src_u,
1557 const uint16_t* src_v,
1558 uint8_t* rgb_buf,
1559 const struct YuvConstants* yuvconstants,
1560 int width) {
1561 int x;
1562 for (x = 0; x < width - 1; x += 2) {
1563 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1564 rgb_buf + 2, yuvconstants);
1565 rgb_buf[3] = 255;
1566 YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1567 rgb_buf + 6, yuvconstants);
1568 rgb_buf[7] = 255;
1569 src_y += 2;
1570 src_u += 1;
1571 src_v += 1;
1572 rgb_buf += 8; // Advance 2 pixels.
1573 }
1574 if (width & 1) {
1575 YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1576 rgb_buf + 2, yuvconstants);
1577 rgb_buf[3] = 255;
1578 }
1579 }
1580
StoreAR30(uint8_t * rgb_buf,int b,int g,int r)1581 static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) {
1582 uint32_t ar30;
1583 b = b >> 4; // convert 10.6 to 10 bit.
1584 g = g >> 4;
1585 r = r >> 4;
1586 b = Clamp10(b);
1587 g = Clamp10(g);
1588 r = Clamp10(r);
1589 ar30 = b | ((uint32_t)g << 10) | ((uint32_t)r << 20) | 0xc0000000;
1590 (*(uint32_t*)rgb_buf) = ar30;
1591 }
1592
1593 // 10 bit YUV to 10 bit AR30
I210ToAR30Row_C(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1594 void I210ToAR30Row_C(const uint16_t* src_y,
1595 const uint16_t* src_u,
1596 const uint16_t* src_v,
1597 uint8_t* rgb_buf,
1598 const struct YuvConstants* yuvconstants,
1599 int width) {
1600 int x;
1601 int b;
1602 int g;
1603 int r;
1604 for (x = 0; x < width - 1; x += 2) {
1605 YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
1606 StoreAR30(rgb_buf, b, g, r);
1607 YuvPixel16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
1608 StoreAR30(rgb_buf + 4, b, g, r);
1609 src_y += 2;
1610 src_u += 1;
1611 src_v += 1;
1612 rgb_buf += 8; // Advance 2 pixels.
1613 }
1614 if (width & 1) {
1615 YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
1616 StoreAR30(rgb_buf, b, g, r);
1617 }
1618 }
1619
1620 // 8 bit YUV to 10 bit AR30
1621 // Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits.
I422ToAR30Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1622 void I422ToAR30Row_C(const uint8_t* src_y,
1623 const uint8_t* src_u,
1624 const uint8_t* src_v,
1625 uint8_t* rgb_buf,
1626 const struct YuvConstants* yuvconstants,
1627 int width) {
1628 int x;
1629 int b;
1630 int g;
1631 int r;
1632 for (x = 0; x < width - 1; x += 2) {
1633 YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
1634 StoreAR30(rgb_buf, b, g, r);
1635 YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
1636 StoreAR30(rgb_buf + 4, b, g, r);
1637 src_y += 2;
1638 src_u += 1;
1639 src_v += 1;
1640 rgb_buf += 8; // Advance 2 pixels.
1641 }
1642 if (width & 1) {
1643 YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants);
1644 StoreAR30(rgb_buf, b, g, r);
1645 }
1646 }
1647
I422AlphaToARGBRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,const uint8_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1648 void I422AlphaToARGBRow_C(const uint8_t* src_y,
1649 const uint8_t* src_u,
1650 const uint8_t* src_v,
1651 const uint8_t* src_a,
1652 uint8_t* rgb_buf,
1653 const struct YuvConstants* yuvconstants,
1654 int width) {
1655 int x;
1656 for (x = 0; x < width - 1; x += 2) {
1657 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1658 rgb_buf + 2, yuvconstants);
1659 rgb_buf[3] = src_a[0];
1660 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5,
1661 rgb_buf + 6, yuvconstants);
1662 rgb_buf[7] = src_a[1];
1663 src_y += 2;
1664 src_u += 1;
1665 src_v += 1;
1666 src_a += 2;
1667 rgb_buf += 8; // Advance 2 pixels.
1668 }
1669 if (width & 1) {
1670 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1671 rgb_buf + 2, yuvconstants);
1672 rgb_buf[3] = src_a[0];
1673 }
1674 }
1675
I422ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1676 void I422ToRGB24Row_C(const uint8_t* src_y,
1677 const uint8_t* src_u,
1678 const uint8_t* src_v,
1679 uint8_t* rgb_buf,
1680 const struct YuvConstants* yuvconstants,
1681 int width) {
1682 int x;
1683 for (x = 0; x < width - 1; x += 2) {
1684 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1685 rgb_buf + 2, yuvconstants);
1686 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4,
1687 rgb_buf + 5, yuvconstants);
1688 src_y += 2;
1689 src_u += 1;
1690 src_v += 1;
1691 rgb_buf += 6; // Advance 2 pixels.
1692 }
1693 if (width & 1) {
1694 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1,
1695 rgb_buf + 2, yuvconstants);
1696 }
1697 }
1698
I422ToARGB4444Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)1699 void I422ToARGB4444Row_C(const uint8_t* src_y,
1700 const uint8_t* src_u,
1701 const uint8_t* src_v,
1702 uint8_t* dst_argb4444,
1703 const struct YuvConstants* yuvconstants,
1704 int width) {
1705 uint8_t b0;
1706 uint8_t g0;
1707 uint8_t r0;
1708 uint8_t b1;
1709 uint8_t g1;
1710 uint8_t r1;
1711 int x;
1712 for (x = 0; x < width - 1; x += 2) {
1713 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1714 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
1715 b0 = b0 >> 4;
1716 g0 = g0 >> 4;
1717 r0 = r0 >> 4;
1718 b1 = b1 >> 4;
1719 g1 = g1 >> 4;
1720 r1 = r1 >> 4;
1721 *(uint32_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | (b1 << 16) |
1722 (g1 << 20) | (r1 << 24) | 0xf000f000;
1723 src_y += 2;
1724 src_u += 1;
1725 src_v += 1;
1726 dst_argb4444 += 4; // Advance 2 pixels.
1727 }
1728 if (width & 1) {
1729 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1730 b0 = b0 >> 4;
1731 g0 = g0 >> 4;
1732 r0 = r0 >> 4;
1733 *(uint16_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000;
1734 }
1735 }
1736
I422ToARGB1555Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)1737 void I422ToARGB1555Row_C(const uint8_t* src_y,
1738 const uint8_t* src_u,
1739 const uint8_t* src_v,
1740 uint8_t* dst_argb1555,
1741 const struct YuvConstants* yuvconstants,
1742 int width) {
1743 uint8_t b0;
1744 uint8_t g0;
1745 uint8_t r0;
1746 uint8_t b1;
1747 uint8_t g1;
1748 uint8_t r1;
1749 int x;
1750 for (x = 0; x < width - 1; x += 2) {
1751 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1752 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
1753 b0 = b0 >> 3;
1754 g0 = g0 >> 3;
1755 r0 = r0 >> 3;
1756 b1 = b1 >> 3;
1757 g1 = g1 >> 3;
1758 r1 = r1 >> 3;
1759 *(uint32_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | (b1 << 16) |
1760 (g1 << 21) | (r1 << 26) | 0x80008000;
1761 src_y += 2;
1762 src_u += 1;
1763 src_v += 1;
1764 dst_argb1555 += 4; // Advance 2 pixels.
1765 }
1766 if (width & 1) {
1767 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1768 b0 = b0 >> 3;
1769 g0 = g0 >> 3;
1770 r0 = r0 >> 3;
1771 *(uint16_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000;
1772 }
1773 }
1774
I422ToRGB565Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)1775 void I422ToRGB565Row_C(const uint8_t* src_y,
1776 const uint8_t* src_u,
1777 const uint8_t* src_v,
1778 uint8_t* dst_rgb565,
1779 const struct YuvConstants* yuvconstants,
1780 int width) {
1781 uint8_t b0;
1782 uint8_t g0;
1783 uint8_t r0;
1784 uint8_t b1;
1785 uint8_t g1;
1786 uint8_t r1;
1787 int x;
1788 for (x = 0; x < width - 1; x += 2) {
1789 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1790 YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
1791 b0 = b0 >> 3;
1792 g0 = g0 >> 2;
1793 r0 = r0 >> 3;
1794 b1 = b1 >> 3;
1795 g1 = g1 >> 2;
1796 r1 = r1 >> 3;
1797 *(uint32_t*)(dst_rgb565) =
1798 b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
1799 src_y += 2;
1800 src_u += 1;
1801 src_v += 1;
1802 dst_rgb565 += 4; // Advance 2 pixels.
1803 }
1804 if (width & 1) {
1805 YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
1806 b0 = b0 >> 3;
1807 g0 = g0 >> 2;
1808 r0 = r0 >> 3;
1809 *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1810 }
1811 }
1812
NV12ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1813 void NV12ToARGBRow_C(const uint8_t* src_y,
1814 const uint8_t* src_uv,
1815 uint8_t* rgb_buf,
1816 const struct YuvConstants* yuvconstants,
1817 int width) {
1818 int x;
1819 for (x = 0; x < width - 1; x += 2) {
1820 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
1821 rgb_buf + 2, yuvconstants);
1822 rgb_buf[3] = 255;
1823 YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5,
1824 rgb_buf + 6, yuvconstants);
1825 rgb_buf[7] = 255;
1826 src_y += 2;
1827 src_uv += 2;
1828 rgb_buf += 8; // Advance 2 pixels.
1829 }
1830 if (width & 1) {
1831 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
1832 rgb_buf + 2, yuvconstants);
1833 rgb_buf[3] = 255;
1834 }
1835 }
1836
NV21ToARGBRow_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1837 void NV21ToARGBRow_C(const uint8_t* src_y,
1838 const uint8_t* src_vu,
1839 uint8_t* rgb_buf,
1840 const struct YuvConstants* yuvconstants,
1841 int width) {
1842 int x;
1843 for (x = 0; x < width - 1; x += 2) {
1844 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
1845 rgb_buf + 2, yuvconstants);
1846 rgb_buf[3] = 255;
1847 YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5,
1848 rgb_buf + 6, yuvconstants);
1849 rgb_buf[7] = 255;
1850 src_y += 2;
1851 src_vu += 2;
1852 rgb_buf += 8; // Advance 2 pixels.
1853 }
1854 if (width & 1) {
1855 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
1856 rgb_buf + 2, yuvconstants);
1857 rgb_buf[3] = 255;
1858 }
1859 }
1860
NV12ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1861 void NV12ToRGB24Row_C(const uint8_t* src_y,
1862 const uint8_t* src_uv,
1863 uint8_t* rgb_buf,
1864 const struct YuvConstants* yuvconstants,
1865 int width) {
1866 int x;
1867 for (x = 0; x < width - 1; x += 2) {
1868 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
1869 rgb_buf + 2, yuvconstants);
1870 YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4,
1871 rgb_buf + 5, yuvconstants);
1872 src_y += 2;
1873 src_uv += 2;
1874 rgb_buf += 6; // Advance 2 pixels.
1875 }
1876 if (width & 1) {
1877 YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1,
1878 rgb_buf + 2, yuvconstants);
1879 }
1880 }
1881
NV21ToRGB24Row_C(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1882 void NV21ToRGB24Row_C(const uint8_t* src_y,
1883 const uint8_t* src_vu,
1884 uint8_t* rgb_buf,
1885 const struct YuvConstants* yuvconstants,
1886 int width) {
1887 int x;
1888 for (x = 0; x < width - 1; x += 2) {
1889 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
1890 rgb_buf + 2, yuvconstants);
1891 YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4,
1892 rgb_buf + 5, yuvconstants);
1893 src_y += 2;
1894 src_vu += 2;
1895 rgb_buf += 6; // Advance 2 pixels.
1896 }
1897 if (width & 1) {
1898 YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1,
1899 rgb_buf + 2, yuvconstants);
1900 }
1901 }
1902
NV12ToRGB565Row_C(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)1903 void NV12ToRGB565Row_C(const uint8_t* src_y,
1904 const uint8_t* src_uv,
1905 uint8_t* dst_rgb565,
1906 const struct YuvConstants* yuvconstants,
1907 int width) {
1908 uint8_t b0;
1909 uint8_t g0;
1910 uint8_t r0;
1911 uint8_t b1;
1912 uint8_t g1;
1913 uint8_t r1;
1914 int x;
1915 for (x = 0; x < width - 1; x += 2) {
1916 YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
1917 YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
1918 b0 = b0 >> 3;
1919 g0 = g0 >> 2;
1920 r0 = r0 >> 3;
1921 b1 = b1 >> 3;
1922 g1 = g1 >> 2;
1923 r1 = r1 >> 3;
1924 *(uint32_t*)(dst_rgb565) =
1925 b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27);
1926 src_y += 2;
1927 src_uv += 2;
1928 dst_rgb565 += 4; // Advance 2 pixels.
1929 }
1930 if (width & 1) {
1931 YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
1932 b0 = b0 >> 3;
1933 g0 = g0 >> 2;
1934 r0 = r0 >> 3;
1935 *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
1936 }
1937 }
1938
YUY2ToARGBRow_C(const uint8_t * src_yuy2,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1939 void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
1940 uint8_t* rgb_buf,
1941 const struct YuvConstants* yuvconstants,
1942 int width) {
1943 int x;
1944 for (x = 0; x < width - 1; x += 2) {
1945 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
1946 rgb_buf + 2, yuvconstants);
1947 rgb_buf[3] = 255;
1948 YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5,
1949 rgb_buf + 6, yuvconstants);
1950 rgb_buf[7] = 255;
1951 src_yuy2 += 4;
1952 rgb_buf += 8; // Advance 2 pixels.
1953 }
1954 if (width & 1) {
1955 YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1,
1956 rgb_buf + 2, yuvconstants);
1957 rgb_buf[3] = 255;
1958 }
1959 }
1960
UYVYToARGBRow_C(const uint8_t * src_uyvy,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1961 void UYVYToARGBRow_C(const uint8_t* src_uyvy,
1962 uint8_t* rgb_buf,
1963 const struct YuvConstants* yuvconstants,
1964 int width) {
1965 int x;
1966 for (x = 0; x < width - 1; x += 2) {
1967 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
1968 rgb_buf + 2, yuvconstants);
1969 rgb_buf[3] = 255;
1970 YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5,
1971 rgb_buf + 6, yuvconstants);
1972 rgb_buf[7] = 255;
1973 src_uyvy += 4;
1974 rgb_buf += 8; // Advance 2 pixels.
1975 }
1976 if (width & 1) {
1977 YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1,
1978 rgb_buf + 2, yuvconstants);
1979 rgb_buf[3] = 255;
1980 }
1981 }
1982
I422ToRGBARow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)1983 void I422ToRGBARow_C(const uint8_t* src_y,
1984 const uint8_t* src_u,
1985 const uint8_t* src_v,
1986 uint8_t* rgb_buf,
1987 const struct YuvConstants* yuvconstants,
1988 int width) {
1989 int x;
1990 for (x = 0; x < width - 1; x += 2) {
1991 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
1992 rgb_buf + 3, yuvconstants);
1993 rgb_buf[0] = 255;
1994 YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6,
1995 rgb_buf + 7, yuvconstants);
1996 rgb_buf[4] = 255;
1997 src_y += 2;
1998 src_u += 1;
1999 src_v += 1;
2000 rgb_buf += 8; // Advance 2 pixels.
2001 }
2002 if (width & 1) {
2003 YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2,
2004 rgb_buf + 3, yuvconstants);
2005 rgb_buf[0] = 255;
2006 }
2007 }
2008
I400ToARGBRow_C(const uint8_t * src_y,uint8_t * rgb_buf,int width)2009 void I400ToARGBRow_C(const uint8_t* src_y, uint8_t* rgb_buf, int width) {
2010 int x;
2011 for (x = 0; x < width - 1; x += 2) {
2012 YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
2013 rgb_buf[3] = 255;
2014 YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
2015 rgb_buf[7] = 255;
2016 src_y += 2;
2017 rgb_buf += 8; // Advance 2 pixels.
2018 }
2019 if (width & 1) {
2020 YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
2021 rgb_buf[3] = 255;
2022 }
2023 }
2024
MirrorRow_C(const uint8_t * src,uint8_t * dst,int width)2025 void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2026 int x;
2027 src += width - 1;
2028 for (x = 0; x < width - 1; x += 2) {
2029 dst[x] = src[0];
2030 dst[x + 1] = src[-1];
2031 src -= 2;
2032 }
2033 if (width & 1) {
2034 dst[width - 1] = src[0];
2035 }
2036 }
2037
MirrorUVRow_C(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)2038 void MirrorUVRow_C(const uint8_t* src_uv,
2039 uint8_t* dst_u,
2040 uint8_t* dst_v,
2041 int width) {
2042 int x;
2043 src_uv += (width - 1) << 1;
2044 for (x = 0; x < width - 1; x += 2) {
2045 dst_u[x] = src_uv[0];
2046 dst_u[x + 1] = src_uv[-2];
2047 dst_v[x] = src_uv[1];
2048 dst_v[x + 1] = src_uv[-2 + 1];
2049 src_uv -= 4;
2050 }
2051 if (width & 1) {
2052 dst_u[width - 1] = src_uv[0];
2053 dst_v[width - 1] = src_uv[1];
2054 }
2055 }
2056
ARGBMirrorRow_C(const uint8_t * src,uint8_t * dst,int width)2057 void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width) {
2058 int x;
2059 const uint32_t* src32 = (const uint32_t*)(src);
2060 uint32_t* dst32 = (uint32_t*)(dst);
2061 src32 += width - 1;
2062 for (x = 0; x < width - 1; x += 2) {
2063 dst32[x] = src32[0];
2064 dst32[x + 1] = src32[-1];
2065 src32 -= 2;
2066 }
2067 if (width & 1) {
2068 dst32[width - 1] = src32[0];
2069 }
2070 }
2071
SplitUVRow_C(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)2072 void SplitUVRow_C(const uint8_t* src_uv,
2073 uint8_t* dst_u,
2074 uint8_t* dst_v,
2075 int width) {
2076 int x;
2077 for (x = 0; x < width - 1; x += 2) {
2078 dst_u[x] = src_uv[0];
2079 dst_u[x + 1] = src_uv[2];
2080 dst_v[x] = src_uv[1];
2081 dst_v[x + 1] = src_uv[3];
2082 src_uv += 4;
2083 }
2084 if (width & 1) {
2085 dst_u[width - 1] = src_uv[0];
2086 dst_v[width - 1] = src_uv[1];
2087 }
2088 }
2089
MergeUVRow_C(const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_uv,int width)2090 void MergeUVRow_C(const uint8_t* src_u,
2091 const uint8_t* src_v,
2092 uint8_t* dst_uv,
2093 int width) {
2094 int x;
2095 for (x = 0; x < width - 1; x += 2) {
2096 dst_uv[0] = src_u[x];
2097 dst_uv[1] = src_v[x];
2098 dst_uv[2] = src_u[x + 1];
2099 dst_uv[3] = src_v[x + 1];
2100 dst_uv += 4;
2101 }
2102 if (width & 1) {
2103 dst_uv[0] = src_u[width - 1];
2104 dst_uv[1] = src_v[width - 1];
2105 }
2106 }
2107
SplitRGBRow_C(const uint8_t * src_rgb,uint8_t * dst_r,uint8_t * dst_g,uint8_t * dst_b,int width)2108 void SplitRGBRow_C(const uint8_t* src_rgb,
2109 uint8_t* dst_r,
2110 uint8_t* dst_g,
2111 uint8_t* dst_b,
2112 int width) {
2113 int x;
2114 for (x = 0; x < width; ++x) {
2115 dst_r[x] = src_rgb[0];
2116 dst_g[x] = src_rgb[1];
2117 dst_b[x] = src_rgb[2];
2118 src_rgb += 3;
2119 }
2120 }
2121
MergeRGBRow_C(const uint8_t * src_r,const uint8_t * src_g,const uint8_t * src_b,uint8_t * dst_rgb,int width)2122 void MergeRGBRow_C(const uint8_t* src_r,
2123 const uint8_t* src_g,
2124 const uint8_t* src_b,
2125 uint8_t* dst_rgb,
2126 int width) {
2127 int x;
2128 for (x = 0; x < width; ++x) {
2129 dst_rgb[0] = src_r[x];
2130 dst_rgb[1] = src_g[x];
2131 dst_rgb[2] = src_b[x];
2132 dst_rgb += 3;
2133 }
2134 }
2135
2136 // Use scale to convert lsb formats to msb, depending how many bits there are:
2137 // 128 = 9 bits
2138 // 64 = 10 bits
2139 // 16 = 12 bits
2140 // 1 = 16 bits
MergeUVRow_16_C(const uint16_t * src_u,const uint16_t * src_v,uint16_t * dst_uv,int scale,int width)2141 void MergeUVRow_16_C(const uint16_t* src_u,
2142 const uint16_t* src_v,
2143 uint16_t* dst_uv,
2144 int scale,
2145 int width) {
2146 int x;
2147 for (x = 0; x < width - 1; x += 2) {
2148 dst_uv[0] = src_u[x] * scale;
2149 dst_uv[1] = src_v[x] * scale;
2150 dst_uv[2] = src_u[x + 1] * scale;
2151 dst_uv[3] = src_v[x + 1] * scale;
2152 dst_uv += 4;
2153 }
2154 if (width & 1) {
2155 dst_uv[0] = src_u[width - 1] * scale;
2156 dst_uv[1] = src_v[width - 1] * scale;
2157 }
2158 }
2159
MultiplyRow_16_C(const uint16_t * src_y,uint16_t * dst_y,int scale,int width)2160 void MultiplyRow_16_C(const uint16_t* src_y,
2161 uint16_t* dst_y,
2162 int scale,
2163 int width) {
2164 int x;
2165 for (x = 0; x < width; ++x) {
2166 dst_y[x] = src_y[x] * scale;
2167 }
2168 }
2169
2170 // Use scale to convert lsb formats to msb, depending how many bits there are:
2171 // 32768 = 9 bits
2172 // 16384 = 10 bits
2173 // 4096 = 12 bits
2174 // 256 = 16 bits
Convert16To8Row_C(const uint16_t * src_y,uint8_t * dst_y,int scale,int width)2175 void Convert16To8Row_C(const uint16_t* src_y,
2176 uint8_t* dst_y,
2177 int scale,
2178 int width) {
2179 int x;
2180 for (x = 0; x < width; ++x) {
2181 dst_y[x] = clamp255((src_y[x] * scale) >> 16);
2182 }
2183 }
2184
2185 // Use scale to convert lsb formats to msb, depending how many bits there are:
2186 // 1024 = 10 bits
Convert8To16Row_C(const uint8_t * src_y,uint16_t * dst_y,int scale,int width)2187 void Convert8To16Row_C(const uint8_t* src_y,
2188 uint16_t* dst_y,
2189 int scale,
2190 int width) {
2191 int x;
2192 scale *= 0x0101; // replicates the byte.
2193 for (x = 0; x < width; ++x) {
2194 dst_y[x] = (src_y[x] * scale) >> 16;
2195 }
2196 }
2197
CopyRow_C(const uint8_t * src,uint8_t * dst,int count)2198 void CopyRow_C(const uint8_t* src, uint8_t* dst, int count) {
2199 memcpy(dst, src, count);
2200 }
2201
CopyRow_16_C(const uint16_t * src,uint16_t * dst,int count)2202 void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count) {
2203 memcpy(dst, src, count * 2);
2204 }
2205
SetRow_C(uint8_t * dst,uint8_t v8,int width)2206 void SetRow_C(uint8_t* dst, uint8_t v8, int width) {
2207 memset(dst, v8, width);
2208 }
2209
ARGBSetRow_C(uint8_t * dst_argb,uint32_t v32,int width)2210 void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width) {
2211 uint32_t* d = (uint32_t*)(dst_argb);
2212 int x;
2213 for (x = 0; x < width; ++x) {
2214 d[x] = v32;
2215 }
2216 }
2217
2218 // Filter 2 rows of YUY2 UV's (422) into U and V (420).
YUY2ToUVRow_C(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)2219 void YUY2ToUVRow_C(const uint8_t* src_yuy2,
2220 int src_stride_yuy2,
2221 uint8_t* dst_u,
2222 uint8_t* dst_v,
2223 int width) {
2224 // Output a row of UV values, filtering 2 rows of YUY2.
2225 int x;
2226 for (x = 0; x < width; x += 2) {
2227 dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
2228 dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
2229 src_yuy2 += 4;
2230 dst_u += 1;
2231 dst_v += 1;
2232 }
2233 }
2234
2235 // Copy row of YUY2 UV's (422) into U and V (422).
YUY2ToUV422Row_C(const uint8_t * src_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)2236 void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
2237 uint8_t* dst_u,
2238 uint8_t* dst_v,
2239 int width) {
2240 // Output a row of UV values.
2241 int x;
2242 for (x = 0; x < width; x += 2) {
2243 dst_u[0] = src_yuy2[1];
2244 dst_v[0] = src_yuy2[3];
2245 src_yuy2 += 4;
2246 dst_u += 1;
2247 dst_v += 1;
2248 }
2249 }
2250
2251 // Copy row of YUY2 Y's (422) into Y (420/422).
YUY2ToYRow_C(const uint8_t * src_yuy2,uint8_t * dst_y,int width)2252 void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
2253 // Output a row of Y values.
2254 int x;
2255 for (x = 0; x < width - 1; x += 2) {
2256 dst_y[x] = src_yuy2[0];
2257 dst_y[x + 1] = src_yuy2[2];
2258 src_yuy2 += 4;
2259 }
2260 if (width & 1) {
2261 dst_y[width - 1] = src_yuy2[0];
2262 }
2263 }
2264
2265 // Filter 2 rows of UYVY UV's (422) into U and V (420).
UYVYToUVRow_C(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)2266 void UYVYToUVRow_C(const uint8_t* src_uyvy,
2267 int src_stride_uyvy,
2268 uint8_t* dst_u,
2269 uint8_t* dst_v,
2270 int width) {
2271 // Output a row of UV values.
2272 int x;
2273 for (x = 0; x < width; x += 2) {
2274 dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
2275 dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
2276 src_uyvy += 4;
2277 dst_u += 1;
2278 dst_v += 1;
2279 }
2280 }
2281
2282 // Copy row of UYVY UV's (422) into U and V (422).
UYVYToUV422Row_C(const uint8_t * src_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)2283 void UYVYToUV422Row_C(const uint8_t* src_uyvy,
2284 uint8_t* dst_u,
2285 uint8_t* dst_v,
2286 int width) {
2287 // Output a row of UV values.
2288 int x;
2289 for (x = 0; x < width; x += 2) {
2290 dst_u[0] = src_uyvy[0];
2291 dst_v[0] = src_uyvy[2];
2292 src_uyvy += 4;
2293 dst_u += 1;
2294 dst_v += 1;
2295 }
2296 }
2297
2298 // Copy row of UYVY Y's (422) into Y (420/422).
UYVYToYRow_C(const uint8_t * src_uyvy,uint8_t * dst_y,int width)2299 void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
2300 // Output a row of Y values.
2301 int x;
2302 for (x = 0; x < width - 1; x += 2) {
2303 dst_y[x] = src_uyvy[1];
2304 dst_y[x + 1] = src_uyvy[3];
2305 src_uyvy += 4;
2306 }
2307 if (width & 1) {
2308 dst_y[width - 1] = src_uyvy[1];
2309 }
2310 }
2311
2312 #define BLEND(f, b, a) (((256 - a) * b) >> 8) + f
2313
2314 // Blend src_argb0 over src_argb1 and store to dst_argb.
2315 // dst_argb may be src_argb0 or src_argb1.
2316 // This code mimics the SSSE3 version for better testability.
ARGBBlendRow_C(const uint8_t * src_argb0,const uint8_t * src_argb1,uint8_t * dst_argb,int width)2317 void ARGBBlendRow_C(const uint8_t* src_argb0,
2318 const uint8_t* src_argb1,
2319 uint8_t* dst_argb,
2320 int width) {
2321 int x;
2322 for (x = 0; x < width - 1; x += 2) {
2323 uint32_t fb = src_argb0[0];
2324 uint32_t fg = src_argb0[1];
2325 uint32_t fr = src_argb0[2];
2326 uint32_t a = src_argb0[3];
2327 uint32_t bb = src_argb1[0];
2328 uint32_t bg = src_argb1[1];
2329 uint32_t br = src_argb1[2];
2330 dst_argb[0] = BLEND(fb, bb, a);
2331 dst_argb[1] = BLEND(fg, bg, a);
2332 dst_argb[2] = BLEND(fr, br, a);
2333 dst_argb[3] = 255u;
2334
2335 fb = src_argb0[4 + 0];
2336 fg = src_argb0[4 + 1];
2337 fr = src_argb0[4 + 2];
2338 a = src_argb0[4 + 3];
2339 bb = src_argb1[4 + 0];
2340 bg = src_argb1[4 + 1];
2341 br = src_argb1[4 + 2];
2342 dst_argb[4 + 0] = BLEND(fb, bb, a);
2343 dst_argb[4 + 1] = BLEND(fg, bg, a);
2344 dst_argb[4 + 2] = BLEND(fr, br, a);
2345 dst_argb[4 + 3] = 255u;
2346 src_argb0 += 8;
2347 src_argb1 += 8;
2348 dst_argb += 8;
2349 }
2350
2351 if (width & 1) {
2352 uint32_t fb = src_argb0[0];
2353 uint32_t fg = src_argb0[1];
2354 uint32_t fr = src_argb0[2];
2355 uint32_t a = src_argb0[3];
2356 uint32_t bb = src_argb1[0];
2357 uint32_t bg = src_argb1[1];
2358 uint32_t br = src_argb1[2];
2359 dst_argb[0] = BLEND(fb, bb, a);
2360 dst_argb[1] = BLEND(fg, bg, a);
2361 dst_argb[2] = BLEND(fr, br, a);
2362 dst_argb[3] = 255u;
2363 }
2364 }
2365 #undef BLEND
2366
2367 #define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8
BlendPlaneRow_C(const uint8_t * src0,const uint8_t * src1,const uint8_t * alpha,uint8_t * dst,int width)2368 void BlendPlaneRow_C(const uint8_t* src0,
2369 const uint8_t* src1,
2370 const uint8_t* alpha,
2371 uint8_t* dst,
2372 int width) {
2373 int x;
2374 for (x = 0; x < width - 1; x += 2) {
2375 dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
2376 dst[1] = UBLEND(src0[1], src1[1], alpha[1]);
2377 src0 += 2;
2378 src1 += 2;
2379 alpha += 2;
2380 dst += 2;
2381 }
2382 if (width & 1) {
2383 dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
2384 }
2385 }
2386 #undef UBLEND
2387
2388 #define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
2389
2390 // Multiply source RGB by alpha and store to destination.
2391 // This code mimics the SSSE3 version for better testability.
ARGBAttenuateRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)2392 void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
2393 int i;
2394 for (i = 0; i < width - 1; i += 2) {
2395 uint32_t b = src_argb[0];
2396 uint32_t g = src_argb[1];
2397 uint32_t r = src_argb[2];
2398 uint32_t a = src_argb[3];
2399 dst_argb[0] = ATTENUATE(b, a);
2400 dst_argb[1] = ATTENUATE(g, a);
2401 dst_argb[2] = ATTENUATE(r, a);
2402 dst_argb[3] = a;
2403 b = src_argb[4];
2404 g = src_argb[5];
2405 r = src_argb[6];
2406 a = src_argb[7];
2407 dst_argb[4] = ATTENUATE(b, a);
2408 dst_argb[5] = ATTENUATE(g, a);
2409 dst_argb[6] = ATTENUATE(r, a);
2410 dst_argb[7] = a;
2411 src_argb += 8;
2412 dst_argb += 8;
2413 }
2414
2415 if (width & 1) {
2416 const uint32_t b = src_argb[0];
2417 const uint32_t g = src_argb[1];
2418 const uint32_t r = src_argb[2];
2419 const uint32_t a = src_argb[3];
2420 dst_argb[0] = ATTENUATE(b, a);
2421 dst_argb[1] = ATTENUATE(g, a);
2422 dst_argb[2] = ATTENUATE(r, a);
2423 dst_argb[3] = a;
2424 }
2425 }
2426 #undef ATTENUATE
2427
2428 // Divide source RGB by alpha and store to destination.
2429 // b = (b * 255 + (a / 2)) / a;
2430 // g = (g * 255 + (a / 2)) / a;
2431 // r = (r * 255 + (a / 2)) / a;
2432 // Reciprocal method is off by 1 on some values. ie 125
2433 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
2434 #define T(a) 0x01000000 + (0x10000 / a)
2435 const uint32_t fixed_invtbl8[256] = {
2436 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06),
2437 T(0x07), T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d),
2438 T(0x0e), T(0x0f), T(0x10), T(0x11), T(0x12), T(0x13), T(0x14),
2439 T(0x15), T(0x16), T(0x17), T(0x18), T(0x19), T(0x1a), T(0x1b),
2440 T(0x1c), T(0x1d), T(0x1e), T(0x1f), T(0x20), T(0x21), T(0x22),
2441 T(0x23), T(0x24), T(0x25), T(0x26), T(0x27), T(0x28), T(0x29),
2442 T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f), T(0x30),
2443 T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
2444 T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e),
2445 T(0x3f), T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45),
2446 T(0x46), T(0x47), T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c),
2447 T(0x4d), T(0x4e), T(0x4f), T(0x50), T(0x51), T(0x52), T(0x53),
2448 T(0x54), T(0x55), T(0x56), T(0x57), T(0x58), T(0x59), T(0x5a),
2449 T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f), T(0x60), T(0x61),
2450 T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67), T(0x68),
2451 T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
2452 T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76),
2453 T(0x77), T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d),
2454 T(0x7e), T(0x7f), T(0x80), T(0x81), T(0x82), T(0x83), T(0x84),
2455 T(0x85), T(0x86), T(0x87), T(0x88), T(0x89), T(0x8a), T(0x8b),
2456 T(0x8c), T(0x8d), T(0x8e), T(0x8f), T(0x90), T(0x91), T(0x92),
2457 T(0x93), T(0x94), T(0x95), T(0x96), T(0x97), T(0x98), T(0x99),
2458 T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f), T(0xa0),
2459 T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
2460 T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae),
2461 T(0xaf), T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5),
2462 T(0xb6), T(0xb7), T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc),
2463 T(0xbd), T(0xbe), T(0xbf), T(0xc0), T(0xc1), T(0xc2), T(0xc3),
2464 T(0xc4), T(0xc5), T(0xc6), T(0xc7), T(0xc8), T(0xc9), T(0xca),
2465 T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf), T(0xd0), T(0xd1),
2466 T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7), T(0xd8),
2467 T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
2468 T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6),
2469 T(0xe7), T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed),
2470 T(0xee), T(0xef), T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4),
2471 T(0xf5), T(0xf6), T(0xf7), T(0xf8), T(0xf9), T(0xfa), T(0xfb),
2472 T(0xfc), T(0xfd), T(0xfe), 0x01000100};
2473 #undef T
2474
ARGBUnattenuateRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width)2475 void ARGBUnattenuateRow_C(const uint8_t* src_argb,
2476 uint8_t* dst_argb,
2477 int width) {
2478 int i;
2479 for (i = 0; i < width; ++i) {
2480 uint32_t b = src_argb[0];
2481 uint32_t g = src_argb[1];
2482 uint32_t r = src_argb[2];
2483 const uint32_t a = src_argb[3];
2484 const uint32_t ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point
2485 b = (b * ia) >> 8;
2486 g = (g * ia) >> 8;
2487 r = (r * ia) >> 8;
2488 // Clamping should not be necessary but is free in assembly.
2489 dst_argb[0] = clamp255(b);
2490 dst_argb[1] = clamp255(g);
2491 dst_argb[2] = clamp255(r);
2492 dst_argb[3] = a;
2493 src_argb += 4;
2494 dst_argb += 4;
2495 }
2496 }
2497
ComputeCumulativeSumRow_C(const uint8_t * row,int32_t * cumsum,const int32_t * previous_cumsum,int width)2498 void ComputeCumulativeSumRow_C(const uint8_t* row,
2499 int32_t* cumsum,
2500 const int32_t* previous_cumsum,
2501 int width) {
2502 int32_t row_sum[4] = {0, 0, 0, 0};
2503 int x;
2504 for (x = 0; x < width; ++x) {
2505 row_sum[0] += row[x * 4 + 0];
2506 row_sum[1] += row[x * 4 + 1];
2507 row_sum[2] += row[x * 4 + 2];
2508 row_sum[3] += row[x * 4 + 3];
2509 cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0];
2510 cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1];
2511 cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2];
2512 cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3];
2513 }
2514 }
2515
CumulativeSumToAverageRow_C(const int32_t * tl,const int32_t * bl,int w,int area,uint8_t * dst,int count)2516 void CumulativeSumToAverageRow_C(const int32_t* tl,
2517 const int32_t* bl,
2518 int w,
2519 int area,
2520 uint8_t* dst,
2521 int count) {
2522 float ooa = 1.0f / area;
2523 int i;
2524 for (i = 0; i < count; ++i) {
2525 dst[0] = (uint8_t)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
2526 dst[1] = (uint8_t)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
2527 dst[2] = (uint8_t)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
2528 dst[3] = (uint8_t)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
2529 dst += 4;
2530 tl += 4;
2531 bl += 4;
2532 }
2533 }
2534
2535 // Copy pixels from rotated source to destination row with a slope.
2536 LIBYUV_API
ARGBAffineRow_C(const uint8_t * src_argb,int src_argb_stride,uint8_t * dst_argb,const float * uv_dudv,int width)2537 void ARGBAffineRow_C(const uint8_t* src_argb,
2538 int src_argb_stride,
2539 uint8_t* dst_argb,
2540 const float* uv_dudv,
2541 int width) {
2542 int i;
2543 // Render a row of pixels from source into a buffer.
2544 float uv[2];
2545 uv[0] = uv_dudv[0];
2546 uv[1] = uv_dudv[1];
2547 for (i = 0; i < width; ++i) {
2548 int x = (int)(uv[0]);
2549 int y = (int)(uv[1]);
2550 *(uint32_t*)(dst_argb) =
2551 *(const uint32_t*)(src_argb + y * src_argb_stride + x * 4);
2552 dst_argb += 4;
2553 uv[0] += uv_dudv[2];
2554 uv[1] += uv_dudv[3];
2555 }
2556 }
2557
2558 // Blend 2 rows into 1.
HalfRow_C(const uint8_t * src_uv,ptrdiff_t src_uv_stride,uint8_t * dst_uv,int width)2559 static void HalfRow_C(const uint8_t* src_uv,
2560 ptrdiff_t src_uv_stride,
2561 uint8_t* dst_uv,
2562 int width) {
2563 int x;
2564 for (x = 0; x < width; ++x) {
2565 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
2566 }
2567 }
2568
HalfRow_16_C(const uint16_t * src_uv,ptrdiff_t src_uv_stride,uint16_t * dst_uv,int width)2569 static void HalfRow_16_C(const uint16_t* src_uv,
2570 ptrdiff_t src_uv_stride,
2571 uint16_t* dst_uv,
2572 int width) {
2573 int x;
2574 for (x = 0; x < width; ++x) {
2575 dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
2576 }
2577 }
2578
2579 // C version 2x2 -> 2x1.
InterpolateRow_C(uint8_t * dst_ptr,const uint8_t * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)2580 void InterpolateRow_C(uint8_t* dst_ptr,
2581 const uint8_t* src_ptr,
2582 ptrdiff_t src_stride,
2583 int width,
2584 int source_y_fraction) {
2585 int y1_fraction = source_y_fraction;
2586 int y0_fraction = 256 - y1_fraction;
2587 const uint8_t* src_ptr1 = src_ptr + src_stride;
2588 int x;
2589 if (y1_fraction == 0) {
2590 memcpy(dst_ptr, src_ptr, width);
2591 return;
2592 }
2593 if (y1_fraction == 128) {
2594 HalfRow_C(src_ptr, src_stride, dst_ptr, width);
2595 return;
2596 }
2597 for (x = 0; x < width - 1; x += 2) {
2598 dst_ptr[0] =
2599 (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
2600 dst_ptr[1] =
2601 (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction + 128) >> 8;
2602 src_ptr += 2;
2603 src_ptr1 += 2;
2604 dst_ptr += 2;
2605 }
2606 if (width & 1) {
2607 dst_ptr[0] =
2608 (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
2609 }
2610 }
2611
InterpolateRow_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)2612 void InterpolateRow_16_C(uint16_t* dst_ptr,
2613 const uint16_t* src_ptr,
2614 ptrdiff_t src_stride,
2615 int width,
2616 int source_y_fraction) {
2617 int y1_fraction = source_y_fraction;
2618 int y0_fraction = 256 - y1_fraction;
2619 const uint16_t* src_ptr1 = src_ptr + src_stride;
2620 int x;
2621 if (source_y_fraction == 0) {
2622 memcpy(dst_ptr, src_ptr, width * 2);
2623 return;
2624 }
2625 if (source_y_fraction == 128) {
2626 HalfRow_16_C(src_ptr, src_stride, dst_ptr, width);
2627 return;
2628 }
2629 for (x = 0; x < width - 1; x += 2) {
2630 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
2631 dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
2632 src_ptr += 2;
2633 src_ptr1 += 2;
2634 dst_ptr += 2;
2635 }
2636 if (width & 1) {
2637 dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
2638 }
2639 }
2640
2641 // Use first 4 shuffler values to reorder ARGB channels.
ARGBShuffleRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const uint8_t * shuffler,int width)2642 void ARGBShuffleRow_C(const uint8_t* src_argb,
2643 uint8_t* dst_argb,
2644 const uint8_t* shuffler,
2645 int width) {
2646 int index0 = shuffler[0];
2647 int index1 = shuffler[1];
2648 int index2 = shuffler[2];
2649 int index3 = shuffler[3];
2650 // Shuffle a row of ARGB.
2651 int x;
2652 for (x = 0; x < width; ++x) {
2653 // To support in-place conversion.
2654 uint8_t b = src_argb[index0];
2655 uint8_t g = src_argb[index1];
2656 uint8_t r = src_argb[index2];
2657 uint8_t a = src_argb[index3];
2658 dst_argb[0] = b;
2659 dst_argb[1] = g;
2660 dst_argb[2] = r;
2661 dst_argb[3] = a;
2662 src_argb += 4;
2663 dst_argb += 4;
2664 }
2665 }
2666
I422ToYUY2Row_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)2667 void I422ToYUY2Row_C(const uint8_t* src_y,
2668 const uint8_t* src_u,
2669 const uint8_t* src_v,
2670 uint8_t* dst_frame,
2671 int width) {
2672 int x;
2673 for (x = 0; x < width - 1; x += 2) {
2674 dst_frame[0] = src_y[0];
2675 dst_frame[1] = src_u[0];
2676 dst_frame[2] = src_y[1];
2677 dst_frame[3] = src_v[0];
2678 dst_frame += 4;
2679 src_y += 2;
2680 src_u += 1;
2681 src_v += 1;
2682 }
2683 if (width & 1) {
2684 dst_frame[0] = src_y[0];
2685 dst_frame[1] = src_u[0];
2686 dst_frame[2] = 0;
2687 dst_frame[3] = src_v[0];
2688 }
2689 }
2690
I422ToUYVYRow_C(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)2691 void I422ToUYVYRow_C(const uint8_t* src_y,
2692 const uint8_t* src_u,
2693 const uint8_t* src_v,
2694 uint8_t* dst_frame,
2695 int width) {
2696 int x;
2697 for (x = 0; x < width - 1; x += 2) {
2698 dst_frame[0] = src_u[0];
2699 dst_frame[1] = src_y[0];
2700 dst_frame[2] = src_v[0];
2701 dst_frame[3] = src_y[1];
2702 dst_frame += 4;
2703 src_y += 2;
2704 src_u += 1;
2705 src_v += 1;
2706 }
2707 if (width & 1) {
2708 dst_frame[0] = src_u[0];
2709 dst_frame[1] = src_y[0];
2710 dst_frame[2] = src_v[0];
2711 dst_frame[3] = 0;
2712 }
2713 }
2714
ARGBPolynomialRow_C(const uint8_t * src_argb,uint8_t * dst_argb,const float * poly,int width)2715 void ARGBPolynomialRow_C(const uint8_t* src_argb,
2716 uint8_t* dst_argb,
2717 const float* poly,
2718 int width) {
2719 int i;
2720 for (i = 0; i < width; ++i) {
2721 float b = (float)(src_argb[0]);
2722 float g = (float)(src_argb[1]);
2723 float r = (float)(src_argb[2]);
2724 float a = (float)(src_argb[3]);
2725 float b2 = b * b;
2726 float g2 = g * g;
2727 float r2 = r * r;
2728 float a2 = a * a;
2729 float db = poly[0] + poly[4] * b;
2730 float dg = poly[1] + poly[5] * g;
2731 float dr = poly[2] + poly[6] * r;
2732 float da = poly[3] + poly[7] * a;
2733 float b3 = b2 * b;
2734 float g3 = g2 * g;
2735 float r3 = r2 * r;
2736 float a3 = a2 * a;
2737 db += poly[8] * b2;
2738 dg += poly[9] * g2;
2739 dr += poly[10] * r2;
2740 da += poly[11] * a2;
2741 db += poly[12] * b3;
2742 dg += poly[13] * g3;
2743 dr += poly[14] * r3;
2744 da += poly[15] * a3;
2745
2746 dst_argb[0] = Clamp((int32_t)(db));
2747 dst_argb[1] = Clamp((int32_t)(dg));
2748 dst_argb[2] = Clamp((int32_t)(dr));
2749 dst_argb[3] = Clamp((int32_t)(da));
2750 src_argb += 4;
2751 dst_argb += 4;
2752 }
2753 }
2754
2755 // Samples assumed to be unsigned in low 9, 10 or 12 bits. Scale factor
2756 // adjust the source integer range to the half float range desired.
2757
2758 // This magic constant is 2^-112. Multiplying by this
2759 // is the same as subtracting 112 from the exponent, which
2760 // is the difference in exponent bias between 32-bit and
2761 // 16-bit floats. Once we've done this subtraction, we can
2762 // simply extract the low bits of the exponent and the high
2763 // bits of the mantissa from our float and we're done.
2764
2765 // Work around GCC 7 punning warning -Wstrict-aliasing
2766 #if defined(__GNUC__)
2767 typedef uint32_t __attribute__((__may_alias__)) uint32_alias_t;
2768 #else
2769 typedef uint32_t uint32_alias_t;
2770 #endif
2771
HalfFloatRow_C(const uint16_t * src,uint16_t * dst,float scale,int width)2772 void HalfFloatRow_C(const uint16_t* src,
2773 uint16_t* dst,
2774 float scale,
2775 int width) {
2776 int i;
2777 float mult = 1.9259299444e-34f * scale;
2778 for (i = 0; i < width; ++i) {
2779 float value = src[i] * mult;
2780 dst[i] = (uint16_t)((*(const uint32_alias_t*)&value) >> 13);
2781 }
2782 }
2783
ByteToFloatRow_C(const uint8_t * src,float * dst,float scale,int width)2784 void ByteToFloatRow_C(const uint8_t* src, float* dst, float scale, int width) {
2785 int i;
2786 for (i = 0; i < width; ++i) {
2787 float value = src[i] * scale;
2788 dst[i] = value;
2789 }
2790 }
2791
ARGBLumaColorTableRow_C(const uint8_t * src_argb,uint8_t * dst_argb,int width,const uint8_t * luma,uint32_t lumacoeff)2792 void ARGBLumaColorTableRow_C(const uint8_t* src_argb,
2793 uint8_t* dst_argb,
2794 int width,
2795 const uint8_t* luma,
2796 uint32_t lumacoeff) {
2797 uint32_t bc = lumacoeff & 0xff;
2798 uint32_t gc = (lumacoeff >> 8) & 0xff;
2799 uint32_t rc = (lumacoeff >> 16) & 0xff;
2800
2801 int i;
2802 for (i = 0; i < width - 1; i += 2) {
2803 // Luminance in rows, color values in columns.
2804 const uint8_t* luma0 =
2805 ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
2806 luma;
2807 const uint8_t* luma1;
2808 dst_argb[0] = luma0[src_argb[0]];
2809 dst_argb[1] = luma0[src_argb[1]];
2810 dst_argb[2] = luma0[src_argb[2]];
2811 dst_argb[3] = src_argb[3];
2812 luma1 =
2813 ((src_argb[4] * bc + src_argb[5] * gc + src_argb[6] * rc) & 0x7F00u) +
2814 luma;
2815 dst_argb[4] = luma1[src_argb[4]];
2816 dst_argb[5] = luma1[src_argb[5]];
2817 dst_argb[6] = luma1[src_argb[6]];
2818 dst_argb[7] = src_argb[7];
2819 src_argb += 8;
2820 dst_argb += 8;
2821 }
2822 if (width & 1) {
2823 // Luminance in rows, color values in columns.
2824 const uint8_t* luma0 =
2825 ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) +
2826 luma;
2827 dst_argb[0] = luma0[src_argb[0]];
2828 dst_argb[1] = luma0[src_argb[1]];
2829 dst_argb[2] = luma0[src_argb[2]];
2830 dst_argb[3] = src_argb[3];
2831 }
2832 }
2833
ARGBCopyAlphaRow_C(const uint8_t * src,uint8_t * dst,int width)2834 void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
2835 int i;
2836 for (i = 0; i < width - 1; i += 2) {
2837 dst[3] = src[3];
2838 dst[7] = src[7];
2839 dst += 8;
2840 src += 8;
2841 }
2842 if (width & 1) {
2843 dst[3] = src[3];
2844 }
2845 }
2846
ARGBExtractAlphaRow_C(const uint8_t * src_argb,uint8_t * dst_a,int width)2847 void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width) {
2848 int i;
2849 for (i = 0; i < width - 1; i += 2) {
2850 dst_a[0] = src_argb[3];
2851 dst_a[1] = src_argb[7];
2852 dst_a += 2;
2853 src_argb += 8;
2854 }
2855 if (width & 1) {
2856 dst_a[0] = src_argb[3];
2857 }
2858 }
2859
ARGBCopyYToAlphaRow_C(const uint8_t * src,uint8_t * dst,int width)2860 void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) {
2861 int i;
2862 for (i = 0; i < width - 1; i += 2) {
2863 dst[3] = src[0];
2864 dst[7] = src[1];
2865 dst += 8;
2866 src += 2;
2867 }
2868 if (width & 1) {
2869 dst[3] = src[0];
2870 }
2871 }
2872
2873 // Maximum temporary width for wrappers to process at a time, in pixels.
2874 #define MAXTWIDTH 2048
2875
2876 #if !(defined(_MSC_VER) && defined(_M_IX86)) && \
2877 defined(HAS_I422TORGB565ROW_SSSE3)
2878 // row_win.cc has asm version, but GCC uses 2 step wrapper.
I422ToRGB565Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2879 void I422ToRGB565Row_SSSE3(const uint8_t* src_y,
2880 const uint8_t* src_u,
2881 const uint8_t* src_v,
2882 uint8_t* dst_rgb565,
2883 const struct YuvConstants* yuvconstants,
2884 int width) {
2885 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
2886 while (width > 0) {
2887 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2888 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
2889 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
2890 src_y += twidth;
2891 src_u += twidth / 2;
2892 src_v += twidth / 2;
2893 dst_rgb565 += twidth * 2;
2894 width -= twidth;
2895 }
2896 }
2897 #endif
2898
2899 #if defined(HAS_I422TOARGB1555ROW_SSSE3)
I422ToARGB1555Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)2900 void I422ToARGB1555Row_SSSE3(const uint8_t* src_y,
2901 const uint8_t* src_u,
2902 const uint8_t* src_v,
2903 uint8_t* dst_argb1555,
2904 const struct YuvConstants* yuvconstants,
2905 int width) {
2906 // Row buffer for intermediate ARGB pixels.
2907 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
2908 while (width > 0) {
2909 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2910 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
2911 ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
2912 src_y += twidth;
2913 src_u += twidth / 2;
2914 src_v += twidth / 2;
2915 dst_argb1555 += twidth * 2;
2916 width -= twidth;
2917 }
2918 }
2919 #endif
2920
2921 #if defined(HAS_I422TOARGB4444ROW_SSSE3)
I422ToARGB4444Row_SSSE3(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)2922 void I422ToARGB4444Row_SSSE3(const uint8_t* src_y,
2923 const uint8_t* src_u,
2924 const uint8_t* src_v,
2925 uint8_t* dst_argb4444,
2926 const struct YuvConstants* yuvconstants,
2927 int width) {
2928 // Row buffer for intermediate ARGB pixels.
2929 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
2930 while (width > 0) {
2931 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2932 I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
2933 ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
2934 src_y += twidth;
2935 src_u += twidth / 2;
2936 src_v += twidth / 2;
2937 dst_argb4444 += twidth * 2;
2938 width -= twidth;
2939 }
2940 }
2941 #endif
2942
2943 #if defined(HAS_NV12TORGB565ROW_SSSE3)
NV12ToRGB565Row_SSSE3(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)2944 void NV12ToRGB565Row_SSSE3(const uint8_t* src_y,
2945 const uint8_t* src_uv,
2946 uint8_t* dst_rgb565,
2947 const struct YuvConstants* yuvconstants,
2948 int width) {
2949 // Row buffer for intermediate ARGB pixels.
2950 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
2951 while (width > 0) {
2952 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2953 NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
2954 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
2955 src_y += twidth;
2956 src_uv += twidth;
2957 dst_rgb565 += twidth * 2;
2958 width -= twidth;
2959 }
2960 }
2961 #endif
2962
2963 #if defined(HAS_NV12TORGB24ROW_SSSE3)
NV12ToRGB24Row_SSSE3(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)2964 void NV12ToRGB24Row_SSSE3(const uint8_t* src_y,
2965 const uint8_t* src_uv,
2966 uint8_t* dst_rgb24,
2967 const struct YuvConstants* yuvconstants,
2968 int width) {
2969 // Row buffer for intermediate ARGB pixels.
2970 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
2971 while (width > 0) {
2972 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2973 NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
2974 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
2975 src_y += twidth;
2976 src_uv += twidth;
2977 dst_rgb24 += twidth * 3;
2978 width -= twidth;
2979 }
2980 }
2981 #endif
2982
2983 #if defined(HAS_NV21TORGB24ROW_SSSE3)
NV21ToRGB24Row_SSSE3(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)2984 void NV21ToRGB24Row_SSSE3(const uint8_t* src_y,
2985 const uint8_t* src_vu,
2986 uint8_t* dst_rgb24,
2987 const struct YuvConstants* yuvconstants,
2988 int width) {
2989 // Row buffer for intermediate ARGB pixels.
2990 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
2991 while (width > 0) {
2992 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
2993 NV21ToARGBRow_SSSE3(src_y, src_vu, row, yuvconstants, twidth);
2994 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
2995 src_y += twidth;
2996 src_vu += twidth;
2997 dst_rgb24 += twidth * 3;
2998 width -= twidth;
2999 }
3000 }
3001 #endif
3002
3003 #if defined(HAS_NV12TORGB24ROW_AVX2)
NV12ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3004 void NV12ToRGB24Row_AVX2(const uint8_t* src_y,
3005 const uint8_t* src_uv,
3006 uint8_t* dst_rgb24,
3007 const struct YuvConstants* yuvconstants,
3008 int width) {
3009 // Row buffer for intermediate ARGB pixels.
3010 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3011 while (width > 0) {
3012 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3013 NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
3014 #if defined(HAS_ARGBTORGB24ROW_AVX2)
3015 ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
3016 #else
3017 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3018 #endif
3019 src_y += twidth;
3020 src_uv += twidth;
3021 dst_rgb24 += twidth * 3;
3022 width -= twidth;
3023 }
3024 }
3025 #endif
3026
3027 #if defined(HAS_NV21TORGB24ROW_AVX2)
NV21ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3028 void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
3029 const uint8_t* src_vu,
3030 uint8_t* dst_rgb24,
3031 const struct YuvConstants* yuvconstants,
3032 int width) {
3033 // Row buffer for intermediate ARGB pixels.
3034 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3035 while (width > 0) {
3036 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3037 NV21ToARGBRow_AVX2(src_y, src_vu, row, yuvconstants, twidth);
3038 #if defined(HAS_ARGBTORGB24ROW_AVX2)
3039 ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
3040 #else
3041 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3042 #endif
3043 src_y += twidth;
3044 src_vu += twidth;
3045 dst_rgb24 += twidth * 3;
3046 width -= twidth;
3047 }
3048 }
3049 #endif
3050
3051 #if defined(HAS_I422TORGB565ROW_AVX2)
I422ToRGB565Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3052 void I422ToRGB565Row_AVX2(const uint8_t* src_y,
3053 const uint8_t* src_u,
3054 const uint8_t* src_v,
3055 uint8_t* dst_rgb565,
3056 const struct YuvConstants* yuvconstants,
3057 int width) {
3058 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3059 while (width > 0) {
3060 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3061 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3062 #if defined(HAS_ARGBTORGB565ROW_AVX2)
3063 ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
3064 #else
3065 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3066 #endif
3067 src_y += twidth;
3068 src_u += twidth / 2;
3069 src_v += twidth / 2;
3070 dst_rgb565 += twidth * 2;
3071 width -= twidth;
3072 }
3073 }
3074 #endif
3075
3076 #if defined(HAS_I422TOARGB1555ROW_AVX2)
I422ToARGB1555Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)3077 void I422ToARGB1555Row_AVX2(const uint8_t* src_y,
3078 const uint8_t* src_u,
3079 const uint8_t* src_v,
3080 uint8_t* dst_argb1555,
3081 const struct YuvConstants* yuvconstants,
3082 int width) {
3083 // Row buffer for intermediate ARGB pixels.
3084 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3085 while (width > 0) {
3086 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3087 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3088 #if defined(HAS_ARGBTOARGB1555ROW_AVX2)
3089 ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
3090 #else
3091 ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
3092 #endif
3093 src_y += twidth;
3094 src_u += twidth / 2;
3095 src_v += twidth / 2;
3096 dst_argb1555 += twidth * 2;
3097 width -= twidth;
3098 }
3099 }
3100 #endif
3101
3102 #if defined(HAS_I422TOARGB4444ROW_AVX2)
I422ToARGB4444Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)3103 void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
3104 const uint8_t* src_u,
3105 const uint8_t* src_v,
3106 uint8_t* dst_argb4444,
3107 const struct YuvConstants* yuvconstants,
3108 int width) {
3109 // Row buffer for intermediate ARGB pixels.
3110 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3111 while (width > 0) {
3112 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3113 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3114 #if defined(HAS_ARGBTOARGB4444ROW_AVX2)
3115 ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
3116 #else
3117 ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
3118 #endif
3119 src_y += twidth;
3120 src_u += twidth / 2;
3121 src_v += twidth / 2;
3122 dst_argb4444 += twidth * 2;
3123 width -= twidth;
3124 }
3125 }
3126 #endif
3127
3128 #if defined(HAS_I422TORGB24ROW_AVX2)
I422ToRGB24Row_AVX2(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb24,const struct YuvConstants * yuvconstants,int width)3129 void I422ToRGB24Row_AVX2(const uint8_t* src_y,
3130 const uint8_t* src_u,
3131 const uint8_t* src_v,
3132 uint8_t* dst_rgb24,
3133 const struct YuvConstants* yuvconstants,
3134 int width) {
3135 // Row buffer for intermediate ARGB pixels.
3136 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3137 while (width > 0) {
3138 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3139 I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
3140 #if defined(HAS_ARGBTORGB24ROW_AVX2)
3141 ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth);
3142 #else
3143 ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
3144 #endif
3145 src_y += twidth;
3146 src_u += twidth / 2;
3147 src_v += twidth / 2;
3148 dst_rgb24 += twidth * 3;
3149 width -= twidth;
3150 }
3151 }
3152 #endif
3153
3154 #if defined(HAS_NV12TORGB565ROW_AVX2)
NV12ToRGB565Row_AVX2(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)3155 void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
3156 const uint8_t* src_uv,
3157 uint8_t* dst_rgb565,
3158 const struct YuvConstants* yuvconstants,
3159 int width) {
3160 // Row buffer for intermediate ARGB pixels.
3161 SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]);
3162 while (width > 0) {
3163 int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
3164 NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
3165 #if defined(HAS_ARGBTORGB565ROW_AVX2)
3166 ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
3167 #else
3168 ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
3169 #endif
3170 src_y += twidth;
3171 src_uv += twidth;
3172 dst_rgb565 += twidth * 2;
3173 width -= twidth;
3174 }
3175 }
3176 #endif
3177
ScaleSumSamples_C(const float * src,float * dst,float scale,int width)3178 float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) {
3179 float fsum = 0.f;
3180 int i;
3181 #if defined(__clang__)
3182 #pragma clang loop vectorize_width(4)
3183 #endif
3184 for (i = 0; i < width; ++i) {
3185 float v = *src++;
3186 fsum += v * v;
3187 *dst++ = v * scale;
3188 }
3189 return fsum;
3190 }
3191
ScaleMaxSamples_C(const float * src,float * dst,float scale,int width)3192 float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width) {
3193 float fmax = 0.f;
3194 int i;
3195 for (i = 0; i < width; ++i) {
3196 float v = *src++;
3197 float vs = v * scale;
3198 fmax = (v > fmax) ? v : fmax;
3199 *dst++ = vs;
3200 }
3201 return fmax;
3202 }
3203
ScaleSamples_C(const float * src,float * dst,float scale,int width)3204 void ScaleSamples_C(const float* src, float* dst, float scale, int width) {
3205 int i;
3206 for (i = 0; i < width; ++i) {
3207 *dst++ = *src++ * scale;
3208 }
3209 }
3210
GaussRow_C(const uint32_t * src,uint16_t * dst,int width)3211 void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) {
3212 int i;
3213 for (i = 0; i < width; ++i) {
3214 *dst++ =
3215 (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8;
3216 ++src;
3217 }
3218 }
3219
3220 // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row.
GaussCol_C(const uint16_t * src0,const uint16_t * src1,const uint16_t * src2,const uint16_t * src3,const uint16_t * src4,uint32_t * dst,int width)3221 void GaussCol_C(const uint16_t* src0,
3222 const uint16_t* src1,
3223 const uint16_t* src2,
3224 const uint16_t* src3,
3225 const uint16_t* src4,
3226 uint32_t* dst,
3227 int width) {
3228 int i;
3229 for (i = 0; i < width; ++i) {
3230 *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++;
3231 }
3232 }
3233
3234 #ifdef __cplusplus
3235 } // extern "C"
3236 } // namespace libyuv
3237 #endif
3238