1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/planar_functions.h"
12
13 #include <string.h> // for memset()
14
15 #include "libyuv/cpu_id.h"
16 #ifdef HAVE_JPEG
17 #include "libyuv/mjpeg_decoder.h"
18 #endif
19 #include "libyuv/row.h"
20 #include "libyuv/scale_row.h" // for ScaleRowDown2
21
22 #ifdef __cplusplus
23 namespace libyuv {
24 extern "C" {
25 #endif
26
27 // Copy a plane of data
28 LIBYUV_API
CopyPlane(const uint8 * src_y,int src_stride_y,uint8 * dst_y,int dst_stride_y,int width,int height)29 void CopyPlane(const uint8* src_y, int src_stride_y,
30 uint8* dst_y, int dst_stride_y,
31 int width, int height) {
32 int y;
33 void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
34 // Coalesce rows.
35 if (src_stride_y == width &&
36 dst_stride_y == width) {
37 width *= height;
38 height = 1;
39 src_stride_y = dst_stride_y = 0;
40 }
41 // Nothing to do.
42 if (src_y == dst_y && src_stride_y == dst_stride_y) {
43 return;
44 }
45 #if defined(HAS_COPYROW_SSE2)
46 if (TestCpuFlag(kCpuHasSSE2)) {
47 CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
48 }
49 #endif
50 #if defined(HAS_COPYROW_AVX)
51 if (TestCpuFlag(kCpuHasAVX)) {
52 CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
53 }
54 #endif
55 #if defined(HAS_COPYROW_ERMS)
56 if (TestCpuFlag(kCpuHasERMS)) {
57 CopyRow = CopyRow_ERMS;
58 }
59 #endif
60 #if defined(HAS_COPYROW_NEON)
61 if (TestCpuFlag(kCpuHasNEON)) {
62 CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
63 }
64 #endif
65 #if defined(HAS_COPYROW_MIPS)
66 if (TestCpuFlag(kCpuHasMIPS)) {
67 CopyRow = CopyRow_MIPS;
68 }
69 #endif
70
71 // Copy plane
72 for (y = 0; y < height; ++y) {
73 CopyRow(src_y, dst_y, width);
74 src_y += src_stride_y;
75 dst_y += dst_stride_y;
76 }
77 }
78
79 LIBYUV_API
CopyPlane_16(const uint16 * src_y,int src_stride_y,uint16 * dst_y,int dst_stride_y,int width,int height)80 void CopyPlane_16(const uint16* src_y, int src_stride_y,
81 uint16* dst_y, int dst_stride_y,
82 int width, int height) {
83 int y;
84 void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C;
85 // Coalesce rows.
86 if (src_stride_y == width &&
87 dst_stride_y == width) {
88 width *= height;
89 height = 1;
90 src_stride_y = dst_stride_y = 0;
91 }
92 #if defined(HAS_COPYROW_16_SSE2)
93 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32)) {
94 CopyRow = CopyRow_16_SSE2;
95 }
96 #endif
97 #if defined(HAS_COPYROW_16_ERMS)
98 if (TestCpuFlag(kCpuHasERMS)) {
99 CopyRow = CopyRow_16_ERMS;
100 }
101 #endif
102 #if defined(HAS_COPYROW_16_NEON)
103 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
104 CopyRow = CopyRow_16_NEON;
105 }
106 #endif
107 #if defined(HAS_COPYROW_16_MIPS)
108 if (TestCpuFlag(kCpuHasMIPS)) {
109 CopyRow = CopyRow_16_MIPS;
110 }
111 #endif
112
113 // Copy plane
114 for (y = 0; y < height; ++y) {
115 CopyRow(src_y, dst_y, width);
116 src_y += src_stride_y;
117 dst_y += dst_stride_y;
118 }
119 }
120
121 // Copy I422.
122 LIBYUV_API
I422Copy(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)123 int I422Copy(const uint8* src_y, int src_stride_y,
124 const uint8* src_u, int src_stride_u,
125 const uint8* src_v, int src_stride_v,
126 uint8* dst_y, int dst_stride_y,
127 uint8* dst_u, int dst_stride_u,
128 uint8* dst_v, int dst_stride_v,
129 int width, int height) {
130 int halfwidth = (width + 1) >> 1;
131 if (!src_y || !src_u || !src_v ||
132 !dst_y || !dst_u || !dst_v ||
133 width <= 0 || height == 0) {
134 return -1;
135 }
136 // Negative height means invert the image.
137 if (height < 0) {
138 height = -height;
139 src_y = src_y + (height - 1) * src_stride_y;
140 src_u = src_u + (height - 1) * src_stride_u;
141 src_v = src_v + (height - 1) * src_stride_v;
142 src_stride_y = -src_stride_y;
143 src_stride_u = -src_stride_u;
144 src_stride_v = -src_stride_v;
145 }
146 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
147 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
148 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
149 return 0;
150 }
151
152 // Copy I444.
153 LIBYUV_API
I444Copy(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)154 int I444Copy(const uint8* src_y, int src_stride_y,
155 const uint8* src_u, int src_stride_u,
156 const uint8* src_v, int src_stride_v,
157 uint8* dst_y, int dst_stride_y,
158 uint8* dst_u, int dst_stride_u,
159 uint8* dst_v, int dst_stride_v,
160 int width, int height) {
161 if (!src_y || !src_u || !src_v ||
162 !dst_y || !dst_u || !dst_v ||
163 width <= 0 || height == 0) {
164 return -1;
165 }
166 // Negative height means invert the image.
167 if (height < 0) {
168 height = -height;
169 src_y = src_y + (height - 1) * src_stride_y;
170 src_u = src_u + (height - 1) * src_stride_u;
171 src_v = src_v + (height - 1) * src_stride_v;
172 src_stride_y = -src_stride_y;
173 src_stride_u = -src_stride_u;
174 src_stride_v = -src_stride_v;
175 }
176
177 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
178 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
179 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
180 return 0;
181 }
182
183 // Copy I400.
184 LIBYUV_API
I400ToI400(const uint8 * src_y,int src_stride_y,uint8 * dst_y,int dst_stride_y,int width,int height)185 int I400ToI400(const uint8* src_y, int src_stride_y,
186 uint8* dst_y, int dst_stride_y,
187 int width, int height) {
188 if (!src_y || !dst_y || width <= 0 || height == 0) {
189 return -1;
190 }
191 // Negative height means invert the image.
192 if (height < 0) {
193 height = -height;
194 src_y = src_y + (height - 1) * src_stride_y;
195 src_stride_y = -src_stride_y;
196 }
197 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
198 return 0;
199 }
200
201 // Convert I420 to I400.
202 LIBYUV_API
I420ToI400(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_y,int dst_stride_y,int width,int height)203 int I420ToI400(const uint8* src_y, int src_stride_y,
204 const uint8* src_u, int src_stride_u,
205 const uint8* src_v, int src_stride_v,
206 uint8* dst_y, int dst_stride_y,
207 int width, int height) {
208 if (!src_y || !dst_y || width <= 0 || height == 0) {
209 return -1;
210 }
211 // Negative height means invert the image.
212 if (height < 0) {
213 height = -height;
214 src_y = src_y + (height - 1) * src_stride_y;
215 src_stride_y = -src_stride_y;
216 }
217 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
218 return 0;
219 }
220
221 // Mirror a plane of data.
MirrorPlane(const uint8 * src_y,int src_stride_y,uint8 * dst_y,int dst_stride_y,int width,int height)222 void MirrorPlane(const uint8* src_y, int src_stride_y,
223 uint8* dst_y, int dst_stride_y,
224 int width, int height) {
225 int y;
226 void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
227 // Negative height means invert the image.
228 if (height < 0) {
229 height = -height;
230 src_y = src_y + (height - 1) * src_stride_y;
231 src_stride_y = -src_stride_y;
232 }
233 #if defined(HAS_MIRRORROW_NEON)
234 if (TestCpuFlag(kCpuHasNEON)) {
235 MirrorRow = MirrorRow_Any_NEON;
236 if (IS_ALIGNED(width, 16)) {
237 MirrorRow = MirrorRow_NEON;
238 }
239 }
240 #endif
241 #if defined(HAS_MIRRORROW_SSSE3)
242 if (TestCpuFlag(kCpuHasSSSE3)) {
243 MirrorRow = MirrorRow_Any_SSSE3;
244 if (IS_ALIGNED(width, 16)) {
245 MirrorRow = MirrorRow_SSSE3;
246 }
247 }
248 #endif
249 #if defined(HAS_MIRRORROW_AVX2)
250 if (TestCpuFlag(kCpuHasAVX2)) {
251 MirrorRow = MirrorRow_Any_AVX2;
252 if (IS_ALIGNED(width, 32)) {
253 MirrorRow = MirrorRow_AVX2;
254 }
255 }
256 #endif
257 // TODO(fbarchard): Mirror on mips handle unaligned memory.
258 #if defined(HAS_MIRRORROW_DSPR2)
259 if (TestCpuFlag(kCpuHasDSPR2) &&
260 IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
261 IS_ALIGNED(dst_y, 4) && IS_ALIGNED(dst_stride_y, 4)) {
262 MirrorRow = MirrorRow_DSPR2;
263 }
264 #endif
265
266 // Mirror plane
267 for (y = 0; y < height; ++y) {
268 MirrorRow(src_y, dst_y, width);
269 src_y += src_stride_y;
270 dst_y += dst_stride_y;
271 }
272 }
273
274 // Convert YUY2 to I422.
275 LIBYUV_API
YUY2ToI422(const uint8 * src_yuy2,int src_stride_yuy2,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)276 int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
277 uint8* dst_y, int dst_stride_y,
278 uint8* dst_u, int dst_stride_u,
279 uint8* dst_v, int dst_stride_v,
280 int width, int height) {
281 int y;
282 void (*YUY2ToUV422Row)(const uint8* src_yuy2,
283 uint8* dst_u, uint8* dst_v, int width) =
284 YUY2ToUV422Row_C;
285 void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int width) =
286 YUY2ToYRow_C;
287 // Negative height means invert the image.
288 if (height < 0) {
289 height = -height;
290 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
291 src_stride_yuy2 = -src_stride_yuy2;
292 }
293 // Coalesce rows.
294 if (src_stride_yuy2 == width * 2 &&
295 dst_stride_y == width &&
296 dst_stride_u * 2 == width &&
297 dst_stride_v * 2 == width) {
298 width *= height;
299 height = 1;
300 src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
301 }
302 #if defined(HAS_YUY2TOYROW_SSE2)
303 if (TestCpuFlag(kCpuHasSSE2)) {
304 YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
305 YUY2ToYRow = YUY2ToYRow_Any_SSE2;
306 if (IS_ALIGNED(width, 16)) {
307 YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
308 YUY2ToYRow = YUY2ToYRow_SSE2;
309 }
310 }
311 #endif
312 #if defined(HAS_YUY2TOYROW_AVX2)
313 if (TestCpuFlag(kCpuHasAVX2)) {
314 YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
315 YUY2ToYRow = YUY2ToYRow_Any_AVX2;
316 if (IS_ALIGNED(width, 32)) {
317 YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
318 YUY2ToYRow = YUY2ToYRow_AVX2;
319 }
320 }
321 #endif
322 #if defined(HAS_YUY2TOYROW_NEON)
323 if (TestCpuFlag(kCpuHasNEON)) {
324 YUY2ToYRow = YUY2ToYRow_Any_NEON;
325 if (width >= 16) {
326 YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
327 }
328 if (IS_ALIGNED(width, 16)) {
329 YUY2ToYRow = YUY2ToYRow_NEON;
330 YUY2ToUV422Row = YUY2ToUV422Row_NEON;
331 }
332 }
333 #endif
334
335 for (y = 0; y < height; ++y) {
336 YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
337 YUY2ToYRow(src_yuy2, dst_y, width);
338 src_yuy2 += src_stride_yuy2;
339 dst_y += dst_stride_y;
340 dst_u += dst_stride_u;
341 dst_v += dst_stride_v;
342 }
343 return 0;
344 }
345
346 // Convert UYVY to I422.
347 LIBYUV_API
UYVYToI422(const uint8 * src_uyvy,int src_stride_uyvy,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)348 int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
349 uint8* dst_y, int dst_stride_y,
350 uint8* dst_u, int dst_stride_u,
351 uint8* dst_v, int dst_stride_v,
352 int width, int height) {
353 int y;
354 void (*UYVYToUV422Row)(const uint8* src_uyvy,
355 uint8* dst_u, uint8* dst_v, int width) =
356 UYVYToUV422Row_C;
357 void (*UYVYToYRow)(const uint8* src_uyvy,
358 uint8* dst_y, int width) = UYVYToYRow_C;
359 // Negative height means invert the image.
360 if (height < 0) {
361 height = -height;
362 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
363 src_stride_uyvy = -src_stride_uyvy;
364 }
365 // Coalesce rows.
366 if (src_stride_uyvy == width * 2 &&
367 dst_stride_y == width &&
368 dst_stride_u * 2 == width &&
369 dst_stride_v * 2 == width) {
370 width *= height;
371 height = 1;
372 src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
373 }
374 #if defined(HAS_UYVYTOYROW_SSE2)
375 if (TestCpuFlag(kCpuHasSSE2)) {
376 UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
377 UYVYToYRow = UYVYToYRow_Any_SSE2;
378 if (IS_ALIGNED(width, 16)) {
379 UYVYToUV422Row = UYVYToUV422Row_SSE2;
380 UYVYToYRow = UYVYToYRow_SSE2;
381 }
382 }
383 #endif
384 #if defined(HAS_UYVYTOYROW_AVX2)
385 if (TestCpuFlag(kCpuHasAVX2)) {
386 UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
387 UYVYToYRow = UYVYToYRow_Any_AVX2;
388 if (IS_ALIGNED(width, 32)) {
389 UYVYToUV422Row = UYVYToUV422Row_AVX2;
390 UYVYToYRow = UYVYToYRow_AVX2;
391 }
392 }
393 #endif
394 #if defined(HAS_UYVYTOYROW_NEON)
395 if (TestCpuFlag(kCpuHasNEON)) {
396 UYVYToYRow = UYVYToYRow_Any_NEON;
397 if (width >= 16) {
398 UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
399 }
400 if (IS_ALIGNED(width, 16)) {
401 UYVYToYRow = UYVYToYRow_NEON;
402 UYVYToUV422Row = UYVYToUV422Row_NEON;
403 }
404 }
405 #endif
406
407 for (y = 0; y < height; ++y) {
408 UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
409 UYVYToYRow(src_uyvy, dst_y, width);
410 src_uyvy += src_stride_uyvy;
411 dst_y += dst_stride_y;
412 dst_u += dst_stride_u;
413 dst_v += dst_stride_v;
414 }
415 return 0;
416 }
417
418 // Mirror I400 with optional flipping
419 LIBYUV_API
I400Mirror(const uint8 * src_y,int src_stride_y,uint8 * dst_y,int dst_stride_y,int width,int height)420 int I400Mirror(const uint8* src_y, int src_stride_y,
421 uint8* dst_y, int dst_stride_y,
422 int width, int height) {
423 if (!src_y || !dst_y ||
424 width <= 0 || height == 0) {
425 return -1;
426 }
427 // Negative height means invert the image.
428 if (height < 0) {
429 height = -height;
430 src_y = src_y + (height - 1) * src_stride_y;
431 src_stride_y = -src_stride_y;
432 }
433
434 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
435 return 0;
436 }
437
438 // Mirror I420 with optional flipping
439 LIBYUV_API
I420Mirror(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)440 int I420Mirror(const uint8* src_y, int src_stride_y,
441 const uint8* src_u, int src_stride_u,
442 const uint8* src_v, int src_stride_v,
443 uint8* dst_y, int dst_stride_y,
444 uint8* dst_u, int dst_stride_u,
445 uint8* dst_v, int dst_stride_v,
446 int width, int height) {
447 int halfwidth = (width + 1) >> 1;
448 int halfheight = (height + 1) >> 1;
449 if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v ||
450 width <= 0 || height == 0) {
451 return -1;
452 }
453 // Negative height means invert the image.
454 if (height < 0) {
455 height = -height;
456 halfheight = (height + 1) >> 1;
457 src_y = src_y + (height - 1) * src_stride_y;
458 src_u = src_u + (halfheight - 1) * src_stride_u;
459 src_v = src_v + (halfheight - 1) * src_stride_v;
460 src_stride_y = -src_stride_y;
461 src_stride_u = -src_stride_u;
462 src_stride_v = -src_stride_v;
463 }
464
465 if (dst_y) {
466 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
467 }
468 MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
469 MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
470 return 0;
471 }
472
473 // ARGB mirror.
474 LIBYUV_API
ARGBMirror(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height)475 int ARGBMirror(const uint8* src_argb, int src_stride_argb,
476 uint8* dst_argb, int dst_stride_argb,
477 int width, int height) {
478 int y;
479 void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
480 ARGBMirrorRow_C;
481 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
482 return -1;
483 }
484 // Negative height means invert the image.
485 if (height < 0) {
486 height = -height;
487 src_argb = src_argb + (height - 1) * src_stride_argb;
488 src_stride_argb = -src_stride_argb;
489 }
490 #if defined(HAS_ARGBMIRRORROW_NEON)
491 if (TestCpuFlag(kCpuHasNEON)) {
492 ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
493 if (IS_ALIGNED(width, 4)) {
494 ARGBMirrorRow = ARGBMirrorRow_NEON;
495 }
496 }
497 #endif
498 #if defined(HAS_ARGBMIRRORROW_SSE2)
499 if (TestCpuFlag(kCpuHasSSE2)) {
500 ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
501 if (IS_ALIGNED(width, 4)) {
502 ARGBMirrorRow = ARGBMirrorRow_SSE2;
503 }
504 }
505 #endif
506 #if defined(HAS_ARGBMIRRORROW_AVX2)
507 if (TestCpuFlag(kCpuHasAVX2)) {
508 ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
509 if (IS_ALIGNED(width, 8)) {
510 ARGBMirrorRow = ARGBMirrorRow_AVX2;
511 }
512 }
513 #endif
514
515 // Mirror plane
516 for (y = 0; y < height; ++y) {
517 ARGBMirrorRow(src_argb, dst_argb, width);
518 src_argb += src_stride_argb;
519 dst_argb += dst_stride_argb;
520 }
521 return 0;
522 }
523
524 // Get a blender that optimized for the CPU and pixel count.
525 // As there are 6 blenders to choose from, the caller should try to use
526 // the same blend function for all pixels if possible.
527 LIBYUV_API
GetARGBBlend()528 ARGBBlendRow GetARGBBlend() {
529 void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
530 uint8* dst_argb, int width) = ARGBBlendRow_C;
531 #if defined(HAS_ARGBBLENDROW_SSSE3)
532 if (TestCpuFlag(kCpuHasSSSE3)) {
533 ARGBBlendRow = ARGBBlendRow_SSSE3;
534 return ARGBBlendRow;
535 }
536 #endif
537 #if defined(HAS_ARGBBLENDROW_NEON)
538 if (TestCpuFlag(kCpuHasNEON)) {
539 ARGBBlendRow = ARGBBlendRow_NEON;
540 }
541 #endif
542 return ARGBBlendRow;
543 }
544
545 // Alpha Blend 2 ARGB images and store to destination.
546 LIBYUV_API
ARGBBlend(const uint8 * src_argb0,int src_stride_argb0,const uint8 * src_argb1,int src_stride_argb1,uint8 * dst_argb,int dst_stride_argb,int width,int height)547 int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
548 const uint8* src_argb1, int src_stride_argb1,
549 uint8* dst_argb, int dst_stride_argb,
550 int width, int height) {
551 int y;
552 void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
553 uint8* dst_argb, int width) = GetARGBBlend();
554 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
555 return -1;
556 }
557 // Negative height means invert the image.
558 if (height < 0) {
559 height = -height;
560 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
561 dst_stride_argb = -dst_stride_argb;
562 }
563 // Coalesce rows.
564 if (src_stride_argb0 == width * 4 &&
565 src_stride_argb1 == width * 4 &&
566 dst_stride_argb == width * 4) {
567 width *= height;
568 height = 1;
569 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
570 }
571
572 for (y = 0; y < height; ++y) {
573 ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
574 src_argb0 += src_stride_argb0;
575 src_argb1 += src_stride_argb1;
576 dst_argb += dst_stride_argb;
577 }
578 return 0;
579 }
580
581 // Alpha Blend plane and store to destination.
582 LIBYUV_API
BlendPlane(const uint8 * src_y0,int src_stride_y0,const uint8 * src_y1,int src_stride_y1,const uint8 * alpha,int alpha_stride,uint8 * dst_y,int dst_stride_y,int width,int height)583 int BlendPlane(const uint8* src_y0, int src_stride_y0,
584 const uint8* src_y1, int src_stride_y1,
585 const uint8* alpha, int alpha_stride,
586 uint8* dst_y, int dst_stride_y,
587 int width, int height) {
588 int y;
589 void (*BlendPlaneRow)(const uint8* src0, const uint8* src1,
590 const uint8* alpha, uint8* dst, int width) = BlendPlaneRow_C;
591 if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0) {
592 return -1;
593 }
594 // Negative height means invert the image.
595 if (height < 0) {
596 height = -height;
597 dst_y = dst_y + (height - 1) * dst_stride_y;
598 dst_stride_y = -dst_stride_y;
599 }
600
601 // Coalesce rows for Y plane.
602 if (src_stride_y0 == width &&
603 src_stride_y1 == width &&
604 alpha_stride == width &&
605 dst_stride_y == width) {
606 width *= height;
607 height = 1;
608 src_stride_y0 = src_stride_y1 = alpha_stride = dst_stride_y = 0;
609 }
610
611 #if defined(HAS_BLENDPLANEROW_SSSE3)
612 if (TestCpuFlag(kCpuHasSSSE3)) {
613 BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
614 if (IS_ALIGNED(width, 8)) {
615 BlendPlaneRow = BlendPlaneRow_SSSE3;
616 }
617 }
618 #endif
619 #if defined(HAS_BLENDPLANEROW_AVX2)
620 if (TestCpuFlag(kCpuHasAVX2)) {
621 BlendPlaneRow = BlendPlaneRow_Any_AVX2;
622 if (IS_ALIGNED(width, 32)) {
623 BlendPlaneRow = BlendPlaneRow_AVX2;
624 }
625 }
626 #endif
627
628 for (y = 0; y < height; ++y) {
629 BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width);
630 src_y0 += src_stride_y0;
631 src_y1 += src_stride_y1;
632 alpha += alpha_stride;
633 dst_y += dst_stride_y;
634 }
635 return 0;
636 }
637
638 #define MAXTWIDTH 2048
639 // Alpha Blend YUV images and store to destination.
640 LIBYUV_API
I420Blend(const uint8 * src_y0,int src_stride_y0,const uint8 * src_u0,int src_stride_u0,const uint8 * src_v0,int src_stride_v0,const uint8 * src_y1,int src_stride_y1,const uint8 * src_u1,int src_stride_u1,const uint8 * src_v1,int src_stride_v1,const uint8 * alpha,int alpha_stride,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)641 int I420Blend(const uint8* src_y0, int src_stride_y0,
642 const uint8* src_u0, int src_stride_u0,
643 const uint8* src_v0, int src_stride_v0,
644 const uint8* src_y1, int src_stride_y1,
645 const uint8* src_u1, int src_stride_u1,
646 const uint8* src_v1, int src_stride_v1,
647 const uint8* alpha, int alpha_stride,
648 uint8* dst_y, int dst_stride_y,
649 uint8* dst_u, int dst_stride_u,
650 uint8* dst_v, int dst_stride_v,
651 int width, int height) {
652 int y;
653 // Half width/height for UV.
654 int halfwidth = (width + 1) >> 1;
655 void (*BlendPlaneRow)(const uint8* src0, const uint8* src1,
656 const uint8* alpha, uint8* dst, int width) = BlendPlaneRow_C;
657 void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
658 uint8* dst_ptr, int dst_width) = ScaleRowDown2Box_C;
659 if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 ||
660 !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
661 return -1;
662 }
663
664 // Negative height means invert the image.
665 if (height < 0) {
666 height = -height;
667 dst_y = dst_y + (height - 1) * dst_stride_y;
668 dst_stride_y = -dst_stride_y;
669 }
670
671 // Blend Y plane.
672 BlendPlane(src_y0, src_stride_y0,
673 src_y1, src_stride_y1,
674 alpha, alpha_stride,
675 dst_y, dst_stride_y,
676 width, height);
677
678 #if defined(HAS_BLENDPLANEROW_SSSE3)
679 if (TestCpuFlag(kCpuHasSSSE3)) {
680 BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
681 if (IS_ALIGNED(halfwidth, 8)) {
682 BlendPlaneRow = BlendPlaneRow_SSSE3;
683 }
684 }
685 #endif
686 #if defined(HAS_BLENDPLANEROW_AVX2)
687 if (TestCpuFlag(kCpuHasAVX2)) {
688 BlendPlaneRow = BlendPlaneRow_Any_AVX2;
689 if (IS_ALIGNED(halfwidth, 32)) {
690 BlendPlaneRow = BlendPlaneRow_AVX2;
691 }
692 }
693 #endif
694 if (!IS_ALIGNED(width, 2)) {
695 ScaleRowDown2 = ScaleRowDown2Box_Odd_C;
696 }
697 #if defined(HAS_SCALEROWDOWN2_NEON)
698 if (TestCpuFlag(kCpuHasNEON)) {
699 ScaleRowDown2 = ScaleRowDown2Box_Odd_NEON;
700 if (IS_ALIGNED(width, 2)) {
701 ScaleRowDown2 = ScaleRowDown2Box_Any_NEON;
702 if (IS_ALIGNED(halfwidth, 16)) {
703 ScaleRowDown2 = ScaleRowDown2Box_NEON;
704 }
705 }
706 }
707 #endif
708 #if defined(HAS_SCALEROWDOWN2_SSSE3)
709 if (TestCpuFlag(kCpuHasSSSE3)) {
710 ScaleRowDown2 = ScaleRowDown2Box_Odd_SSSE3;
711 if (IS_ALIGNED(width, 2)) {
712 ScaleRowDown2 = ScaleRowDown2Box_Any_SSSE3;
713 if (IS_ALIGNED(halfwidth, 16)) {
714 ScaleRowDown2 = ScaleRowDown2Box_SSSE3;
715 }
716 }
717 }
718 #endif
719 #if defined(HAS_SCALEROWDOWN2_AVX2)
720 if (TestCpuFlag(kCpuHasAVX2)) {
721 ScaleRowDown2 = ScaleRowDown2Box_Odd_AVX2;
722 if (IS_ALIGNED(width, 2)) {
723 ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2;
724 if (IS_ALIGNED(halfwidth, 32)) {
725 ScaleRowDown2 = ScaleRowDown2Box_AVX2;
726 }
727 }
728 }
729 #endif
730
731 // Row buffer for intermediate alpha pixels.
732 align_buffer_64(halfalpha, halfwidth);
733 for (y = 0; y < height; y += 2) {
734 // last row of odd height image use 1 row of alpha instead of 2.
735 if (y == (height - 1)) {
736 alpha_stride = 0;
737 }
738 // Subsample 2 rows of UV to half width and half height.
739 ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth);
740 alpha += alpha_stride * 2;
741 BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth);
742 BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth);
743 src_u0 += src_stride_u0;
744 src_u1 += src_stride_u1;
745 dst_u += dst_stride_u;
746 src_v0 += src_stride_v0;
747 src_v1 += src_stride_v1;
748 dst_v += dst_stride_v;
749 }
750 free_aligned_buffer_64(halfalpha);
751 return 0;
752 }
753
754 // Multiply 2 ARGB images and store to destination.
755 LIBYUV_API
ARGBMultiply(const uint8 * src_argb0,int src_stride_argb0,const uint8 * src_argb1,int src_stride_argb1,uint8 * dst_argb,int dst_stride_argb,int width,int height)756 int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
757 const uint8* src_argb1, int src_stride_argb1,
758 uint8* dst_argb, int dst_stride_argb,
759 int width, int height) {
760 int y;
761 void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst,
762 int width) = ARGBMultiplyRow_C;
763 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
764 return -1;
765 }
766 // Negative height means invert the image.
767 if (height < 0) {
768 height = -height;
769 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
770 dst_stride_argb = -dst_stride_argb;
771 }
772 // Coalesce rows.
773 if (src_stride_argb0 == width * 4 &&
774 src_stride_argb1 == width * 4 &&
775 dst_stride_argb == width * 4) {
776 width *= height;
777 height = 1;
778 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
779 }
780 #if defined(HAS_ARGBMULTIPLYROW_SSE2)
781 if (TestCpuFlag(kCpuHasSSE2)) {
782 ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
783 if (IS_ALIGNED(width, 4)) {
784 ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
785 }
786 }
787 #endif
788 #if defined(HAS_ARGBMULTIPLYROW_AVX2)
789 if (TestCpuFlag(kCpuHasAVX2)) {
790 ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
791 if (IS_ALIGNED(width, 8)) {
792 ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
793 }
794 }
795 #endif
796 #if defined(HAS_ARGBMULTIPLYROW_NEON)
797 if (TestCpuFlag(kCpuHasNEON)) {
798 ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
799 if (IS_ALIGNED(width, 8)) {
800 ARGBMultiplyRow = ARGBMultiplyRow_NEON;
801 }
802 }
803 #endif
804
805 // Multiply plane
806 for (y = 0; y < height; ++y) {
807 ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
808 src_argb0 += src_stride_argb0;
809 src_argb1 += src_stride_argb1;
810 dst_argb += dst_stride_argb;
811 }
812 return 0;
813 }
814
815 // Add 2 ARGB images and store to destination.
816 LIBYUV_API
ARGBAdd(const uint8 * src_argb0,int src_stride_argb0,const uint8 * src_argb1,int src_stride_argb1,uint8 * dst_argb,int dst_stride_argb,int width,int height)817 int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
818 const uint8* src_argb1, int src_stride_argb1,
819 uint8* dst_argb, int dst_stride_argb,
820 int width, int height) {
821 int y;
822 void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst,
823 int width) = ARGBAddRow_C;
824 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
825 return -1;
826 }
827 // Negative height means invert the image.
828 if (height < 0) {
829 height = -height;
830 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
831 dst_stride_argb = -dst_stride_argb;
832 }
833 // Coalesce rows.
834 if (src_stride_argb0 == width * 4 &&
835 src_stride_argb1 == width * 4 &&
836 dst_stride_argb == width * 4) {
837 width *= height;
838 height = 1;
839 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
840 }
841 #if defined(HAS_ARGBADDROW_SSE2) && (defined(_MSC_VER) && !defined(__clang__))
842 if (TestCpuFlag(kCpuHasSSE2)) {
843 ARGBAddRow = ARGBAddRow_SSE2;
844 }
845 #endif
846 #if defined(HAS_ARGBADDROW_SSE2) && !(defined(_MSC_VER) && !defined(__clang__))
847 if (TestCpuFlag(kCpuHasSSE2)) {
848 ARGBAddRow = ARGBAddRow_Any_SSE2;
849 if (IS_ALIGNED(width, 4)) {
850 ARGBAddRow = ARGBAddRow_SSE2;
851 }
852 }
853 #endif
854 #if defined(HAS_ARGBADDROW_AVX2)
855 if (TestCpuFlag(kCpuHasAVX2)) {
856 ARGBAddRow = ARGBAddRow_Any_AVX2;
857 if (IS_ALIGNED(width, 8)) {
858 ARGBAddRow = ARGBAddRow_AVX2;
859 }
860 }
861 #endif
862 #if defined(HAS_ARGBADDROW_NEON)
863 if (TestCpuFlag(kCpuHasNEON)) {
864 ARGBAddRow = ARGBAddRow_Any_NEON;
865 if (IS_ALIGNED(width, 8)) {
866 ARGBAddRow = ARGBAddRow_NEON;
867 }
868 }
869 #endif
870
871 // Add plane
872 for (y = 0; y < height; ++y) {
873 ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
874 src_argb0 += src_stride_argb0;
875 src_argb1 += src_stride_argb1;
876 dst_argb += dst_stride_argb;
877 }
878 return 0;
879 }
880
881 // Subtract 2 ARGB images and store to destination.
882 LIBYUV_API
ARGBSubtract(const uint8 * src_argb0,int src_stride_argb0,const uint8 * src_argb1,int src_stride_argb1,uint8 * dst_argb,int dst_stride_argb,int width,int height)883 int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
884 const uint8* src_argb1, int src_stride_argb1,
885 uint8* dst_argb, int dst_stride_argb,
886 int width, int height) {
887 int y;
888 void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst,
889 int width) = ARGBSubtractRow_C;
890 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
891 return -1;
892 }
893 // Negative height means invert the image.
894 if (height < 0) {
895 height = -height;
896 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
897 dst_stride_argb = -dst_stride_argb;
898 }
899 // Coalesce rows.
900 if (src_stride_argb0 == width * 4 &&
901 src_stride_argb1 == width * 4 &&
902 dst_stride_argb == width * 4) {
903 width *= height;
904 height = 1;
905 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
906 }
907 #if defined(HAS_ARGBSUBTRACTROW_SSE2)
908 if (TestCpuFlag(kCpuHasSSE2)) {
909 ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
910 if (IS_ALIGNED(width, 4)) {
911 ARGBSubtractRow = ARGBSubtractRow_SSE2;
912 }
913 }
914 #endif
915 #if defined(HAS_ARGBSUBTRACTROW_AVX2)
916 if (TestCpuFlag(kCpuHasAVX2)) {
917 ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
918 if (IS_ALIGNED(width, 8)) {
919 ARGBSubtractRow = ARGBSubtractRow_AVX2;
920 }
921 }
922 #endif
923 #if defined(HAS_ARGBSUBTRACTROW_NEON)
924 if (TestCpuFlag(kCpuHasNEON)) {
925 ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
926 if (IS_ALIGNED(width, 8)) {
927 ARGBSubtractRow = ARGBSubtractRow_NEON;
928 }
929 }
930 #endif
931
932 // Subtract plane
933 for (y = 0; y < height; ++y) {
934 ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
935 src_argb0 += src_stride_argb0;
936 src_argb1 += src_stride_argb1;
937 dst_argb += dst_stride_argb;
938 }
939 return 0;
940 }
941 // Convert I422 to RGBA with matrix
I422ToRGBAMatrix(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_rgba,int dst_stride_rgba,const struct YuvConstants * yuvconstants,int width,int height)942 static int I422ToRGBAMatrix(const uint8* src_y, int src_stride_y,
943 const uint8* src_u, int src_stride_u,
944 const uint8* src_v, int src_stride_v,
945 uint8* dst_rgba, int dst_stride_rgba,
946 const struct YuvConstants* yuvconstants,
947 int width, int height) {
948 int y;
949 void (*I422ToRGBARow)(const uint8* y_buf,
950 const uint8* u_buf,
951 const uint8* v_buf,
952 uint8* rgb_buf,
953 const struct YuvConstants* yuvconstants,
954 int width) = I422ToRGBARow_C;
955 if (!src_y || !src_u || !src_v || !dst_rgba ||
956 width <= 0 || height == 0) {
957 return -1;
958 }
959 // Negative height means invert the image.
960 if (height < 0) {
961 height = -height;
962 dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
963 dst_stride_rgba = -dst_stride_rgba;
964 }
965 #if defined(HAS_I422TORGBAROW_SSSE3)
966 if (TestCpuFlag(kCpuHasSSSE3)) {
967 I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
968 if (IS_ALIGNED(width, 8)) {
969 I422ToRGBARow = I422ToRGBARow_SSSE3;
970 }
971 }
972 #endif
973 #if defined(HAS_I422TORGBAROW_AVX2)
974 if (TestCpuFlag(kCpuHasAVX2)) {
975 I422ToRGBARow = I422ToRGBARow_Any_AVX2;
976 if (IS_ALIGNED(width, 16)) {
977 I422ToRGBARow = I422ToRGBARow_AVX2;
978 }
979 }
980 #endif
981 #if defined(HAS_I422TORGBAROW_NEON)
982 if (TestCpuFlag(kCpuHasNEON)) {
983 I422ToRGBARow = I422ToRGBARow_Any_NEON;
984 if (IS_ALIGNED(width, 8)) {
985 I422ToRGBARow = I422ToRGBARow_NEON;
986 }
987 }
988 #endif
989 #if defined(HAS_I422TORGBAROW_DSPR2)
990 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
991 IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
992 IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
993 IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
994 IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) {
995 I422ToRGBARow = I422ToRGBARow_DSPR2;
996 }
997 #endif
998
999 for (y = 0; y < height; ++y) {
1000 I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width);
1001 dst_rgba += dst_stride_rgba;
1002 src_y += src_stride_y;
1003 src_u += src_stride_u;
1004 src_v += src_stride_v;
1005 }
1006 return 0;
1007 }
1008
1009 // Convert I422 to RGBA.
1010 LIBYUV_API
I422ToRGBA(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_rgba,int dst_stride_rgba,int width,int height)1011 int I422ToRGBA(const uint8* src_y, int src_stride_y,
1012 const uint8* src_u, int src_stride_u,
1013 const uint8* src_v, int src_stride_v,
1014 uint8* dst_rgba, int dst_stride_rgba,
1015 int width, int height) {
1016 return I422ToRGBAMatrix(src_y, src_stride_y,
1017 src_u, src_stride_u,
1018 src_v, src_stride_v,
1019 dst_rgba, dst_stride_rgba,
1020 &kYuvI601Constants,
1021 width, height);
1022 }
1023
1024 // Convert I422 to BGRA.
1025 LIBYUV_API
I422ToBGRA(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_bgra,int dst_stride_bgra,int width,int height)1026 int I422ToBGRA(const uint8* src_y, int src_stride_y,
1027 const uint8* src_u, int src_stride_u,
1028 const uint8* src_v, int src_stride_v,
1029 uint8* dst_bgra, int dst_stride_bgra,
1030 int width, int height) {
1031 return I422ToRGBAMatrix(src_y, src_stride_y,
1032 src_v, src_stride_v, // Swap U and V
1033 src_u, src_stride_u,
1034 dst_bgra, dst_stride_bgra,
1035 &kYvuI601Constants, // Use Yvu matrix
1036 width, height);
1037 }
1038
1039 // Convert NV12 to RGB565.
1040 LIBYUV_API
NV12ToRGB565(const uint8 * src_y,int src_stride_y,const uint8 * src_uv,int src_stride_uv,uint8 * dst_rgb565,int dst_stride_rgb565,int width,int height)1041 int NV12ToRGB565(const uint8* src_y, int src_stride_y,
1042 const uint8* src_uv, int src_stride_uv,
1043 uint8* dst_rgb565, int dst_stride_rgb565,
1044 int width, int height) {
1045 int y;
1046 void (*NV12ToRGB565Row)(const uint8* y_buf,
1047 const uint8* uv_buf,
1048 uint8* rgb_buf,
1049 const struct YuvConstants* yuvconstants,
1050 int width) = NV12ToRGB565Row_C;
1051 if (!src_y || !src_uv || !dst_rgb565 ||
1052 width <= 0 || height == 0) {
1053 return -1;
1054 }
1055 // Negative height means invert the image.
1056 if (height < 0) {
1057 height = -height;
1058 dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
1059 dst_stride_rgb565 = -dst_stride_rgb565;
1060 }
1061 #if defined(HAS_NV12TORGB565ROW_SSSE3)
1062 if (TestCpuFlag(kCpuHasSSSE3)) {
1063 NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
1064 if (IS_ALIGNED(width, 8)) {
1065 NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
1066 }
1067 }
1068 #endif
1069 #if defined(HAS_NV12TORGB565ROW_AVX2)
1070 if (TestCpuFlag(kCpuHasAVX2)) {
1071 NV12ToRGB565Row = NV12ToRGB565Row_Any_AVX2;
1072 if (IS_ALIGNED(width, 16)) {
1073 NV12ToRGB565Row = NV12ToRGB565Row_AVX2;
1074 }
1075 }
1076 #endif
1077 #if defined(HAS_NV12TORGB565ROW_NEON)
1078 if (TestCpuFlag(kCpuHasNEON)) {
1079 NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
1080 if (IS_ALIGNED(width, 8)) {
1081 NV12ToRGB565Row = NV12ToRGB565Row_NEON;
1082 }
1083 }
1084 #endif
1085
1086 for (y = 0; y < height; ++y) {
1087 NV12ToRGB565Row(src_y, src_uv, dst_rgb565, &kYuvI601Constants, width);
1088 dst_rgb565 += dst_stride_rgb565;
1089 src_y += src_stride_y;
1090 if (y & 1) {
1091 src_uv += src_stride_uv;
1092 }
1093 }
1094 return 0;
1095 }
1096
1097 // Convert RAW to RGB24.
1098 LIBYUV_API
RAWToRGB24(const uint8 * src_raw,int src_stride_raw,uint8 * dst_rgb24,int dst_stride_rgb24,int width,int height)1099 int RAWToRGB24(const uint8* src_raw, int src_stride_raw,
1100 uint8* dst_rgb24, int dst_stride_rgb24,
1101 int width, int height) {
1102 int y;
1103 void (*RAWToRGB24Row)(const uint8* src_rgb, uint8* dst_rgb24, int width) =
1104 RAWToRGB24Row_C;
1105 if (!src_raw || !dst_rgb24 ||
1106 width <= 0 || height == 0) {
1107 return -1;
1108 }
1109 // Negative height means invert the image.
1110 if (height < 0) {
1111 height = -height;
1112 src_raw = src_raw + (height - 1) * src_stride_raw;
1113 src_stride_raw = -src_stride_raw;
1114 }
1115 // Coalesce rows.
1116 if (src_stride_raw == width * 3 &&
1117 dst_stride_rgb24 == width * 3) {
1118 width *= height;
1119 height = 1;
1120 src_stride_raw = dst_stride_rgb24 = 0;
1121 }
1122 #if defined(HAS_RAWTORGB24ROW_SSSE3)
1123 if (TestCpuFlag(kCpuHasSSSE3)) {
1124 RAWToRGB24Row = RAWToRGB24Row_Any_SSSE3;
1125 if (IS_ALIGNED(width, 8)) {
1126 RAWToRGB24Row = RAWToRGB24Row_SSSE3;
1127 }
1128 }
1129 #endif
1130 #if defined(HAS_RAWTORGB24ROW_NEON)
1131 if (TestCpuFlag(kCpuHasNEON)) {
1132 RAWToRGB24Row = RAWToRGB24Row_Any_NEON;
1133 if (IS_ALIGNED(width, 8)) {
1134 RAWToRGB24Row = RAWToRGB24Row_NEON;
1135 }
1136 }
1137 #endif
1138
1139 for (y = 0; y < height; ++y) {
1140 RAWToRGB24Row(src_raw, dst_rgb24, width);
1141 src_raw += src_stride_raw;
1142 dst_rgb24 += dst_stride_rgb24;
1143 }
1144 return 0;
1145 }
1146
1147 LIBYUV_API
SetPlane(uint8 * dst_y,int dst_stride_y,int width,int height,uint32 value)1148 void SetPlane(uint8* dst_y, int dst_stride_y,
1149 int width, int height,
1150 uint32 value) {
1151 int y;
1152 void (*SetRow)(uint8* dst, uint8 value, int width) = SetRow_C;
1153 if (height < 0) {
1154 height = -height;
1155 dst_y = dst_y + (height - 1) * dst_stride_y;
1156 dst_stride_y = -dst_stride_y;
1157 }
1158 // Coalesce rows.
1159 if (dst_stride_y == width) {
1160 width *= height;
1161 height = 1;
1162 dst_stride_y = 0;
1163 }
1164 #if defined(HAS_SETROW_NEON)
1165 if (TestCpuFlag(kCpuHasNEON)) {
1166 SetRow = SetRow_Any_NEON;
1167 if (IS_ALIGNED(width, 16)) {
1168 SetRow = SetRow_NEON;
1169 }
1170 }
1171 #endif
1172 #if defined(HAS_SETROW_X86)
1173 if (TestCpuFlag(kCpuHasX86)) {
1174 SetRow = SetRow_Any_X86;
1175 if (IS_ALIGNED(width, 4)) {
1176 SetRow = SetRow_X86;
1177 }
1178 }
1179 #endif
1180 #if defined(HAS_SETROW_ERMS)
1181 if (TestCpuFlag(kCpuHasERMS)) {
1182 SetRow = SetRow_ERMS;
1183 }
1184 #endif
1185
1186 // Set plane
1187 for (y = 0; y < height; ++y) {
1188 SetRow(dst_y, value, width);
1189 dst_y += dst_stride_y;
1190 }
1191 }
1192
1193 // Draw a rectangle into I420
1194 LIBYUV_API
I420Rect(uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int x,int y,int width,int height,int value_y,int value_u,int value_v)1195 int I420Rect(uint8* dst_y, int dst_stride_y,
1196 uint8* dst_u, int dst_stride_u,
1197 uint8* dst_v, int dst_stride_v,
1198 int x, int y,
1199 int width, int height,
1200 int value_y, int value_u, int value_v) {
1201 int halfwidth = (width + 1) >> 1;
1202 int halfheight = (height + 1) >> 1;
1203 uint8* start_y = dst_y + y * dst_stride_y + x;
1204 uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
1205 uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
1206 if (!dst_y || !dst_u || !dst_v ||
1207 width <= 0 || height == 0 ||
1208 x < 0 || y < 0 ||
1209 value_y < 0 || value_y > 255 ||
1210 value_u < 0 || value_u > 255 ||
1211 value_v < 0 || value_v > 255) {
1212 return -1;
1213 }
1214
1215 SetPlane(start_y, dst_stride_y, width, height, value_y);
1216 SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
1217 SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
1218 return 0;
1219 }
1220
1221 // Draw a rectangle into ARGB
1222 LIBYUV_API
ARGBRect(uint8 * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height,uint32 value)1223 int ARGBRect(uint8* dst_argb, int dst_stride_argb,
1224 int dst_x, int dst_y,
1225 int width, int height,
1226 uint32 value) {
1227 int y;
1228 void (*ARGBSetRow)(uint8* dst_argb, uint32 value, int width) = ARGBSetRow_C;
1229 if (!dst_argb ||
1230 width <= 0 || height == 0 ||
1231 dst_x < 0 || dst_y < 0) {
1232 return -1;
1233 }
1234 if (height < 0) {
1235 height = -height;
1236 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1237 dst_stride_argb = -dst_stride_argb;
1238 }
1239 dst_argb += dst_y * dst_stride_argb + dst_x * 4;
1240 // Coalesce rows.
1241 if (dst_stride_argb == width * 4) {
1242 width *= height;
1243 height = 1;
1244 dst_stride_argb = 0;
1245 }
1246
1247 #if defined(HAS_ARGBSETROW_NEON)
1248 if (TestCpuFlag(kCpuHasNEON)) {
1249 ARGBSetRow = ARGBSetRow_Any_NEON;
1250 if (IS_ALIGNED(width, 4)) {
1251 ARGBSetRow = ARGBSetRow_NEON;
1252 }
1253 }
1254 #endif
1255 #if defined(HAS_ARGBSETROW_X86)
1256 if (TestCpuFlag(kCpuHasX86)) {
1257 ARGBSetRow = ARGBSetRow_X86;
1258 }
1259 #endif
1260
1261 // Set plane
1262 for (y = 0; y < height; ++y) {
1263 ARGBSetRow(dst_argb, value, width);
1264 dst_argb += dst_stride_argb;
1265 }
1266 return 0;
1267 }
1268
1269 // Convert unattentuated ARGB to preattenuated ARGB.
1270 // An unattenutated ARGB alpha blend uses the formula
1271 // p = a * f + (1 - a) * b
1272 // where
1273 // p is output pixel
1274 // f is foreground pixel
1275 // b is background pixel
1276 // a is alpha value from foreground pixel
1277 // An preattenutated ARGB alpha blend uses the formula
1278 // p = f + (1 - a) * b
1279 // where
1280 // f is foreground pixel premultiplied by alpha
1281
1282 LIBYUV_API
ARGBAttenuate(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height)1283 int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
1284 uint8* dst_argb, int dst_stride_argb,
1285 int width, int height) {
1286 int y;
1287 void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
1288 int width) = ARGBAttenuateRow_C;
1289 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1290 return -1;
1291 }
1292 if (height < 0) {
1293 height = -height;
1294 src_argb = src_argb + (height - 1) * src_stride_argb;
1295 src_stride_argb = -src_stride_argb;
1296 }
1297 // Coalesce rows.
1298 if (src_stride_argb == width * 4 &&
1299 dst_stride_argb == width * 4) {
1300 width *= height;
1301 height = 1;
1302 src_stride_argb = dst_stride_argb = 0;
1303 }
1304 #if defined(HAS_ARGBATTENUATEROW_SSSE3)
1305 if (TestCpuFlag(kCpuHasSSSE3)) {
1306 ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
1307 if (IS_ALIGNED(width, 4)) {
1308 ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
1309 }
1310 }
1311 #endif
1312 #if defined(HAS_ARGBATTENUATEROW_AVX2)
1313 if (TestCpuFlag(kCpuHasAVX2)) {
1314 ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
1315 if (IS_ALIGNED(width, 8)) {
1316 ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
1317 }
1318 }
1319 #endif
1320 #if defined(HAS_ARGBATTENUATEROW_NEON)
1321 if (TestCpuFlag(kCpuHasNEON)) {
1322 ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
1323 if (IS_ALIGNED(width, 8)) {
1324 ARGBAttenuateRow = ARGBAttenuateRow_NEON;
1325 }
1326 }
1327 #endif
1328
1329 for (y = 0; y < height; ++y) {
1330 ARGBAttenuateRow(src_argb, dst_argb, width);
1331 src_argb += src_stride_argb;
1332 dst_argb += dst_stride_argb;
1333 }
1334 return 0;
1335 }
1336
1337 // Convert preattentuated ARGB to unattenuated ARGB.
1338 LIBYUV_API
ARGBUnattenuate(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height)1339 int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
1340 uint8* dst_argb, int dst_stride_argb,
1341 int width, int height) {
1342 int y;
1343 void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb,
1344 int width) = ARGBUnattenuateRow_C;
1345 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1346 return -1;
1347 }
1348 if (height < 0) {
1349 height = -height;
1350 src_argb = src_argb + (height - 1) * src_stride_argb;
1351 src_stride_argb = -src_stride_argb;
1352 }
1353 // Coalesce rows.
1354 if (src_stride_argb == width * 4 &&
1355 dst_stride_argb == width * 4) {
1356 width *= height;
1357 height = 1;
1358 src_stride_argb = dst_stride_argb = 0;
1359 }
1360 #if defined(HAS_ARGBUNATTENUATEROW_SSE2)
1361 if (TestCpuFlag(kCpuHasSSE2)) {
1362 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
1363 if (IS_ALIGNED(width, 4)) {
1364 ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
1365 }
1366 }
1367 #endif
1368 #if defined(HAS_ARGBUNATTENUATEROW_AVX2)
1369 if (TestCpuFlag(kCpuHasAVX2)) {
1370 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
1371 if (IS_ALIGNED(width, 8)) {
1372 ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
1373 }
1374 }
1375 #endif
1376 // TODO(fbarchard): Neon version.
1377
1378 for (y = 0; y < height; ++y) {
1379 ARGBUnattenuateRow(src_argb, dst_argb, width);
1380 src_argb += src_stride_argb;
1381 dst_argb += dst_stride_argb;
1382 }
1383 return 0;
1384 }
1385
1386 // Convert ARGB to Grayed ARGB.
1387 LIBYUV_API
ARGBGrayTo(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height)1388 int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
1389 uint8* dst_argb, int dst_stride_argb,
1390 int width, int height) {
1391 int y;
1392 void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
1393 int width) = ARGBGrayRow_C;
1394 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1395 return -1;
1396 }
1397 if (height < 0) {
1398 height = -height;
1399 src_argb = src_argb + (height - 1) * src_stride_argb;
1400 src_stride_argb = -src_stride_argb;
1401 }
1402 // Coalesce rows.
1403 if (src_stride_argb == width * 4 &&
1404 dst_stride_argb == width * 4) {
1405 width *= height;
1406 height = 1;
1407 src_stride_argb = dst_stride_argb = 0;
1408 }
1409 #if defined(HAS_ARGBGRAYROW_SSSE3)
1410 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
1411 ARGBGrayRow = ARGBGrayRow_SSSE3;
1412 }
1413 #endif
1414 #if defined(HAS_ARGBGRAYROW_NEON)
1415 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1416 ARGBGrayRow = ARGBGrayRow_NEON;
1417 }
1418 #endif
1419
1420 for (y = 0; y < height; ++y) {
1421 ARGBGrayRow(src_argb, dst_argb, width);
1422 src_argb += src_stride_argb;
1423 dst_argb += dst_stride_argb;
1424 }
1425 return 0;
1426 }
1427
1428 // Make a rectangle of ARGB gray scale.
1429 LIBYUV_API
ARGBGray(uint8 * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height)1430 int ARGBGray(uint8* dst_argb, int dst_stride_argb,
1431 int dst_x, int dst_y,
1432 int width, int height) {
1433 int y;
1434 void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
1435 int width) = ARGBGrayRow_C;
1436 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1437 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
1438 return -1;
1439 }
1440 // Coalesce rows.
1441 if (dst_stride_argb == width * 4) {
1442 width *= height;
1443 height = 1;
1444 dst_stride_argb = 0;
1445 }
1446 #if defined(HAS_ARGBGRAYROW_SSSE3)
1447 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
1448 ARGBGrayRow = ARGBGrayRow_SSSE3;
1449 }
1450 #endif
1451 #if defined(HAS_ARGBGRAYROW_NEON)
1452 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1453 ARGBGrayRow = ARGBGrayRow_NEON;
1454 }
1455 #endif
1456 for (y = 0; y < height; ++y) {
1457 ARGBGrayRow(dst, dst, width);
1458 dst += dst_stride_argb;
1459 }
1460 return 0;
1461 }
1462
1463 // Make a rectangle of ARGB Sepia tone.
1464 LIBYUV_API
ARGBSepia(uint8 * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height)1465 int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
1466 int dst_x, int dst_y, int width, int height) {
1467 int y;
1468 void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C;
1469 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1470 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
1471 return -1;
1472 }
1473 // Coalesce rows.
1474 if (dst_stride_argb == width * 4) {
1475 width *= height;
1476 height = 1;
1477 dst_stride_argb = 0;
1478 }
1479 #if defined(HAS_ARGBSEPIAROW_SSSE3)
1480 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
1481 ARGBSepiaRow = ARGBSepiaRow_SSSE3;
1482 }
1483 #endif
1484 #if defined(HAS_ARGBSEPIAROW_NEON)
1485 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1486 ARGBSepiaRow = ARGBSepiaRow_NEON;
1487 }
1488 #endif
1489 for (y = 0; y < height; ++y) {
1490 ARGBSepiaRow(dst, width);
1491 dst += dst_stride_argb;
1492 }
1493 return 0;
1494 }
1495
1496 // Apply a 4x4 matrix to each ARGB pixel.
1497 // Note: Normally for shading, but can be used to swizzle or invert.
1498 LIBYUV_API
ARGBColorMatrix(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,const int8 * matrix_argb,int width,int height)1499 int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
1500 uint8* dst_argb, int dst_stride_argb,
1501 const int8* matrix_argb,
1502 int width, int height) {
1503 int y;
1504 void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb,
1505 const int8* matrix_argb, int width) = ARGBColorMatrixRow_C;
1506 if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
1507 return -1;
1508 }
1509 if (height < 0) {
1510 height = -height;
1511 src_argb = src_argb + (height - 1) * src_stride_argb;
1512 src_stride_argb = -src_stride_argb;
1513 }
1514 // Coalesce rows.
1515 if (src_stride_argb == width * 4 &&
1516 dst_stride_argb == width * 4) {
1517 width *= height;
1518 height = 1;
1519 src_stride_argb = dst_stride_argb = 0;
1520 }
1521 #if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
1522 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
1523 ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
1524 }
1525 #endif
1526 #if defined(HAS_ARGBCOLORMATRIXROW_NEON)
1527 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1528 ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
1529 }
1530 #endif
1531 for (y = 0; y < height; ++y) {
1532 ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
1533 src_argb += src_stride_argb;
1534 dst_argb += dst_stride_argb;
1535 }
1536 return 0;
1537 }
1538
1539 // Apply a 4x3 matrix to each ARGB pixel.
1540 // Deprecated.
1541 LIBYUV_API
RGBColorMatrix(uint8 * dst_argb,int dst_stride_argb,const int8 * matrix_rgb,int dst_x,int dst_y,int width,int height)1542 int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
1543 const int8* matrix_rgb,
1544 int dst_x, int dst_y, int width, int height) {
1545 SIMD_ALIGNED(int8 matrix_argb[16]);
1546 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1547 if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 ||
1548 dst_x < 0 || dst_y < 0) {
1549 return -1;
1550 }
1551
1552 // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
1553 matrix_argb[0] = matrix_rgb[0] / 2;
1554 matrix_argb[1] = matrix_rgb[1] / 2;
1555 matrix_argb[2] = matrix_rgb[2] / 2;
1556 matrix_argb[3] = matrix_rgb[3] / 2;
1557 matrix_argb[4] = matrix_rgb[4] / 2;
1558 matrix_argb[5] = matrix_rgb[5] / 2;
1559 matrix_argb[6] = matrix_rgb[6] / 2;
1560 matrix_argb[7] = matrix_rgb[7] / 2;
1561 matrix_argb[8] = matrix_rgb[8] / 2;
1562 matrix_argb[9] = matrix_rgb[9] / 2;
1563 matrix_argb[10] = matrix_rgb[10] / 2;
1564 matrix_argb[11] = matrix_rgb[11] / 2;
1565 matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
1566 matrix_argb[15] = 64; // 1.0
1567
1568 return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb,
1569 dst, dst_stride_argb,
1570 &matrix_argb[0], width, height);
1571 }
1572
1573 // Apply a color table each ARGB pixel.
1574 // Table contains 256 ARGB values.
1575 LIBYUV_API
ARGBColorTable(uint8 * dst_argb,int dst_stride_argb,const uint8 * table_argb,int dst_x,int dst_y,int width,int height)1576 int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
1577 const uint8* table_argb,
1578 int dst_x, int dst_y, int width, int height) {
1579 int y;
1580 void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
1581 int width) = ARGBColorTableRow_C;
1582 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1583 if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
1584 dst_x < 0 || dst_y < 0) {
1585 return -1;
1586 }
1587 // Coalesce rows.
1588 if (dst_stride_argb == width * 4) {
1589 width *= height;
1590 height = 1;
1591 dst_stride_argb = 0;
1592 }
1593 #if defined(HAS_ARGBCOLORTABLEROW_X86)
1594 if (TestCpuFlag(kCpuHasX86)) {
1595 ARGBColorTableRow = ARGBColorTableRow_X86;
1596 }
1597 #endif
1598 for (y = 0; y < height; ++y) {
1599 ARGBColorTableRow(dst, table_argb, width);
1600 dst += dst_stride_argb;
1601 }
1602 return 0;
1603 }
1604
1605 // Apply a color table each ARGB pixel but preserve destination alpha.
1606 // Table contains 256 ARGB values.
1607 LIBYUV_API
RGBColorTable(uint8 * dst_argb,int dst_stride_argb,const uint8 * table_argb,int dst_x,int dst_y,int width,int height)1608 int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
1609 const uint8* table_argb,
1610 int dst_x, int dst_y, int width, int height) {
1611 int y;
1612 void (*RGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
1613 int width) = RGBColorTableRow_C;
1614 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1615 if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
1616 dst_x < 0 || dst_y < 0) {
1617 return -1;
1618 }
1619 // Coalesce rows.
1620 if (dst_stride_argb == width * 4) {
1621 width *= height;
1622 height = 1;
1623 dst_stride_argb = 0;
1624 }
1625 #if defined(HAS_RGBCOLORTABLEROW_X86)
1626 if (TestCpuFlag(kCpuHasX86)) {
1627 RGBColorTableRow = RGBColorTableRow_X86;
1628 }
1629 #endif
1630 for (y = 0; y < height; ++y) {
1631 RGBColorTableRow(dst, table_argb, width);
1632 dst += dst_stride_argb;
1633 }
1634 return 0;
1635 }
1636
1637 // ARGBQuantize is used to posterize art.
1638 // e.g. rgb / qvalue * qvalue + qvalue / 2
1639 // But the low levels implement efficiently with 3 parameters, and could be
1640 // used for other high level operations.
1641 // dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
1642 // where scale is 1 / interval_size as a fixed point value.
1643 // The divide is replaces with a multiply by reciprocal fixed point multiply.
1644 // Caveat - although SSE2 saturates, the C function does not and should be used
1645 // with care if doing anything but quantization.
1646 LIBYUV_API
ARGBQuantize(uint8 * dst_argb,int dst_stride_argb,int scale,int interval_size,int interval_offset,int dst_x,int dst_y,int width,int height)1647 int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
1648 int scale, int interval_size, int interval_offset,
1649 int dst_x, int dst_y, int width, int height) {
1650 int y;
1651 void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size,
1652 int interval_offset, int width) = ARGBQuantizeRow_C;
1653 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1654 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
1655 interval_size < 1 || interval_size > 255) {
1656 return -1;
1657 }
1658 // Coalesce rows.
1659 if (dst_stride_argb == width * 4) {
1660 width *= height;
1661 height = 1;
1662 dst_stride_argb = 0;
1663 }
1664 #if defined(HAS_ARGBQUANTIZEROW_SSE2)
1665 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
1666 ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
1667 }
1668 #endif
1669 #if defined(HAS_ARGBQUANTIZEROW_NEON)
1670 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1671 ARGBQuantizeRow = ARGBQuantizeRow_NEON;
1672 }
1673 #endif
1674 for (y = 0; y < height; ++y) {
1675 ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
1676 dst += dst_stride_argb;
1677 }
1678 return 0;
1679 }
1680
1681 // Computes table of cumulative sum for image where the value is the sum
1682 // of all values above and to the left of the entry. Used by ARGBBlur.
1683 LIBYUV_API
ARGBComputeCumulativeSum(const uint8 * src_argb,int src_stride_argb,int32 * dst_cumsum,int dst_stride32_cumsum,int width,int height)1684 int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
1685 int32* dst_cumsum, int dst_stride32_cumsum,
1686 int width, int height) {
1687 int y;
1688 void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
1689 const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
1690 int32* previous_cumsum = dst_cumsum;
1691 if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
1692 return -1;
1693 }
1694 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
1695 if (TestCpuFlag(kCpuHasSSE2)) {
1696 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
1697 }
1698 #endif
1699 memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel.
1700 for (y = 0; y < height; ++y) {
1701 ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
1702 previous_cumsum = dst_cumsum;
1703 dst_cumsum += dst_stride32_cumsum;
1704 src_argb += src_stride_argb;
1705 }
1706 return 0;
1707 }
1708
1709 // Blur ARGB image.
1710 // Caller should allocate CumulativeSum table of width * height * 16 bytes
1711 // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
1712 // as the buffer is treated as circular.
1713 LIBYUV_API
ARGBBlur(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int32 * dst_cumsum,int dst_stride32_cumsum,int width,int height,int radius)1714 int ARGBBlur(const uint8* src_argb, int src_stride_argb,
1715 uint8* dst_argb, int dst_stride_argb,
1716 int32* dst_cumsum, int dst_stride32_cumsum,
1717 int width, int height, int radius) {
1718 int y;
1719 void (*ComputeCumulativeSumRow)(const uint8 *row, int32 *cumsum,
1720 const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
1721 void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft,
1722 int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C;
1723 int32* cumsum_bot_row;
1724 int32* max_cumsum_bot_row;
1725 int32* cumsum_top_row;
1726
1727 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1728 return -1;
1729 }
1730 if (height < 0) {
1731 height = -height;
1732 src_argb = src_argb + (height - 1) * src_stride_argb;
1733 src_stride_argb = -src_stride_argb;
1734 }
1735 if (radius > height) {
1736 radius = height;
1737 }
1738 if (radius > (width / 2 - 1)) {
1739 radius = width / 2 - 1;
1740 }
1741 if (radius <= 0) {
1742 return -1;
1743 }
1744 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
1745 if (TestCpuFlag(kCpuHasSSE2)) {
1746 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
1747 CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
1748 }
1749 #endif
1750 // Compute enough CumulativeSum for first row to be blurred. After this
1751 // one row of CumulativeSum is updated at a time.
1752 ARGBComputeCumulativeSum(src_argb, src_stride_argb,
1753 dst_cumsum, dst_stride32_cumsum,
1754 width, radius);
1755
1756 src_argb = src_argb + radius * src_stride_argb;
1757 cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
1758
1759 max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
1760 cumsum_top_row = &dst_cumsum[0];
1761
1762 for (y = 0; y < height; ++y) {
1763 int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
1764 int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
1765 int area = radius * (bot_y - top_y);
1766 int boxwidth = radius * 4;
1767 int x;
1768 int n;
1769
1770 // Increment cumsum_top_row pointer with circular buffer wrap around.
1771 if (top_y) {
1772 cumsum_top_row += dst_stride32_cumsum;
1773 if (cumsum_top_row >= max_cumsum_bot_row) {
1774 cumsum_top_row = dst_cumsum;
1775 }
1776 }
1777 // Increment cumsum_bot_row pointer with circular buffer wrap around and
1778 // then fill in a row of CumulativeSum.
1779 if ((y + radius) < height) {
1780 const int32* prev_cumsum_bot_row = cumsum_bot_row;
1781 cumsum_bot_row += dst_stride32_cumsum;
1782 if (cumsum_bot_row >= max_cumsum_bot_row) {
1783 cumsum_bot_row = dst_cumsum;
1784 }
1785 ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
1786 width);
1787 src_argb += src_stride_argb;
1788 }
1789
1790 // Left clipped.
1791 for (x = 0; x < radius + 1; ++x) {
1792 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
1793 boxwidth, area, &dst_argb[x * 4], 1);
1794 area += (bot_y - top_y);
1795 boxwidth += 4;
1796 }
1797
1798 // Middle unclipped.
1799 n = (width - 1) - radius - x + 1;
1800 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
1801 boxwidth, area, &dst_argb[x * 4], n);
1802
1803 // Right clipped.
1804 for (x += n; x <= width - 1; ++x) {
1805 area -= (bot_y - top_y);
1806 boxwidth -= 4;
1807 CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
1808 cumsum_bot_row + (x - radius - 1) * 4,
1809 boxwidth, area, &dst_argb[x * 4], 1);
1810 }
1811 dst_argb += dst_stride_argb;
1812 }
1813 return 0;
1814 }
1815
1816 // Multiply ARGB image by a specified ARGB value.
1817 LIBYUV_API
ARGBShade(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height,uint32 value)1818 int ARGBShade(const uint8* src_argb, int src_stride_argb,
1819 uint8* dst_argb, int dst_stride_argb,
1820 int width, int height, uint32 value) {
1821 int y;
1822 void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb,
1823 int width, uint32 value) = ARGBShadeRow_C;
1824 if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
1825 return -1;
1826 }
1827 if (height < 0) {
1828 height = -height;
1829 src_argb = src_argb + (height - 1) * src_stride_argb;
1830 src_stride_argb = -src_stride_argb;
1831 }
1832 // Coalesce rows.
1833 if (src_stride_argb == width * 4 &&
1834 dst_stride_argb == width * 4) {
1835 width *= height;
1836 height = 1;
1837 src_stride_argb = dst_stride_argb = 0;
1838 }
1839 #if defined(HAS_ARGBSHADEROW_SSE2)
1840 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
1841 ARGBShadeRow = ARGBShadeRow_SSE2;
1842 }
1843 #endif
1844 #if defined(HAS_ARGBSHADEROW_NEON)
1845 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1846 ARGBShadeRow = ARGBShadeRow_NEON;
1847 }
1848 #endif
1849
1850 for (y = 0; y < height; ++y) {
1851 ARGBShadeRow(src_argb, dst_argb, width, value);
1852 src_argb += src_stride_argb;
1853 dst_argb += dst_stride_argb;
1854 }
1855 return 0;
1856 }
1857
1858 // Interpolate 2 planes by specified amount (0 to 255).
1859 LIBYUV_API
InterpolatePlane(const uint8 * src0,int src_stride0,const uint8 * src1,int src_stride1,uint8 * dst,int dst_stride,int width,int height,int interpolation)1860 int InterpolatePlane(const uint8* src0, int src_stride0,
1861 const uint8* src1, int src_stride1,
1862 uint8* dst, int dst_stride,
1863 int width, int height, int interpolation) {
1864 int y;
1865 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
1866 ptrdiff_t src_stride, int dst_width,
1867 int source_y_fraction) = InterpolateRow_C;
1868 if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
1869 return -1;
1870 }
1871 // Negative height means invert the image.
1872 if (height < 0) {
1873 height = -height;
1874 dst = dst + (height - 1) * dst_stride;
1875 dst_stride = -dst_stride;
1876 }
1877 // Coalesce rows.
1878 if (src_stride0 == width &&
1879 src_stride1 == width &&
1880 dst_stride == width) {
1881 width *= height;
1882 height = 1;
1883 src_stride0 = src_stride1 = dst_stride = 0;
1884 }
1885 #if defined(HAS_INTERPOLATEROW_SSSE3)
1886 if (TestCpuFlag(kCpuHasSSSE3)) {
1887 InterpolateRow = InterpolateRow_Any_SSSE3;
1888 if (IS_ALIGNED(width, 16)) {
1889 InterpolateRow = InterpolateRow_SSSE3;
1890 }
1891 }
1892 #endif
1893 #if defined(HAS_INTERPOLATEROW_AVX2)
1894 if (TestCpuFlag(kCpuHasAVX2)) {
1895 InterpolateRow = InterpolateRow_Any_AVX2;
1896 if (IS_ALIGNED(width, 32)) {
1897 InterpolateRow = InterpolateRow_AVX2;
1898 }
1899 }
1900 #endif
1901 #if defined(HAS_INTERPOLATEROW_NEON)
1902 if (TestCpuFlag(kCpuHasNEON)) {
1903 InterpolateRow = InterpolateRow_Any_NEON;
1904 if (IS_ALIGNED(width, 16)) {
1905 InterpolateRow = InterpolateRow_NEON;
1906 }
1907 }
1908 #endif
1909 #if defined(HAS_INTERPOLATEROW_DSPR2)
1910 if (TestCpuFlag(kCpuHasDSPR2) &&
1911 IS_ALIGNED(src0, 4) && IS_ALIGNED(src_stride0, 4) &&
1912 IS_ALIGNED(src1, 4) && IS_ALIGNED(src_stride1, 4) &&
1913 IS_ALIGNED(dst, 4) && IS_ALIGNED(dst_stride, 4) &&
1914 IS_ALIGNED(width, 4)) {
1915 InterpolateRow = InterpolateRow_DSPR2;
1916 }
1917 #endif
1918
1919 for (y = 0; y < height; ++y) {
1920 InterpolateRow(dst, src0, src1 - src0, width, interpolation);
1921 src0 += src_stride0;
1922 src1 += src_stride1;
1923 dst += dst_stride;
1924 }
1925 return 0;
1926 }
1927
1928 // Interpolate 2 ARGB images by specified amount (0 to 255).
1929 LIBYUV_API
ARGBInterpolate(const uint8 * src_argb0,int src_stride_argb0,const uint8 * src_argb1,int src_stride_argb1,uint8 * dst_argb,int dst_stride_argb,int width,int height,int interpolation)1930 int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
1931 const uint8* src_argb1, int src_stride_argb1,
1932 uint8* dst_argb, int dst_stride_argb,
1933 int width, int height, int interpolation) {
1934 return InterpolatePlane(src_argb0, src_stride_argb0,
1935 src_argb1, src_stride_argb1,
1936 dst_argb, dst_stride_argb,
1937 width * 4, height, interpolation);
1938 }
1939
1940 // Interpolate 2 YUV images by specified amount (0 to 255).
1941 LIBYUV_API
I420Interpolate(const uint8 * src0_y,int src0_stride_y,const uint8 * src0_u,int src0_stride_u,const uint8 * src0_v,int src0_stride_v,const uint8 * src1_y,int src1_stride_y,const uint8 * src1_u,int src1_stride_u,const uint8 * src1_v,int src1_stride_v,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height,int interpolation)1942 int I420Interpolate(const uint8* src0_y, int src0_stride_y,
1943 const uint8* src0_u, int src0_stride_u,
1944 const uint8* src0_v, int src0_stride_v,
1945 const uint8* src1_y, int src1_stride_y,
1946 const uint8* src1_u, int src1_stride_u,
1947 const uint8* src1_v, int src1_stride_v,
1948 uint8* dst_y, int dst_stride_y,
1949 uint8* dst_u, int dst_stride_u,
1950 uint8* dst_v, int dst_stride_v,
1951 int width, int height, int interpolation) {
1952 int halfwidth = (width + 1) >> 1;
1953 int halfheight = (height + 1) >> 1;
1954 if (!src0_y || !src0_u || !src0_v ||
1955 !src1_y || !src1_u || !src1_v ||
1956 !dst_y || !dst_u || !dst_v ||
1957 width <= 0 || height == 0) {
1958 return -1;
1959 }
1960 InterpolatePlane(src0_y, src0_stride_y,
1961 src1_y, src1_stride_y,
1962 dst_y, dst_stride_y,
1963 width, height, interpolation);
1964 InterpolatePlane(src0_u, src0_stride_u,
1965 src1_u, src1_stride_u,
1966 dst_u, dst_stride_u,
1967 halfwidth, halfheight, interpolation);
1968 InterpolatePlane(src0_v, src0_stride_v,
1969 src1_v, src1_stride_v,
1970 dst_v, dst_stride_v,
1971 halfwidth, halfheight, interpolation);
1972 return 0;
1973 }
1974
1975 // Shuffle ARGB channel order. e.g. BGRA to ARGB.
1976 LIBYUV_API
ARGBShuffle(const uint8 * src_bgra,int src_stride_bgra,uint8 * dst_argb,int dst_stride_argb,const uint8 * shuffler,int width,int height)1977 int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
1978 uint8* dst_argb, int dst_stride_argb,
1979 const uint8* shuffler, int width, int height) {
1980 int y;
1981 void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
1982 const uint8* shuffler, int width) = ARGBShuffleRow_C;
1983 if (!src_bgra || !dst_argb ||
1984 width <= 0 || height == 0) {
1985 return -1;
1986 }
1987 // Negative height means invert the image.
1988 if (height < 0) {
1989 height = -height;
1990 src_bgra = src_bgra + (height - 1) * src_stride_bgra;
1991 src_stride_bgra = -src_stride_bgra;
1992 }
1993 // Coalesce rows.
1994 if (src_stride_bgra == width * 4 &&
1995 dst_stride_argb == width * 4) {
1996 width *= height;
1997 height = 1;
1998 src_stride_bgra = dst_stride_argb = 0;
1999 }
2000 #if defined(HAS_ARGBSHUFFLEROW_SSE2)
2001 if (TestCpuFlag(kCpuHasSSE2)) {
2002 ARGBShuffleRow = ARGBShuffleRow_Any_SSE2;
2003 if (IS_ALIGNED(width, 4)) {
2004 ARGBShuffleRow = ARGBShuffleRow_SSE2;
2005 }
2006 }
2007 #endif
2008 #if defined(HAS_ARGBSHUFFLEROW_SSSE3)
2009 if (TestCpuFlag(kCpuHasSSSE3)) {
2010 ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
2011 if (IS_ALIGNED(width, 8)) {
2012 ARGBShuffleRow = ARGBShuffleRow_SSSE3;
2013 }
2014 }
2015 #endif
2016 #if defined(HAS_ARGBSHUFFLEROW_AVX2)
2017 if (TestCpuFlag(kCpuHasAVX2)) {
2018 ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
2019 if (IS_ALIGNED(width, 16)) {
2020 ARGBShuffleRow = ARGBShuffleRow_AVX2;
2021 }
2022 }
2023 #endif
2024 #if defined(HAS_ARGBSHUFFLEROW_NEON)
2025 if (TestCpuFlag(kCpuHasNEON)) {
2026 ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
2027 if (IS_ALIGNED(width, 4)) {
2028 ARGBShuffleRow = ARGBShuffleRow_NEON;
2029 }
2030 }
2031 #endif
2032
2033 for (y = 0; y < height; ++y) {
2034 ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
2035 src_bgra += src_stride_bgra;
2036 dst_argb += dst_stride_argb;
2037 }
2038 return 0;
2039 }
2040
2041 // Sobel ARGB effect.
ARGBSobelize(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height,void (* SobelRow)(const uint8 * src_sobelx,const uint8 * src_sobely,uint8 * dst,int width))2042 static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
2043 uint8* dst_argb, int dst_stride_argb,
2044 int width, int height,
2045 void (*SobelRow)(const uint8* src_sobelx,
2046 const uint8* src_sobely,
2047 uint8* dst, int width)) {
2048 int y;
2049 void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int width) =
2050 ARGBToYJRow_C;
2051 void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
2052 uint8* dst_sobely, int width) = SobelYRow_C;
2053 void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
2054 const uint8* src_y2, uint8* dst_sobely, int width) =
2055 SobelXRow_C;
2056 const int kEdge = 16; // Extra pixels at start of row for extrude/align.
2057 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
2058 return -1;
2059 }
2060 // Negative height means invert the image.
2061 if (height < 0) {
2062 height = -height;
2063 src_argb = src_argb + (height - 1) * src_stride_argb;
2064 src_stride_argb = -src_stride_argb;
2065 }
2066
2067 #if defined(HAS_ARGBTOYJROW_SSSE3)
2068 if (TestCpuFlag(kCpuHasSSSE3)) {
2069 ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
2070 if (IS_ALIGNED(width, 16)) {
2071 ARGBToYJRow = ARGBToYJRow_SSSE3;
2072 }
2073 }
2074 #endif
2075 #if defined(HAS_ARGBTOYJROW_AVX2)
2076 if (TestCpuFlag(kCpuHasAVX2)) {
2077 ARGBToYJRow = ARGBToYJRow_Any_AVX2;
2078 if (IS_ALIGNED(width, 32)) {
2079 ARGBToYJRow = ARGBToYJRow_AVX2;
2080 }
2081 }
2082 #endif
2083 #if defined(HAS_ARGBTOYJROW_NEON)
2084 if (TestCpuFlag(kCpuHasNEON)) {
2085 ARGBToYJRow = ARGBToYJRow_Any_NEON;
2086 if (IS_ALIGNED(width, 8)) {
2087 ARGBToYJRow = ARGBToYJRow_NEON;
2088 }
2089 }
2090 #endif
2091
2092 #if defined(HAS_SOBELYROW_SSE2)
2093 if (TestCpuFlag(kCpuHasSSE2)) {
2094 SobelYRow = SobelYRow_SSE2;
2095 }
2096 #endif
2097 #if defined(HAS_SOBELYROW_NEON)
2098 if (TestCpuFlag(kCpuHasNEON)) {
2099 SobelYRow = SobelYRow_NEON;
2100 }
2101 #endif
2102 #if defined(HAS_SOBELXROW_SSE2)
2103 if (TestCpuFlag(kCpuHasSSE2)) {
2104 SobelXRow = SobelXRow_SSE2;
2105 }
2106 #endif
2107 #if defined(HAS_SOBELXROW_NEON)
2108 if (TestCpuFlag(kCpuHasNEON)) {
2109 SobelXRow = SobelXRow_NEON;
2110 }
2111 #endif
2112 {
2113 // 3 rows with edges before/after.
2114 const int kRowSize = (width + kEdge + 31) & ~31;
2115 align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
2116 uint8* row_sobelx = rows;
2117 uint8* row_sobely = rows + kRowSize;
2118 uint8* row_y = rows + kRowSize * 2;
2119
2120 // Convert first row.
2121 uint8* row_y0 = row_y + kEdge;
2122 uint8* row_y1 = row_y0 + kRowSize;
2123 uint8* row_y2 = row_y1 + kRowSize;
2124 ARGBToYJRow(src_argb, row_y0, width);
2125 row_y0[-1] = row_y0[0];
2126 memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind.
2127 ARGBToYJRow(src_argb, row_y1, width);
2128 row_y1[-1] = row_y1[0];
2129 memset(row_y1 + width, row_y1[width - 1], 16);
2130 memset(row_y2 + width, 0, 16);
2131
2132 for (y = 0; y < height; ++y) {
2133 // Convert next row of ARGB to G.
2134 if (y < (height - 1)) {
2135 src_argb += src_stride_argb;
2136 }
2137 ARGBToYJRow(src_argb, row_y2, width);
2138 row_y2[-1] = row_y2[0];
2139 row_y2[width] = row_y2[width - 1];
2140
2141 SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
2142 SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
2143 SobelRow(row_sobelx, row_sobely, dst_argb, width);
2144
2145 // Cycle thru circular queue of 3 row_y buffers.
2146 {
2147 uint8* row_yt = row_y0;
2148 row_y0 = row_y1;
2149 row_y1 = row_y2;
2150 row_y2 = row_yt;
2151 }
2152
2153 dst_argb += dst_stride_argb;
2154 }
2155 free_aligned_buffer_64(rows);
2156 }
2157 return 0;
2158 }
2159
2160 // Sobel ARGB effect.
2161 LIBYUV_API
ARGBSobel(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height)2162 int ARGBSobel(const uint8* src_argb, int src_stride_argb,
2163 uint8* dst_argb, int dst_stride_argb,
2164 int width, int height) {
2165 void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
2166 uint8* dst_argb, int width) = SobelRow_C;
2167 #if defined(HAS_SOBELROW_SSE2)
2168 if (TestCpuFlag(kCpuHasSSE2)) {
2169 SobelRow = SobelRow_Any_SSE2;
2170 if (IS_ALIGNED(width, 16)) {
2171 SobelRow = SobelRow_SSE2;
2172 }
2173 }
2174 #endif
2175 #if defined(HAS_SOBELROW_NEON)
2176 if (TestCpuFlag(kCpuHasNEON)) {
2177 SobelRow = SobelRow_Any_NEON;
2178 if (IS_ALIGNED(width, 8)) {
2179 SobelRow = SobelRow_NEON;
2180 }
2181 }
2182 #endif
2183 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
2184 width, height, SobelRow);
2185 }
2186
2187 // Sobel ARGB effect with planar output.
2188 LIBYUV_API
ARGBSobelToPlane(const uint8 * src_argb,int src_stride_argb,uint8 * dst_y,int dst_stride_y,int width,int height)2189 int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
2190 uint8* dst_y, int dst_stride_y,
2191 int width, int height) {
2192 void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
2193 uint8* dst_, int width) = SobelToPlaneRow_C;
2194 #if defined(HAS_SOBELTOPLANEROW_SSE2)
2195 if (TestCpuFlag(kCpuHasSSE2)) {
2196 SobelToPlaneRow = SobelToPlaneRow_Any_SSE2;
2197 if (IS_ALIGNED(width, 16)) {
2198 SobelToPlaneRow = SobelToPlaneRow_SSE2;
2199 }
2200 }
2201 #endif
2202 #if defined(HAS_SOBELTOPLANEROW_NEON)
2203 if (TestCpuFlag(kCpuHasNEON)) {
2204 SobelToPlaneRow = SobelToPlaneRow_Any_NEON;
2205 if (IS_ALIGNED(width, 16)) {
2206 SobelToPlaneRow = SobelToPlaneRow_NEON;
2207 }
2208 }
2209 #endif
2210 return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y,
2211 width, height, SobelToPlaneRow);
2212 }
2213
2214 // SobelXY ARGB effect.
2215 // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel.
2216 LIBYUV_API
ARGBSobelXY(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height)2217 int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
2218 uint8* dst_argb, int dst_stride_argb,
2219 int width, int height) {
2220 void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
2221 uint8* dst_argb, int width) = SobelXYRow_C;
2222 #if defined(HAS_SOBELXYROW_SSE2)
2223 if (TestCpuFlag(kCpuHasSSE2)) {
2224 SobelXYRow = SobelXYRow_Any_SSE2;
2225 if (IS_ALIGNED(width, 16)) {
2226 SobelXYRow = SobelXYRow_SSE2;
2227 }
2228 }
2229 #endif
2230 #if defined(HAS_SOBELXYROW_NEON)
2231 if (TestCpuFlag(kCpuHasNEON)) {
2232 SobelXYRow = SobelXYRow_Any_NEON;
2233 if (IS_ALIGNED(width, 8)) {
2234 SobelXYRow = SobelXYRow_NEON;
2235 }
2236 }
2237 #endif
2238 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
2239 width, height, SobelXYRow);
2240 }
2241
2242 // Apply a 4x4 polynomial to each ARGB pixel.
2243 LIBYUV_API
ARGBPolynomial(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,const float * poly,int width,int height)2244 int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
2245 uint8* dst_argb, int dst_stride_argb,
2246 const float* poly,
2247 int width, int height) {
2248 int y;
2249 void (*ARGBPolynomialRow)(const uint8* src_argb,
2250 uint8* dst_argb, const float* poly,
2251 int width) = ARGBPolynomialRow_C;
2252 if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
2253 return -1;
2254 }
2255 // Negative height means invert the image.
2256 if (height < 0) {
2257 height = -height;
2258 src_argb = src_argb + (height - 1) * src_stride_argb;
2259 src_stride_argb = -src_stride_argb;
2260 }
2261 // Coalesce rows.
2262 if (src_stride_argb == width * 4 &&
2263 dst_stride_argb == width * 4) {
2264 width *= height;
2265 height = 1;
2266 src_stride_argb = dst_stride_argb = 0;
2267 }
2268 #if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
2269 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
2270 ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
2271 }
2272 #endif
2273 #if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
2274 if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
2275 IS_ALIGNED(width, 2)) {
2276 ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
2277 }
2278 #endif
2279
2280 for (y = 0; y < height; ++y) {
2281 ARGBPolynomialRow(src_argb, dst_argb, poly, width);
2282 src_argb += src_stride_argb;
2283 dst_argb += dst_stride_argb;
2284 }
2285 return 0;
2286 }
2287
2288 // Apply a lumacolortable to each ARGB pixel.
2289 LIBYUV_API
ARGBLumaColorTable(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,const uint8 * luma,int width,int height)2290 int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
2291 uint8* dst_argb, int dst_stride_argb,
2292 const uint8* luma,
2293 int width, int height) {
2294 int y;
2295 void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb,
2296 int width, const uint8* luma, const uint32 lumacoeff) =
2297 ARGBLumaColorTableRow_C;
2298 if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
2299 return -1;
2300 }
2301 // Negative height means invert the image.
2302 if (height < 0) {
2303 height = -height;
2304 src_argb = src_argb + (height - 1) * src_stride_argb;
2305 src_stride_argb = -src_stride_argb;
2306 }
2307 // Coalesce rows.
2308 if (src_stride_argb == width * 4 &&
2309 dst_stride_argb == width * 4) {
2310 width *= height;
2311 height = 1;
2312 src_stride_argb = dst_stride_argb = 0;
2313 }
2314 #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
2315 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
2316 ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
2317 }
2318 #endif
2319
2320 for (y = 0; y < height; ++y) {
2321 ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
2322 src_argb += src_stride_argb;
2323 dst_argb += dst_stride_argb;
2324 }
2325 return 0;
2326 }
2327
2328 // Copy Alpha from one ARGB image to another.
2329 LIBYUV_API
ARGBCopyAlpha(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height)2330 int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
2331 uint8* dst_argb, int dst_stride_argb,
2332 int width, int height) {
2333 int y;
2334 void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) =
2335 ARGBCopyAlphaRow_C;
2336 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
2337 return -1;
2338 }
2339 // Negative height means invert the image.
2340 if (height < 0) {
2341 height = -height;
2342 src_argb = src_argb + (height - 1) * src_stride_argb;
2343 src_stride_argb = -src_stride_argb;
2344 }
2345 // Coalesce rows.
2346 if (src_stride_argb == width * 4 &&
2347 dst_stride_argb == width * 4) {
2348 width *= height;
2349 height = 1;
2350 src_stride_argb = dst_stride_argb = 0;
2351 }
2352 #if defined(HAS_ARGBCOPYALPHAROW_SSE2)
2353 if (TestCpuFlag(kCpuHasSSE2)) {
2354 ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_SSE2;
2355 if (IS_ALIGNED(width, 8)) {
2356 ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
2357 }
2358 }
2359 #endif
2360 #if defined(HAS_ARGBCOPYALPHAROW_AVX2)
2361 if (TestCpuFlag(kCpuHasAVX2)) {
2362 ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_AVX2;
2363 if (IS_ALIGNED(width, 16)) {
2364 ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
2365 }
2366 }
2367 #endif
2368
2369 for (y = 0; y < height; ++y) {
2370 ARGBCopyAlphaRow(src_argb, dst_argb, width);
2371 src_argb += src_stride_argb;
2372 dst_argb += dst_stride_argb;
2373 }
2374 return 0;
2375 }
2376
2377 // Extract just the alpha channel from ARGB.
2378 LIBYUV_API
ARGBExtractAlpha(const uint8 * src_argb,int src_stride,uint8 * dst_a,int dst_stride,int width,int height)2379 int ARGBExtractAlpha(const uint8* src_argb, int src_stride,
2380 uint8* dst_a, int dst_stride,
2381 int width, int height) {
2382 if (!src_argb || !dst_a || width <= 0 || height == 0) {
2383 return -1;
2384 }
2385 // Negative height means invert the image.
2386 if (height < 0) {
2387 height = -height;
2388 src_argb += (height - 1) * src_stride;
2389 src_stride = -src_stride;
2390 }
2391 // Coalesce rows.
2392 if (src_stride == width * 4 && dst_stride == width) {
2393 width *= height;
2394 height = 1;
2395 src_stride = dst_stride = 0;
2396 }
2397 void (*ARGBExtractAlphaRow)(const uint8 *src_argb, uint8 *dst_a, int width) =
2398 ARGBExtractAlphaRow_C;
2399 #if defined(HAS_ARGBEXTRACTALPHAROW_SSE2)
2400 if (TestCpuFlag(kCpuHasSSE2)) {
2401 ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2
2402 : ARGBExtractAlphaRow_Any_SSE2;
2403 }
2404 #endif
2405 #if defined(HAS_ARGBEXTRACTALPHAROW_NEON)
2406 if (TestCpuFlag(kCpuHasNEON)) {
2407 ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON
2408 : ARGBExtractAlphaRow_Any_NEON;
2409 }
2410 #endif
2411
2412 for (int y = 0; y < height; ++y) {
2413 ARGBExtractAlphaRow(src_argb, dst_a, width);
2414 src_argb += src_stride;
2415 dst_a += dst_stride;
2416 }
2417 return 0;
2418 }
2419
2420 // Copy a planar Y channel to the alpha channel of a destination ARGB image.
2421 LIBYUV_API
ARGBCopyYToAlpha(const uint8 * src_y,int src_stride_y,uint8 * dst_argb,int dst_stride_argb,int width,int height)2422 int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
2423 uint8* dst_argb, int dst_stride_argb,
2424 int width, int height) {
2425 int y;
2426 void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
2427 ARGBCopyYToAlphaRow_C;
2428 if (!src_y || !dst_argb || width <= 0 || height == 0) {
2429 return -1;
2430 }
2431 // Negative height means invert the image.
2432 if (height < 0) {
2433 height = -height;
2434 src_y = src_y + (height - 1) * src_stride_y;
2435 src_stride_y = -src_stride_y;
2436 }
2437 // Coalesce rows.
2438 if (src_stride_y == width &&
2439 dst_stride_argb == width * 4) {
2440 width *= height;
2441 height = 1;
2442 src_stride_y = dst_stride_argb = 0;
2443 }
2444 #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
2445 if (TestCpuFlag(kCpuHasSSE2)) {
2446 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_SSE2;
2447 if (IS_ALIGNED(width, 8)) {
2448 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
2449 }
2450 }
2451 #endif
2452 #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
2453 if (TestCpuFlag(kCpuHasAVX2)) {
2454 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_AVX2;
2455 if (IS_ALIGNED(width, 16)) {
2456 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
2457 }
2458 }
2459 #endif
2460
2461 for (y = 0; y < height; ++y) {
2462 ARGBCopyYToAlphaRow(src_y, dst_argb, width);
2463 src_y += src_stride_y;
2464 dst_argb += dst_stride_argb;
2465 }
2466 return 0;
2467 }
2468
2469 // TODO(fbarchard): Consider if width is even Y channel can be split
2470 // directly. A SplitUVRow_Odd function could copy the remaining chroma.
2471
2472 LIBYUV_API
YUY2ToNV12(const uint8 * src_yuy2,int src_stride_yuy2,uint8 * dst_y,int dst_stride_y,uint8 * dst_uv,int dst_stride_uv,int width,int height)2473 int YUY2ToNV12(const uint8* src_yuy2, int src_stride_yuy2,
2474 uint8* dst_y, int dst_stride_y,
2475 uint8* dst_uv, int dst_stride_uv,
2476 int width, int height) {
2477 int y;
2478 int halfwidth = (width + 1) >> 1;
2479 void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
2480 int width) = SplitUVRow_C;
2481 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
2482 ptrdiff_t src_stride, int dst_width,
2483 int source_y_fraction) = InterpolateRow_C;
2484 if (!src_yuy2 ||
2485 !dst_y || !dst_uv ||
2486 width <= 0 || height == 0) {
2487 return -1;
2488 }
2489 // Negative height means invert the image.
2490 if (height < 0) {
2491 height = -height;
2492 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
2493 src_stride_yuy2 = -src_stride_yuy2;
2494 }
2495 #if defined(HAS_SPLITUVROW_SSE2)
2496 if (TestCpuFlag(kCpuHasSSE2)) {
2497 SplitUVRow = SplitUVRow_Any_SSE2;
2498 if (IS_ALIGNED(width, 16)) {
2499 SplitUVRow = SplitUVRow_SSE2;
2500 }
2501 }
2502 #endif
2503 #if defined(HAS_SPLITUVROW_AVX2)
2504 if (TestCpuFlag(kCpuHasAVX2)) {
2505 SplitUVRow = SplitUVRow_Any_AVX2;
2506 if (IS_ALIGNED(width, 32)) {
2507 SplitUVRow = SplitUVRow_AVX2;
2508 }
2509 }
2510 #endif
2511 #if defined(HAS_SPLITUVROW_NEON)
2512 if (TestCpuFlag(kCpuHasNEON)) {
2513 SplitUVRow = SplitUVRow_Any_NEON;
2514 if (IS_ALIGNED(width, 16)) {
2515 SplitUVRow = SplitUVRow_NEON;
2516 }
2517 }
2518 #endif
2519 #if defined(HAS_INTERPOLATEROW_SSSE3)
2520 if (TestCpuFlag(kCpuHasSSSE3)) {
2521 InterpolateRow = InterpolateRow_Any_SSSE3;
2522 if (IS_ALIGNED(width, 16)) {
2523 InterpolateRow = InterpolateRow_SSSE3;
2524 }
2525 }
2526 #endif
2527 #if defined(HAS_INTERPOLATEROW_AVX2)
2528 if (TestCpuFlag(kCpuHasAVX2)) {
2529 InterpolateRow = InterpolateRow_Any_AVX2;
2530 if (IS_ALIGNED(width, 32)) {
2531 InterpolateRow = InterpolateRow_AVX2;
2532 }
2533 }
2534 #endif
2535 #if defined(HAS_INTERPOLATEROW_NEON)
2536 if (TestCpuFlag(kCpuHasNEON)) {
2537 InterpolateRow = InterpolateRow_Any_NEON;
2538 if (IS_ALIGNED(width, 16)) {
2539 InterpolateRow = InterpolateRow_NEON;
2540 }
2541 }
2542 #endif
2543
2544 {
2545 int awidth = halfwidth * 2;
2546 // row of y and 2 rows of uv
2547 align_buffer_64(rows, awidth * 3);
2548
2549 for (y = 0; y < height - 1; y += 2) {
2550 // Split Y from UV.
2551 SplitUVRow(src_yuy2, rows, rows + awidth, awidth);
2552 memcpy(dst_y, rows, width);
2553 SplitUVRow(src_yuy2 + src_stride_yuy2, rows, rows + awidth * 2, awidth);
2554 memcpy(dst_y + dst_stride_y, rows, width);
2555 InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
2556 src_yuy2 += src_stride_yuy2 * 2;
2557 dst_y += dst_stride_y * 2;
2558 dst_uv += dst_stride_uv;
2559 }
2560 if (height & 1) {
2561 // Split Y from UV.
2562 SplitUVRow(src_yuy2, rows, dst_uv, awidth);
2563 memcpy(dst_y, rows, width);
2564 }
2565 free_aligned_buffer_64(rows);
2566 }
2567 return 0;
2568 }
2569
2570 LIBYUV_API
UYVYToNV12(const uint8 * src_uyvy,int src_stride_uyvy,uint8 * dst_y,int dst_stride_y,uint8 * dst_uv,int dst_stride_uv,int width,int height)2571 int UYVYToNV12(const uint8* src_uyvy, int src_stride_uyvy,
2572 uint8* dst_y, int dst_stride_y,
2573 uint8* dst_uv, int dst_stride_uv,
2574 int width, int height) {
2575 int y;
2576 int halfwidth = (width + 1) >> 1;
2577 void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
2578 int width) = SplitUVRow_C;
2579 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
2580 ptrdiff_t src_stride, int dst_width,
2581 int source_y_fraction) = InterpolateRow_C;
2582 if (!src_uyvy ||
2583 !dst_y || !dst_uv ||
2584 width <= 0 || height == 0) {
2585 return -1;
2586 }
2587 // Negative height means invert the image.
2588 if (height < 0) {
2589 height = -height;
2590 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
2591 src_stride_uyvy = -src_stride_uyvy;
2592 }
2593 #if defined(HAS_SPLITUVROW_SSE2)
2594 if (TestCpuFlag(kCpuHasSSE2)) {
2595 SplitUVRow = SplitUVRow_Any_SSE2;
2596 if (IS_ALIGNED(width, 16)) {
2597 SplitUVRow = SplitUVRow_SSE2;
2598 }
2599 }
2600 #endif
2601 #if defined(HAS_SPLITUVROW_AVX2)
2602 if (TestCpuFlag(kCpuHasAVX2)) {
2603 SplitUVRow = SplitUVRow_Any_AVX2;
2604 if (IS_ALIGNED(width, 32)) {
2605 SplitUVRow = SplitUVRow_AVX2;
2606 }
2607 }
2608 #endif
2609 #if defined(HAS_SPLITUVROW_NEON)
2610 if (TestCpuFlag(kCpuHasNEON)) {
2611 SplitUVRow = SplitUVRow_Any_NEON;
2612 if (IS_ALIGNED(width, 16)) {
2613 SplitUVRow = SplitUVRow_NEON;
2614 }
2615 }
2616 #endif
2617 #if defined(HAS_INTERPOLATEROW_SSSE3)
2618 if (TestCpuFlag(kCpuHasSSSE3)) {
2619 InterpolateRow = InterpolateRow_Any_SSSE3;
2620 if (IS_ALIGNED(width, 16)) {
2621 InterpolateRow = InterpolateRow_SSSE3;
2622 }
2623 }
2624 #endif
2625 #if defined(HAS_INTERPOLATEROW_AVX2)
2626 if (TestCpuFlag(kCpuHasAVX2)) {
2627 InterpolateRow = InterpolateRow_Any_AVX2;
2628 if (IS_ALIGNED(width, 32)) {
2629 InterpolateRow = InterpolateRow_AVX2;
2630 }
2631 }
2632 #endif
2633 #if defined(HAS_INTERPOLATEROW_NEON)
2634 if (TestCpuFlag(kCpuHasNEON)) {
2635 InterpolateRow = InterpolateRow_Any_NEON;
2636 if (IS_ALIGNED(width, 16)) {
2637 InterpolateRow = InterpolateRow_NEON;
2638 }
2639 }
2640 #endif
2641
2642 {
2643 int awidth = halfwidth * 2;
2644 // row of y and 2 rows of uv
2645 align_buffer_64(rows, awidth * 3);
2646
2647 for (y = 0; y < height - 1; y += 2) {
2648 // Split Y from UV.
2649 SplitUVRow(src_uyvy, rows + awidth, rows, awidth);
2650 memcpy(dst_y, rows, width);
2651 SplitUVRow(src_uyvy + src_stride_uyvy, rows + awidth * 2, rows, awidth);
2652 memcpy(dst_y + dst_stride_y, rows, width);
2653 InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
2654 src_uyvy += src_stride_uyvy * 2;
2655 dst_y += dst_stride_y * 2;
2656 dst_uv += dst_stride_uv;
2657 }
2658 if (height & 1) {
2659 // Split Y from UV.
2660 SplitUVRow(src_uyvy, dst_uv, rows, awidth);
2661 memcpy(dst_y, rows, width);
2662 }
2663 free_aligned_buffer_64(rows);
2664 }
2665 return 0;
2666 }
2667
2668 #ifdef __cplusplus
2669 } // extern "C"
2670 } // namespace libyuv
2671 #endif
2672