1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/planar_functions.h"
12
13 #include <string.h> // for memset()
14
15 #include "libyuv/cpu_id.h"
16 #ifdef HAVE_JPEG
17 #include "libyuv/mjpeg_decoder.h"
18 #endif
19 #include "libyuv/row.h"
20 #include "libyuv/scale_row.h" // for ScaleRowDown2
21
22 #ifdef __cplusplus
23 namespace libyuv {
24 extern "C" {
25 #endif
26
27 // Copy a plane of data
28 LIBYUV_API
CopyPlane(const uint8 * src_y,int src_stride_y,uint8 * dst_y,int dst_stride_y,int width,int height)29 void CopyPlane(const uint8* src_y,
30 int src_stride_y,
31 uint8* dst_y,
32 int dst_stride_y,
33 int width,
34 int height) {
35 int y;
36 void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
37 // Negative height means invert the image.
38 if (height < 0) {
39 height = -height;
40 dst_y = dst_y + (height - 1) * dst_stride_y;
41 dst_stride_y = -dst_stride_y;
42 }
43 // Coalesce rows.
44 if (src_stride_y == width && dst_stride_y == width) {
45 width *= height;
46 height = 1;
47 src_stride_y = dst_stride_y = 0;
48 }
49 // Nothing to do.
50 if (src_y == dst_y && src_stride_y == dst_stride_y) {
51 return;
52 }
53 #if defined(HAS_COPYROW_SSE2)
54 if (TestCpuFlag(kCpuHasSSE2)) {
55 CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
56 }
57 #endif
58 #if defined(HAS_COPYROW_AVX)
59 if (TestCpuFlag(kCpuHasAVX)) {
60 CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
61 }
62 #endif
63 #if defined(HAS_COPYROW_ERMS)
64 if (TestCpuFlag(kCpuHasERMS)) {
65 CopyRow = CopyRow_ERMS;
66 }
67 #endif
68 #if defined(HAS_COPYROW_NEON)
69 if (TestCpuFlag(kCpuHasNEON)) {
70 CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
71 }
72 #endif
73 #if defined(HAS_COPYROW_MIPS)
74 if (TestCpuFlag(kCpuHasMIPS)) {
75 CopyRow = CopyRow_MIPS;
76 }
77 #endif
78
79 // Copy plane
80 for (y = 0; y < height; ++y) {
81 CopyRow(src_y, dst_y, width);
82 src_y += src_stride_y;
83 dst_y += dst_stride_y;
84 }
85 }
86
87 // TODO(fbarchard): Consider support for negative height.
88 // TODO(fbarchard): Consider stride measured in bytes.
89 LIBYUV_API
CopyPlane_16(const uint16 * src_y,int src_stride_y,uint16 * dst_y,int dst_stride_y,int width,int height)90 void CopyPlane_16(const uint16* src_y,
91 int src_stride_y,
92 uint16* dst_y,
93 int dst_stride_y,
94 int width,
95 int height) {
96 int y;
97 void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C;
98 // Coalesce rows.
99 if (src_stride_y == width && dst_stride_y == width) {
100 width *= height;
101 height = 1;
102 src_stride_y = dst_stride_y = 0;
103 }
104 #if defined(HAS_COPYROW_16_SSE2)
105 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32)) {
106 CopyRow = CopyRow_16_SSE2;
107 }
108 #endif
109 #if defined(HAS_COPYROW_16_ERMS)
110 if (TestCpuFlag(kCpuHasERMS)) {
111 CopyRow = CopyRow_16_ERMS;
112 }
113 #endif
114 #if defined(HAS_COPYROW_16_NEON)
115 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
116 CopyRow = CopyRow_16_NEON;
117 }
118 #endif
119 #if defined(HAS_COPYROW_16_MIPS)
120 if (TestCpuFlag(kCpuHasMIPS)) {
121 CopyRow = CopyRow_16_MIPS;
122 }
123 #endif
124
125 // Copy plane
126 for (y = 0; y < height; ++y) {
127 CopyRow(src_y, dst_y, width);
128 src_y += src_stride_y;
129 dst_y += dst_stride_y;
130 }
131 }
132
133 // Copy I422.
134 LIBYUV_API
I422Copy(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)135 int I422Copy(const uint8* src_y,
136 int src_stride_y,
137 const uint8* src_u,
138 int src_stride_u,
139 const uint8* src_v,
140 int src_stride_v,
141 uint8* dst_y,
142 int dst_stride_y,
143 uint8* dst_u,
144 int dst_stride_u,
145 uint8* dst_v,
146 int dst_stride_v,
147 int width,
148 int height) {
149 int halfwidth = (width + 1) >> 1;
150 if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
151 return -1;
152 }
153 // Negative height means invert the image.
154 if (height < 0) {
155 height = -height;
156 src_y = src_y + (height - 1) * src_stride_y;
157 src_u = src_u + (height - 1) * src_stride_u;
158 src_v = src_v + (height - 1) * src_stride_v;
159 src_stride_y = -src_stride_y;
160 src_stride_u = -src_stride_u;
161 src_stride_v = -src_stride_v;
162 }
163
164 if (dst_y) {
165 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
166 }
167 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
168 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
169 return 0;
170 }
171
172 // Copy I444.
173 LIBYUV_API
I444Copy(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)174 int I444Copy(const uint8* src_y,
175 int src_stride_y,
176 const uint8* src_u,
177 int src_stride_u,
178 const uint8* src_v,
179 int src_stride_v,
180 uint8* dst_y,
181 int dst_stride_y,
182 uint8* dst_u,
183 int dst_stride_u,
184 uint8* dst_v,
185 int dst_stride_v,
186 int width,
187 int height) {
188 if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) {
189 return -1;
190 }
191 // Negative height means invert the image.
192 if (height < 0) {
193 height = -height;
194 src_y = src_y + (height - 1) * src_stride_y;
195 src_u = src_u + (height - 1) * src_stride_u;
196 src_v = src_v + (height - 1) * src_stride_v;
197 src_stride_y = -src_stride_y;
198 src_stride_u = -src_stride_u;
199 src_stride_v = -src_stride_v;
200 }
201
202 if (dst_y) {
203 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
204 }
205 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
206 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
207 return 0;
208 }
209
210 // Copy I400.
211 LIBYUV_API
I400ToI400(const uint8 * src_y,int src_stride_y,uint8 * dst_y,int dst_stride_y,int width,int height)212 int I400ToI400(const uint8* src_y,
213 int src_stride_y,
214 uint8* dst_y,
215 int dst_stride_y,
216 int width,
217 int height) {
218 if (!src_y || !dst_y || width <= 0 || height == 0) {
219 return -1;
220 }
221 // Negative height means invert the image.
222 if (height < 0) {
223 height = -height;
224 src_y = src_y + (height - 1) * src_stride_y;
225 src_stride_y = -src_stride_y;
226 }
227 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
228 return 0;
229 }
230
231 // Convert I420 to I400.
232 LIBYUV_API
I420ToI400(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_y,int dst_stride_y,int width,int height)233 int I420ToI400(const uint8* src_y,
234 int src_stride_y,
235 const uint8* src_u,
236 int src_stride_u,
237 const uint8* src_v,
238 int src_stride_v,
239 uint8* dst_y,
240 int dst_stride_y,
241 int width,
242 int height) {
243 (void)src_u;
244 (void)src_stride_u;
245 (void)src_v;
246 (void)src_stride_v;
247 if (!src_y || !dst_y || width <= 0 || height == 0) {
248 return -1;
249 }
250 // Negative height means invert the image.
251 if (height < 0) {
252 height = -height;
253 src_y = src_y + (height - 1) * src_stride_y;
254 src_stride_y = -src_stride_y;
255 }
256
257 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
258 return 0;
259 }
260
261 // Support function for NV12 etc UV channels.
262 // Width and height are plane sizes (typically half pixel width).
263 LIBYUV_API
SplitUVPlane(const uint8 * src_uv,int src_stride_uv,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)264 void SplitUVPlane(const uint8* src_uv,
265 int src_stride_uv,
266 uint8* dst_u,
267 int dst_stride_u,
268 uint8* dst_v,
269 int dst_stride_v,
270 int width,
271 int height) {
272 int y;
273 void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
274 int width) = SplitUVRow_C;
275 // Negative height means invert the image.
276 if (height < 0) {
277 height = -height;
278 dst_u = dst_u + (height - 1) * dst_stride_u;
279 dst_v = dst_v + (height - 1) * dst_stride_v;
280 dst_stride_u = -dst_stride_u;
281 dst_stride_v = -dst_stride_v;
282 }
283 // Coalesce rows.
284 if (src_stride_uv == width * 2 && dst_stride_u == width &&
285 dst_stride_v == width) {
286 width *= height;
287 height = 1;
288 src_stride_uv = dst_stride_u = dst_stride_v = 0;
289 }
290 #if defined(HAS_SPLITUVROW_SSE2)
291 if (TestCpuFlag(kCpuHasSSE2)) {
292 SplitUVRow = SplitUVRow_Any_SSE2;
293 if (IS_ALIGNED(width, 16)) {
294 SplitUVRow = SplitUVRow_SSE2;
295 }
296 }
297 #endif
298 #if defined(HAS_SPLITUVROW_AVX2)
299 if (TestCpuFlag(kCpuHasAVX2)) {
300 SplitUVRow = SplitUVRow_Any_AVX2;
301 if (IS_ALIGNED(width, 32)) {
302 SplitUVRow = SplitUVRow_AVX2;
303 }
304 }
305 #endif
306 #if defined(HAS_SPLITUVROW_NEON)
307 if (TestCpuFlag(kCpuHasNEON)) {
308 SplitUVRow = SplitUVRow_Any_NEON;
309 if (IS_ALIGNED(width, 16)) {
310 SplitUVRow = SplitUVRow_NEON;
311 }
312 }
313 #endif
314 #if defined(HAS_SPLITUVROW_DSPR2)
315 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_u, 4) &&
316 IS_ALIGNED(dst_stride_u, 4) && IS_ALIGNED(dst_v, 4) &&
317 IS_ALIGNED(dst_stride_v, 4)) {
318 SplitUVRow = SplitUVRow_Any_DSPR2;
319 if (IS_ALIGNED(width, 16)) {
320 SplitUVRow = SplitUVRow_DSPR2;
321 }
322 }
323 #endif
324
325 for (y = 0; y < height; ++y) {
326 // Copy a row of UV.
327 SplitUVRow(src_uv, dst_u, dst_v, width);
328 dst_u += dst_stride_u;
329 dst_v += dst_stride_v;
330 src_uv += src_stride_uv;
331 }
332 }
333
334 LIBYUV_API
MergeUVPlane(const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_uv,int dst_stride_uv,int width,int height)335 void MergeUVPlane(const uint8* src_u,
336 int src_stride_u,
337 const uint8* src_v,
338 int src_stride_v,
339 uint8* dst_uv,
340 int dst_stride_uv,
341 int width,
342 int height) {
343 int y;
344 void (*MergeUVRow)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
345 int width) = MergeUVRow_C;
346 // Coalesce rows.
347 // Negative height means invert the image.
348 if (height < 0) {
349 height = -height;
350 dst_uv = dst_uv + (height - 1) * dst_stride_uv;
351 dst_stride_uv = -dst_stride_uv;
352 }
353 // Coalesce rows.
354 if (src_stride_u == width && src_stride_v == width &&
355 dst_stride_uv == width * 2) {
356 width *= height;
357 height = 1;
358 src_stride_u = src_stride_v = dst_stride_uv = 0;
359 }
360 #if defined(HAS_MERGEUVROW_SSE2)
361 if (TestCpuFlag(kCpuHasSSE2)) {
362 MergeUVRow = MergeUVRow_Any_SSE2;
363 if (IS_ALIGNED(width, 16)) {
364 MergeUVRow = MergeUVRow_SSE2;
365 }
366 }
367 #endif
368 #if defined(HAS_MERGEUVROW_AVX2)
369 if (TestCpuFlag(kCpuHasAVX2)) {
370 MergeUVRow = MergeUVRow_Any_AVX2;
371 if (IS_ALIGNED(width, 32)) {
372 MergeUVRow = MergeUVRow_AVX2;
373 }
374 }
375 #endif
376 #if defined(HAS_MERGEUVROW_NEON)
377 if (TestCpuFlag(kCpuHasNEON)) {
378 MergeUVRow = MergeUVRow_Any_NEON;
379 if (IS_ALIGNED(width, 16)) {
380 MergeUVRow = MergeUVRow_NEON;
381 }
382 }
383 #endif
384 #if defined(HAS_MERGEUVROW_MSA)
385 if (TestCpuFlag(kCpuHasMSA)) {
386 MergeUVRow = MergeUVRow_Any_MSA;
387 if (IS_ALIGNED(width, 16)) {
388 MergeUVRow = MergeUVRow_MSA;
389 }
390 }
391 #endif
392
393 for (y = 0; y < height; ++y) {
394 // Merge a row of U and V into a row of UV.
395 MergeUVRow(src_u, src_v, dst_uv, width);
396 src_u += src_stride_u;
397 src_v += src_stride_v;
398 dst_uv += dst_stride_uv;
399 }
400 }
401
402 // Mirror a plane of data.
MirrorPlane(const uint8 * src_y,int src_stride_y,uint8 * dst_y,int dst_stride_y,int width,int height)403 void MirrorPlane(const uint8* src_y,
404 int src_stride_y,
405 uint8* dst_y,
406 int dst_stride_y,
407 int width,
408 int height) {
409 int y;
410 void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
411 // Negative height means invert the image.
412 if (height < 0) {
413 height = -height;
414 src_y = src_y + (height - 1) * src_stride_y;
415 src_stride_y = -src_stride_y;
416 }
417 #if defined(HAS_MIRRORROW_NEON)
418 if (TestCpuFlag(kCpuHasNEON)) {
419 MirrorRow = MirrorRow_Any_NEON;
420 if (IS_ALIGNED(width, 16)) {
421 MirrorRow = MirrorRow_NEON;
422 }
423 }
424 #endif
425 #if defined(HAS_MIRRORROW_SSSE3)
426 if (TestCpuFlag(kCpuHasSSSE3)) {
427 MirrorRow = MirrorRow_Any_SSSE3;
428 if (IS_ALIGNED(width, 16)) {
429 MirrorRow = MirrorRow_SSSE3;
430 }
431 }
432 #endif
433 #if defined(HAS_MIRRORROW_AVX2)
434 if (TestCpuFlag(kCpuHasAVX2)) {
435 MirrorRow = MirrorRow_Any_AVX2;
436 if (IS_ALIGNED(width, 32)) {
437 MirrorRow = MirrorRow_AVX2;
438 }
439 }
440 #endif
441 // TODO(fbarchard): Mirror on mips handle unaligned memory.
442 #if defined(HAS_MIRRORROW_DSPR2)
443 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_y, 4) &&
444 IS_ALIGNED(src_stride_y, 4) && IS_ALIGNED(dst_y, 4) &&
445 IS_ALIGNED(dst_stride_y, 4)) {
446 MirrorRow = MirrorRow_DSPR2;
447 }
448 #endif
449 #if defined(HAS_MIRRORROW_MSA)
450 if (TestCpuFlag(kCpuHasMSA)) {
451 MirrorRow = MirrorRow_Any_MSA;
452 if (IS_ALIGNED(width, 64)) {
453 MirrorRow = MirrorRow_MSA;
454 }
455 }
456 #endif
457
458 // Mirror plane
459 for (y = 0; y < height; ++y) {
460 MirrorRow(src_y, dst_y, width);
461 src_y += src_stride_y;
462 dst_y += dst_stride_y;
463 }
464 }
465
466 // Convert YUY2 to I422.
467 LIBYUV_API
YUY2ToI422(const uint8 * src_yuy2,int src_stride_yuy2,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)468 int YUY2ToI422(const uint8* src_yuy2,
469 int src_stride_yuy2,
470 uint8* dst_y,
471 int dst_stride_y,
472 uint8* dst_u,
473 int dst_stride_u,
474 uint8* dst_v,
475 int dst_stride_v,
476 int width,
477 int height) {
478 int y;
479 void (*YUY2ToUV422Row)(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
480 int width) = YUY2ToUV422Row_C;
481 void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int width) =
482 YUY2ToYRow_C;
483 if (!src_yuy2 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
484 return -1;
485 }
486 // Negative height means invert the image.
487 if (height < 0) {
488 height = -height;
489 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
490 src_stride_yuy2 = -src_stride_yuy2;
491 }
492 // Coalesce rows.
493 if (src_stride_yuy2 == width * 2 && dst_stride_y == width &&
494 dst_stride_u * 2 == width && dst_stride_v * 2 == width &&
495 width * height <= 32768) {
496 width *= height;
497 height = 1;
498 src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
499 }
500 #if defined(HAS_YUY2TOYROW_SSE2)
501 if (TestCpuFlag(kCpuHasSSE2)) {
502 YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
503 YUY2ToYRow = YUY2ToYRow_Any_SSE2;
504 if (IS_ALIGNED(width, 16)) {
505 YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
506 YUY2ToYRow = YUY2ToYRow_SSE2;
507 }
508 }
509 #endif
510 #if defined(HAS_YUY2TOYROW_AVX2)
511 if (TestCpuFlag(kCpuHasAVX2)) {
512 YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
513 YUY2ToYRow = YUY2ToYRow_Any_AVX2;
514 if (IS_ALIGNED(width, 32)) {
515 YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
516 YUY2ToYRow = YUY2ToYRow_AVX2;
517 }
518 }
519 #endif
520 #if defined(HAS_YUY2TOYROW_NEON)
521 if (TestCpuFlag(kCpuHasNEON)) {
522 YUY2ToYRow = YUY2ToYRow_Any_NEON;
523 YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
524 if (IS_ALIGNED(width, 16)) {
525 YUY2ToYRow = YUY2ToYRow_NEON;
526 YUY2ToUV422Row = YUY2ToUV422Row_NEON;
527 }
528 }
529 #endif
530 #if defined(HAS_YUY2TOYROW_MSA)
531 if (TestCpuFlag(kCpuHasMSA)) {
532 YUY2ToYRow = YUY2ToYRow_Any_MSA;
533 YUY2ToUV422Row = YUY2ToUV422Row_Any_MSA;
534 if (IS_ALIGNED(width, 32)) {
535 YUY2ToYRow = YUY2ToYRow_MSA;
536 YUY2ToUV422Row = YUY2ToUV422Row_MSA;
537 }
538 }
539 #endif
540
541 for (y = 0; y < height; ++y) {
542 YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
543 YUY2ToYRow(src_yuy2, dst_y, width);
544 src_yuy2 += src_stride_yuy2;
545 dst_y += dst_stride_y;
546 dst_u += dst_stride_u;
547 dst_v += dst_stride_v;
548 }
549 return 0;
550 }
551
552 // Convert UYVY to I422.
553 LIBYUV_API
UYVYToI422(const uint8 * src_uyvy,int src_stride_uyvy,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)554 int UYVYToI422(const uint8* src_uyvy,
555 int src_stride_uyvy,
556 uint8* dst_y,
557 int dst_stride_y,
558 uint8* dst_u,
559 int dst_stride_u,
560 uint8* dst_v,
561 int dst_stride_v,
562 int width,
563 int height) {
564 int y;
565 void (*UYVYToUV422Row)(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
566 int width) = UYVYToUV422Row_C;
567 void (*UYVYToYRow)(const uint8* src_uyvy, uint8* dst_y, int width) =
568 UYVYToYRow_C;
569 if (!src_uyvy || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
570 return -1;
571 }
572 // Negative height means invert the image.
573 if (height < 0) {
574 height = -height;
575 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
576 src_stride_uyvy = -src_stride_uyvy;
577 }
578 // Coalesce rows.
579 if (src_stride_uyvy == width * 2 && dst_stride_y == width &&
580 dst_stride_u * 2 == width && dst_stride_v * 2 == width &&
581 width * height <= 32768) {
582 width *= height;
583 height = 1;
584 src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
585 }
586 #if defined(HAS_UYVYTOYROW_SSE2)
587 if (TestCpuFlag(kCpuHasSSE2)) {
588 UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
589 UYVYToYRow = UYVYToYRow_Any_SSE2;
590 if (IS_ALIGNED(width, 16)) {
591 UYVYToUV422Row = UYVYToUV422Row_SSE2;
592 UYVYToYRow = UYVYToYRow_SSE2;
593 }
594 }
595 #endif
596 #if defined(HAS_UYVYTOYROW_AVX2)
597 if (TestCpuFlag(kCpuHasAVX2)) {
598 UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
599 UYVYToYRow = UYVYToYRow_Any_AVX2;
600 if (IS_ALIGNED(width, 32)) {
601 UYVYToUV422Row = UYVYToUV422Row_AVX2;
602 UYVYToYRow = UYVYToYRow_AVX2;
603 }
604 }
605 #endif
606 #if defined(HAS_UYVYTOYROW_NEON)
607 if (TestCpuFlag(kCpuHasNEON)) {
608 UYVYToYRow = UYVYToYRow_Any_NEON;
609 UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
610 if (IS_ALIGNED(width, 16)) {
611 UYVYToYRow = UYVYToYRow_NEON;
612 UYVYToUV422Row = UYVYToUV422Row_NEON;
613 }
614 }
615 #endif
616 #if defined(HAS_UYVYTOYROW_MSA)
617 if (TestCpuFlag(kCpuHasMSA)) {
618 UYVYToYRow = UYVYToYRow_Any_MSA;
619 UYVYToUV422Row = UYVYToUV422Row_Any_MSA;
620 if (IS_ALIGNED(width, 32)) {
621 UYVYToYRow = UYVYToYRow_MSA;
622 UYVYToUV422Row = UYVYToUV422Row_MSA;
623 }
624 }
625 #endif
626
627 for (y = 0; y < height; ++y) {
628 UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
629 UYVYToYRow(src_uyvy, dst_y, width);
630 src_uyvy += src_stride_uyvy;
631 dst_y += dst_stride_y;
632 dst_u += dst_stride_u;
633 dst_v += dst_stride_v;
634 }
635 return 0;
636 }
637
638 // Convert YUY2 to Y.
639 LIBYUV_API
YUY2ToY(const uint8 * src_yuy2,int src_stride_yuy2,uint8 * dst_y,int dst_stride_y,int width,int height)640 int YUY2ToY(const uint8* src_yuy2,
641 int src_stride_yuy2,
642 uint8* dst_y,
643 int dst_stride_y,
644 int width,
645 int height) {
646 int y;
647 void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int width) =
648 YUY2ToYRow_C;
649 if (!src_yuy2 || !dst_y || width <= 0 || height == 0) {
650 return -1;
651 }
652 // Negative height means invert the image.
653 if (height < 0) {
654 height = -height;
655 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
656 src_stride_yuy2 = -src_stride_yuy2;
657 }
658 // Coalesce rows.
659 if (src_stride_yuy2 == width * 2 && dst_stride_y == width) {
660 width *= height;
661 height = 1;
662 src_stride_yuy2 = dst_stride_y = 0;
663 }
664 #if defined(HAS_YUY2TOYROW_SSE2)
665 if (TestCpuFlag(kCpuHasSSE2)) {
666 YUY2ToYRow = YUY2ToYRow_Any_SSE2;
667 if (IS_ALIGNED(width, 16)) {
668 YUY2ToYRow = YUY2ToYRow_SSE2;
669 }
670 }
671 #endif
672 #if defined(HAS_YUY2TOYROW_AVX2)
673 if (TestCpuFlag(kCpuHasAVX2)) {
674 YUY2ToYRow = YUY2ToYRow_Any_AVX2;
675 if (IS_ALIGNED(width, 32)) {
676 YUY2ToYRow = YUY2ToYRow_AVX2;
677 }
678 }
679 #endif
680 #if defined(HAS_YUY2TOYROW_NEON)
681 if (TestCpuFlag(kCpuHasNEON)) {
682 YUY2ToYRow = YUY2ToYRow_Any_NEON;
683 if (IS_ALIGNED(width, 16)) {
684 YUY2ToYRow = YUY2ToYRow_NEON;
685 }
686 }
687 #endif
688 #if defined(HAS_YUY2TOYROW_MSA)
689 if (TestCpuFlag(kCpuHasMSA)) {
690 YUY2ToYRow = YUY2ToYRow_Any_MSA;
691 if (IS_ALIGNED(width, 32)) {
692 YUY2ToYRow = YUY2ToYRow_MSA;
693 }
694 }
695 #endif
696
697 for (y = 0; y < height; ++y) {
698 YUY2ToYRow(src_yuy2, dst_y, width);
699 src_yuy2 += src_stride_yuy2;
700 dst_y += dst_stride_y;
701 }
702 return 0;
703 }
704
705 // Mirror I400 with optional flipping
706 LIBYUV_API
I400Mirror(const uint8 * src_y,int src_stride_y,uint8 * dst_y,int dst_stride_y,int width,int height)707 int I400Mirror(const uint8* src_y,
708 int src_stride_y,
709 uint8* dst_y,
710 int dst_stride_y,
711 int width,
712 int height) {
713 if (!src_y || !dst_y || width <= 0 || height == 0) {
714 return -1;
715 }
716 // Negative height means invert the image.
717 if (height < 0) {
718 height = -height;
719 src_y = src_y + (height - 1) * src_stride_y;
720 src_stride_y = -src_stride_y;
721 }
722
723 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
724 return 0;
725 }
726
727 // Mirror I420 with optional flipping
728 LIBYUV_API
I420Mirror(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)729 int I420Mirror(const uint8* src_y,
730 int src_stride_y,
731 const uint8* src_u,
732 int src_stride_u,
733 const uint8* src_v,
734 int src_stride_v,
735 uint8* dst_y,
736 int dst_stride_y,
737 uint8* dst_u,
738 int dst_stride_u,
739 uint8* dst_v,
740 int dst_stride_v,
741 int width,
742 int height) {
743 int halfwidth = (width + 1) >> 1;
744 int halfheight = (height + 1) >> 1;
745 if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v || width <= 0 ||
746 height == 0) {
747 return -1;
748 }
749 // Negative height means invert the image.
750 if (height < 0) {
751 height = -height;
752 halfheight = (height + 1) >> 1;
753 src_y = src_y + (height - 1) * src_stride_y;
754 src_u = src_u + (halfheight - 1) * src_stride_u;
755 src_v = src_v + (halfheight - 1) * src_stride_v;
756 src_stride_y = -src_stride_y;
757 src_stride_u = -src_stride_u;
758 src_stride_v = -src_stride_v;
759 }
760
761 if (dst_y) {
762 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
763 }
764 MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
765 MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
766 return 0;
767 }
768
769 // ARGB mirror.
770 LIBYUV_API
ARGBMirror(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height)771 int ARGBMirror(const uint8* src_argb,
772 int src_stride_argb,
773 uint8* dst_argb,
774 int dst_stride_argb,
775 int width,
776 int height) {
777 int y;
778 void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
779 ARGBMirrorRow_C;
780 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
781 return -1;
782 }
783 // Negative height means invert the image.
784 if (height < 0) {
785 height = -height;
786 src_argb = src_argb + (height - 1) * src_stride_argb;
787 src_stride_argb = -src_stride_argb;
788 }
789 #if defined(HAS_ARGBMIRRORROW_NEON)
790 if (TestCpuFlag(kCpuHasNEON)) {
791 ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
792 if (IS_ALIGNED(width, 4)) {
793 ARGBMirrorRow = ARGBMirrorRow_NEON;
794 }
795 }
796 #endif
797 #if defined(HAS_ARGBMIRRORROW_SSE2)
798 if (TestCpuFlag(kCpuHasSSE2)) {
799 ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
800 if (IS_ALIGNED(width, 4)) {
801 ARGBMirrorRow = ARGBMirrorRow_SSE2;
802 }
803 }
804 #endif
805 #if defined(HAS_ARGBMIRRORROW_AVX2)
806 if (TestCpuFlag(kCpuHasAVX2)) {
807 ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
808 if (IS_ALIGNED(width, 8)) {
809 ARGBMirrorRow = ARGBMirrorRow_AVX2;
810 }
811 }
812 #endif
813 #if defined(HAS_ARGBMIRRORROW_MSA)
814 if (TestCpuFlag(kCpuHasMSA)) {
815 ARGBMirrorRow = ARGBMirrorRow_Any_MSA;
816 if (IS_ALIGNED(width, 16)) {
817 ARGBMirrorRow = ARGBMirrorRow_MSA;
818 }
819 }
820 #endif
821
822 // Mirror plane
823 for (y = 0; y < height; ++y) {
824 ARGBMirrorRow(src_argb, dst_argb, width);
825 src_argb += src_stride_argb;
826 dst_argb += dst_stride_argb;
827 }
828 return 0;
829 }
830
831 // Get a blender that optimized for the CPU and pixel count.
832 // As there are 6 blenders to choose from, the caller should try to use
833 // the same blend function for all pixels if possible.
834 LIBYUV_API
GetARGBBlend()835 ARGBBlendRow GetARGBBlend() {
836 void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
837 uint8* dst_argb, int width) = ARGBBlendRow_C;
838 #if defined(HAS_ARGBBLENDROW_SSSE3)
839 if (TestCpuFlag(kCpuHasSSSE3)) {
840 ARGBBlendRow = ARGBBlendRow_SSSE3;
841 return ARGBBlendRow;
842 }
843 #endif
844 #if defined(HAS_ARGBBLENDROW_NEON)
845 if (TestCpuFlag(kCpuHasNEON)) {
846 ARGBBlendRow = ARGBBlendRow_NEON;
847 }
848 #endif
849 return ARGBBlendRow;
850 }
851
852 // Alpha Blend 2 ARGB images and store to destination.
853 LIBYUV_API
ARGBBlend(const uint8 * src_argb0,int src_stride_argb0,const uint8 * src_argb1,int src_stride_argb1,uint8 * dst_argb,int dst_stride_argb,int width,int height)854 int ARGBBlend(const uint8* src_argb0,
855 int src_stride_argb0,
856 const uint8* src_argb1,
857 int src_stride_argb1,
858 uint8* dst_argb,
859 int dst_stride_argb,
860 int width,
861 int height) {
862 int y;
863 void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
864 uint8* dst_argb, int width) = GetARGBBlend();
865 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
866 return -1;
867 }
868 // Negative height means invert the image.
869 if (height < 0) {
870 height = -height;
871 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
872 dst_stride_argb = -dst_stride_argb;
873 }
874 // Coalesce rows.
875 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
876 dst_stride_argb == width * 4) {
877 width *= height;
878 height = 1;
879 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
880 }
881
882 for (y = 0; y < height; ++y) {
883 ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
884 src_argb0 += src_stride_argb0;
885 src_argb1 += src_stride_argb1;
886 dst_argb += dst_stride_argb;
887 }
888 return 0;
889 }
890
891 // Alpha Blend plane and store to destination.
892 LIBYUV_API
BlendPlane(const uint8 * src_y0,int src_stride_y0,const uint8 * src_y1,int src_stride_y1,const uint8 * alpha,int alpha_stride,uint8 * dst_y,int dst_stride_y,int width,int height)893 int BlendPlane(const uint8* src_y0,
894 int src_stride_y0,
895 const uint8* src_y1,
896 int src_stride_y1,
897 const uint8* alpha,
898 int alpha_stride,
899 uint8* dst_y,
900 int dst_stride_y,
901 int width,
902 int height) {
903 int y;
904 void (*BlendPlaneRow)(const uint8* src0, const uint8* src1,
905 const uint8* alpha, uint8* dst, int width) =
906 BlendPlaneRow_C;
907 if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0) {
908 return -1;
909 }
910 // Negative height means invert the image.
911 if (height < 0) {
912 height = -height;
913 dst_y = dst_y + (height - 1) * dst_stride_y;
914 dst_stride_y = -dst_stride_y;
915 }
916
917 // Coalesce rows for Y plane.
918 if (src_stride_y0 == width && src_stride_y1 == width &&
919 alpha_stride == width && dst_stride_y == width) {
920 width *= height;
921 height = 1;
922 src_stride_y0 = src_stride_y1 = alpha_stride = dst_stride_y = 0;
923 }
924
925 #if defined(HAS_BLENDPLANEROW_SSSE3)
926 if (TestCpuFlag(kCpuHasSSSE3)) {
927 BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
928 if (IS_ALIGNED(width, 8)) {
929 BlendPlaneRow = BlendPlaneRow_SSSE3;
930 }
931 }
932 #endif
933 #if defined(HAS_BLENDPLANEROW_AVX2)
934 if (TestCpuFlag(kCpuHasAVX2)) {
935 BlendPlaneRow = BlendPlaneRow_Any_AVX2;
936 if (IS_ALIGNED(width, 32)) {
937 BlendPlaneRow = BlendPlaneRow_AVX2;
938 }
939 }
940 #endif
941
942 for (y = 0; y < height; ++y) {
943 BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width);
944 src_y0 += src_stride_y0;
945 src_y1 += src_stride_y1;
946 alpha += alpha_stride;
947 dst_y += dst_stride_y;
948 }
949 return 0;
950 }
951
952 #define MAXTWIDTH 2048
953 // Alpha Blend YUV images and store to destination.
954 LIBYUV_API
I420Blend(const uint8 * src_y0,int src_stride_y0,const uint8 * src_u0,int src_stride_u0,const uint8 * src_v0,int src_stride_v0,const uint8 * src_y1,int src_stride_y1,const uint8 * src_u1,int src_stride_u1,const uint8 * src_v1,int src_stride_v1,const uint8 * alpha,int alpha_stride,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)955 int I420Blend(const uint8* src_y0,
956 int src_stride_y0,
957 const uint8* src_u0,
958 int src_stride_u0,
959 const uint8* src_v0,
960 int src_stride_v0,
961 const uint8* src_y1,
962 int src_stride_y1,
963 const uint8* src_u1,
964 int src_stride_u1,
965 const uint8* src_v1,
966 int src_stride_v1,
967 const uint8* alpha,
968 int alpha_stride,
969 uint8* dst_y,
970 int dst_stride_y,
971 uint8* dst_u,
972 int dst_stride_u,
973 uint8* dst_v,
974 int dst_stride_v,
975 int width,
976 int height) {
977 int y;
978 // Half width/height for UV.
979 int halfwidth = (width + 1) >> 1;
980 void (*BlendPlaneRow)(const uint8* src0, const uint8* src1,
981 const uint8* alpha, uint8* dst, int width) =
982 BlendPlaneRow_C;
983 void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
984 uint8* dst_ptr, int dst_width) = ScaleRowDown2Box_C;
985 if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 ||
986 !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
987 return -1;
988 }
989
990 // Negative height means invert the image.
991 if (height < 0) {
992 height = -height;
993 dst_y = dst_y + (height - 1) * dst_stride_y;
994 dst_stride_y = -dst_stride_y;
995 }
996
997 // Blend Y plane.
998 BlendPlane(src_y0, src_stride_y0, src_y1, src_stride_y1, alpha, alpha_stride,
999 dst_y, dst_stride_y, width, height);
1000
1001 #if defined(HAS_BLENDPLANEROW_SSSE3)
1002 if (TestCpuFlag(kCpuHasSSSE3)) {
1003 BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
1004 if (IS_ALIGNED(halfwidth, 8)) {
1005 BlendPlaneRow = BlendPlaneRow_SSSE3;
1006 }
1007 }
1008 #endif
1009 #if defined(HAS_BLENDPLANEROW_AVX2)
1010 if (TestCpuFlag(kCpuHasAVX2)) {
1011 BlendPlaneRow = BlendPlaneRow_Any_AVX2;
1012 if (IS_ALIGNED(halfwidth, 32)) {
1013 BlendPlaneRow = BlendPlaneRow_AVX2;
1014 }
1015 }
1016 #endif
1017 if (!IS_ALIGNED(width, 2)) {
1018 ScaleRowDown2 = ScaleRowDown2Box_Odd_C;
1019 }
1020 #if defined(HAS_SCALEROWDOWN2_NEON)
1021 if (TestCpuFlag(kCpuHasNEON)) {
1022 ScaleRowDown2 = ScaleRowDown2Box_Odd_NEON;
1023 if (IS_ALIGNED(width, 2)) {
1024 ScaleRowDown2 = ScaleRowDown2Box_Any_NEON;
1025 if (IS_ALIGNED(halfwidth, 16)) {
1026 ScaleRowDown2 = ScaleRowDown2Box_NEON;
1027 }
1028 }
1029 }
1030 #endif
1031 #if defined(HAS_SCALEROWDOWN2_SSSE3)
1032 if (TestCpuFlag(kCpuHasSSSE3)) {
1033 ScaleRowDown2 = ScaleRowDown2Box_Odd_SSSE3;
1034 if (IS_ALIGNED(width, 2)) {
1035 ScaleRowDown2 = ScaleRowDown2Box_Any_SSSE3;
1036 if (IS_ALIGNED(halfwidth, 16)) {
1037 ScaleRowDown2 = ScaleRowDown2Box_SSSE3;
1038 }
1039 }
1040 }
1041 #endif
1042 #if defined(HAS_SCALEROWDOWN2_AVX2)
1043 if (TestCpuFlag(kCpuHasAVX2)) {
1044 ScaleRowDown2 = ScaleRowDown2Box_Odd_AVX2;
1045 if (IS_ALIGNED(width, 2)) {
1046 ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2;
1047 if (IS_ALIGNED(halfwidth, 32)) {
1048 ScaleRowDown2 = ScaleRowDown2Box_AVX2;
1049 }
1050 }
1051 }
1052 #endif
1053
1054 // Row buffer for intermediate alpha pixels.
1055 align_buffer_64(halfalpha, halfwidth);
1056 for (y = 0; y < height; y += 2) {
1057 // last row of odd height image use 1 row of alpha instead of 2.
1058 if (y == (height - 1)) {
1059 alpha_stride = 0;
1060 }
1061 // Subsample 2 rows of UV to half width and half height.
1062 ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth);
1063 alpha += alpha_stride * 2;
1064 BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth);
1065 BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth);
1066 src_u0 += src_stride_u0;
1067 src_u1 += src_stride_u1;
1068 dst_u += dst_stride_u;
1069 src_v0 += src_stride_v0;
1070 src_v1 += src_stride_v1;
1071 dst_v += dst_stride_v;
1072 }
1073 free_aligned_buffer_64(halfalpha);
1074 return 0;
1075 }
1076
1077 // Multiply 2 ARGB images and store to destination.
1078 LIBYUV_API
ARGBMultiply(const uint8 * src_argb0,int src_stride_argb0,const uint8 * src_argb1,int src_stride_argb1,uint8 * dst_argb,int dst_stride_argb,int width,int height)1079 int ARGBMultiply(const uint8* src_argb0,
1080 int src_stride_argb0,
1081 const uint8* src_argb1,
1082 int src_stride_argb1,
1083 uint8* dst_argb,
1084 int dst_stride_argb,
1085 int width,
1086 int height) {
1087 int y;
1088 void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst,
1089 int width) = ARGBMultiplyRow_C;
1090 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
1091 return -1;
1092 }
1093 // Negative height means invert the image.
1094 if (height < 0) {
1095 height = -height;
1096 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1097 dst_stride_argb = -dst_stride_argb;
1098 }
1099 // Coalesce rows.
1100 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
1101 dst_stride_argb == width * 4) {
1102 width *= height;
1103 height = 1;
1104 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
1105 }
1106 #if defined(HAS_ARGBMULTIPLYROW_SSE2)
1107 if (TestCpuFlag(kCpuHasSSE2)) {
1108 ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
1109 if (IS_ALIGNED(width, 4)) {
1110 ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
1111 }
1112 }
1113 #endif
1114 #if defined(HAS_ARGBMULTIPLYROW_AVX2)
1115 if (TestCpuFlag(kCpuHasAVX2)) {
1116 ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
1117 if (IS_ALIGNED(width, 8)) {
1118 ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
1119 }
1120 }
1121 #endif
1122 #if defined(HAS_ARGBMULTIPLYROW_NEON)
1123 if (TestCpuFlag(kCpuHasNEON)) {
1124 ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
1125 if (IS_ALIGNED(width, 8)) {
1126 ARGBMultiplyRow = ARGBMultiplyRow_NEON;
1127 }
1128 }
1129 #endif
1130 #if defined(HAS_ARGBMULTIPLYROW_MSA)
1131 if (TestCpuFlag(kCpuHasMSA)) {
1132 ARGBMultiplyRow = ARGBMultiplyRow_Any_MSA;
1133 if (IS_ALIGNED(width, 4)) {
1134 ARGBMultiplyRow = ARGBMultiplyRow_MSA;
1135 }
1136 }
1137 #endif
1138
1139 // Multiply plane
1140 for (y = 0; y < height; ++y) {
1141 ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
1142 src_argb0 += src_stride_argb0;
1143 src_argb1 += src_stride_argb1;
1144 dst_argb += dst_stride_argb;
1145 }
1146 return 0;
1147 }
1148
1149 // Add 2 ARGB images and store to destination.
1150 LIBYUV_API
ARGBAdd(const uint8 * src_argb0,int src_stride_argb0,const uint8 * src_argb1,int src_stride_argb1,uint8 * dst_argb,int dst_stride_argb,int width,int height)1151 int ARGBAdd(const uint8* src_argb0,
1152 int src_stride_argb0,
1153 const uint8* src_argb1,
1154 int src_stride_argb1,
1155 uint8* dst_argb,
1156 int dst_stride_argb,
1157 int width,
1158 int height) {
1159 int y;
1160 void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst,
1161 int width) = ARGBAddRow_C;
1162 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
1163 return -1;
1164 }
1165 // Negative height means invert the image.
1166 if (height < 0) {
1167 height = -height;
1168 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1169 dst_stride_argb = -dst_stride_argb;
1170 }
1171 // Coalesce rows.
1172 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
1173 dst_stride_argb == width * 4) {
1174 width *= height;
1175 height = 1;
1176 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
1177 }
1178 #if defined(HAS_ARGBADDROW_SSE2) && (defined(_MSC_VER) && !defined(__clang__))
1179 if (TestCpuFlag(kCpuHasSSE2)) {
1180 ARGBAddRow = ARGBAddRow_SSE2;
1181 }
1182 #endif
1183 #if defined(HAS_ARGBADDROW_SSE2) && !(defined(_MSC_VER) && !defined(__clang__))
1184 if (TestCpuFlag(kCpuHasSSE2)) {
1185 ARGBAddRow = ARGBAddRow_Any_SSE2;
1186 if (IS_ALIGNED(width, 4)) {
1187 ARGBAddRow = ARGBAddRow_SSE2;
1188 }
1189 }
1190 #endif
1191 #if defined(HAS_ARGBADDROW_AVX2)
1192 if (TestCpuFlag(kCpuHasAVX2)) {
1193 ARGBAddRow = ARGBAddRow_Any_AVX2;
1194 if (IS_ALIGNED(width, 8)) {
1195 ARGBAddRow = ARGBAddRow_AVX2;
1196 }
1197 }
1198 #endif
1199 #if defined(HAS_ARGBADDROW_NEON)
1200 if (TestCpuFlag(kCpuHasNEON)) {
1201 ARGBAddRow = ARGBAddRow_Any_NEON;
1202 if (IS_ALIGNED(width, 8)) {
1203 ARGBAddRow = ARGBAddRow_NEON;
1204 }
1205 }
1206 #endif
1207 #if defined(HAS_ARGBADDROW_MSA)
1208 if (TestCpuFlag(kCpuHasMSA)) {
1209 ARGBAddRow = ARGBAddRow_Any_MSA;
1210 if (IS_ALIGNED(width, 8)) {
1211 ARGBAddRow = ARGBAddRow_MSA;
1212 }
1213 }
1214 #endif
1215
1216 // Add plane
1217 for (y = 0; y < height; ++y) {
1218 ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
1219 src_argb0 += src_stride_argb0;
1220 src_argb1 += src_stride_argb1;
1221 dst_argb += dst_stride_argb;
1222 }
1223 return 0;
1224 }
1225
1226 // Subtract 2 ARGB images and store to destination.
1227 LIBYUV_API
ARGBSubtract(const uint8 * src_argb0,int src_stride_argb0,const uint8 * src_argb1,int src_stride_argb1,uint8 * dst_argb,int dst_stride_argb,int width,int height)1228 int ARGBSubtract(const uint8* src_argb0,
1229 int src_stride_argb0,
1230 const uint8* src_argb1,
1231 int src_stride_argb1,
1232 uint8* dst_argb,
1233 int dst_stride_argb,
1234 int width,
1235 int height) {
1236 int y;
1237 void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst,
1238 int width) = ARGBSubtractRow_C;
1239 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
1240 return -1;
1241 }
1242 // Negative height means invert the image.
1243 if (height < 0) {
1244 height = -height;
1245 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1246 dst_stride_argb = -dst_stride_argb;
1247 }
1248 // Coalesce rows.
1249 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
1250 dst_stride_argb == width * 4) {
1251 width *= height;
1252 height = 1;
1253 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
1254 }
1255 #if defined(HAS_ARGBSUBTRACTROW_SSE2)
1256 if (TestCpuFlag(kCpuHasSSE2)) {
1257 ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
1258 if (IS_ALIGNED(width, 4)) {
1259 ARGBSubtractRow = ARGBSubtractRow_SSE2;
1260 }
1261 }
1262 #endif
1263 #if defined(HAS_ARGBSUBTRACTROW_AVX2)
1264 if (TestCpuFlag(kCpuHasAVX2)) {
1265 ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
1266 if (IS_ALIGNED(width, 8)) {
1267 ARGBSubtractRow = ARGBSubtractRow_AVX2;
1268 }
1269 }
1270 #endif
1271 #if defined(HAS_ARGBSUBTRACTROW_NEON)
1272 if (TestCpuFlag(kCpuHasNEON)) {
1273 ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
1274 if (IS_ALIGNED(width, 8)) {
1275 ARGBSubtractRow = ARGBSubtractRow_NEON;
1276 }
1277 }
1278 #endif
1279 #if defined(HAS_ARGBSUBTRACTROW_MSA)
1280 if (TestCpuFlag(kCpuHasMSA)) {
1281 ARGBSubtractRow = ARGBSubtractRow_Any_MSA;
1282 if (IS_ALIGNED(width, 8)) {
1283 ARGBSubtractRow = ARGBSubtractRow_MSA;
1284 }
1285 }
1286 #endif
1287
1288 // Subtract plane
1289 for (y = 0; y < height; ++y) {
1290 ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
1291 src_argb0 += src_stride_argb0;
1292 src_argb1 += src_stride_argb1;
1293 dst_argb += dst_stride_argb;
1294 }
1295 return 0;
1296 }
1297 // Convert I422 to RGBA with matrix
I422ToRGBAMatrix(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_rgba,int dst_stride_rgba,const struct YuvConstants * yuvconstants,int width,int height)1298 static int I422ToRGBAMatrix(const uint8* src_y,
1299 int src_stride_y,
1300 const uint8* src_u,
1301 int src_stride_u,
1302 const uint8* src_v,
1303 int src_stride_v,
1304 uint8* dst_rgba,
1305 int dst_stride_rgba,
1306 const struct YuvConstants* yuvconstants,
1307 int width,
1308 int height) {
1309 int y;
1310 void (*I422ToRGBARow)(const uint8* y_buf, const uint8* u_buf,
1311 const uint8* v_buf, uint8* rgb_buf,
1312 const struct YuvConstants* yuvconstants, int width) =
1313 I422ToRGBARow_C;
1314 if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) {
1315 return -1;
1316 }
1317 // Negative height means invert the image.
1318 if (height < 0) {
1319 height = -height;
1320 dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
1321 dst_stride_rgba = -dst_stride_rgba;
1322 }
1323 #if defined(HAS_I422TORGBAROW_SSSE3)
1324 if (TestCpuFlag(kCpuHasSSSE3)) {
1325 I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
1326 if (IS_ALIGNED(width, 8)) {
1327 I422ToRGBARow = I422ToRGBARow_SSSE3;
1328 }
1329 }
1330 #endif
1331 #if defined(HAS_I422TORGBAROW_AVX2)
1332 if (TestCpuFlag(kCpuHasAVX2)) {
1333 I422ToRGBARow = I422ToRGBARow_Any_AVX2;
1334 if (IS_ALIGNED(width, 16)) {
1335 I422ToRGBARow = I422ToRGBARow_AVX2;
1336 }
1337 }
1338 #endif
1339 #if defined(HAS_I422TORGBAROW_NEON)
1340 if (TestCpuFlag(kCpuHasNEON)) {
1341 I422ToRGBARow = I422ToRGBARow_Any_NEON;
1342 if (IS_ALIGNED(width, 8)) {
1343 I422ToRGBARow = I422ToRGBARow_NEON;
1344 }
1345 }
1346 #endif
1347 #if defined(HAS_I422TORGBAROW_DSPR2)
1348 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
1349 IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
1350 IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
1351 IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
1352 IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) {
1353 I422ToRGBARow = I422ToRGBARow_DSPR2;
1354 }
1355 #endif
1356 #if defined(HAS_I422TORGBAROW_MSA)
1357 if (TestCpuFlag(kCpuHasMSA)) {
1358 I422ToRGBARow = I422ToRGBARow_Any_MSA;
1359 if (IS_ALIGNED(width, 8)) {
1360 I422ToRGBARow = I422ToRGBARow_MSA;
1361 }
1362 }
1363 #endif
1364
1365 for (y = 0; y < height; ++y) {
1366 I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width);
1367 dst_rgba += dst_stride_rgba;
1368 src_y += src_stride_y;
1369 src_u += src_stride_u;
1370 src_v += src_stride_v;
1371 }
1372 return 0;
1373 }
1374
1375 // Convert I422 to RGBA.
1376 LIBYUV_API
I422ToRGBA(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_rgba,int dst_stride_rgba,int width,int height)1377 int I422ToRGBA(const uint8* src_y,
1378 int src_stride_y,
1379 const uint8* src_u,
1380 int src_stride_u,
1381 const uint8* src_v,
1382 int src_stride_v,
1383 uint8* dst_rgba,
1384 int dst_stride_rgba,
1385 int width,
1386 int height) {
1387 return I422ToRGBAMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v,
1388 src_stride_v, dst_rgba, dst_stride_rgba,
1389 &kYuvI601Constants, width, height);
1390 }
1391
1392 // Convert I422 to BGRA.
1393 LIBYUV_API
I422ToBGRA(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_bgra,int dst_stride_bgra,int width,int height)1394 int I422ToBGRA(const uint8* src_y,
1395 int src_stride_y,
1396 const uint8* src_u,
1397 int src_stride_u,
1398 const uint8* src_v,
1399 int src_stride_v,
1400 uint8* dst_bgra,
1401 int dst_stride_bgra,
1402 int width,
1403 int height) {
1404 return I422ToRGBAMatrix(src_y, src_stride_y, src_v,
1405 src_stride_v, // Swap U and V
1406 src_u, src_stride_u, dst_bgra, dst_stride_bgra,
1407 &kYvuI601Constants, // Use Yvu matrix
1408 width, height);
1409 }
1410
1411 // Convert NV12 to RGB565.
1412 LIBYUV_API
NV12ToRGB565(const uint8 * src_y,int src_stride_y,const uint8 * src_uv,int src_stride_uv,uint8 * dst_rgb565,int dst_stride_rgb565,int width,int height)1413 int NV12ToRGB565(const uint8* src_y,
1414 int src_stride_y,
1415 const uint8* src_uv,
1416 int src_stride_uv,
1417 uint8* dst_rgb565,
1418 int dst_stride_rgb565,
1419 int width,
1420 int height) {
1421 int y;
1422 void (*NV12ToRGB565Row)(
1423 const uint8* y_buf, const uint8* uv_buf, uint8* rgb_buf,
1424 const struct YuvConstants* yuvconstants, int width) = NV12ToRGB565Row_C;
1425 if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) {
1426 return -1;
1427 }
1428 // Negative height means invert the image.
1429 if (height < 0) {
1430 height = -height;
1431 dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
1432 dst_stride_rgb565 = -dst_stride_rgb565;
1433 }
1434 #if defined(HAS_NV12TORGB565ROW_SSSE3)
1435 if (TestCpuFlag(kCpuHasSSSE3)) {
1436 NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
1437 if (IS_ALIGNED(width, 8)) {
1438 NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
1439 }
1440 }
1441 #endif
1442 #if defined(HAS_NV12TORGB565ROW_AVX2)
1443 if (TestCpuFlag(kCpuHasAVX2)) {
1444 NV12ToRGB565Row = NV12ToRGB565Row_Any_AVX2;
1445 if (IS_ALIGNED(width, 16)) {
1446 NV12ToRGB565Row = NV12ToRGB565Row_AVX2;
1447 }
1448 }
1449 #endif
1450 #if defined(HAS_NV12TORGB565ROW_NEON)
1451 if (TestCpuFlag(kCpuHasNEON)) {
1452 NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
1453 if (IS_ALIGNED(width, 8)) {
1454 NV12ToRGB565Row = NV12ToRGB565Row_NEON;
1455 }
1456 }
1457 #endif
1458 #if defined(HAS_NV12TORGB565ROW_MSA)
1459 if (TestCpuFlag(kCpuHasMSA)) {
1460 NV12ToRGB565Row = NV12ToRGB565Row_Any_MSA;
1461 if (IS_ALIGNED(width, 8)) {
1462 NV12ToRGB565Row = NV12ToRGB565Row_MSA;
1463 }
1464 }
1465 #endif
1466
1467 for (y = 0; y < height; ++y) {
1468 NV12ToRGB565Row(src_y, src_uv, dst_rgb565, &kYuvI601Constants, width);
1469 dst_rgb565 += dst_stride_rgb565;
1470 src_y += src_stride_y;
1471 if (y & 1) {
1472 src_uv += src_stride_uv;
1473 }
1474 }
1475 return 0;
1476 }
1477
1478 // Convert RAW to RGB24.
1479 LIBYUV_API
RAWToRGB24(const uint8 * src_raw,int src_stride_raw,uint8 * dst_rgb24,int dst_stride_rgb24,int width,int height)1480 int RAWToRGB24(const uint8* src_raw,
1481 int src_stride_raw,
1482 uint8* dst_rgb24,
1483 int dst_stride_rgb24,
1484 int width,
1485 int height) {
1486 int y;
1487 void (*RAWToRGB24Row)(const uint8* src_rgb, uint8* dst_rgb24, int width) =
1488 RAWToRGB24Row_C;
1489 if (!src_raw || !dst_rgb24 || width <= 0 || height == 0) {
1490 return -1;
1491 }
1492 // Negative height means invert the image.
1493 if (height < 0) {
1494 height = -height;
1495 src_raw = src_raw + (height - 1) * src_stride_raw;
1496 src_stride_raw = -src_stride_raw;
1497 }
1498 // Coalesce rows.
1499 if (src_stride_raw == width * 3 && dst_stride_rgb24 == width * 3) {
1500 width *= height;
1501 height = 1;
1502 src_stride_raw = dst_stride_rgb24 = 0;
1503 }
1504 #if defined(HAS_RAWTORGB24ROW_SSSE3)
1505 if (TestCpuFlag(kCpuHasSSSE3)) {
1506 RAWToRGB24Row = RAWToRGB24Row_Any_SSSE3;
1507 if (IS_ALIGNED(width, 8)) {
1508 RAWToRGB24Row = RAWToRGB24Row_SSSE3;
1509 }
1510 }
1511 #endif
1512 #if defined(HAS_RAWTORGB24ROW_NEON)
1513 if (TestCpuFlag(kCpuHasNEON)) {
1514 RAWToRGB24Row = RAWToRGB24Row_Any_NEON;
1515 if (IS_ALIGNED(width, 8)) {
1516 RAWToRGB24Row = RAWToRGB24Row_NEON;
1517 }
1518 }
1519 #endif
1520 #if defined(HAS_RAWTORGB24ROW_MSA)
1521 if (TestCpuFlag(kCpuHasMSA)) {
1522 RAWToRGB24Row = RAWToRGB24Row_Any_MSA;
1523 if (IS_ALIGNED(width, 16)) {
1524 RAWToRGB24Row = RAWToRGB24Row_MSA;
1525 }
1526 }
1527 #endif
1528
1529 for (y = 0; y < height; ++y) {
1530 RAWToRGB24Row(src_raw, dst_rgb24, width);
1531 src_raw += src_stride_raw;
1532 dst_rgb24 += dst_stride_rgb24;
1533 }
1534 return 0;
1535 }
1536
1537 LIBYUV_API
SetPlane(uint8 * dst_y,int dst_stride_y,int width,int height,uint32 value)1538 void SetPlane(uint8* dst_y,
1539 int dst_stride_y,
1540 int width,
1541 int height,
1542 uint32 value) {
1543 int y;
1544 void (*SetRow)(uint8 * dst, uint8 value, int width) = SetRow_C;
1545 if (height < 0) {
1546 height = -height;
1547 dst_y = dst_y + (height - 1) * dst_stride_y;
1548 dst_stride_y = -dst_stride_y;
1549 }
1550 // Coalesce rows.
1551 if (dst_stride_y == width) {
1552 width *= height;
1553 height = 1;
1554 dst_stride_y = 0;
1555 }
1556 #if defined(HAS_SETROW_NEON)
1557 if (TestCpuFlag(kCpuHasNEON)) {
1558 SetRow = SetRow_Any_NEON;
1559 if (IS_ALIGNED(width, 16)) {
1560 SetRow = SetRow_NEON;
1561 }
1562 }
1563 #endif
1564 #if defined(HAS_SETROW_X86)
1565 if (TestCpuFlag(kCpuHasX86)) {
1566 SetRow = SetRow_Any_X86;
1567 if (IS_ALIGNED(width, 4)) {
1568 SetRow = SetRow_X86;
1569 }
1570 }
1571 #endif
1572 #if defined(HAS_SETROW_ERMS)
1573 if (TestCpuFlag(kCpuHasERMS)) {
1574 SetRow = SetRow_ERMS;
1575 }
1576 #endif
1577
1578 // Set plane
1579 for (y = 0; y < height; ++y) {
1580 SetRow(dst_y, value, width);
1581 dst_y += dst_stride_y;
1582 }
1583 }
1584
1585 // Draw a rectangle into I420
1586 LIBYUV_API
I420Rect(uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int x,int y,int width,int height,int value_y,int value_u,int value_v)1587 int I420Rect(uint8* dst_y,
1588 int dst_stride_y,
1589 uint8* dst_u,
1590 int dst_stride_u,
1591 uint8* dst_v,
1592 int dst_stride_v,
1593 int x,
1594 int y,
1595 int width,
1596 int height,
1597 int value_y,
1598 int value_u,
1599 int value_v) {
1600 int halfwidth = (width + 1) >> 1;
1601 int halfheight = (height + 1) >> 1;
1602 uint8* start_y = dst_y + y * dst_stride_y + x;
1603 uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
1604 uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
1605 if (!dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || x < 0 ||
1606 y < 0 || value_y < 0 || value_y > 255 || value_u < 0 || value_u > 255 ||
1607 value_v < 0 || value_v > 255) {
1608 return -1;
1609 }
1610
1611 SetPlane(start_y, dst_stride_y, width, height, value_y);
1612 SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
1613 SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
1614 return 0;
1615 }
1616
1617 // Draw a rectangle into ARGB
1618 LIBYUV_API
ARGBRect(uint8 * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height,uint32 value)1619 int ARGBRect(uint8* dst_argb,
1620 int dst_stride_argb,
1621 int dst_x,
1622 int dst_y,
1623 int width,
1624 int height,
1625 uint32 value) {
1626 int y;
1627 void (*ARGBSetRow)(uint8 * dst_argb, uint32 value, int width) = ARGBSetRow_C;
1628 if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) {
1629 return -1;
1630 }
1631 if (height < 0) {
1632 height = -height;
1633 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1634 dst_stride_argb = -dst_stride_argb;
1635 }
1636 dst_argb += dst_y * dst_stride_argb + dst_x * 4;
1637 // Coalesce rows.
1638 if (dst_stride_argb == width * 4) {
1639 width *= height;
1640 height = 1;
1641 dst_stride_argb = 0;
1642 }
1643
1644 #if defined(HAS_ARGBSETROW_NEON)
1645 if (TestCpuFlag(kCpuHasNEON)) {
1646 ARGBSetRow = ARGBSetRow_Any_NEON;
1647 if (IS_ALIGNED(width, 4)) {
1648 ARGBSetRow = ARGBSetRow_NEON;
1649 }
1650 }
1651 #endif
1652 #if defined(HAS_ARGBSETROW_X86)
1653 if (TestCpuFlag(kCpuHasX86)) {
1654 ARGBSetRow = ARGBSetRow_X86;
1655 }
1656 #endif
1657 #if defined(HAS_ARGBSETROW_MSA)
1658 if (TestCpuFlag(kCpuHasMSA)) {
1659 ARGBSetRow = ARGBSetRow_Any_MSA;
1660 if (IS_ALIGNED(width, 4)) {
1661 ARGBSetRow = ARGBSetRow_MSA;
1662 }
1663 }
1664 #endif
1665
1666 // Set plane
1667 for (y = 0; y < height; ++y) {
1668 ARGBSetRow(dst_argb, value, width);
1669 dst_argb += dst_stride_argb;
1670 }
1671 return 0;
1672 }
1673
1674 // Convert unattentuated ARGB to preattenuated ARGB.
1675 // An unattenutated ARGB alpha blend uses the formula
1676 // p = a * f + (1 - a) * b
1677 // where
1678 // p is output pixel
1679 // f is foreground pixel
1680 // b is background pixel
1681 // a is alpha value from foreground pixel
1682 // An preattenutated ARGB alpha blend uses the formula
1683 // p = f + (1 - a) * b
1684 // where
1685 // f is foreground pixel premultiplied by alpha
1686
1687 LIBYUV_API
ARGBAttenuate(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height)1688 int ARGBAttenuate(const uint8* src_argb,
1689 int src_stride_argb,
1690 uint8* dst_argb,
1691 int dst_stride_argb,
1692 int width,
1693 int height) {
1694 int y;
1695 void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb, int width) =
1696 ARGBAttenuateRow_C;
1697 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1698 return -1;
1699 }
1700 if (height < 0) {
1701 height = -height;
1702 src_argb = src_argb + (height - 1) * src_stride_argb;
1703 src_stride_argb = -src_stride_argb;
1704 }
1705 // Coalesce rows.
1706 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
1707 width *= height;
1708 height = 1;
1709 src_stride_argb = dst_stride_argb = 0;
1710 }
1711 #if defined(HAS_ARGBATTENUATEROW_SSSE3)
1712 if (TestCpuFlag(kCpuHasSSSE3)) {
1713 ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
1714 if (IS_ALIGNED(width, 4)) {
1715 ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
1716 }
1717 }
1718 #endif
1719 #if defined(HAS_ARGBATTENUATEROW_AVX2)
1720 if (TestCpuFlag(kCpuHasAVX2)) {
1721 ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
1722 if (IS_ALIGNED(width, 8)) {
1723 ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
1724 }
1725 }
1726 #endif
1727 #if defined(HAS_ARGBATTENUATEROW_NEON)
1728 if (TestCpuFlag(kCpuHasNEON)) {
1729 ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
1730 if (IS_ALIGNED(width, 8)) {
1731 ARGBAttenuateRow = ARGBAttenuateRow_NEON;
1732 }
1733 }
1734 #endif
1735 #if defined(HAS_ARGBATTENUATEROW_MSA)
1736 if (TestCpuFlag(kCpuHasMSA)) {
1737 ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA;
1738 if (IS_ALIGNED(width, 8)) {
1739 ARGBAttenuateRow = ARGBAttenuateRow_MSA;
1740 }
1741 }
1742 #endif
1743
1744 for (y = 0; y < height; ++y) {
1745 ARGBAttenuateRow(src_argb, dst_argb, width);
1746 src_argb += src_stride_argb;
1747 dst_argb += dst_stride_argb;
1748 }
1749 return 0;
1750 }
1751
1752 // Convert preattentuated ARGB to unattenuated ARGB.
1753 LIBYUV_API
ARGBUnattenuate(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height)1754 int ARGBUnattenuate(const uint8* src_argb,
1755 int src_stride_argb,
1756 uint8* dst_argb,
1757 int dst_stride_argb,
1758 int width,
1759 int height) {
1760 int y;
1761 void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb,
1762 int width) = ARGBUnattenuateRow_C;
1763 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1764 return -1;
1765 }
1766 if (height < 0) {
1767 height = -height;
1768 src_argb = src_argb + (height - 1) * src_stride_argb;
1769 src_stride_argb = -src_stride_argb;
1770 }
1771 // Coalesce rows.
1772 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
1773 width *= height;
1774 height = 1;
1775 src_stride_argb = dst_stride_argb = 0;
1776 }
1777 #if defined(HAS_ARGBUNATTENUATEROW_SSE2)
1778 if (TestCpuFlag(kCpuHasSSE2)) {
1779 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
1780 if (IS_ALIGNED(width, 4)) {
1781 ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
1782 }
1783 }
1784 #endif
1785 #if defined(HAS_ARGBUNATTENUATEROW_AVX2)
1786 if (TestCpuFlag(kCpuHasAVX2)) {
1787 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
1788 if (IS_ALIGNED(width, 8)) {
1789 ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
1790 }
1791 }
1792 #endif
1793 // TODO(fbarchard): Neon version.
1794
1795 for (y = 0; y < height; ++y) {
1796 ARGBUnattenuateRow(src_argb, dst_argb, width);
1797 src_argb += src_stride_argb;
1798 dst_argb += dst_stride_argb;
1799 }
1800 return 0;
1801 }
1802
1803 // Convert ARGB to Grayed ARGB.
1804 LIBYUV_API
ARGBGrayTo(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height)1805 int ARGBGrayTo(const uint8* src_argb,
1806 int src_stride_argb,
1807 uint8* dst_argb,
1808 int dst_stride_argb,
1809 int width,
1810 int height) {
1811 int y;
1812 void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, int width) =
1813 ARGBGrayRow_C;
1814 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1815 return -1;
1816 }
1817 if (height < 0) {
1818 height = -height;
1819 src_argb = src_argb + (height - 1) * src_stride_argb;
1820 src_stride_argb = -src_stride_argb;
1821 }
1822 // Coalesce rows.
1823 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
1824 width *= height;
1825 height = 1;
1826 src_stride_argb = dst_stride_argb = 0;
1827 }
1828 #if defined(HAS_ARGBGRAYROW_SSSE3)
1829 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
1830 ARGBGrayRow = ARGBGrayRow_SSSE3;
1831 }
1832 #endif
1833 #if defined(HAS_ARGBGRAYROW_NEON)
1834 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1835 ARGBGrayRow = ARGBGrayRow_NEON;
1836 }
1837 #endif
1838 #if defined(HAS_ARGBGRAYROW_MSA)
1839 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
1840 ARGBGrayRow = ARGBGrayRow_MSA;
1841 }
1842 #endif
1843
1844 for (y = 0; y < height; ++y) {
1845 ARGBGrayRow(src_argb, dst_argb, width);
1846 src_argb += src_stride_argb;
1847 dst_argb += dst_stride_argb;
1848 }
1849 return 0;
1850 }
1851
1852 // Make a rectangle of ARGB gray scale.
1853 LIBYUV_API
ARGBGray(uint8 * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height)1854 int ARGBGray(uint8* dst_argb,
1855 int dst_stride_argb,
1856 int dst_x,
1857 int dst_y,
1858 int width,
1859 int height) {
1860 int y;
1861 void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, int width) =
1862 ARGBGrayRow_C;
1863 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1864 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
1865 return -1;
1866 }
1867 // Coalesce rows.
1868 if (dst_stride_argb == width * 4) {
1869 width *= height;
1870 height = 1;
1871 dst_stride_argb = 0;
1872 }
1873 #if defined(HAS_ARGBGRAYROW_SSSE3)
1874 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
1875 ARGBGrayRow = ARGBGrayRow_SSSE3;
1876 }
1877 #endif
1878 #if defined(HAS_ARGBGRAYROW_NEON)
1879 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1880 ARGBGrayRow = ARGBGrayRow_NEON;
1881 }
1882 #endif
1883 #if defined(HAS_ARGBGRAYROW_MSA)
1884 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
1885 ARGBGrayRow = ARGBGrayRow_MSA;
1886 }
1887 #endif
1888
1889 for (y = 0; y < height; ++y) {
1890 ARGBGrayRow(dst, dst, width);
1891 dst += dst_stride_argb;
1892 }
1893 return 0;
1894 }
1895
1896 // Make a rectangle of ARGB Sepia tone.
1897 LIBYUV_API
ARGBSepia(uint8 * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height)1898 int ARGBSepia(uint8* dst_argb,
1899 int dst_stride_argb,
1900 int dst_x,
1901 int dst_y,
1902 int width,
1903 int height) {
1904 int y;
1905 void (*ARGBSepiaRow)(uint8 * dst_argb, int width) = ARGBSepiaRow_C;
1906 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1907 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
1908 return -1;
1909 }
1910 // Coalesce rows.
1911 if (dst_stride_argb == width * 4) {
1912 width *= height;
1913 height = 1;
1914 dst_stride_argb = 0;
1915 }
1916 #if defined(HAS_ARGBSEPIAROW_SSSE3)
1917 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
1918 ARGBSepiaRow = ARGBSepiaRow_SSSE3;
1919 }
1920 #endif
1921 #if defined(HAS_ARGBSEPIAROW_NEON)
1922 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1923 ARGBSepiaRow = ARGBSepiaRow_NEON;
1924 }
1925 #endif
1926 #if defined(HAS_ARGBSEPIAROW_MSA)
1927 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
1928 ARGBSepiaRow = ARGBSepiaRow_MSA;
1929 }
1930 #endif
1931
1932 for (y = 0; y < height; ++y) {
1933 ARGBSepiaRow(dst, width);
1934 dst += dst_stride_argb;
1935 }
1936 return 0;
1937 }
1938
1939 // Apply a 4x4 matrix to each ARGB pixel.
1940 // Note: Normally for shading, but can be used to swizzle or invert.
1941 LIBYUV_API
ARGBColorMatrix(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,const int8 * matrix_argb,int width,int height)1942 int ARGBColorMatrix(const uint8* src_argb,
1943 int src_stride_argb,
1944 uint8* dst_argb,
1945 int dst_stride_argb,
1946 const int8* matrix_argb,
1947 int width,
1948 int height) {
1949 int y;
1950 void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb,
1951 const int8* matrix_argb, int width) =
1952 ARGBColorMatrixRow_C;
1953 if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
1954 return -1;
1955 }
1956 if (height < 0) {
1957 height = -height;
1958 src_argb = src_argb + (height - 1) * src_stride_argb;
1959 src_stride_argb = -src_stride_argb;
1960 }
1961 // Coalesce rows.
1962 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
1963 width *= height;
1964 height = 1;
1965 src_stride_argb = dst_stride_argb = 0;
1966 }
1967 #if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
1968 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
1969 ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
1970 }
1971 #endif
1972 #if defined(HAS_ARGBCOLORMATRIXROW_NEON)
1973 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1974 ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
1975 }
1976 #endif
1977 for (y = 0; y < height; ++y) {
1978 ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
1979 src_argb += src_stride_argb;
1980 dst_argb += dst_stride_argb;
1981 }
1982 return 0;
1983 }
1984
1985 // Apply a 4x3 matrix to each ARGB pixel.
1986 // Deprecated.
1987 LIBYUV_API
RGBColorMatrix(uint8 * dst_argb,int dst_stride_argb,const int8 * matrix_rgb,int dst_x,int dst_y,int width,int height)1988 int RGBColorMatrix(uint8* dst_argb,
1989 int dst_stride_argb,
1990 const int8* matrix_rgb,
1991 int dst_x,
1992 int dst_y,
1993 int width,
1994 int height) {
1995 SIMD_ALIGNED(int8 matrix_argb[16]);
1996 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1997 if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 || dst_x < 0 ||
1998 dst_y < 0) {
1999 return -1;
2000 }
2001
2002 // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
2003 matrix_argb[0] = matrix_rgb[0] / 2;
2004 matrix_argb[1] = matrix_rgb[1] / 2;
2005 matrix_argb[2] = matrix_rgb[2] / 2;
2006 matrix_argb[3] = matrix_rgb[3] / 2;
2007 matrix_argb[4] = matrix_rgb[4] / 2;
2008 matrix_argb[5] = matrix_rgb[5] / 2;
2009 matrix_argb[6] = matrix_rgb[6] / 2;
2010 matrix_argb[7] = matrix_rgb[7] / 2;
2011 matrix_argb[8] = matrix_rgb[8] / 2;
2012 matrix_argb[9] = matrix_rgb[9] / 2;
2013 matrix_argb[10] = matrix_rgb[10] / 2;
2014 matrix_argb[11] = matrix_rgb[11] / 2;
2015 matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
2016 matrix_argb[15] = 64; // 1.0
2017
2018 return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb, dst,
2019 dst_stride_argb, &matrix_argb[0], width, height);
2020 }
2021
2022 // Apply a color table each ARGB pixel.
2023 // Table contains 256 ARGB values.
2024 LIBYUV_API
ARGBColorTable(uint8 * dst_argb,int dst_stride_argb,const uint8 * table_argb,int dst_x,int dst_y,int width,int height)2025 int ARGBColorTable(uint8* dst_argb,
2026 int dst_stride_argb,
2027 const uint8* table_argb,
2028 int dst_x,
2029 int dst_y,
2030 int width,
2031 int height) {
2032 int y;
2033 void (*ARGBColorTableRow)(uint8 * dst_argb, const uint8* table_argb,
2034 int width) = ARGBColorTableRow_C;
2035 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
2036 if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
2037 dst_y < 0) {
2038 return -1;
2039 }
2040 // Coalesce rows.
2041 if (dst_stride_argb == width * 4) {
2042 width *= height;
2043 height = 1;
2044 dst_stride_argb = 0;
2045 }
2046 #if defined(HAS_ARGBCOLORTABLEROW_X86)
2047 if (TestCpuFlag(kCpuHasX86)) {
2048 ARGBColorTableRow = ARGBColorTableRow_X86;
2049 }
2050 #endif
2051 for (y = 0; y < height; ++y) {
2052 ARGBColorTableRow(dst, table_argb, width);
2053 dst += dst_stride_argb;
2054 }
2055 return 0;
2056 }
2057
2058 // Apply a color table each ARGB pixel but preserve destination alpha.
2059 // Table contains 256 ARGB values.
2060 LIBYUV_API
RGBColorTable(uint8 * dst_argb,int dst_stride_argb,const uint8 * table_argb,int dst_x,int dst_y,int width,int height)2061 int RGBColorTable(uint8* dst_argb,
2062 int dst_stride_argb,
2063 const uint8* table_argb,
2064 int dst_x,
2065 int dst_y,
2066 int width,
2067 int height) {
2068 int y;
2069 void (*RGBColorTableRow)(uint8 * dst_argb, const uint8* table_argb,
2070 int width) = RGBColorTableRow_C;
2071 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
2072 if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
2073 dst_y < 0) {
2074 return -1;
2075 }
2076 // Coalesce rows.
2077 if (dst_stride_argb == width * 4) {
2078 width *= height;
2079 height = 1;
2080 dst_stride_argb = 0;
2081 }
2082 #if defined(HAS_RGBCOLORTABLEROW_X86)
2083 if (TestCpuFlag(kCpuHasX86)) {
2084 RGBColorTableRow = RGBColorTableRow_X86;
2085 }
2086 #endif
2087 for (y = 0; y < height; ++y) {
2088 RGBColorTableRow(dst, table_argb, width);
2089 dst += dst_stride_argb;
2090 }
2091 return 0;
2092 }
2093
2094 // ARGBQuantize is used to posterize art.
2095 // e.g. rgb / qvalue * qvalue + qvalue / 2
2096 // But the low levels implement efficiently with 3 parameters, and could be
2097 // used for other high level operations.
2098 // dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
2099 // where scale is 1 / interval_size as a fixed point value.
2100 // The divide is replaces with a multiply by reciprocal fixed point multiply.
2101 // Caveat - although SSE2 saturates, the C function does not and should be used
2102 // with care if doing anything but quantization.
2103 LIBYUV_API
ARGBQuantize(uint8 * dst_argb,int dst_stride_argb,int scale,int interval_size,int interval_offset,int dst_x,int dst_y,int width,int height)2104 int ARGBQuantize(uint8* dst_argb,
2105 int dst_stride_argb,
2106 int scale,
2107 int interval_size,
2108 int interval_offset,
2109 int dst_x,
2110 int dst_y,
2111 int width,
2112 int height) {
2113 int y;
2114 void (*ARGBQuantizeRow)(uint8 * dst_argb, int scale, int interval_size,
2115 int interval_offset, int width) = ARGBQuantizeRow_C;
2116 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
2117 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
2118 interval_size < 1 || interval_size > 255) {
2119 return -1;
2120 }
2121 // Coalesce rows.
2122 if (dst_stride_argb == width * 4) {
2123 width *= height;
2124 height = 1;
2125 dst_stride_argb = 0;
2126 }
2127 #if defined(HAS_ARGBQUANTIZEROW_SSE2)
2128 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
2129 ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
2130 }
2131 #endif
2132 #if defined(HAS_ARGBQUANTIZEROW_NEON)
2133 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2134 ARGBQuantizeRow = ARGBQuantizeRow_NEON;
2135 }
2136 #endif
2137 for (y = 0; y < height; ++y) {
2138 ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
2139 dst += dst_stride_argb;
2140 }
2141 return 0;
2142 }
2143
2144 // Computes table of cumulative sum for image where the value is the sum
2145 // of all values above and to the left of the entry. Used by ARGBBlur.
2146 LIBYUV_API
ARGBComputeCumulativeSum(const uint8 * src_argb,int src_stride_argb,int32 * dst_cumsum,int dst_stride32_cumsum,int width,int height)2147 int ARGBComputeCumulativeSum(const uint8* src_argb,
2148 int src_stride_argb,
2149 int32* dst_cumsum,
2150 int dst_stride32_cumsum,
2151 int width,
2152 int height) {
2153 int y;
2154 void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
2155 const int32* previous_cumsum, int width) =
2156 ComputeCumulativeSumRow_C;
2157 int32* previous_cumsum = dst_cumsum;
2158 if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
2159 return -1;
2160 }
2161 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
2162 if (TestCpuFlag(kCpuHasSSE2)) {
2163 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
2164 }
2165 #endif
2166 memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel.
2167 for (y = 0; y < height; ++y) {
2168 ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
2169 previous_cumsum = dst_cumsum;
2170 dst_cumsum += dst_stride32_cumsum;
2171 src_argb += src_stride_argb;
2172 }
2173 return 0;
2174 }
2175
2176 // Blur ARGB image.
2177 // Caller should allocate CumulativeSum table of width * height * 16 bytes
2178 // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
2179 // as the buffer is treated as circular.
2180 LIBYUV_API
ARGBBlur(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int32 * dst_cumsum,int dst_stride32_cumsum,int width,int height,int radius)2181 int ARGBBlur(const uint8* src_argb,
2182 int src_stride_argb,
2183 uint8* dst_argb,
2184 int dst_stride_argb,
2185 int32* dst_cumsum,
2186 int dst_stride32_cumsum,
2187 int width,
2188 int height,
2189 int radius) {
2190 int y;
2191 void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
2192 const int32* previous_cumsum, int width) =
2193 ComputeCumulativeSumRow_C;
2194 void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft,
2195 int width, int area, uint8* dst,
2196 int count) = CumulativeSumToAverageRow_C;
2197 int32* cumsum_bot_row;
2198 int32* max_cumsum_bot_row;
2199 int32* cumsum_top_row;
2200
2201 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
2202 return -1;
2203 }
2204 if (height < 0) {
2205 height = -height;
2206 src_argb = src_argb + (height - 1) * src_stride_argb;
2207 src_stride_argb = -src_stride_argb;
2208 }
2209 if (radius > height) {
2210 radius = height;
2211 }
2212 if (radius > (width / 2 - 1)) {
2213 radius = width / 2 - 1;
2214 }
2215 if (radius <= 0) {
2216 return -1;
2217 }
2218 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
2219 if (TestCpuFlag(kCpuHasSSE2)) {
2220 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
2221 CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
2222 }
2223 #endif
2224 // Compute enough CumulativeSum for first row to be blurred. After this
2225 // one row of CumulativeSum is updated at a time.
2226 ARGBComputeCumulativeSum(src_argb, src_stride_argb, dst_cumsum,
2227 dst_stride32_cumsum, width, radius);
2228
2229 src_argb = src_argb + radius * src_stride_argb;
2230 cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
2231
2232 max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
2233 cumsum_top_row = &dst_cumsum[0];
2234
2235 for (y = 0; y < height; ++y) {
2236 int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
2237 int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
2238 int area = radius * (bot_y - top_y);
2239 int boxwidth = radius * 4;
2240 int x;
2241 int n;
2242
2243 // Increment cumsum_top_row pointer with circular buffer wrap around.
2244 if (top_y) {
2245 cumsum_top_row += dst_stride32_cumsum;
2246 if (cumsum_top_row >= max_cumsum_bot_row) {
2247 cumsum_top_row = dst_cumsum;
2248 }
2249 }
2250 // Increment cumsum_bot_row pointer with circular buffer wrap around and
2251 // then fill in a row of CumulativeSum.
2252 if ((y + radius) < height) {
2253 const int32* prev_cumsum_bot_row = cumsum_bot_row;
2254 cumsum_bot_row += dst_stride32_cumsum;
2255 if (cumsum_bot_row >= max_cumsum_bot_row) {
2256 cumsum_bot_row = dst_cumsum;
2257 }
2258 ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
2259 width);
2260 src_argb += src_stride_argb;
2261 }
2262
2263 // Left clipped.
2264 for (x = 0; x < radius + 1; ++x) {
2265 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area,
2266 &dst_argb[x * 4], 1);
2267 area += (bot_y - top_y);
2268 boxwidth += 4;
2269 }
2270
2271 // Middle unclipped.
2272 n = (width - 1) - radius - x + 1;
2273 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area,
2274 &dst_argb[x * 4], n);
2275
2276 // Right clipped.
2277 for (x += n; x <= width - 1; ++x) {
2278 area -= (bot_y - top_y);
2279 boxwidth -= 4;
2280 CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
2281 cumsum_bot_row + (x - radius - 1) * 4, boxwidth,
2282 area, &dst_argb[x * 4], 1);
2283 }
2284 dst_argb += dst_stride_argb;
2285 }
2286 return 0;
2287 }
2288
2289 // Multiply ARGB image by a specified ARGB value.
2290 LIBYUV_API
ARGBShade(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height,uint32 value)2291 int ARGBShade(const uint8* src_argb,
2292 int src_stride_argb,
2293 uint8* dst_argb,
2294 int dst_stride_argb,
2295 int width,
2296 int height,
2297 uint32 value) {
2298 int y;
2299 void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb, int width,
2300 uint32 value) = ARGBShadeRow_C;
2301 if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
2302 return -1;
2303 }
2304 if (height < 0) {
2305 height = -height;
2306 src_argb = src_argb + (height - 1) * src_stride_argb;
2307 src_stride_argb = -src_stride_argb;
2308 }
2309 // Coalesce rows.
2310 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
2311 width *= height;
2312 height = 1;
2313 src_stride_argb = dst_stride_argb = 0;
2314 }
2315 #if defined(HAS_ARGBSHADEROW_SSE2)
2316 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
2317 ARGBShadeRow = ARGBShadeRow_SSE2;
2318 }
2319 #endif
2320 #if defined(HAS_ARGBSHADEROW_NEON)
2321 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2322 ARGBShadeRow = ARGBShadeRow_NEON;
2323 }
2324 #endif
2325 #if defined(HAS_ARGBSHADEROW_MSA)
2326 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 4)) {
2327 ARGBShadeRow = ARGBShadeRow_MSA;
2328 }
2329 #endif
2330
2331 for (y = 0; y < height; ++y) {
2332 ARGBShadeRow(src_argb, dst_argb, width, value);
2333 src_argb += src_stride_argb;
2334 dst_argb += dst_stride_argb;
2335 }
2336 return 0;
2337 }
2338
2339 // Interpolate 2 planes by specified amount (0 to 255).
2340 LIBYUV_API
InterpolatePlane(const uint8 * src0,int src_stride0,const uint8 * src1,int src_stride1,uint8 * dst,int dst_stride,int width,int height,int interpolation)2341 int InterpolatePlane(const uint8* src0,
2342 int src_stride0,
2343 const uint8* src1,
2344 int src_stride1,
2345 uint8* dst,
2346 int dst_stride,
2347 int width,
2348 int height,
2349 int interpolation) {
2350 int y;
2351 void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr,
2352 ptrdiff_t src_stride, int dst_width,
2353 int source_y_fraction) = InterpolateRow_C;
2354 if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
2355 return -1;
2356 }
2357 // Negative height means invert the image.
2358 if (height < 0) {
2359 height = -height;
2360 dst = dst + (height - 1) * dst_stride;
2361 dst_stride = -dst_stride;
2362 }
2363 // Coalesce rows.
2364 if (src_stride0 == width && src_stride1 == width && dst_stride == width) {
2365 width *= height;
2366 height = 1;
2367 src_stride0 = src_stride1 = dst_stride = 0;
2368 }
2369 #if defined(HAS_INTERPOLATEROW_SSSE3)
2370 if (TestCpuFlag(kCpuHasSSSE3)) {
2371 InterpolateRow = InterpolateRow_Any_SSSE3;
2372 if (IS_ALIGNED(width, 16)) {
2373 InterpolateRow = InterpolateRow_SSSE3;
2374 }
2375 }
2376 #endif
2377 #if defined(HAS_INTERPOLATEROW_AVX2)
2378 if (TestCpuFlag(kCpuHasAVX2)) {
2379 InterpolateRow = InterpolateRow_Any_AVX2;
2380 if (IS_ALIGNED(width, 32)) {
2381 InterpolateRow = InterpolateRow_AVX2;
2382 }
2383 }
2384 #endif
2385 #if defined(HAS_INTERPOLATEROW_NEON)
2386 if (TestCpuFlag(kCpuHasNEON)) {
2387 InterpolateRow = InterpolateRow_Any_NEON;
2388 if (IS_ALIGNED(width, 16)) {
2389 InterpolateRow = InterpolateRow_NEON;
2390 }
2391 }
2392 #endif
2393 #if defined(HAS_INTERPOLATEROW_DSPR2)
2394 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src0, 4) &&
2395 IS_ALIGNED(src_stride0, 4) && IS_ALIGNED(src1, 4) &&
2396 IS_ALIGNED(src_stride1, 4) && IS_ALIGNED(dst, 4) &&
2397 IS_ALIGNED(dst_stride, 4) && IS_ALIGNED(width, 4)) {
2398 InterpolateRow = InterpolateRow_DSPR2;
2399 }
2400 #endif
2401 #if defined(HAS_INTERPOLATEROW_MSA)
2402 if (TestCpuFlag(kCpuHasMSA)) {
2403 InterpolateRow = InterpolateRow_Any_MSA;
2404 if (IS_ALIGNED(width, 32)) {
2405 InterpolateRow = InterpolateRow_MSA;
2406 }
2407 }
2408 #endif
2409
2410 for (y = 0; y < height; ++y) {
2411 InterpolateRow(dst, src0, src1 - src0, width, interpolation);
2412 src0 += src_stride0;
2413 src1 += src_stride1;
2414 dst += dst_stride;
2415 }
2416 return 0;
2417 }
2418
2419 // Interpolate 2 ARGB images by specified amount (0 to 255).
2420 LIBYUV_API
ARGBInterpolate(const uint8 * src_argb0,int src_stride_argb0,const uint8 * src_argb1,int src_stride_argb1,uint8 * dst_argb,int dst_stride_argb,int width,int height,int interpolation)2421 int ARGBInterpolate(const uint8* src_argb0,
2422 int src_stride_argb0,
2423 const uint8* src_argb1,
2424 int src_stride_argb1,
2425 uint8* dst_argb,
2426 int dst_stride_argb,
2427 int width,
2428 int height,
2429 int interpolation) {
2430 return InterpolatePlane(src_argb0, src_stride_argb0, src_argb1,
2431 src_stride_argb1, dst_argb, dst_stride_argb,
2432 width * 4, height, interpolation);
2433 }
2434
2435 // Interpolate 2 YUV images by specified amount (0 to 255).
2436 LIBYUV_API
I420Interpolate(const uint8 * src0_y,int src0_stride_y,const uint8 * src0_u,int src0_stride_u,const uint8 * src0_v,int src0_stride_v,const uint8 * src1_y,int src1_stride_y,const uint8 * src1_u,int src1_stride_u,const uint8 * src1_v,int src1_stride_v,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height,int interpolation)2437 int I420Interpolate(const uint8* src0_y,
2438 int src0_stride_y,
2439 const uint8* src0_u,
2440 int src0_stride_u,
2441 const uint8* src0_v,
2442 int src0_stride_v,
2443 const uint8* src1_y,
2444 int src1_stride_y,
2445 const uint8* src1_u,
2446 int src1_stride_u,
2447 const uint8* src1_v,
2448 int src1_stride_v,
2449 uint8* dst_y,
2450 int dst_stride_y,
2451 uint8* dst_u,
2452 int dst_stride_u,
2453 uint8* dst_v,
2454 int dst_stride_v,
2455 int width,
2456 int height,
2457 int interpolation) {
2458 int halfwidth = (width + 1) >> 1;
2459 int halfheight = (height + 1) >> 1;
2460 if (!src0_y || !src0_u || !src0_v || !src1_y || !src1_u || !src1_v ||
2461 !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
2462 return -1;
2463 }
2464 InterpolatePlane(src0_y, src0_stride_y, src1_y, src1_stride_y, dst_y,
2465 dst_stride_y, width, height, interpolation);
2466 InterpolatePlane(src0_u, src0_stride_u, src1_u, src1_stride_u, dst_u,
2467 dst_stride_u, halfwidth, halfheight, interpolation);
2468 InterpolatePlane(src0_v, src0_stride_v, src1_v, src1_stride_v, dst_v,
2469 dst_stride_v, halfwidth, halfheight, interpolation);
2470 return 0;
2471 }
2472
2473 // Shuffle ARGB channel order. e.g. BGRA to ARGB.
2474 LIBYUV_API
ARGBShuffle(const uint8 * src_bgra,int src_stride_bgra,uint8 * dst_argb,int dst_stride_argb,const uint8 * shuffler,int width,int height)2475 int ARGBShuffle(const uint8* src_bgra,
2476 int src_stride_bgra,
2477 uint8* dst_argb,
2478 int dst_stride_argb,
2479 const uint8* shuffler,
2480 int width,
2481 int height) {
2482 int y;
2483 void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
2484 const uint8* shuffler, int width) = ARGBShuffleRow_C;
2485 if (!src_bgra || !dst_argb || width <= 0 || height == 0) {
2486 return -1;
2487 }
2488 // Negative height means invert the image.
2489 if (height < 0) {
2490 height = -height;
2491 src_bgra = src_bgra + (height - 1) * src_stride_bgra;
2492 src_stride_bgra = -src_stride_bgra;
2493 }
2494 // Coalesce rows.
2495 if (src_stride_bgra == width * 4 && dst_stride_argb == width * 4) {
2496 width *= height;
2497 height = 1;
2498 src_stride_bgra = dst_stride_argb = 0;
2499 }
2500 #if defined(HAS_ARGBSHUFFLEROW_SSE2)
2501 if (TestCpuFlag(kCpuHasSSE2)) {
2502 ARGBShuffleRow = ARGBShuffleRow_Any_SSE2;
2503 if (IS_ALIGNED(width, 4)) {
2504 ARGBShuffleRow = ARGBShuffleRow_SSE2;
2505 }
2506 }
2507 #endif
2508 #if defined(HAS_ARGBSHUFFLEROW_SSSE3)
2509 if (TestCpuFlag(kCpuHasSSSE3)) {
2510 ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
2511 if (IS_ALIGNED(width, 8)) {
2512 ARGBShuffleRow = ARGBShuffleRow_SSSE3;
2513 }
2514 }
2515 #endif
2516 #if defined(HAS_ARGBSHUFFLEROW_AVX2)
2517 if (TestCpuFlag(kCpuHasAVX2)) {
2518 ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
2519 if (IS_ALIGNED(width, 16)) {
2520 ARGBShuffleRow = ARGBShuffleRow_AVX2;
2521 }
2522 }
2523 #endif
2524 #if defined(HAS_ARGBSHUFFLEROW_NEON)
2525 if (TestCpuFlag(kCpuHasNEON)) {
2526 ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
2527 if (IS_ALIGNED(width, 4)) {
2528 ARGBShuffleRow = ARGBShuffleRow_NEON;
2529 }
2530 }
2531 #endif
2532 #if defined(HAS_ARGBSHUFFLEROW_MSA)
2533 if (TestCpuFlag(kCpuHasMSA)) {
2534 ARGBShuffleRow = ARGBShuffleRow_Any_MSA;
2535 if (IS_ALIGNED(width, 8)) {
2536 ARGBShuffleRow = ARGBShuffleRow_MSA;
2537 }
2538 }
2539 #endif
2540
2541 for (y = 0; y < height; ++y) {
2542 ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
2543 src_bgra += src_stride_bgra;
2544 dst_argb += dst_stride_argb;
2545 }
2546 return 0;
2547 }
2548
2549 // Sobel ARGB effect.
ARGBSobelize(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height,void (* SobelRow)(const uint8 * src_sobelx,const uint8 * src_sobely,uint8 * dst,int width))2550 static int ARGBSobelize(const uint8* src_argb,
2551 int src_stride_argb,
2552 uint8* dst_argb,
2553 int dst_stride_argb,
2554 int width,
2555 int height,
2556 void (*SobelRow)(const uint8* src_sobelx,
2557 const uint8* src_sobely,
2558 uint8* dst,
2559 int width)) {
2560 int y;
2561 void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_g, int width) =
2562 ARGBToYJRow_C;
2563 void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1, uint8* dst_sobely,
2564 int width) = SobelYRow_C;
2565 void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
2566 const uint8* src_y2, uint8* dst_sobely, int width) =
2567 SobelXRow_C;
2568 const int kEdge = 16; // Extra pixels at start of row for extrude/align.
2569 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
2570 return -1;
2571 }
2572 // Negative height means invert the image.
2573 if (height < 0) {
2574 height = -height;
2575 src_argb = src_argb + (height - 1) * src_stride_argb;
2576 src_stride_argb = -src_stride_argb;
2577 }
2578
2579 #if defined(HAS_ARGBTOYJROW_SSSE3)
2580 if (TestCpuFlag(kCpuHasSSSE3)) {
2581 ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
2582 if (IS_ALIGNED(width, 16)) {
2583 ARGBToYJRow = ARGBToYJRow_SSSE3;
2584 }
2585 }
2586 #endif
2587 #if defined(HAS_ARGBTOYJROW_AVX2)
2588 if (TestCpuFlag(kCpuHasAVX2)) {
2589 ARGBToYJRow = ARGBToYJRow_Any_AVX2;
2590 if (IS_ALIGNED(width, 32)) {
2591 ARGBToYJRow = ARGBToYJRow_AVX2;
2592 }
2593 }
2594 #endif
2595 #if defined(HAS_ARGBTOYJROW_NEON)
2596 if (TestCpuFlag(kCpuHasNEON)) {
2597 ARGBToYJRow = ARGBToYJRow_Any_NEON;
2598 if (IS_ALIGNED(width, 8)) {
2599 ARGBToYJRow = ARGBToYJRow_NEON;
2600 }
2601 }
2602 #endif
2603 #if defined(HAS_ARGBTOYJROW_MSA)
2604 if (TestCpuFlag(kCpuHasMSA)) {
2605 ARGBToYJRow = ARGBToYJRow_Any_MSA;
2606 if (IS_ALIGNED(width, 16)) {
2607 ARGBToYJRow = ARGBToYJRow_MSA;
2608 }
2609 }
2610 #endif
2611
2612 #if defined(HAS_SOBELYROW_SSE2)
2613 if (TestCpuFlag(kCpuHasSSE2)) {
2614 SobelYRow = SobelYRow_SSE2;
2615 }
2616 #endif
2617 #if defined(HAS_SOBELYROW_NEON)
2618 if (TestCpuFlag(kCpuHasNEON)) {
2619 SobelYRow = SobelYRow_NEON;
2620 }
2621 #endif
2622 #if defined(HAS_SOBELXROW_SSE2)
2623 if (TestCpuFlag(kCpuHasSSE2)) {
2624 SobelXRow = SobelXRow_SSE2;
2625 }
2626 #endif
2627 #if defined(HAS_SOBELXROW_NEON)
2628 if (TestCpuFlag(kCpuHasNEON)) {
2629 SobelXRow = SobelXRow_NEON;
2630 }
2631 #endif
2632 {
2633 // 3 rows with edges before/after.
2634 const int kRowSize = (width + kEdge + 31) & ~31;
2635 align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
2636 uint8* row_sobelx = rows;
2637 uint8* row_sobely = rows + kRowSize;
2638 uint8* row_y = rows + kRowSize * 2;
2639
2640 // Convert first row.
2641 uint8* row_y0 = row_y + kEdge;
2642 uint8* row_y1 = row_y0 + kRowSize;
2643 uint8* row_y2 = row_y1 + kRowSize;
2644 ARGBToYJRow(src_argb, row_y0, width);
2645 row_y0[-1] = row_y0[0];
2646 memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind.
2647 ARGBToYJRow(src_argb, row_y1, width);
2648 row_y1[-1] = row_y1[0];
2649 memset(row_y1 + width, row_y1[width - 1], 16);
2650 memset(row_y2 + width, 0, 16);
2651
2652 for (y = 0; y < height; ++y) {
2653 // Convert next row of ARGB to G.
2654 if (y < (height - 1)) {
2655 src_argb += src_stride_argb;
2656 }
2657 ARGBToYJRow(src_argb, row_y2, width);
2658 row_y2[-1] = row_y2[0];
2659 row_y2[width] = row_y2[width - 1];
2660
2661 SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
2662 SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
2663 SobelRow(row_sobelx, row_sobely, dst_argb, width);
2664
2665 // Cycle thru circular queue of 3 row_y buffers.
2666 {
2667 uint8* row_yt = row_y0;
2668 row_y0 = row_y1;
2669 row_y1 = row_y2;
2670 row_y2 = row_yt;
2671 }
2672
2673 dst_argb += dst_stride_argb;
2674 }
2675 free_aligned_buffer_64(rows);
2676 }
2677 return 0;
2678 }
2679
2680 // Sobel ARGB effect.
2681 LIBYUV_API
ARGBSobel(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height)2682 int ARGBSobel(const uint8* src_argb,
2683 int src_stride_argb,
2684 uint8* dst_argb,
2685 int dst_stride_argb,
2686 int width,
2687 int height) {
2688 void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
2689 uint8* dst_argb, int width) = SobelRow_C;
2690 #if defined(HAS_SOBELROW_SSE2)
2691 if (TestCpuFlag(kCpuHasSSE2)) {
2692 SobelRow = SobelRow_Any_SSE2;
2693 if (IS_ALIGNED(width, 16)) {
2694 SobelRow = SobelRow_SSE2;
2695 }
2696 }
2697 #endif
2698 #if defined(HAS_SOBELROW_NEON)
2699 if (TestCpuFlag(kCpuHasNEON)) {
2700 SobelRow = SobelRow_Any_NEON;
2701 if (IS_ALIGNED(width, 8)) {
2702 SobelRow = SobelRow_NEON;
2703 }
2704 }
2705 #endif
2706 #if defined(HAS_SOBELROW_MSA)
2707 if (TestCpuFlag(kCpuHasMSA)) {
2708 SobelRow = SobelRow_Any_MSA;
2709 if (IS_ALIGNED(width, 16)) {
2710 SobelRow = SobelRow_MSA;
2711 }
2712 }
2713 #endif
2714 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
2715 width, height, SobelRow);
2716 }
2717
2718 // Sobel ARGB effect with planar output.
2719 LIBYUV_API
ARGBSobelToPlane(const uint8 * src_argb,int src_stride_argb,uint8 * dst_y,int dst_stride_y,int width,int height)2720 int ARGBSobelToPlane(const uint8* src_argb,
2721 int src_stride_argb,
2722 uint8* dst_y,
2723 int dst_stride_y,
2724 int width,
2725 int height) {
2726 void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
2727 uint8* dst_, int width) = SobelToPlaneRow_C;
2728 #if defined(HAS_SOBELTOPLANEROW_SSE2)
2729 if (TestCpuFlag(kCpuHasSSE2)) {
2730 SobelToPlaneRow = SobelToPlaneRow_Any_SSE2;
2731 if (IS_ALIGNED(width, 16)) {
2732 SobelToPlaneRow = SobelToPlaneRow_SSE2;
2733 }
2734 }
2735 #endif
2736 #if defined(HAS_SOBELTOPLANEROW_NEON)
2737 if (TestCpuFlag(kCpuHasNEON)) {
2738 SobelToPlaneRow = SobelToPlaneRow_Any_NEON;
2739 if (IS_ALIGNED(width, 16)) {
2740 SobelToPlaneRow = SobelToPlaneRow_NEON;
2741 }
2742 }
2743 #endif
2744 #if defined(HAS_SOBELTOPLANEROW_MSA)
2745 if (TestCpuFlag(kCpuHasMSA)) {
2746 SobelToPlaneRow = SobelToPlaneRow_Any_MSA;
2747 if (IS_ALIGNED(width, 32)) {
2748 SobelToPlaneRow = SobelToPlaneRow_MSA;
2749 }
2750 }
2751 #endif
2752 return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, width,
2753 height, SobelToPlaneRow);
2754 }
2755
2756 // SobelXY ARGB effect.
2757 // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel.
2758 LIBYUV_API
ARGBSobelXY(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height)2759 int ARGBSobelXY(const uint8* src_argb,
2760 int src_stride_argb,
2761 uint8* dst_argb,
2762 int dst_stride_argb,
2763 int width,
2764 int height) {
2765 void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
2766 uint8* dst_argb, int width) = SobelXYRow_C;
2767 #if defined(HAS_SOBELXYROW_SSE2)
2768 if (TestCpuFlag(kCpuHasSSE2)) {
2769 SobelXYRow = SobelXYRow_Any_SSE2;
2770 if (IS_ALIGNED(width, 16)) {
2771 SobelXYRow = SobelXYRow_SSE2;
2772 }
2773 }
2774 #endif
2775 #if defined(HAS_SOBELXYROW_NEON)
2776 if (TestCpuFlag(kCpuHasNEON)) {
2777 SobelXYRow = SobelXYRow_Any_NEON;
2778 if (IS_ALIGNED(width, 8)) {
2779 SobelXYRow = SobelXYRow_NEON;
2780 }
2781 }
2782 #endif
2783 #if defined(HAS_SOBELXYROW_MSA)
2784 if (TestCpuFlag(kCpuHasMSA)) {
2785 SobelXYRow = SobelXYRow_Any_MSA;
2786 if (IS_ALIGNED(width, 16)) {
2787 SobelXYRow = SobelXYRow_MSA;
2788 }
2789 }
2790 #endif
2791 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
2792 width, height, SobelXYRow);
2793 }
2794
2795 // Apply a 4x4 polynomial to each ARGB pixel.
2796 LIBYUV_API
ARGBPolynomial(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,const float * poly,int width,int height)2797 int ARGBPolynomial(const uint8* src_argb,
2798 int src_stride_argb,
2799 uint8* dst_argb,
2800 int dst_stride_argb,
2801 const float* poly,
2802 int width,
2803 int height) {
2804 int y;
2805 void (*ARGBPolynomialRow)(const uint8* src_argb, uint8* dst_argb,
2806 const float* poly, int width) = ARGBPolynomialRow_C;
2807 if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
2808 return -1;
2809 }
2810 // Negative height means invert the image.
2811 if (height < 0) {
2812 height = -height;
2813 src_argb = src_argb + (height - 1) * src_stride_argb;
2814 src_stride_argb = -src_stride_argb;
2815 }
2816 // Coalesce rows.
2817 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
2818 width *= height;
2819 height = 1;
2820 src_stride_argb = dst_stride_argb = 0;
2821 }
2822 #if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
2823 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
2824 ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
2825 }
2826 #endif
2827 #if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
2828 if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
2829 IS_ALIGNED(width, 2)) {
2830 ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
2831 }
2832 #endif
2833
2834 for (y = 0; y < height; ++y) {
2835 ARGBPolynomialRow(src_argb, dst_argb, poly, width);
2836 src_argb += src_stride_argb;
2837 dst_argb += dst_stride_argb;
2838 }
2839 return 0;
2840 }
2841
2842 // Convert plane of 16 bit shorts to half floats.
2843 // Source values are multiplied by scale before storing as half float.
2844 LIBYUV_API
HalfFloatPlane(const uint16 * src_y,int src_stride_y,uint16 * dst_y,int dst_stride_y,float scale,int width,int height)2845 int HalfFloatPlane(const uint16* src_y,
2846 int src_stride_y,
2847 uint16* dst_y,
2848 int dst_stride_y,
2849 float scale,
2850 int width,
2851 int height) {
2852 int y;
2853 void (*HalfFloatRow)(const uint16* src, uint16* dst, float scale, int width) =
2854 HalfFloatRow_C;
2855 if (!src_y || !dst_y || width <= 0 || height == 0) {
2856 return -1;
2857 }
2858 src_stride_y >>= 1;
2859 dst_stride_y >>= 1;
2860 // Negative height means invert the image.
2861 if (height < 0) {
2862 height = -height;
2863 src_y = src_y + (height - 1) * src_stride_y;
2864 src_stride_y = -src_stride_y;
2865 }
2866 // Coalesce rows.
2867 if (src_stride_y == width && dst_stride_y == width) {
2868 width *= height;
2869 height = 1;
2870 src_stride_y = dst_stride_y = 0;
2871 }
2872 #if defined(HAS_HALFFLOATROW_SSE2)
2873 if (TestCpuFlag(kCpuHasSSE2)) {
2874 HalfFloatRow = HalfFloatRow_Any_SSE2;
2875 if (IS_ALIGNED(width, 8)) {
2876 HalfFloatRow = HalfFloatRow_SSE2;
2877 }
2878 }
2879 #endif
2880 #if defined(HAS_HALFFLOATROW_AVX2)
2881 if (TestCpuFlag(kCpuHasAVX2)) {
2882 HalfFloatRow = HalfFloatRow_Any_AVX2;
2883 if (IS_ALIGNED(width, 16)) {
2884 HalfFloatRow = HalfFloatRow_AVX2;
2885 }
2886 }
2887 #endif
2888 #if defined(HAS_HALFFLOATROW_F16C)
2889 if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasF16C)) {
2890 HalfFloatRow =
2891 (scale == 1.0f) ? HalfFloat1Row_Any_F16C : HalfFloatRow_Any_F16C;
2892 if (IS_ALIGNED(width, 16)) {
2893 HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_F16C : HalfFloatRow_F16C;
2894 }
2895 }
2896 #endif
2897 #if defined(HAS_HALFFLOATROW_NEON)
2898 if (TestCpuFlag(kCpuHasNEON)) {
2899 HalfFloatRow =
2900 (scale == 1.0f) ? HalfFloat1Row_Any_NEON : HalfFloatRow_Any_NEON;
2901 if (IS_ALIGNED(width, 8)) {
2902 HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_NEON : HalfFloatRow_NEON;
2903 }
2904 }
2905 #endif
2906
2907 for (y = 0; y < height; ++y) {
2908 HalfFloatRow(src_y, dst_y, scale, width);
2909 src_y += src_stride_y;
2910 dst_y += dst_stride_y;
2911 }
2912 return 0;
2913 }
2914
2915 // Apply a lumacolortable to each ARGB pixel.
2916 LIBYUV_API
ARGBLumaColorTable(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,const uint8 * luma,int width,int height)2917 int ARGBLumaColorTable(const uint8* src_argb,
2918 int src_stride_argb,
2919 uint8* dst_argb,
2920 int dst_stride_argb,
2921 const uint8* luma,
2922 int width,
2923 int height) {
2924 int y;
2925 void (*ARGBLumaColorTableRow)(
2926 const uint8* src_argb, uint8* dst_argb, int width, const uint8* luma,
2927 const uint32 lumacoeff) = ARGBLumaColorTableRow_C;
2928 if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
2929 return -1;
2930 }
2931 // Negative height means invert the image.
2932 if (height < 0) {
2933 height = -height;
2934 src_argb = src_argb + (height - 1) * src_stride_argb;
2935 src_stride_argb = -src_stride_argb;
2936 }
2937 // Coalesce rows.
2938 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
2939 width *= height;
2940 height = 1;
2941 src_stride_argb = dst_stride_argb = 0;
2942 }
2943 #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
2944 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
2945 ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
2946 }
2947 #endif
2948
2949 for (y = 0; y < height; ++y) {
2950 ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
2951 src_argb += src_stride_argb;
2952 dst_argb += dst_stride_argb;
2953 }
2954 return 0;
2955 }
2956
2957 // Copy Alpha from one ARGB image to another.
2958 LIBYUV_API
ARGBCopyAlpha(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height)2959 int ARGBCopyAlpha(const uint8* src_argb,
2960 int src_stride_argb,
2961 uint8* dst_argb,
2962 int dst_stride_argb,
2963 int width,
2964 int height) {
2965 int y;
2966 void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) =
2967 ARGBCopyAlphaRow_C;
2968 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
2969 return -1;
2970 }
2971 // Negative height means invert the image.
2972 if (height < 0) {
2973 height = -height;
2974 src_argb = src_argb + (height - 1) * src_stride_argb;
2975 src_stride_argb = -src_stride_argb;
2976 }
2977 // Coalesce rows.
2978 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
2979 width *= height;
2980 height = 1;
2981 src_stride_argb = dst_stride_argb = 0;
2982 }
2983 #if defined(HAS_ARGBCOPYALPHAROW_SSE2)
2984 if (TestCpuFlag(kCpuHasSSE2)) {
2985 ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_SSE2;
2986 if (IS_ALIGNED(width, 8)) {
2987 ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
2988 }
2989 }
2990 #endif
2991 #if defined(HAS_ARGBCOPYALPHAROW_AVX2)
2992 if (TestCpuFlag(kCpuHasAVX2)) {
2993 ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_AVX2;
2994 if (IS_ALIGNED(width, 16)) {
2995 ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
2996 }
2997 }
2998 #endif
2999
3000 for (y = 0; y < height; ++y) {
3001 ARGBCopyAlphaRow(src_argb, dst_argb, width);
3002 src_argb += src_stride_argb;
3003 dst_argb += dst_stride_argb;
3004 }
3005 return 0;
3006 }
3007
3008 // Extract just the alpha channel from ARGB.
3009 LIBYUV_API
ARGBExtractAlpha(const uint8 * src_argb,int src_stride,uint8 * dst_a,int dst_stride,int width,int height)3010 int ARGBExtractAlpha(const uint8* src_argb,
3011 int src_stride,
3012 uint8* dst_a,
3013 int dst_stride,
3014 int width,
3015 int height) {
3016 if (!src_argb || !dst_a || width <= 0 || height == 0) {
3017 return -1;
3018 }
3019 // Negative height means invert the image.
3020 if (height < 0) {
3021 height = -height;
3022 src_argb += (height - 1) * src_stride;
3023 src_stride = -src_stride;
3024 }
3025 // Coalesce rows.
3026 if (src_stride == width * 4 && dst_stride == width) {
3027 width *= height;
3028 height = 1;
3029 src_stride = dst_stride = 0;
3030 }
3031 void (*ARGBExtractAlphaRow)(const uint8* src_argb, uint8* dst_a, int width) =
3032 ARGBExtractAlphaRow_C;
3033 #if defined(HAS_ARGBEXTRACTALPHAROW_SSE2)
3034 if (TestCpuFlag(kCpuHasSSE2)) {
3035 ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2
3036 : ARGBExtractAlphaRow_Any_SSE2;
3037 }
3038 #endif
3039 #if defined(HAS_ARGBEXTRACTALPHAROW_AVX2)
3040 if (TestCpuFlag(kCpuHasAVX2)) {
3041 ARGBExtractAlphaRow = IS_ALIGNED(width, 32) ? ARGBExtractAlphaRow_AVX2
3042 : ARGBExtractAlphaRow_Any_AVX2;
3043 }
3044 #endif
3045 #if defined(HAS_ARGBEXTRACTALPHAROW_NEON)
3046 if (TestCpuFlag(kCpuHasNEON)) {
3047 ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON
3048 : ARGBExtractAlphaRow_Any_NEON;
3049 }
3050 #endif
3051
3052 for (int y = 0; y < height; ++y) {
3053 ARGBExtractAlphaRow(src_argb, dst_a, width);
3054 src_argb += src_stride;
3055 dst_a += dst_stride;
3056 }
3057 return 0;
3058 }
3059
3060 // Copy a planar Y channel to the alpha channel of a destination ARGB image.
3061 LIBYUV_API
ARGBCopyYToAlpha(const uint8 * src_y,int src_stride_y,uint8 * dst_argb,int dst_stride_argb,int width,int height)3062 int ARGBCopyYToAlpha(const uint8* src_y,
3063 int src_stride_y,
3064 uint8* dst_argb,
3065 int dst_stride_argb,
3066 int width,
3067 int height) {
3068 int y;
3069 void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
3070 ARGBCopyYToAlphaRow_C;
3071 if (!src_y || !dst_argb || width <= 0 || height == 0) {
3072 return -1;
3073 }
3074 // Negative height means invert the image.
3075 if (height < 0) {
3076 height = -height;
3077 src_y = src_y + (height - 1) * src_stride_y;
3078 src_stride_y = -src_stride_y;
3079 }
3080 // Coalesce rows.
3081 if (src_stride_y == width && dst_stride_argb == width * 4) {
3082 width *= height;
3083 height = 1;
3084 src_stride_y = dst_stride_argb = 0;
3085 }
3086 #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
3087 if (TestCpuFlag(kCpuHasSSE2)) {
3088 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_SSE2;
3089 if (IS_ALIGNED(width, 8)) {
3090 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
3091 }
3092 }
3093 #endif
3094 #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
3095 if (TestCpuFlag(kCpuHasAVX2)) {
3096 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_AVX2;
3097 if (IS_ALIGNED(width, 16)) {
3098 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
3099 }
3100 }
3101 #endif
3102
3103 for (y = 0; y < height; ++y) {
3104 ARGBCopyYToAlphaRow(src_y, dst_argb, width);
3105 src_y += src_stride_y;
3106 dst_argb += dst_stride_argb;
3107 }
3108 return 0;
3109 }
3110
3111 // TODO(fbarchard): Consider if width is even Y channel can be split
3112 // directly. A SplitUVRow_Odd function could copy the remaining chroma.
3113
3114 LIBYUV_API
YUY2ToNV12(const uint8 * src_yuy2,int src_stride_yuy2,uint8 * dst_y,int dst_stride_y,uint8 * dst_uv,int dst_stride_uv,int width,int height)3115 int YUY2ToNV12(const uint8* src_yuy2,
3116 int src_stride_yuy2,
3117 uint8* dst_y,
3118 int dst_stride_y,
3119 uint8* dst_uv,
3120 int dst_stride_uv,
3121 int width,
3122 int height) {
3123 int y;
3124 int halfwidth = (width + 1) >> 1;
3125 void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
3126 int width) = SplitUVRow_C;
3127 void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr,
3128 ptrdiff_t src_stride, int dst_width,
3129 int source_y_fraction) = InterpolateRow_C;
3130 if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) {
3131 return -1;
3132 }
3133 // Negative height means invert the image.
3134 if (height < 0) {
3135 height = -height;
3136 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
3137 src_stride_yuy2 = -src_stride_yuy2;
3138 }
3139 #if defined(HAS_SPLITUVROW_SSE2)
3140 if (TestCpuFlag(kCpuHasSSE2)) {
3141 SplitUVRow = SplitUVRow_Any_SSE2;
3142 if (IS_ALIGNED(width, 16)) {
3143 SplitUVRow = SplitUVRow_SSE2;
3144 }
3145 }
3146 #endif
3147 #if defined(HAS_SPLITUVROW_AVX2)
3148 if (TestCpuFlag(kCpuHasAVX2)) {
3149 SplitUVRow = SplitUVRow_Any_AVX2;
3150 if (IS_ALIGNED(width, 32)) {
3151 SplitUVRow = SplitUVRow_AVX2;
3152 }
3153 }
3154 #endif
3155 #if defined(HAS_SPLITUVROW_NEON)
3156 if (TestCpuFlag(kCpuHasNEON)) {
3157 SplitUVRow = SplitUVRow_Any_NEON;
3158 if (IS_ALIGNED(width, 16)) {
3159 SplitUVRow = SplitUVRow_NEON;
3160 }
3161 }
3162 #endif
3163 #if defined(HAS_INTERPOLATEROW_SSSE3)
3164 if (TestCpuFlag(kCpuHasSSSE3)) {
3165 InterpolateRow = InterpolateRow_Any_SSSE3;
3166 if (IS_ALIGNED(width, 16)) {
3167 InterpolateRow = InterpolateRow_SSSE3;
3168 }
3169 }
3170 #endif
3171 #if defined(HAS_INTERPOLATEROW_AVX2)
3172 if (TestCpuFlag(kCpuHasAVX2)) {
3173 InterpolateRow = InterpolateRow_Any_AVX2;
3174 if (IS_ALIGNED(width, 32)) {
3175 InterpolateRow = InterpolateRow_AVX2;
3176 }
3177 }
3178 #endif
3179 #if defined(HAS_INTERPOLATEROW_NEON)
3180 if (TestCpuFlag(kCpuHasNEON)) {
3181 InterpolateRow = InterpolateRow_Any_NEON;
3182 if (IS_ALIGNED(width, 16)) {
3183 InterpolateRow = InterpolateRow_NEON;
3184 }
3185 }
3186 #endif
3187 #if defined(HAS_INTERPOLATEROW_MSA)
3188 if (TestCpuFlag(kCpuHasMSA)) {
3189 InterpolateRow = InterpolateRow_Any_MSA;
3190 if (IS_ALIGNED(width, 32)) {
3191 InterpolateRow = InterpolateRow_MSA;
3192 }
3193 }
3194 #endif
3195
3196 {
3197 int awidth = halfwidth * 2;
3198 // row of y and 2 rows of uv
3199 align_buffer_64(rows, awidth * 3);
3200
3201 for (y = 0; y < height - 1; y += 2) {
3202 // Split Y from UV.
3203 SplitUVRow(src_yuy2, rows, rows + awidth, awidth);
3204 memcpy(dst_y, rows, width);
3205 SplitUVRow(src_yuy2 + src_stride_yuy2, rows, rows + awidth * 2, awidth);
3206 memcpy(dst_y + dst_stride_y, rows, width);
3207 InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
3208 src_yuy2 += src_stride_yuy2 * 2;
3209 dst_y += dst_stride_y * 2;
3210 dst_uv += dst_stride_uv;
3211 }
3212 if (height & 1) {
3213 // Split Y from UV.
3214 SplitUVRow(src_yuy2, rows, dst_uv, awidth);
3215 memcpy(dst_y, rows, width);
3216 }
3217 free_aligned_buffer_64(rows);
3218 }
3219 return 0;
3220 }
3221
3222 LIBYUV_API
UYVYToNV12(const uint8 * src_uyvy,int src_stride_uyvy,uint8 * dst_y,int dst_stride_y,uint8 * dst_uv,int dst_stride_uv,int width,int height)3223 int UYVYToNV12(const uint8* src_uyvy,
3224 int src_stride_uyvy,
3225 uint8* dst_y,
3226 int dst_stride_y,
3227 uint8* dst_uv,
3228 int dst_stride_uv,
3229 int width,
3230 int height) {
3231 int y;
3232 int halfwidth = (width + 1) >> 1;
3233 void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
3234 int width) = SplitUVRow_C;
3235 void (*InterpolateRow)(uint8 * dst_ptr, const uint8* src_ptr,
3236 ptrdiff_t src_stride, int dst_width,
3237 int source_y_fraction) = InterpolateRow_C;
3238 if (!src_uyvy || !dst_y || !dst_uv || width <= 0 || height == 0) {
3239 return -1;
3240 }
3241 // Negative height means invert the image.
3242 if (height < 0) {
3243 height = -height;
3244 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
3245 src_stride_uyvy = -src_stride_uyvy;
3246 }
3247 #if defined(HAS_SPLITUVROW_SSE2)
3248 if (TestCpuFlag(kCpuHasSSE2)) {
3249 SplitUVRow = SplitUVRow_Any_SSE2;
3250 if (IS_ALIGNED(width, 16)) {
3251 SplitUVRow = SplitUVRow_SSE2;
3252 }
3253 }
3254 #endif
3255 #if defined(HAS_SPLITUVROW_AVX2)
3256 if (TestCpuFlag(kCpuHasAVX2)) {
3257 SplitUVRow = SplitUVRow_Any_AVX2;
3258 if (IS_ALIGNED(width, 32)) {
3259 SplitUVRow = SplitUVRow_AVX2;
3260 }
3261 }
3262 #endif
3263 #if defined(HAS_SPLITUVROW_NEON)
3264 if (TestCpuFlag(kCpuHasNEON)) {
3265 SplitUVRow = SplitUVRow_Any_NEON;
3266 if (IS_ALIGNED(width, 16)) {
3267 SplitUVRow = SplitUVRow_NEON;
3268 }
3269 }
3270 #endif
3271 #if defined(HAS_INTERPOLATEROW_SSSE3)
3272 if (TestCpuFlag(kCpuHasSSSE3)) {
3273 InterpolateRow = InterpolateRow_Any_SSSE3;
3274 if (IS_ALIGNED(width, 16)) {
3275 InterpolateRow = InterpolateRow_SSSE3;
3276 }
3277 }
3278 #endif
3279 #if defined(HAS_INTERPOLATEROW_AVX2)
3280 if (TestCpuFlag(kCpuHasAVX2)) {
3281 InterpolateRow = InterpolateRow_Any_AVX2;
3282 if (IS_ALIGNED(width, 32)) {
3283 InterpolateRow = InterpolateRow_AVX2;
3284 }
3285 }
3286 #endif
3287 #if defined(HAS_INTERPOLATEROW_NEON)
3288 if (TestCpuFlag(kCpuHasNEON)) {
3289 InterpolateRow = InterpolateRow_Any_NEON;
3290 if (IS_ALIGNED(width, 16)) {
3291 InterpolateRow = InterpolateRow_NEON;
3292 }
3293 }
3294 #endif
3295 #if defined(HAS_INTERPOLATEROW_MSA)
3296 if (TestCpuFlag(kCpuHasMSA)) {
3297 InterpolateRow = InterpolateRow_Any_MSA;
3298 if (IS_ALIGNED(width, 32)) {
3299 InterpolateRow = InterpolateRow_MSA;
3300 }
3301 }
3302 #endif
3303
3304 {
3305 int awidth = halfwidth * 2;
3306 // row of y and 2 rows of uv
3307 align_buffer_64(rows, awidth * 3);
3308
3309 for (y = 0; y < height - 1; y += 2) {
3310 // Split Y from UV.
3311 SplitUVRow(src_uyvy, rows + awidth, rows, awidth);
3312 memcpy(dst_y, rows, width);
3313 SplitUVRow(src_uyvy + src_stride_uyvy, rows + awidth * 2, rows, awidth);
3314 memcpy(dst_y + dst_stride_y, rows, width);
3315 InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
3316 src_uyvy += src_stride_uyvy * 2;
3317 dst_y += dst_stride_y * 2;
3318 dst_uv += dst_stride_uv;
3319 }
3320 if (height & 1) {
3321 // Split Y from UV.
3322 SplitUVRow(src_uyvy, dst_uv, rows, awidth);
3323 memcpy(dst_y, rows, width);
3324 }
3325 free_aligned_buffer_64(rows);
3326 }
3327 return 0;
3328 }
3329
3330 #ifdef __cplusplus
3331 } // extern "C"
3332 } // namespace libyuv
3333 #endif
3334