1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/planar_functions.h"
12
13 #include <assert.h>
14 #include <string.h> // for memset()
15
16 #include "libyuv/cpu_id.h"
17 #ifdef HAVE_JPEG
18 #include "libyuv/mjpeg_decoder.h"
19 #endif
20 #include "libyuv/row.h"
21 #include "libyuv/scale_row.h" // for ScaleRowDown2
22
23 #ifdef __cplusplus
24 namespace libyuv {
25 extern "C" {
26 #endif
27
28 // Copy a plane of data
29 LIBYUV_API
CopyPlane(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height)30 void CopyPlane(const uint8_t* src_y,
31 int src_stride_y,
32 uint8_t* dst_y,
33 int dst_stride_y,
34 int width,
35 int height) {
36 int y;
37 void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C;
38 if (width <= 0 || height == 0) {
39 return;
40 }
41 // Negative height means invert the image.
42 if (height < 0) {
43 height = -height;
44 dst_y = dst_y + (height - 1) * dst_stride_y;
45 dst_stride_y = -dst_stride_y;
46 }
47 // Coalesce rows.
48 if (src_stride_y == width && dst_stride_y == width) {
49 width *= height;
50 height = 1;
51 src_stride_y = dst_stride_y = 0;
52 }
53 // Nothing to do.
54 if (src_y == dst_y && src_stride_y == dst_stride_y) {
55 return;
56 }
57
58 #if defined(HAS_COPYROW_SSE2)
59 if (TestCpuFlag(kCpuHasSSE2)) {
60 CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
61 }
62 #endif
63 #if defined(HAS_COPYROW_AVX)
64 if (TestCpuFlag(kCpuHasAVX)) {
65 CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
66 }
67 #endif
68 #if defined(HAS_COPYROW_ERMS)
69 if (TestCpuFlag(kCpuHasERMS)) {
70 CopyRow = CopyRow_ERMS;
71 }
72 #endif
73 #if defined(HAS_COPYROW_NEON)
74 if (TestCpuFlag(kCpuHasNEON)) {
75 CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
76 }
77 #endif
78
79 // Copy plane
80 for (y = 0; y < height; ++y) {
81 CopyRow(src_y, dst_y, width);
82 src_y += src_stride_y;
83 dst_y += dst_stride_y;
84 }
85 }
86
87 LIBYUV_API
CopyPlane_16(const uint16_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,int width,int height)88 void CopyPlane_16(const uint16_t* src_y,
89 int src_stride_y,
90 uint16_t* dst_y,
91 int dst_stride_y,
92 int width,
93 int height) {
94 CopyPlane((const uint8_t*)src_y, src_stride_y * 2, (uint8_t*)dst_y,
95 dst_stride_y * 2, width * 2, height);
96 }
97
98 // Convert a plane of 16 bit data to 8 bit
99 LIBYUV_API
Convert16To8Plane(const uint16_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int scale,int width,int height)100 void Convert16To8Plane(const uint16_t* src_y,
101 int src_stride_y,
102 uint8_t* dst_y,
103 int dst_stride_y,
104 int scale, // 16384 for 10 bits
105 int width,
106 int height) {
107 int y;
108 void (*Convert16To8Row)(const uint16_t* src_y, uint8_t* dst_y, int scale,
109 int width) = Convert16To8Row_C;
110
111 if (width <= 0 || height == 0) {
112 return;
113 }
114 // Negative height means invert the image.
115 if (height < 0) {
116 height = -height;
117 dst_y = dst_y + (height - 1) * dst_stride_y;
118 dst_stride_y = -dst_stride_y;
119 }
120 // Coalesce rows.
121 if (src_stride_y == width && dst_stride_y == width) {
122 width *= height;
123 height = 1;
124 src_stride_y = dst_stride_y = 0;
125 }
126 #if defined(HAS_CONVERT16TO8ROW_NEON)
127 if (TestCpuFlag(kCpuHasNEON)) {
128 Convert16To8Row = Convert16To8Row_Any_NEON;
129 if (IS_ALIGNED(width, 16)) {
130 Convert16To8Row = Convert16To8Row_NEON;
131 }
132 }
133 #endif
134 #if defined(HAS_CONVERT16TO8ROW_SSSE3)
135 if (TestCpuFlag(kCpuHasSSSE3)) {
136 Convert16To8Row = Convert16To8Row_Any_SSSE3;
137 if (IS_ALIGNED(width, 16)) {
138 Convert16To8Row = Convert16To8Row_SSSE3;
139 }
140 }
141 #endif
142 #if defined(HAS_CONVERT16TO8ROW_AVX2)
143 if (TestCpuFlag(kCpuHasAVX2)) {
144 Convert16To8Row = Convert16To8Row_Any_AVX2;
145 if (IS_ALIGNED(width, 32)) {
146 Convert16To8Row = Convert16To8Row_AVX2;
147 }
148 }
149 #endif
150
151 // Convert plane
152 for (y = 0; y < height; ++y) {
153 Convert16To8Row(src_y, dst_y, scale, width);
154 src_y += src_stride_y;
155 dst_y += dst_stride_y;
156 }
157 }
158
159 // Convert a plane of 8 bit data to 16 bit
160 LIBYUV_API
Convert8To16Plane(const uint8_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,int scale,int width,int height)161 void Convert8To16Plane(const uint8_t* src_y,
162 int src_stride_y,
163 uint16_t* dst_y,
164 int dst_stride_y,
165 int scale, // 16384 for 10 bits
166 int width,
167 int height) {
168 int y;
169 void (*Convert8To16Row)(const uint8_t* src_y, uint16_t* dst_y, int scale,
170 int width) = Convert8To16Row_C;
171
172 if (width <= 0 || height == 0) {
173 return;
174 }
175 // Negative height means invert the image.
176 if (height < 0) {
177 height = -height;
178 dst_y = dst_y + (height - 1) * dst_stride_y;
179 dst_stride_y = -dst_stride_y;
180 }
181 // Coalesce rows.
182 if (src_stride_y == width && dst_stride_y == width) {
183 width *= height;
184 height = 1;
185 src_stride_y = dst_stride_y = 0;
186 }
187 #if defined(HAS_CONVERT8TO16ROW_SSE2)
188 if (TestCpuFlag(kCpuHasSSE2)) {
189 Convert8To16Row = Convert8To16Row_Any_SSE2;
190 if (IS_ALIGNED(width, 16)) {
191 Convert8To16Row = Convert8To16Row_SSE2;
192 }
193 }
194 #endif
195 #if defined(HAS_CONVERT8TO16ROW_AVX2)
196 if (TestCpuFlag(kCpuHasAVX2)) {
197 Convert8To16Row = Convert8To16Row_Any_AVX2;
198 if (IS_ALIGNED(width, 32)) {
199 Convert8To16Row = Convert8To16Row_AVX2;
200 }
201 }
202 #endif
203
204 // Convert plane
205 for (y = 0; y < height; ++y) {
206 Convert8To16Row(src_y, dst_y, scale, width);
207 src_y += src_stride_y;
208 dst_y += dst_stride_y;
209 }
210 }
211
212 // Copy I422.
213 LIBYUV_API
I422Copy(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)214 int I422Copy(const uint8_t* src_y,
215 int src_stride_y,
216 const uint8_t* src_u,
217 int src_stride_u,
218 const uint8_t* src_v,
219 int src_stride_v,
220 uint8_t* dst_y,
221 int dst_stride_y,
222 uint8_t* dst_u,
223 int dst_stride_u,
224 uint8_t* dst_v,
225 int dst_stride_v,
226 int width,
227 int height) {
228 int halfwidth = (width + 1) >> 1;
229
230 if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
231 height == 0) {
232 return -1;
233 }
234
235 // Negative height means invert the image.
236 if (height < 0) {
237 height = -height;
238 src_y = src_y + (height - 1) * src_stride_y;
239 src_u = src_u + (height - 1) * src_stride_u;
240 src_v = src_v + (height - 1) * src_stride_v;
241 src_stride_y = -src_stride_y;
242 src_stride_u = -src_stride_u;
243 src_stride_v = -src_stride_v;
244 }
245
246 if (dst_y) {
247 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
248 }
249 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
250 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
251 return 0;
252 }
253
254 // Copy I444.
255 LIBYUV_API
I444Copy(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)256 int I444Copy(const uint8_t* src_y,
257 int src_stride_y,
258 const uint8_t* src_u,
259 int src_stride_u,
260 const uint8_t* src_v,
261 int src_stride_v,
262 uint8_t* dst_y,
263 int dst_stride_y,
264 uint8_t* dst_u,
265 int dst_stride_u,
266 uint8_t* dst_v,
267 int dst_stride_v,
268 int width,
269 int height) {
270 if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
271 height == 0) {
272 return -1;
273 }
274 // Negative height means invert the image.
275 if (height < 0) {
276 height = -height;
277 src_y = src_y + (height - 1) * src_stride_y;
278 src_u = src_u + (height - 1) * src_stride_u;
279 src_v = src_v + (height - 1) * src_stride_v;
280 src_stride_y = -src_stride_y;
281 src_stride_u = -src_stride_u;
282 src_stride_v = -src_stride_v;
283 }
284
285 if (dst_y) {
286 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
287 }
288 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
289 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
290 return 0;
291 }
292
293 // Copy I210.
294 LIBYUV_API
I210Copy(const uint16_t * src_y,int src_stride_y,const uint16_t * src_u,int src_stride_u,const uint16_t * src_v,int src_stride_v,uint16_t * dst_y,int dst_stride_y,uint16_t * dst_u,int dst_stride_u,uint16_t * dst_v,int dst_stride_v,int width,int height)295 int I210Copy(const uint16_t* src_y,
296 int src_stride_y,
297 const uint16_t* src_u,
298 int src_stride_u,
299 const uint16_t* src_v,
300 int src_stride_v,
301 uint16_t* dst_y,
302 int dst_stride_y,
303 uint16_t* dst_u,
304 int dst_stride_u,
305 uint16_t* dst_v,
306 int dst_stride_v,
307 int width,
308 int height) {
309 int halfwidth = (width + 1) >> 1;
310
311 if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
312 height == 0) {
313 return -1;
314 }
315
316 // Negative height means invert the image.
317 if (height < 0) {
318 height = -height;
319 src_y = src_y + (height - 1) * src_stride_y;
320 src_u = src_u + (height - 1) * src_stride_u;
321 src_v = src_v + (height - 1) * src_stride_v;
322 src_stride_y = -src_stride_y;
323 src_stride_u = -src_stride_u;
324 src_stride_v = -src_stride_v;
325 }
326
327 if (dst_y) {
328 CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
329 }
330 // Copy UV planes.
331 CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
332 CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
333 return 0;
334 }
335
336 // Copy I400.
337 LIBYUV_API
I400ToI400(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height)338 int I400ToI400(const uint8_t* src_y,
339 int src_stride_y,
340 uint8_t* dst_y,
341 int dst_stride_y,
342 int width,
343 int height) {
344 if (!src_y || !dst_y || width <= 0 || height == 0) {
345 return -1;
346 }
347 // Negative height means invert the image.
348 if (height < 0) {
349 height = -height;
350 src_y = src_y + (height - 1) * src_stride_y;
351 src_stride_y = -src_stride_y;
352 }
353 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
354 return 0;
355 }
356
357 // Convert I420 to I400.
358 LIBYUV_API
I420ToI400(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_y,int dst_stride_y,int width,int height)359 int I420ToI400(const uint8_t* src_y,
360 int src_stride_y,
361 const uint8_t* src_u,
362 int src_stride_u,
363 const uint8_t* src_v,
364 int src_stride_v,
365 uint8_t* dst_y,
366 int dst_stride_y,
367 int width,
368 int height) {
369 (void)src_u;
370 (void)src_stride_u;
371 (void)src_v;
372 (void)src_stride_v;
373 if (!src_y || !dst_y || width <= 0 || height == 0) {
374 return -1;
375 }
376 // Negative height means invert the image.
377 if (height < 0) {
378 height = -height;
379 src_y = src_y + (height - 1) * src_stride_y;
380 src_stride_y = -src_stride_y;
381 }
382
383 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
384 return 0;
385 }
386
387 // Copy NV12. Supports inverting.
NV12Copy(const uint8_t * src_y,int src_stride_y,const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int width,int height)388 int NV12Copy(const uint8_t* src_y,
389 int src_stride_y,
390 const uint8_t* src_uv,
391 int src_stride_uv,
392 uint8_t* dst_y,
393 int dst_stride_y,
394 uint8_t* dst_uv,
395 int dst_stride_uv,
396 int width,
397 int height) {
398 int halfwidth = (width + 1) >> 1;
399 int halfheight = (height + 1) >> 1;
400
401 if (!src_y || !dst_y || !src_uv || !dst_uv || width <= 0 || height == 0) {
402 return -1;
403 }
404
405 // Negative height means invert the image.
406 if (height < 0) {
407 height = -height;
408 halfheight = (height + 1) >> 1;
409 src_y = src_y + (height - 1) * src_stride_y;
410 src_uv = src_uv + (halfheight - 1) * src_stride_uv;
411 src_stride_y = -src_stride_y;
412 src_stride_uv = -src_stride_uv;
413 }
414 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
415 CopyPlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv, halfwidth * 2,
416 halfheight);
417 return 0;
418 }
419
420 // Copy NV21. Supports inverting.
NV21Copy(const uint8_t * src_y,int src_stride_y,const uint8_t * src_vu,int src_stride_vu,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_vu,int dst_stride_vu,int width,int height)421 int NV21Copy(const uint8_t* src_y,
422 int src_stride_y,
423 const uint8_t* src_vu,
424 int src_stride_vu,
425 uint8_t* dst_y,
426 int dst_stride_y,
427 uint8_t* dst_vu,
428 int dst_stride_vu,
429 int width,
430 int height) {
431 return NV12Copy(src_y, src_stride_y, src_vu, src_stride_vu, dst_y,
432 dst_stride_y, dst_vu, dst_stride_vu, width, height);
433 }
434
435 // Support function for NV12 etc UV channels.
436 // Width and height are plane sizes (typically half pixel width).
437 LIBYUV_API
SplitUVPlane(const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)438 void SplitUVPlane(const uint8_t* src_uv,
439 int src_stride_uv,
440 uint8_t* dst_u,
441 int dst_stride_u,
442 uint8_t* dst_v,
443 int dst_stride_v,
444 int width,
445 int height) {
446 int y;
447 void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
448 int width) = SplitUVRow_C;
449 if (width <= 0 || height == 0) {
450 return;
451 }
452 // Negative height means invert the image.
453 if (height < 0) {
454 height = -height;
455 dst_u = dst_u + (height - 1) * dst_stride_u;
456 dst_v = dst_v + (height - 1) * dst_stride_v;
457 dst_stride_u = -dst_stride_u;
458 dst_stride_v = -dst_stride_v;
459 }
460 // Coalesce rows.
461 if (src_stride_uv == width * 2 && dst_stride_u == width &&
462 dst_stride_v == width) {
463 width *= height;
464 height = 1;
465 src_stride_uv = dst_stride_u = dst_stride_v = 0;
466 }
467 #if defined(HAS_SPLITUVROW_SSE2)
468 if (TestCpuFlag(kCpuHasSSE2)) {
469 SplitUVRow = SplitUVRow_Any_SSE2;
470 if (IS_ALIGNED(width, 16)) {
471 SplitUVRow = SplitUVRow_SSE2;
472 }
473 }
474 #endif
475 #if defined(HAS_SPLITUVROW_AVX2)
476 if (TestCpuFlag(kCpuHasAVX2)) {
477 SplitUVRow = SplitUVRow_Any_AVX2;
478 if (IS_ALIGNED(width, 32)) {
479 SplitUVRow = SplitUVRow_AVX2;
480 }
481 }
482 #endif
483 #if defined(HAS_SPLITUVROW_NEON)
484 if (TestCpuFlag(kCpuHasNEON)) {
485 SplitUVRow = SplitUVRow_Any_NEON;
486 if (IS_ALIGNED(width, 16)) {
487 SplitUVRow = SplitUVRow_NEON;
488 }
489 }
490 #endif
491 #if defined(HAS_SPLITUVROW_MSA)
492 if (TestCpuFlag(kCpuHasMSA)) {
493 SplitUVRow = SplitUVRow_Any_MSA;
494 if (IS_ALIGNED(width, 32)) {
495 SplitUVRow = SplitUVRow_MSA;
496 }
497 }
498 #endif
499 #if defined(HAS_SPLITUVROW_LSX)
500 if (TestCpuFlag(kCpuHasLSX)) {
501 SplitUVRow = SplitUVRow_Any_LSX;
502 if (IS_ALIGNED(width, 32)) {
503 SplitUVRow = SplitUVRow_LSX;
504 }
505 }
506 #endif
507
508 for (y = 0; y < height; ++y) {
509 // Copy a row of UV.
510 SplitUVRow(src_uv, dst_u, dst_v, width);
511 dst_u += dst_stride_u;
512 dst_v += dst_stride_v;
513 src_uv += src_stride_uv;
514 }
515 }
516
517 LIBYUV_API
MergeUVPlane(const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_uv,int dst_stride_uv,int width,int height)518 void MergeUVPlane(const uint8_t* src_u,
519 int src_stride_u,
520 const uint8_t* src_v,
521 int src_stride_v,
522 uint8_t* dst_uv,
523 int dst_stride_uv,
524 int width,
525 int height) {
526 int y;
527 void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
528 uint8_t* dst_uv, int width) = MergeUVRow_C;
529 if (width <= 0 || height == 0) {
530 return;
531 }
532 // Negative height means invert the image.
533 if (height < 0) {
534 height = -height;
535 dst_uv = dst_uv + (height - 1) * dst_stride_uv;
536 dst_stride_uv = -dst_stride_uv;
537 }
538 // Coalesce rows.
539 if (src_stride_u == width && src_stride_v == width &&
540 dst_stride_uv == width * 2) {
541 width *= height;
542 height = 1;
543 src_stride_u = src_stride_v = dst_stride_uv = 0;
544 }
545 #if defined(HAS_MERGEUVROW_SSE2)
546 if (TestCpuFlag(kCpuHasSSE2)) {
547 MergeUVRow = MergeUVRow_Any_SSE2;
548 if (IS_ALIGNED(width, 16)) {
549 MergeUVRow = MergeUVRow_SSE2;
550 }
551 }
552 #endif
553 #if defined(HAS_MERGEUVROW_AVX2)
554 if (TestCpuFlag(kCpuHasAVX2)) {
555 MergeUVRow = MergeUVRow_Any_AVX2;
556 if (IS_ALIGNED(width, 32)) {
557 MergeUVRow = MergeUVRow_AVX2;
558 }
559 }
560 #endif
561 #if defined(HAS_MERGEUVROW_NEON)
562 if (TestCpuFlag(kCpuHasNEON)) {
563 MergeUVRow = MergeUVRow_Any_NEON;
564 if (IS_ALIGNED(width, 16)) {
565 MergeUVRow = MergeUVRow_NEON;
566 }
567 }
568 #endif
569 #if defined(HAS_MERGEUVROW_MSA)
570 if (TestCpuFlag(kCpuHasMSA)) {
571 MergeUVRow = MergeUVRow_Any_MSA;
572 if (IS_ALIGNED(width, 16)) {
573 MergeUVRow = MergeUVRow_MSA;
574 }
575 }
576 #endif
577 #if defined(HAS_MERGEUVROW_LSX)
578 if (TestCpuFlag(kCpuHasLSX)) {
579 MergeUVRow = MergeUVRow_Any_LSX;
580 if (IS_ALIGNED(width, 16)) {
581 MergeUVRow = MergeUVRow_LSX;
582 }
583 }
584 #endif
585
586 for (y = 0; y < height; ++y) {
587 // Merge a row of U and V into a row of UV.
588 MergeUVRow(src_u, src_v, dst_uv, width);
589 src_u += src_stride_u;
590 src_v += src_stride_v;
591 dst_uv += dst_stride_uv;
592 }
593 }
594
595 // Support function for P010 etc UV channels.
596 // Width and height are plane sizes (typically half pixel width).
597 LIBYUV_API
SplitUVPlane_16(const uint16_t * src_uv,int src_stride_uv,uint16_t * dst_u,int dst_stride_u,uint16_t * dst_v,int dst_stride_v,int width,int height,int depth)598 void SplitUVPlane_16(const uint16_t* src_uv,
599 int src_stride_uv,
600 uint16_t* dst_u,
601 int dst_stride_u,
602 uint16_t* dst_v,
603 int dst_stride_v,
604 int width,
605 int height,
606 int depth) {
607 int y;
608 void (*SplitUVRow_16)(const uint16_t* src_uv, uint16_t* dst_u,
609 uint16_t* dst_v, int depth, int width) =
610 SplitUVRow_16_C;
611 if (width <= 0 || height == 0) {
612 return;
613 }
614 // Negative height means invert the image.
615 if (height < 0) {
616 height = -height;
617 dst_u = dst_u + (height - 1) * dst_stride_u;
618 dst_v = dst_v + (height - 1) * dst_stride_v;
619 dst_stride_u = -dst_stride_u;
620 dst_stride_v = -dst_stride_v;
621 }
622 // Coalesce rows.
623 if (src_stride_uv == width * 2 && dst_stride_u == width &&
624 dst_stride_v == width) {
625 width *= height;
626 height = 1;
627 src_stride_uv = dst_stride_u = dst_stride_v = 0;
628 }
629 #if defined(HAS_SPLITUVROW_16_AVX2)
630 if (TestCpuFlag(kCpuHasAVX2)) {
631 SplitUVRow_16 = SplitUVRow_16_Any_AVX2;
632 if (IS_ALIGNED(width, 16)) {
633 SplitUVRow_16 = SplitUVRow_16_AVX2;
634 }
635 }
636 #endif
637 #if defined(HAS_SPLITUVROW_16_NEON)
638 if (TestCpuFlag(kCpuHasNEON)) {
639 SplitUVRow_16 = SplitUVRow_16_Any_NEON;
640 if (IS_ALIGNED(width, 8)) {
641 SplitUVRow_16 = SplitUVRow_16_NEON;
642 }
643 }
644 #endif
645
646 for (y = 0; y < height; ++y) {
647 // Copy a row of UV.
648 SplitUVRow_16(src_uv, dst_u, dst_v, depth, width);
649 dst_u += dst_stride_u;
650 dst_v += dst_stride_v;
651 src_uv += src_stride_uv;
652 }
653 }
654
655 LIBYUV_API
MergeUVPlane_16(const uint16_t * src_u,int src_stride_u,const uint16_t * src_v,int src_stride_v,uint16_t * dst_uv,int dst_stride_uv,int width,int height,int depth)656 void MergeUVPlane_16(const uint16_t* src_u,
657 int src_stride_u,
658 const uint16_t* src_v,
659 int src_stride_v,
660 uint16_t* dst_uv,
661 int dst_stride_uv,
662 int width,
663 int height,
664 int depth) {
665 int y;
666 void (*MergeUVRow_16)(const uint16_t* src_u, const uint16_t* src_v,
667 uint16_t* dst_uv, int depth, int width) =
668 MergeUVRow_16_C;
669 assert(depth >= 8);
670 assert(depth <= 16);
671 if (width <= 0 || height == 0) {
672 return;
673 }
674 // Negative height means invert the image.
675 if (height < 0) {
676 height = -height;
677 dst_uv = dst_uv + (height - 1) * dst_stride_uv;
678 dst_stride_uv = -dst_stride_uv;
679 }
680 // Coalesce rows.
681 if (src_stride_u == width && src_stride_v == width &&
682 dst_stride_uv == width * 2) {
683 width *= height;
684 height = 1;
685 src_stride_u = src_stride_v = dst_stride_uv = 0;
686 }
687 #if defined(HAS_MERGEUVROW_16_AVX2)
688 if (TestCpuFlag(kCpuHasAVX2)) {
689 MergeUVRow_16 = MergeUVRow_16_Any_AVX2;
690 if (IS_ALIGNED(width, 16)) {
691 MergeUVRow_16 = MergeUVRow_16_AVX2;
692 }
693 }
694 #endif
695 #if defined(HAS_MERGEUVROW_16_NEON)
696 if (TestCpuFlag(kCpuHasNEON)) {
697 MergeUVRow_16 = MergeUVRow_16_Any_NEON;
698 if (IS_ALIGNED(width, 8)) {
699 MergeUVRow_16 = MergeUVRow_16_NEON;
700 }
701 }
702 #endif
703
704 for (y = 0; y < height; ++y) {
705 // Merge a row of U and V into a row of UV.
706 MergeUVRow_16(src_u, src_v, dst_uv, depth, width);
707 src_u += src_stride_u;
708 src_v += src_stride_v;
709 dst_uv += dst_stride_uv;
710 }
711 }
712
713 // Convert plane from lsb to msb
714 LIBYUV_API
ConvertToMSBPlane_16(const uint16_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,int width,int height,int depth)715 void ConvertToMSBPlane_16(const uint16_t* src_y,
716 int src_stride_y,
717 uint16_t* dst_y,
718 int dst_stride_y,
719 int width,
720 int height,
721 int depth) {
722 int y;
723 int scale = 1 << (16 - depth);
724 void (*MultiplyRow_16)(const uint16_t* src_y, uint16_t* dst_y, int scale,
725 int width) = MultiplyRow_16_C;
726 if (width <= 0 || height == 0) {
727 return;
728 }
729 // Negative height means invert the image.
730 if (height < 0) {
731 height = -height;
732 dst_y = dst_y + (height - 1) * dst_stride_y;
733 dst_stride_y = -dst_stride_y;
734 }
735 // Coalesce rows.
736 if (src_stride_y == width && dst_stride_y == width) {
737 width *= height;
738 height = 1;
739 src_stride_y = dst_stride_y = 0;
740 }
741
742 #if defined(HAS_MULTIPLYROW_16_AVX2)
743 if (TestCpuFlag(kCpuHasAVX2)) {
744 MultiplyRow_16 = MultiplyRow_16_Any_AVX2;
745 if (IS_ALIGNED(width, 32)) {
746 MultiplyRow_16 = MultiplyRow_16_AVX2;
747 }
748 }
749 #endif
750 #if defined(HAS_MULTIPLYROW_16_NEON)
751 if (TestCpuFlag(kCpuHasNEON)) {
752 MultiplyRow_16 = MultiplyRow_16_Any_NEON;
753 if (IS_ALIGNED(width, 16)) {
754 MultiplyRow_16 = MultiplyRow_16_NEON;
755 }
756 }
757 #endif
758
759 for (y = 0; y < height; ++y) {
760 MultiplyRow_16(src_y, dst_y, scale, width);
761 src_y += src_stride_y;
762 dst_y += dst_stride_y;
763 }
764 }
765
766 // Convert plane from msb to lsb
767 LIBYUV_API
ConvertToLSBPlane_16(const uint16_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,int width,int height,int depth)768 void ConvertToLSBPlane_16(const uint16_t* src_y,
769 int src_stride_y,
770 uint16_t* dst_y,
771 int dst_stride_y,
772 int width,
773 int height,
774 int depth) {
775 int y;
776 int scale = 1 << depth;
777 void (*DivideRow)(const uint16_t* src_y, uint16_t* dst_y, int scale,
778 int width) = DivideRow_16_C;
779 if (width <= 0 || height == 0) {
780 return;
781 }
782 // Negative height means invert the image.
783 if (height < 0) {
784 height = -height;
785 dst_y = dst_y + (height - 1) * dst_stride_y;
786 dst_stride_y = -dst_stride_y;
787 }
788 // Coalesce rows.
789 if (src_stride_y == width && dst_stride_y == width) {
790 width *= height;
791 height = 1;
792 src_stride_y = dst_stride_y = 0;
793 }
794
795 #if defined(HAS_DIVIDEROW_16_AVX2)
796 if (TestCpuFlag(kCpuHasAVX2)) {
797 DivideRow = DivideRow_16_Any_AVX2;
798 if (IS_ALIGNED(width, 32)) {
799 DivideRow = DivideRow_16_AVX2;
800 }
801 }
802 #endif
803 #if defined(HAS_DIVIDEROW_16_NEON)
804 if (TestCpuFlag(kCpuHasNEON)) {
805 DivideRow = DivideRow_16_Any_NEON;
806 if (IS_ALIGNED(width, 16)) {
807 DivideRow = DivideRow_16_NEON;
808 }
809 }
810 #endif
811
812 for (y = 0; y < height; ++y) {
813 DivideRow(src_y, dst_y, scale, width);
814 src_y += src_stride_y;
815 dst_y += dst_stride_y;
816 }
817 }
818
819 // Swap U and V channels in interleaved UV plane.
820 LIBYUV_API
SwapUVPlane(const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_vu,int dst_stride_vu,int width,int height)821 void SwapUVPlane(const uint8_t* src_uv,
822 int src_stride_uv,
823 uint8_t* dst_vu,
824 int dst_stride_vu,
825 int width,
826 int height) {
827 int y;
828 void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
829 SwapUVRow_C;
830 if (width <= 0 || height == 0) {
831 return;
832 }
833 // Negative height means invert the image.
834 if (height < 0) {
835 height = -height;
836 src_uv = src_uv + (height - 1) * src_stride_uv;
837 src_stride_uv = -src_stride_uv;
838 }
839 // Coalesce rows.
840 if (src_stride_uv == width * 2 && dst_stride_vu == width * 2) {
841 width *= height;
842 height = 1;
843 src_stride_uv = dst_stride_vu = 0;
844 }
845
846 #if defined(HAS_SWAPUVROW_SSSE3)
847 if (TestCpuFlag(kCpuHasSSSE3)) {
848 SwapUVRow = SwapUVRow_Any_SSSE3;
849 if (IS_ALIGNED(width, 16)) {
850 SwapUVRow = SwapUVRow_SSSE3;
851 }
852 }
853 #endif
854 #if defined(HAS_SWAPUVROW_AVX2)
855 if (TestCpuFlag(kCpuHasAVX2)) {
856 SwapUVRow = SwapUVRow_Any_AVX2;
857 if (IS_ALIGNED(width, 32)) {
858 SwapUVRow = SwapUVRow_AVX2;
859 }
860 }
861 #endif
862 #if defined(HAS_SWAPUVROW_NEON)
863 if (TestCpuFlag(kCpuHasNEON)) {
864 SwapUVRow = SwapUVRow_Any_NEON;
865 if (IS_ALIGNED(width, 16)) {
866 SwapUVRow = SwapUVRow_NEON;
867 }
868 }
869 #endif
870
871 for (y = 0; y < height; ++y) {
872 SwapUVRow(src_uv, dst_vu, width);
873 src_uv += src_stride_uv;
874 dst_vu += dst_stride_vu;
875 }
876 }
877
878 // Convert NV21 to NV12.
879 LIBYUV_API
NV21ToNV12(const uint8_t * src_y,int src_stride_y,const uint8_t * src_vu,int src_stride_vu,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int width,int height)880 int NV21ToNV12(const uint8_t* src_y,
881 int src_stride_y,
882 const uint8_t* src_vu,
883 int src_stride_vu,
884 uint8_t* dst_y,
885 int dst_stride_y,
886 uint8_t* dst_uv,
887 int dst_stride_uv,
888 int width,
889 int height) {
890 int halfwidth = (width + 1) >> 1;
891 int halfheight = (height + 1) >> 1;
892
893 if (!src_vu || !dst_uv || width <= 0 || height == 0) {
894 return -1;
895 }
896
897 if (dst_y) {
898 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
899 }
900
901 // Negative height means invert the image.
902 if (height < 0) {
903 height = -height;
904 halfheight = (height + 1) >> 1;
905 src_vu = src_vu + (halfheight - 1) * src_stride_vu;
906 src_stride_vu = -src_stride_vu;
907 }
908
909 SwapUVPlane(src_vu, src_stride_vu, dst_uv, dst_stride_uv, halfwidth,
910 halfheight);
911 return 0;
912 }
913
914 // Detile a plane of data
915 // tile width is 16 and assumed.
916 // tile_height is 16 or 32 for MM21.
917 // src_stride_y is bytes per row of source ignoring tiling. e.g. 640
918 // TODO: More detile row functions.
919
920 LIBYUV_API
DetilePlane(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height,int tile_height)921 void DetilePlane(const uint8_t* src_y,
922 int src_stride_y,
923 uint8_t* dst_y,
924 int dst_stride_y,
925 int width,
926 int height,
927 int tile_height) {
928 const ptrdiff_t src_tile_stride = 16 * tile_height;
929 int y;
930 void (*DetileRow)(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst,
931 int width) = DetileRow_C;
932 assert(src_stride_y >= 0);
933 assert(tile_height > 0);
934 assert(src_stride_y > 0);
935
936 if (width <= 0 || height == 0) {
937 return;
938 }
939 // Negative height means invert the image.
940 if (height < 0) {
941 height = -height;
942 dst_y = dst_y + (height - 1) * dst_stride_y;
943 dst_stride_y = -dst_stride_y;
944 }
945
946 #if defined(HAS_DETILEROW_SSE2)
947 if (TestCpuFlag(kCpuHasSSE2)) {
948 DetileRow = DetileRow_Any_SSE2;
949 if (IS_ALIGNED(width, 16)) {
950 DetileRow = DetileRow_SSE2;
951 }
952 }
953 #endif
954 #if defined(HAS_DETILEROW_NEON)
955 if (TestCpuFlag(kCpuHasNEON)) {
956 DetileRow = DetileRow_Any_NEON;
957 if (IS_ALIGNED(width, 16)) {
958 DetileRow = DetileRow_NEON;
959 }
960 }
961 #endif
962
963 // Detile plane
964 for (y = 0; y < height; ++y) {
965 DetileRow(src_y, src_tile_stride, dst_y, width);
966 dst_y += dst_stride_y;
967 src_y += 16;
968 // Advance to next row of tiles.
969 if ((y & (tile_height - 1)) == (tile_height - 1)) {
970 src_y = src_y - src_tile_stride + src_stride_y * tile_height;
971 }
972 }
973 }
974
975 LIBYUV_API
DetileSplitUVPlane(const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height,int tile_height)976 void DetileSplitUVPlane(const uint8_t* src_uv,
977 int src_stride_uv,
978 uint8_t* dst_u,
979 int dst_stride_u,
980 uint8_t* dst_v,
981 int dst_stride_v,
982 int width,
983 int height,
984 int tile_height) {
985 const ptrdiff_t src_tile_stride = 16 * tile_height;
986 int y;
987 void (*DetileSplitUVRow)(const uint8_t* src, ptrdiff_t src_tile_stride,
988 uint8_t* dst_u, uint8_t* dst_v, int width) =
989 DetileSplitUVRow_C;
990 assert(src_stride_uv >= 0);
991 assert(tile_height > 0);
992 assert(src_stride_uv > 0);
993
994 if (width <= 0 || height == 0) {
995 return;
996 }
997 // Negative height means invert the image.
998 if (height < 0) {
999 height = -height;
1000 dst_u = dst_u + (height - 1) * dst_stride_u;
1001 dst_stride_u = -dst_stride_u;
1002 dst_v = dst_v + (height - 1) * dst_stride_v;
1003 dst_stride_v = -dst_stride_v;
1004 }
1005
1006 #if defined(HAS_DETILESPLITUVROW_SSSE3)
1007 if (TestCpuFlag(kCpuHasSSSE3)) {
1008 DetileSplitUVRow = DetileSplitUVRow_Any_SSSE3;
1009 if (IS_ALIGNED(width, 16)) {
1010 DetileSplitUVRow = DetileSplitUVRow_SSSE3;
1011 }
1012 }
1013 #endif
1014 #if defined(HAS_DETILESPLITUVROW_NEON)
1015 if (TestCpuFlag(kCpuHasNEON)) {
1016 DetileSplitUVRow = DetileSplitUVRow_Any_NEON;
1017 if (IS_ALIGNED(width, 16)) {
1018 DetileSplitUVRow = DetileSplitUVRow_NEON;
1019 }
1020 }
1021 #endif
1022
1023 // Detile plane
1024 for (y = 0; y < height; ++y) {
1025 DetileSplitUVRow(src_uv, src_tile_stride, dst_u, dst_v, width);
1026 dst_u += dst_stride_u;
1027 dst_v += dst_stride_v;
1028 src_uv += 16;
1029 // Advance to next row of tiles.
1030 if ((y & (tile_height - 1)) == (tile_height - 1)) {
1031 src_uv = src_uv - src_tile_stride + src_stride_uv * tile_height;
1032 }
1033 }
1034 }
1035
1036 // Support function for NV12 etc RGB channels.
1037 // Width and height are plane sizes (typically half pixel width).
1038 LIBYUV_API
SplitRGBPlane(const uint8_t * src_rgb,int src_stride_rgb,uint8_t * dst_r,int dst_stride_r,uint8_t * dst_g,int dst_stride_g,uint8_t * dst_b,int dst_stride_b,int width,int height)1039 void SplitRGBPlane(const uint8_t* src_rgb,
1040 int src_stride_rgb,
1041 uint8_t* dst_r,
1042 int dst_stride_r,
1043 uint8_t* dst_g,
1044 int dst_stride_g,
1045 uint8_t* dst_b,
1046 int dst_stride_b,
1047 int width,
1048 int height) {
1049 int y;
1050 void (*SplitRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
1051 uint8_t* dst_b, int width) = SplitRGBRow_C;
1052 if (width <= 0 || height == 0) {
1053 return;
1054 }
1055 // Negative height means invert the image.
1056 if (height < 0) {
1057 height = -height;
1058 dst_r = dst_r + (height - 1) * dst_stride_r;
1059 dst_g = dst_g + (height - 1) * dst_stride_g;
1060 dst_b = dst_b + (height - 1) * dst_stride_b;
1061 dst_stride_r = -dst_stride_r;
1062 dst_stride_g = -dst_stride_g;
1063 dst_stride_b = -dst_stride_b;
1064 }
1065 // Coalesce rows.
1066 if (src_stride_rgb == width * 3 && dst_stride_r == width &&
1067 dst_stride_g == width && dst_stride_b == width) {
1068 width *= height;
1069 height = 1;
1070 src_stride_rgb = dst_stride_r = dst_stride_g = dst_stride_b = 0;
1071 }
1072 #if defined(HAS_SPLITRGBROW_SSSE3)
1073 if (TestCpuFlag(kCpuHasSSSE3)) {
1074 SplitRGBRow = SplitRGBRow_Any_SSSE3;
1075 if (IS_ALIGNED(width, 16)) {
1076 SplitRGBRow = SplitRGBRow_SSSE3;
1077 }
1078 }
1079 #endif
1080 #if defined(HAS_SPLITRGBROW_NEON)
1081 if (TestCpuFlag(kCpuHasNEON)) {
1082 SplitRGBRow = SplitRGBRow_Any_NEON;
1083 if (IS_ALIGNED(width, 16)) {
1084 SplitRGBRow = SplitRGBRow_NEON;
1085 }
1086 }
1087 #endif
1088
1089 for (y = 0; y < height; ++y) {
1090 // Copy a row of RGB.
1091 SplitRGBRow(src_rgb, dst_r, dst_g, dst_b, width);
1092 dst_r += dst_stride_r;
1093 dst_g += dst_stride_g;
1094 dst_b += dst_stride_b;
1095 src_rgb += src_stride_rgb;
1096 }
1097 }
1098
1099 LIBYUV_API
MergeRGBPlane(const uint8_t * src_r,int src_stride_r,const uint8_t * src_g,int src_stride_g,const uint8_t * src_b,int src_stride_b,uint8_t * dst_rgb,int dst_stride_rgb,int width,int height)1100 void MergeRGBPlane(const uint8_t* src_r,
1101 int src_stride_r,
1102 const uint8_t* src_g,
1103 int src_stride_g,
1104 const uint8_t* src_b,
1105 int src_stride_b,
1106 uint8_t* dst_rgb,
1107 int dst_stride_rgb,
1108 int width,
1109 int height) {
1110 int y;
1111 void (*MergeRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
1112 const uint8_t* src_b, uint8_t* dst_rgb, int width) =
1113 MergeRGBRow_C;
1114 if (width <= 0 || height == 0) {
1115 return;
1116 }
1117 // Coalesce rows.
1118 // Negative height means invert the image.
1119 if (height < 0) {
1120 height = -height;
1121 dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb;
1122 dst_stride_rgb = -dst_stride_rgb;
1123 }
1124 // Coalesce rows.
1125 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1126 dst_stride_rgb == width * 3) {
1127 width *= height;
1128 height = 1;
1129 src_stride_r = src_stride_g = src_stride_b = dst_stride_rgb = 0;
1130 }
1131 #if defined(HAS_MERGERGBROW_SSSE3)
1132 if (TestCpuFlag(kCpuHasSSSE3)) {
1133 MergeRGBRow = MergeRGBRow_Any_SSSE3;
1134 if (IS_ALIGNED(width, 16)) {
1135 MergeRGBRow = MergeRGBRow_SSSE3;
1136 }
1137 }
1138 #endif
1139 #if defined(HAS_MERGERGBROW_NEON)
1140 if (TestCpuFlag(kCpuHasNEON)) {
1141 MergeRGBRow = MergeRGBRow_Any_NEON;
1142 if (IS_ALIGNED(width, 16)) {
1143 MergeRGBRow = MergeRGBRow_NEON;
1144 }
1145 }
1146 #endif
1147
1148 for (y = 0; y < height; ++y) {
1149 // Merge a row of U and V into a row of RGB.
1150 MergeRGBRow(src_r, src_g, src_b, dst_rgb, width);
1151 src_r += src_stride_r;
1152 src_g += src_stride_g;
1153 src_b += src_stride_b;
1154 dst_rgb += dst_stride_rgb;
1155 }
1156 }
1157
1158 LIBYUV_NOINLINE
SplitARGBPlaneAlpha(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_r,int dst_stride_r,uint8_t * dst_g,int dst_stride_g,uint8_t * dst_b,int dst_stride_b,uint8_t * dst_a,int dst_stride_a,int width,int height)1159 void SplitARGBPlaneAlpha(const uint8_t* src_argb,
1160 int src_stride_argb,
1161 uint8_t* dst_r,
1162 int dst_stride_r,
1163 uint8_t* dst_g,
1164 int dst_stride_g,
1165 uint8_t* dst_b,
1166 int dst_stride_b,
1167 uint8_t* dst_a,
1168 int dst_stride_a,
1169 int width,
1170 int height) {
1171 int y;
1172 void (*SplitARGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
1173 uint8_t* dst_b, uint8_t* dst_a, int width) =
1174 SplitARGBRow_C;
1175
1176 assert(height > 0);
1177
1178 if (src_stride_argb == width * 4 && dst_stride_r == width &&
1179 dst_stride_g == width && dst_stride_b == width && dst_stride_a == width) {
1180 width *= height;
1181 height = 1;
1182 src_stride_argb = dst_stride_r = dst_stride_g = dst_stride_b =
1183 dst_stride_a = 0;
1184 }
1185
1186 #if defined(HAS_SPLITARGBROW_SSE2)
1187 if (TestCpuFlag(kCpuHasSSE2)) {
1188 SplitARGBRow = SplitARGBRow_Any_SSE2;
1189 if (IS_ALIGNED(width, 8)) {
1190 SplitARGBRow = SplitARGBRow_SSE2;
1191 }
1192 }
1193 #endif
1194 #if defined(HAS_SPLITARGBROW_SSSE3)
1195 if (TestCpuFlag(kCpuHasSSSE3)) {
1196 SplitARGBRow = SplitARGBRow_Any_SSSE3;
1197 if (IS_ALIGNED(width, 8)) {
1198 SplitARGBRow = SplitARGBRow_SSSE3;
1199 }
1200 }
1201 #endif
1202 #if defined(HAS_SPLITARGBROW_AVX2)
1203 if (TestCpuFlag(kCpuHasAVX2)) {
1204 SplitARGBRow = SplitARGBRow_Any_AVX2;
1205 if (IS_ALIGNED(width, 16)) {
1206 SplitARGBRow = SplitARGBRow_AVX2;
1207 }
1208 }
1209 #endif
1210 #if defined(HAS_SPLITARGBROW_NEON)
1211 if (TestCpuFlag(kCpuHasNEON)) {
1212 SplitARGBRow = SplitARGBRow_Any_NEON;
1213 if (IS_ALIGNED(width, 16)) {
1214 SplitARGBRow = SplitARGBRow_NEON;
1215 }
1216 }
1217 #endif
1218
1219 for (y = 0; y < height; ++y) {
1220 SplitARGBRow(src_argb, dst_r, dst_g, dst_b, dst_a, width);
1221 dst_r += dst_stride_r;
1222 dst_g += dst_stride_g;
1223 dst_b += dst_stride_b;
1224 dst_a += dst_stride_a;
1225 src_argb += src_stride_argb;
1226 }
1227 }
1228
1229 LIBYUV_NOINLINE
SplitARGBPlaneOpaque(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_r,int dst_stride_r,uint8_t * dst_g,int dst_stride_g,uint8_t * dst_b,int dst_stride_b,int width,int height)1230 void SplitARGBPlaneOpaque(const uint8_t* src_argb,
1231 int src_stride_argb,
1232 uint8_t* dst_r,
1233 int dst_stride_r,
1234 uint8_t* dst_g,
1235 int dst_stride_g,
1236 uint8_t* dst_b,
1237 int dst_stride_b,
1238 int width,
1239 int height) {
1240 int y;
1241 void (*SplitXRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
1242 uint8_t* dst_b, int width) = SplitXRGBRow_C;
1243 assert(height > 0);
1244
1245 if (src_stride_argb == width * 4 && dst_stride_r == width &&
1246 dst_stride_g == width && dst_stride_b == width) {
1247 width *= height;
1248 height = 1;
1249 src_stride_argb = dst_stride_r = dst_stride_g = dst_stride_b = 0;
1250 }
1251
1252 #if defined(HAS_SPLITXRGBROW_SSE2)
1253 if (TestCpuFlag(kCpuHasSSE2)) {
1254 SplitXRGBRow = SplitXRGBRow_Any_SSE2;
1255 if (IS_ALIGNED(width, 8)) {
1256 SplitXRGBRow = SplitXRGBRow_SSE2;
1257 }
1258 }
1259 #endif
1260 #if defined(HAS_SPLITXRGBROW_SSSE3)
1261 if (TestCpuFlag(kCpuHasSSSE3)) {
1262 SplitXRGBRow = SplitXRGBRow_Any_SSSE3;
1263 if (IS_ALIGNED(width, 8)) {
1264 SplitXRGBRow = SplitXRGBRow_SSSE3;
1265 }
1266 }
1267 #endif
1268 #if defined(HAS_SPLITXRGBROW_AVX2)
1269 if (TestCpuFlag(kCpuHasAVX2)) {
1270 SplitXRGBRow = SplitXRGBRow_Any_AVX2;
1271 if (IS_ALIGNED(width, 16)) {
1272 SplitXRGBRow = SplitXRGBRow_AVX2;
1273 }
1274 }
1275 #endif
1276 #if defined(HAS_SPLITXRGBROW_NEON)
1277 if (TestCpuFlag(kCpuHasNEON)) {
1278 SplitXRGBRow = SplitXRGBRow_Any_NEON;
1279 if (IS_ALIGNED(width, 16)) {
1280 SplitXRGBRow = SplitXRGBRow_NEON;
1281 }
1282 }
1283 #endif
1284
1285 for (y = 0; y < height; ++y) {
1286 SplitXRGBRow(src_argb, dst_r, dst_g, dst_b, width);
1287 dst_r += dst_stride_r;
1288 dst_g += dst_stride_g;
1289 dst_b += dst_stride_b;
1290 src_argb += src_stride_argb;
1291 }
1292 }
1293
1294 LIBYUV_API
SplitARGBPlane(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_r,int dst_stride_r,uint8_t * dst_g,int dst_stride_g,uint8_t * dst_b,int dst_stride_b,uint8_t * dst_a,int dst_stride_a,int width,int height)1295 void SplitARGBPlane(const uint8_t* src_argb,
1296 int src_stride_argb,
1297 uint8_t* dst_r,
1298 int dst_stride_r,
1299 uint8_t* dst_g,
1300 int dst_stride_g,
1301 uint8_t* dst_b,
1302 int dst_stride_b,
1303 uint8_t* dst_a,
1304 int dst_stride_a,
1305 int width,
1306 int height) {
1307 // Negative height means invert the image.
1308 if (height < 0) {
1309 height = -height;
1310 dst_r = dst_r + (height - 1) * dst_stride_r;
1311 dst_g = dst_g + (height - 1) * dst_stride_g;
1312 dst_b = dst_b + (height - 1) * dst_stride_b;
1313 dst_a = dst_a + (height - 1) * dst_stride_a;
1314 dst_stride_r = -dst_stride_r;
1315 dst_stride_g = -dst_stride_g;
1316 dst_stride_b = -dst_stride_b;
1317 dst_stride_a = -dst_stride_a;
1318 }
1319
1320 if (dst_a == NULL) {
1321 SplitARGBPlaneOpaque(src_argb, src_stride_argb, dst_r, dst_stride_r, dst_g,
1322 dst_stride_g, dst_b, dst_stride_b, width, height);
1323 } else {
1324 SplitARGBPlaneAlpha(src_argb, src_stride_argb, dst_r, dst_stride_r, dst_g,
1325 dst_stride_g, dst_b, dst_stride_b, dst_a, dst_stride_a,
1326 width, height);
1327 }
1328 }
1329
1330 LIBYUV_NOINLINE
MergeARGBPlaneAlpha(const uint8_t * src_r,int src_stride_r,const uint8_t * src_g,int src_stride_g,const uint8_t * src_b,int src_stride_b,const uint8_t * src_a,int src_stride_a,uint8_t * dst_argb,int dst_stride_argb,int width,int height)1331 void MergeARGBPlaneAlpha(const uint8_t* src_r,
1332 int src_stride_r,
1333 const uint8_t* src_g,
1334 int src_stride_g,
1335 const uint8_t* src_b,
1336 int src_stride_b,
1337 const uint8_t* src_a,
1338 int src_stride_a,
1339 uint8_t* dst_argb,
1340 int dst_stride_argb,
1341 int width,
1342 int height) {
1343 int y;
1344 void (*MergeARGBRow)(const uint8_t* src_r, const uint8_t* src_g,
1345 const uint8_t* src_b, const uint8_t* src_a,
1346 uint8_t* dst_argb, int width) = MergeARGBRow_C;
1347
1348 assert(height > 0);
1349
1350 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1351 src_stride_a == width && dst_stride_argb == width * 4) {
1352 width *= height;
1353 height = 1;
1354 src_stride_r = src_stride_g = src_stride_b = src_stride_a =
1355 dst_stride_argb = 0;
1356 }
1357 #if defined(HAS_MERGEARGBROW_SSE2)
1358 if (TestCpuFlag(kCpuHasSSE2)) {
1359 MergeARGBRow = MergeARGBRow_Any_SSE2;
1360 if (IS_ALIGNED(width, 8)) {
1361 MergeARGBRow = MergeARGBRow_SSE2;
1362 }
1363 }
1364 #endif
1365 #if defined(HAS_MERGEARGBROW_AVX2)
1366 if (TestCpuFlag(kCpuHasAVX2)) {
1367 MergeARGBRow = MergeARGBRow_Any_AVX2;
1368 if (IS_ALIGNED(width, 16)) {
1369 MergeARGBRow = MergeARGBRow_AVX2;
1370 }
1371 }
1372 #endif
1373 #if defined(HAS_MERGEARGBROW_NEON)
1374 if (TestCpuFlag(kCpuHasNEON)) {
1375 MergeARGBRow = MergeARGBRow_Any_NEON;
1376 if (IS_ALIGNED(width, 16)) {
1377 MergeARGBRow = MergeARGBRow_NEON;
1378 }
1379 }
1380 #endif
1381
1382 for (y = 0; y < height; ++y) {
1383 MergeARGBRow(src_r, src_g, src_b, src_a, dst_argb, width);
1384 src_r += src_stride_r;
1385 src_g += src_stride_g;
1386 src_b += src_stride_b;
1387 src_a += src_stride_a;
1388 dst_argb += dst_stride_argb;
1389 }
1390 }
1391
1392 LIBYUV_NOINLINE
MergeARGBPlaneOpaque(const uint8_t * src_r,int src_stride_r,const uint8_t * src_g,int src_stride_g,const uint8_t * src_b,int src_stride_b,uint8_t * dst_argb,int dst_stride_argb,int width,int height)1393 void MergeARGBPlaneOpaque(const uint8_t* src_r,
1394 int src_stride_r,
1395 const uint8_t* src_g,
1396 int src_stride_g,
1397 const uint8_t* src_b,
1398 int src_stride_b,
1399 uint8_t* dst_argb,
1400 int dst_stride_argb,
1401 int width,
1402 int height) {
1403 int y;
1404 void (*MergeXRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
1405 const uint8_t* src_b, uint8_t* dst_argb, int width) =
1406 MergeXRGBRow_C;
1407
1408 assert(height > 0);
1409
1410 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1411 dst_stride_argb == width * 4) {
1412 width *= height;
1413 height = 1;
1414 src_stride_r = src_stride_g = src_stride_b = dst_stride_argb = 0;
1415 }
1416 #if defined(HAS_MERGEXRGBROW_SSE2)
1417 if (TestCpuFlag(kCpuHasSSE2)) {
1418 MergeXRGBRow = MergeXRGBRow_Any_SSE2;
1419 if (IS_ALIGNED(width, 8)) {
1420 MergeXRGBRow = MergeXRGBRow_SSE2;
1421 }
1422 }
1423 #endif
1424 #if defined(HAS_MERGEXRGBROW_AVX2)
1425 if (TestCpuFlag(kCpuHasAVX2)) {
1426 MergeXRGBRow = MergeXRGBRow_Any_AVX2;
1427 if (IS_ALIGNED(width, 16)) {
1428 MergeXRGBRow = MergeXRGBRow_AVX2;
1429 }
1430 }
1431 #endif
1432 #if defined(HAS_MERGEXRGBROW_NEON)
1433 if (TestCpuFlag(kCpuHasNEON)) {
1434 MergeXRGBRow = MergeXRGBRow_Any_NEON;
1435 if (IS_ALIGNED(width, 16)) {
1436 MergeXRGBRow = MergeXRGBRow_NEON;
1437 }
1438 }
1439 #endif
1440
1441 for (y = 0; y < height; ++y) {
1442 MergeXRGBRow(src_r, src_g, src_b, dst_argb, width);
1443 src_r += src_stride_r;
1444 src_g += src_stride_g;
1445 src_b += src_stride_b;
1446 dst_argb += dst_stride_argb;
1447 }
1448 }
1449
1450 LIBYUV_API
MergeARGBPlane(const uint8_t * src_r,int src_stride_r,const uint8_t * src_g,int src_stride_g,const uint8_t * src_b,int src_stride_b,const uint8_t * src_a,int src_stride_a,uint8_t * dst_argb,int dst_stride_argb,int width,int height)1451 void MergeARGBPlane(const uint8_t* src_r,
1452 int src_stride_r,
1453 const uint8_t* src_g,
1454 int src_stride_g,
1455 const uint8_t* src_b,
1456 int src_stride_b,
1457 const uint8_t* src_a,
1458 int src_stride_a,
1459 uint8_t* dst_argb,
1460 int dst_stride_argb,
1461 int width,
1462 int height) {
1463 // Negative height means invert the image.
1464 if (height < 0) {
1465 height = -height;
1466 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1467 dst_stride_argb = -dst_stride_argb;
1468 }
1469
1470 if (src_a == NULL) {
1471 MergeARGBPlaneOpaque(src_r, src_stride_r, src_g, src_stride_g, src_b,
1472 src_stride_b, dst_argb, dst_stride_argb, width,
1473 height);
1474 } else {
1475 MergeARGBPlaneAlpha(src_r, src_stride_r, src_g, src_stride_g, src_b,
1476 src_stride_b, src_a, src_stride_a, dst_argb,
1477 dst_stride_argb, width, height);
1478 }
1479 }
1480
1481 // TODO(yuan): Support 2 bit alpha channel.
1482 LIBYUV_API
MergeXR30Plane(const uint16_t * src_r,int src_stride_r,const uint16_t * src_g,int src_stride_g,const uint16_t * src_b,int src_stride_b,uint8_t * dst_ar30,int dst_stride_ar30,int width,int height,int depth)1483 void MergeXR30Plane(const uint16_t* src_r,
1484 int src_stride_r,
1485 const uint16_t* src_g,
1486 int src_stride_g,
1487 const uint16_t* src_b,
1488 int src_stride_b,
1489 uint8_t* dst_ar30,
1490 int dst_stride_ar30,
1491 int width,
1492 int height,
1493 int depth) {
1494 int y;
1495 void (*MergeXR30Row)(const uint16_t* src_r, const uint16_t* src_g,
1496 const uint16_t* src_b, uint8_t* dst_ar30, int depth,
1497 int width) = MergeXR30Row_C;
1498
1499 // Negative height means invert the image.
1500 if (height < 0) {
1501 height = -height;
1502 dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30;
1503 dst_stride_ar30 = -dst_stride_ar30;
1504 }
1505 // Coalesce rows.
1506 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1507 dst_stride_ar30 == width * 4) {
1508 width *= height;
1509 height = 1;
1510 src_stride_r = src_stride_g = src_stride_b = dst_stride_ar30 = 0;
1511 }
1512 #if defined(HAS_MERGEXR30ROW_AVX2)
1513 if (TestCpuFlag(kCpuHasAVX2)) {
1514 MergeXR30Row = MergeXR30Row_Any_AVX2;
1515 if (IS_ALIGNED(width, 16)) {
1516 MergeXR30Row = MergeXR30Row_AVX2;
1517 }
1518 }
1519 #endif
1520 #if defined(HAS_MERGEXR30ROW_NEON)
1521 if (TestCpuFlag(kCpuHasNEON)) {
1522 if (depth == 10) {
1523 MergeXR30Row = MergeXR30Row_10_Any_NEON;
1524 if (IS_ALIGNED(width, 8)) {
1525 MergeXR30Row = MergeXR30Row_10_NEON;
1526 }
1527 } else {
1528 MergeXR30Row = MergeXR30Row_Any_NEON;
1529 if (IS_ALIGNED(width, 8)) {
1530 MergeXR30Row = MergeXR30Row_NEON;
1531 }
1532 }
1533 }
1534 #endif
1535
1536 for (y = 0; y < height; ++y) {
1537 MergeXR30Row(src_r, src_g, src_b, dst_ar30, depth, width);
1538 src_r += src_stride_r;
1539 src_g += src_stride_g;
1540 src_b += src_stride_b;
1541 dst_ar30 += dst_stride_ar30;
1542 }
1543 }
1544
1545 LIBYUV_NOINLINE
MergeAR64PlaneAlpha(const uint16_t * src_r,int src_stride_r,const uint16_t * src_g,int src_stride_g,const uint16_t * src_b,int src_stride_b,const uint16_t * src_a,int src_stride_a,uint16_t * dst_ar64,int dst_stride_ar64,int width,int height,int depth)1546 static void MergeAR64PlaneAlpha(const uint16_t* src_r,
1547 int src_stride_r,
1548 const uint16_t* src_g,
1549 int src_stride_g,
1550 const uint16_t* src_b,
1551 int src_stride_b,
1552 const uint16_t* src_a,
1553 int src_stride_a,
1554 uint16_t* dst_ar64,
1555 int dst_stride_ar64,
1556 int width,
1557 int height,
1558 int depth) {
1559 int y;
1560 void (*MergeAR64Row)(const uint16_t* src_r, const uint16_t* src_g,
1561 const uint16_t* src_b, const uint16_t* src_a,
1562 uint16_t* dst_argb, int depth, int width) =
1563 MergeAR64Row_C;
1564
1565 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1566 src_stride_a == width && dst_stride_ar64 == width * 4) {
1567 width *= height;
1568 height = 1;
1569 src_stride_r = src_stride_g = src_stride_b = src_stride_a =
1570 dst_stride_ar64 = 0;
1571 }
1572 #if defined(HAS_MERGEAR64ROW_AVX2)
1573 if (TestCpuFlag(kCpuHasAVX2)) {
1574 MergeAR64Row = MergeAR64Row_Any_AVX2;
1575 if (IS_ALIGNED(width, 16)) {
1576 MergeAR64Row = MergeAR64Row_AVX2;
1577 }
1578 }
1579 #endif
1580 #if defined(HAS_MERGEAR64ROW_NEON)
1581 if (TestCpuFlag(kCpuHasNEON)) {
1582 MergeAR64Row = MergeAR64Row_Any_NEON;
1583 if (IS_ALIGNED(width, 8)) {
1584 MergeAR64Row = MergeAR64Row_NEON;
1585 }
1586 }
1587 #endif
1588
1589 for (y = 0; y < height; ++y) {
1590 MergeAR64Row(src_r, src_g, src_b, src_a, dst_ar64, depth, width);
1591 src_r += src_stride_r;
1592 src_g += src_stride_g;
1593 src_b += src_stride_b;
1594 src_a += src_stride_a;
1595 dst_ar64 += dst_stride_ar64;
1596 }
1597 }
1598
1599 LIBYUV_NOINLINE
MergeAR64PlaneOpaque(const uint16_t * src_r,int src_stride_r,const uint16_t * src_g,int src_stride_g,const uint16_t * src_b,int src_stride_b,uint16_t * dst_ar64,int dst_stride_ar64,int width,int height,int depth)1600 static void MergeAR64PlaneOpaque(const uint16_t* src_r,
1601 int src_stride_r,
1602 const uint16_t* src_g,
1603 int src_stride_g,
1604 const uint16_t* src_b,
1605 int src_stride_b,
1606 uint16_t* dst_ar64,
1607 int dst_stride_ar64,
1608 int width,
1609 int height,
1610 int depth) {
1611 int y;
1612 void (*MergeXR64Row)(const uint16_t* src_r, const uint16_t* src_g,
1613 const uint16_t* src_b, uint16_t* dst_argb, int depth,
1614 int width) = MergeXR64Row_C;
1615
1616 // Coalesce rows.
1617 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1618 dst_stride_ar64 == width * 4) {
1619 width *= height;
1620 height = 1;
1621 src_stride_r = src_stride_g = src_stride_b = dst_stride_ar64 = 0;
1622 }
1623 #if defined(HAS_MERGEXR64ROW_AVX2)
1624 if (TestCpuFlag(kCpuHasAVX2)) {
1625 MergeXR64Row = MergeXR64Row_Any_AVX2;
1626 if (IS_ALIGNED(width, 16)) {
1627 MergeXR64Row = MergeXR64Row_AVX2;
1628 }
1629 }
1630 #endif
1631 #if defined(HAS_MERGEXR64ROW_NEON)
1632 if (TestCpuFlag(kCpuHasNEON)) {
1633 MergeXR64Row = MergeXR64Row_Any_NEON;
1634 if (IS_ALIGNED(width, 8)) {
1635 MergeXR64Row = MergeXR64Row_NEON;
1636 }
1637 }
1638 #endif
1639
1640 for (y = 0; y < height; ++y) {
1641 MergeXR64Row(src_r, src_g, src_b, dst_ar64, depth, width);
1642 src_r += src_stride_r;
1643 src_g += src_stride_g;
1644 src_b += src_stride_b;
1645 dst_ar64 += dst_stride_ar64;
1646 }
1647 }
1648
1649 LIBYUV_API
MergeAR64Plane(const uint16_t * src_r,int src_stride_r,const uint16_t * src_g,int src_stride_g,const uint16_t * src_b,int src_stride_b,const uint16_t * src_a,int src_stride_a,uint16_t * dst_ar64,int dst_stride_ar64,int width,int height,int depth)1650 void MergeAR64Plane(const uint16_t* src_r,
1651 int src_stride_r,
1652 const uint16_t* src_g,
1653 int src_stride_g,
1654 const uint16_t* src_b,
1655 int src_stride_b,
1656 const uint16_t* src_a,
1657 int src_stride_a,
1658 uint16_t* dst_ar64,
1659 int dst_stride_ar64,
1660 int width,
1661 int height,
1662 int depth) {
1663 // Negative height means invert the image.
1664 if (height < 0) {
1665 height = -height;
1666 dst_ar64 = dst_ar64 + (height - 1) * dst_stride_ar64;
1667 dst_stride_ar64 = -dst_stride_ar64;
1668 }
1669
1670 if (src_a == NULL) {
1671 MergeAR64PlaneOpaque(src_r, src_stride_r, src_g, src_stride_g, src_b,
1672 src_stride_b, dst_ar64, dst_stride_ar64, width, height,
1673 depth);
1674 } else {
1675 MergeAR64PlaneAlpha(src_r, src_stride_r, src_g, src_stride_g, src_b,
1676 src_stride_b, src_a, src_stride_a, dst_ar64,
1677 dst_stride_ar64, width, height, depth);
1678 }
1679 }
1680
1681 LIBYUV_NOINLINE
MergeARGB16To8PlaneAlpha(const uint16_t * src_r,int src_stride_r,const uint16_t * src_g,int src_stride_g,const uint16_t * src_b,int src_stride_b,const uint16_t * src_a,int src_stride_a,uint8_t * dst_argb,int dst_stride_argb,int width,int height,int depth)1682 static void MergeARGB16To8PlaneAlpha(const uint16_t* src_r,
1683 int src_stride_r,
1684 const uint16_t* src_g,
1685 int src_stride_g,
1686 const uint16_t* src_b,
1687 int src_stride_b,
1688 const uint16_t* src_a,
1689 int src_stride_a,
1690 uint8_t* dst_argb,
1691 int dst_stride_argb,
1692 int width,
1693 int height,
1694 int depth) {
1695 int y;
1696 void (*MergeARGB16To8Row)(const uint16_t* src_r, const uint16_t* src_g,
1697 const uint16_t* src_b, const uint16_t* src_a,
1698 uint8_t* dst_argb, int depth, int width) =
1699 MergeARGB16To8Row_C;
1700
1701 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1702 src_stride_a == width && dst_stride_argb == width * 4) {
1703 width *= height;
1704 height = 1;
1705 src_stride_r = src_stride_g = src_stride_b = src_stride_a =
1706 dst_stride_argb = 0;
1707 }
1708 #if defined(HAS_MERGEARGB16TO8ROW_AVX2)
1709 if (TestCpuFlag(kCpuHasAVX2)) {
1710 MergeARGB16To8Row = MergeARGB16To8Row_Any_AVX2;
1711 if (IS_ALIGNED(width, 16)) {
1712 MergeARGB16To8Row = MergeARGB16To8Row_AVX2;
1713 }
1714 }
1715 #endif
1716 #if defined(HAS_MERGEARGB16TO8ROW_NEON)
1717 if (TestCpuFlag(kCpuHasNEON)) {
1718 MergeARGB16To8Row = MergeARGB16To8Row_Any_NEON;
1719 if (IS_ALIGNED(width, 8)) {
1720 MergeARGB16To8Row = MergeARGB16To8Row_NEON;
1721 }
1722 }
1723 #endif
1724
1725 for (y = 0; y < height; ++y) {
1726 MergeARGB16To8Row(src_r, src_g, src_b, src_a, dst_argb, depth, width);
1727 src_r += src_stride_r;
1728 src_g += src_stride_g;
1729 src_b += src_stride_b;
1730 src_a += src_stride_a;
1731 dst_argb += dst_stride_argb;
1732 }
1733 }
1734
1735 LIBYUV_NOINLINE
MergeARGB16To8PlaneOpaque(const uint16_t * src_r,int src_stride_r,const uint16_t * src_g,int src_stride_g,const uint16_t * src_b,int src_stride_b,uint8_t * dst_argb,int dst_stride_argb,int width,int height,int depth)1736 static void MergeARGB16To8PlaneOpaque(const uint16_t* src_r,
1737 int src_stride_r,
1738 const uint16_t* src_g,
1739 int src_stride_g,
1740 const uint16_t* src_b,
1741 int src_stride_b,
1742 uint8_t* dst_argb,
1743 int dst_stride_argb,
1744 int width,
1745 int height,
1746 int depth) {
1747 int y;
1748 void (*MergeXRGB16To8Row)(const uint16_t* src_r, const uint16_t* src_g,
1749 const uint16_t* src_b, uint8_t* dst_argb, int depth,
1750 int width) = MergeXRGB16To8Row_C;
1751
1752 // Coalesce rows.
1753 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1754 dst_stride_argb == width * 4) {
1755 width *= height;
1756 height = 1;
1757 src_stride_r = src_stride_g = src_stride_b = dst_stride_argb = 0;
1758 }
1759 #if defined(HAS_MERGEXRGB16TO8ROW_AVX2)
1760 if (TestCpuFlag(kCpuHasAVX2)) {
1761 MergeXRGB16To8Row = MergeXRGB16To8Row_Any_AVX2;
1762 if (IS_ALIGNED(width, 16)) {
1763 MergeXRGB16To8Row = MergeXRGB16To8Row_AVX2;
1764 }
1765 }
1766 #endif
1767 #if defined(HAS_MERGEXRGB16TO8ROW_NEON)
1768 if (TestCpuFlag(kCpuHasNEON)) {
1769 MergeXRGB16To8Row = MergeXRGB16To8Row_Any_NEON;
1770 if (IS_ALIGNED(width, 8)) {
1771 MergeXRGB16To8Row = MergeXRGB16To8Row_NEON;
1772 }
1773 }
1774 #endif
1775
1776 for (y = 0; y < height; ++y) {
1777 MergeXRGB16To8Row(src_r, src_g, src_b, dst_argb, depth, width);
1778 src_r += src_stride_r;
1779 src_g += src_stride_g;
1780 src_b += src_stride_b;
1781 dst_argb += dst_stride_argb;
1782 }
1783 }
1784
1785 LIBYUV_API
MergeARGB16To8Plane(const uint16_t * src_r,int src_stride_r,const uint16_t * src_g,int src_stride_g,const uint16_t * src_b,int src_stride_b,const uint16_t * src_a,int src_stride_a,uint8_t * dst_argb,int dst_stride_argb,int width,int height,int depth)1786 void MergeARGB16To8Plane(const uint16_t* src_r,
1787 int src_stride_r,
1788 const uint16_t* src_g,
1789 int src_stride_g,
1790 const uint16_t* src_b,
1791 int src_stride_b,
1792 const uint16_t* src_a,
1793 int src_stride_a,
1794 uint8_t* dst_argb,
1795 int dst_stride_argb,
1796 int width,
1797 int height,
1798 int depth) {
1799 // Negative height means invert the image.
1800 if (height < 0) {
1801 height = -height;
1802 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1803 dst_stride_argb = -dst_stride_argb;
1804 }
1805
1806 if (src_a == NULL) {
1807 MergeARGB16To8PlaneOpaque(src_r, src_stride_r, src_g, src_stride_g, src_b,
1808 src_stride_b, dst_argb, dst_stride_argb, width,
1809 height, depth);
1810 } else {
1811 MergeARGB16To8PlaneAlpha(src_r, src_stride_r, src_g, src_stride_g, src_b,
1812 src_stride_b, src_a, src_stride_a, dst_argb,
1813 dst_stride_argb, width, height, depth);
1814 }
1815 }
1816
1817 // Convert YUY2 to I422.
1818 LIBYUV_API
YUY2ToI422(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)1819 int YUY2ToI422(const uint8_t* src_yuy2,
1820 int src_stride_yuy2,
1821 uint8_t* dst_y,
1822 int dst_stride_y,
1823 uint8_t* dst_u,
1824 int dst_stride_u,
1825 uint8_t* dst_v,
1826 int dst_stride_v,
1827 int width,
1828 int height) {
1829 int y;
1830 void (*YUY2ToUV422Row)(const uint8_t* src_yuy2, uint8_t* dst_u,
1831 uint8_t* dst_v, int width) = YUY2ToUV422Row_C;
1832 void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
1833 YUY2ToYRow_C;
1834 if (!src_yuy2 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
1835 return -1;
1836 }
1837 // Negative height means invert the image.
1838 if (height < 0) {
1839 height = -height;
1840 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
1841 src_stride_yuy2 = -src_stride_yuy2;
1842 }
1843 // Coalesce rows.
1844 if (src_stride_yuy2 == width * 2 && dst_stride_y == width &&
1845 dst_stride_u * 2 == width && dst_stride_v * 2 == width &&
1846 width * height <= 32768) {
1847 width *= height;
1848 height = 1;
1849 src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
1850 }
1851 #if defined(HAS_YUY2TOYROW_SSE2)
1852 if (TestCpuFlag(kCpuHasSSE2)) {
1853 YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
1854 YUY2ToYRow = YUY2ToYRow_Any_SSE2;
1855 if (IS_ALIGNED(width, 16)) {
1856 YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
1857 YUY2ToYRow = YUY2ToYRow_SSE2;
1858 }
1859 }
1860 #endif
1861 #if defined(HAS_YUY2TOYROW_AVX2)
1862 if (TestCpuFlag(kCpuHasAVX2)) {
1863 YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
1864 YUY2ToYRow = YUY2ToYRow_Any_AVX2;
1865 if (IS_ALIGNED(width, 32)) {
1866 YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
1867 YUY2ToYRow = YUY2ToYRow_AVX2;
1868 }
1869 }
1870 #endif
1871 #if defined(HAS_YUY2TOYROW_NEON)
1872 if (TestCpuFlag(kCpuHasNEON)) {
1873 YUY2ToYRow = YUY2ToYRow_Any_NEON;
1874 YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
1875 if (IS_ALIGNED(width, 16)) {
1876 YUY2ToYRow = YUY2ToYRow_NEON;
1877 YUY2ToUV422Row = YUY2ToUV422Row_NEON;
1878 }
1879 }
1880 #endif
1881 #if defined(HAS_YUY2TOYROW_MSA) && defined(HAS_YUY2TOUV422ROW_MSA)
1882 if (TestCpuFlag(kCpuHasMSA)) {
1883 YUY2ToYRow = YUY2ToYRow_Any_MSA;
1884 YUY2ToUV422Row = YUY2ToUV422Row_Any_MSA;
1885 if (IS_ALIGNED(width, 32)) {
1886 YUY2ToYRow = YUY2ToYRow_MSA;
1887 YUY2ToUV422Row = YUY2ToUV422Row_MSA;
1888 }
1889 }
1890 #endif
1891 #if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUV422ROW_LASX)
1892 if (TestCpuFlag(kCpuHasLASX)) {
1893 YUY2ToYRow = YUY2ToYRow_Any_LASX;
1894 YUY2ToUV422Row = YUY2ToUV422Row_Any_LASX;
1895 if (IS_ALIGNED(width, 32)) {
1896 YUY2ToYRow = YUY2ToYRow_LASX;
1897 YUY2ToUV422Row = YUY2ToUV422Row_LASX;
1898 }
1899 }
1900 #endif
1901
1902 for (y = 0; y < height; ++y) {
1903 YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
1904 YUY2ToYRow(src_yuy2, dst_y, width);
1905 src_yuy2 += src_stride_yuy2;
1906 dst_y += dst_stride_y;
1907 dst_u += dst_stride_u;
1908 dst_v += dst_stride_v;
1909 }
1910 return 0;
1911 }
1912
1913 // Convert UYVY to I422.
1914 LIBYUV_API
UYVYToI422(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)1915 int UYVYToI422(const uint8_t* src_uyvy,
1916 int src_stride_uyvy,
1917 uint8_t* dst_y,
1918 int dst_stride_y,
1919 uint8_t* dst_u,
1920 int dst_stride_u,
1921 uint8_t* dst_v,
1922 int dst_stride_v,
1923 int width,
1924 int height) {
1925 int y;
1926 void (*UYVYToUV422Row)(const uint8_t* src_uyvy, uint8_t* dst_u,
1927 uint8_t* dst_v, int width) = UYVYToUV422Row_C;
1928 void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) =
1929 UYVYToYRow_C;
1930 if (!src_uyvy || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
1931 return -1;
1932 }
1933 // Negative height means invert the image.
1934 if (height < 0) {
1935 height = -height;
1936 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
1937 src_stride_uyvy = -src_stride_uyvy;
1938 }
1939 // Coalesce rows.
1940 if (src_stride_uyvy == width * 2 && dst_stride_y == width &&
1941 dst_stride_u * 2 == width && dst_stride_v * 2 == width &&
1942 width * height <= 32768) {
1943 width *= height;
1944 height = 1;
1945 src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
1946 }
1947 #if defined(HAS_UYVYTOYROW_SSE2)
1948 if (TestCpuFlag(kCpuHasSSE2)) {
1949 UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
1950 UYVYToYRow = UYVYToYRow_Any_SSE2;
1951 if (IS_ALIGNED(width, 16)) {
1952 UYVYToUV422Row = UYVYToUV422Row_SSE2;
1953 UYVYToYRow = UYVYToYRow_SSE2;
1954 }
1955 }
1956 #endif
1957 #if defined(HAS_UYVYTOYROW_AVX2)
1958 if (TestCpuFlag(kCpuHasAVX2)) {
1959 UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
1960 UYVYToYRow = UYVYToYRow_Any_AVX2;
1961 if (IS_ALIGNED(width, 32)) {
1962 UYVYToUV422Row = UYVYToUV422Row_AVX2;
1963 UYVYToYRow = UYVYToYRow_AVX2;
1964 }
1965 }
1966 #endif
1967 #if defined(HAS_UYVYTOYROW_NEON)
1968 if (TestCpuFlag(kCpuHasNEON)) {
1969 UYVYToYRow = UYVYToYRow_Any_NEON;
1970 UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
1971 if (IS_ALIGNED(width, 16)) {
1972 UYVYToYRow = UYVYToYRow_NEON;
1973 UYVYToUV422Row = UYVYToUV422Row_NEON;
1974 }
1975 }
1976 #endif
1977 #if defined(HAS_UYVYTOYROW_MSA) && defined(HAS_UYVYTOUV422ROW_MSA)
1978 if (TestCpuFlag(kCpuHasMSA)) {
1979 UYVYToYRow = UYVYToYRow_Any_MSA;
1980 UYVYToUV422Row = UYVYToUV422Row_Any_MSA;
1981 if (IS_ALIGNED(width, 32)) {
1982 UYVYToYRow = UYVYToYRow_MSA;
1983 UYVYToUV422Row = UYVYToUV422Row_MSA;
1984 }
1985 }
1986 #endif
1987 #if defined(HAS_UYVYTOYROW_LASX) && defined(HAS_UYVYTOUV422ROW_LASX)
1988 if (TestCpuFlag(kCpuHasLASX)) {
1989 UYVYToYRow = UYVYToYRow_Any_LASX;
1990 UYVYToUV422Row = UYVYToUV422Row_Any_LASX;
1991 if (IS_ALIGNED(width, 32)) {
1992 UYVYToYRow = UYVYToYRow_LASX;
1993 UYVYToUV422Row = UYVYToUV422Row_LASX;
1994 }
1995 }
1996 #endif
1997
1998 for (y = 0; y < height; ++y) {
1999 UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
2000 UYVYToYRow(src_uyvy, dst_y, width);
2001 src_uyvy += src_stride_uyvy;
2002 dst_y += dst_stride_y;
2003 dst_u += dst_stride_u;
2004 dst_v += dst_stride_v;
2005 }
2006 return 0;
2007 }
2008
2009 // Convert YUY2 to Y.
2010 LIBYUV_API
YUY2ToY(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_y,int dst_stride_y,int width,int height)2011 int YUY2ToY(const uint8_t* src_yuy2,
2012 int src_stride_yuy2,
2013 uint8_t* dst_y,
2014 int dst_stride_y,
2015 int width,
2016 int height) {
2017 int y;
2018 void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
2019 YUY2ToYRow_C;
2020 if (!src_yuy2 || !dst_y || width <= 0 || height == 0) {
2021 return -1;
2022 }
2023 // Negative height means invert the image.
2024 if (height < 0) {
2025 height = -height;
2026 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
2027 src_stride_yuy2 = -src_stride_yuy2;
2028 }
2029 // Coalesce rows.
2030 if (src_stride_yuy2 == width * 2 && dst_stride_y == width) {
2031 width *= height;
2032 height = 1;
2033 src_stride_yuy2 = dst_stride_y = 0;
2034 }
2035 #if defined(HAS_YUY2TOYROW_SSE2)
2036 if (TestCpuFlag(kCpuHasSSE2)) {
2037 YUY2ToYRow = YUY2ToYRow_Any_SSE2;
2038 if (IS_ALIGNED(width, 16)) {
2039 YUY2ToYRow = YUY2ToYRow_SSE2;
2040 }
2041 }
2042 #endif
2043 #if defined(HAS_YUY2TOYROW_AVX2)
2044 if (TestCpuFlag(kCpuHasAVX2)) {
2045 YUY2ToYRow = YUY2ToYRow_Any_AVX2;
2046 if (IS_ALIGNED(width, 32)) {
2047 YUY2ToYRow = YUY2ToYRow_AVX2;
2048 }
2049 }
2050 #endif
2051 #if defined(HAS_YUY2TOYROW_NEON)
2052 if (TestCpuFlag(kCpuHasNEON)) {
2053 YUY2ToYRow = YUY2ToYRow_Any_NEON;
2054 if (IS_ALIGNED(width, 16)) {
2055 YUY2ToYRow = YUY2ToYRow_NEON;
2056 }
2057 }
2058 #endif
2059 #if defined(HAS_YUY2TOYROW_MSA)
2060 if (TestCpuFlag(kCpuHasMSA)) {
2061 YUY2ToYRow = YUY2ToYRow_Any_MSA;
2062 if (IS_ALIGNED(width, 32)) {
2063 YUY2ToYRow = YUY2ToYRow_MSA;
2064 }
2065 }
2066 #endif
2067
2068 for (y = 0; y < height; ++y) {
2069 YUY2ToYRow(src_yuy2, dst_y, width);
2070 src_yuy2 += src_stride_yuy2;
2071 dst_y += dst_stride_y;
2072 }
2073 return 0;
2074 }
2075
2076 // Convert UYVY to Y.
2077 LIBYUV_API
UYVYToY(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_y,int dst_stride_y,int width,int height)2078 int UYVYToY(const uint8_t* src_uyvy,
2079 int src_stride_uyvy,
2080 uint8_t* dst_y,
2081 int dst_stride_y,
2082 int width,
2083 int height) {
2084 int y;
2085 void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) =
2086 UYVYToYRow_C;
2087 if (!src_uyvy || !dst_y || width <= 0 || height == 0) {
2088 return -1;
2089 }
2090 // Negative height means invert the image.
2091 if (height < 0) {
2092 height = -height;
2093 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
2094 src_stride_uyvy = -src_stride_uyvy;
2095 }
2096 // Coalesce rows.
2097 if (src_stride_uyvy == width * 2 && dst_stride_y == width) {
2098 width *= height;
2099 height = 1;
2100 src_stride_uyvy = dst_stride_y = 0;
2101 }
2102 #if defined(HAS_UYVYTOYROW_SSE2)
2103 if (TestCpuFlag(kCpuHasSSE2)) {
2104 UYVYToYRow = UYVYToYRow_Any_SSE2;
2105 if (IS_ALIGNED(width, 16)) {
2106 UYVYToYRow = UYVYToYRow_SSE2;
2107 }
2108 }
2109 #endif
2110 #if defined(HAS_UYVYTOYROW_AVX2)
2111 if (TestCpuFlag(kCpuHasAVX2)) {
2112 UYVYToYRow = UYVYToYRow_Any_AVX2;
2113 if (IS_ALIGNED(width, 32)) {
2114 UYVYToYRow = UYVYToYRow_AVX2;
2115 }
2116 }
2117 #endif
2118 #if defined(HAS_UYVYTOYROW_NEON)
2119 if (TestCpuFlag(kCpuHasNEON)) {
2120 UYVYToYRow = UYVYToYRow_Any_NEON;
2121 if (IS_ALIGNED(width, 16)) {
2122 UYVYToYRow = UYVYToYRow_NEON;
2123 }
2124 }
2125 #endif
2126 #if defined(HAS_UYVYTOYROW_MSA)
2127 if (TestCpuFlag(kCpuHasMSA)) {
2128 UYVYToYRow = UYVYToYRow_Any_MSA;
2129 if (IS_ALIGNED(width, 32)) {
2130 UYVYToYRow = UYVYToYRow_MSA;
2131 }
2132 }
2133 #endif
2134
2135 for (y = 0; y < height; ++y) {
2136 UYVYToYRow(src_uyvy, dst_y, width);
2137 src_uyvy += src_stride_uyvy;
2138 dst_y += dst_stride_y;
2139 }
2140 return 0;
2141 }
2142
2143 // Mirror a plane of data.
2144 // See Also I400Mirror
2145 LIBYUV_API
MirrorPlane(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height)2146 void MirrorPlane(const uint8_t* src_y,
2147 int src_stride_y,
2148 uint8_t* dst_y,
2149 int dst_stride_y,
2150 int width,
2151 int height) {
2152 int y;
2153 void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C;
2154 // Negative height means invert the image.
2155 if (height < 0) {
2156 height = -height;
2157 src_y = src_y + (height - 1) * src_stride_y;
2158 src_stride_y = -src_stride_y;
2159 }
2160 #if defined(HAS_MIRRORROW_NEON)
2161 if (TestCpuFlag(kCpuHasNEON)) {
2162 MirrorRow = MirrorRow_Any_NEON;
2163 if (IS_ALIGNED(width, 32)) {
2164 MirrorRow = MirrorRow_NEON;
2165 }
2166 }
2167 #endif
2168 #if defined(HAS_MIRRORROW_SSSE3)
2169 if (TestCpuFlag(kCpuHasSSSE3)) {
2170 MirrorRow = MirrorRow_Any_SSSE3;
2171 if (IS_ALIGNED(width, 16)) {
2172 MirrorRow = MirrorRow_SSSE3;
2173 }
2174 }
2175 #endif
2176 #if defined(HAS_MIRRORROW_AVX2)
2177 if (TestCpuFlag(kCpuHasAVX2)) {
2178 MirrorRow = MirrorRow_Any_AVX2;
2179 if (IS_ALIGNED(width, 32)) {
2180 MirrorRow = MirrorRow_AVX2;
2181 }
2182 }
2183 #endif
2184 #if defined(HAS_MIRRORROW_MSA)
2185 if (TestCpuFlag(kCpuHasMSA)) {
2186 MirrorRow = MirrorRow_Any_MSA;
2187 if (IS_ALIGNED(width, 64)) {
2188 MirrorRow = MirrorRow_MSA;
2189 }
2190 }
2191 #endif
2192 #if defined(HAS_MIRRORROW_LASX)
2193 if (TestCpuFlag(kCpuHasLASX)) {
2194 MirrorRow = MirrorRow_Any_LASX;
2195 if (IS_ALIGNED(width, 64)) {
2196 MirrorRow = MirrorRow_LASX;
2197 }
2198 }
2199 #endif
2200
2201 // Mirror plane
2202 for (y = 0; y < height; ++y) {
2203 MirrorRow(src_y, dst_y, width);
2204 src_y += src_stride_y;
2205 dst_y += dst_stride_y;
2206 }
2207 }
2208
2209 // Mirror a plane of UV data.
2210 LIBYUV_API
MirrorUVPlane(const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_uv,int dst_stride_uv,int width,int height)2211 void MirrorUVPlane(const uint8_t* src_uv,
2212 int src_stride_uv,
2213 uint8_t* dst_uv,
2214 int dst_stride_uv,
2215 int width,
2216 int height) {
2217 int y;
2218 void (*MirrorUVRow)(const uint8_t* src, uint8_t* dst, int width) =
2219 MirrorUVRow_C;
2220 // Negative height means invert the image.
2221 if (height < 0) {
2222 height = -height;
2223 src_uv = src_uv + (height - 1) * src_stride_uv;
2224 src_stride_uv = -src_stride_uv;
2225 }
2226 #if defined(HAS_MIRRORUVROW_NEON)
2227 if (TestCpuFlag(kCpuHasNEON)) {
2228 MirrorUVRow = MirrorUVRow_Any_NEON;
2229 if (IS_ALIGNED(width, 32)) {
2230 MirrorUVRow = MirrorUVRow_NEON;
2231 }
2232 }
2233 #endif
2234 #if defined(HAS_MIRRORUVROW_SSSE3)
2235 if (TestCpuFlag(kCpuHasSSSE3)) {
2236 MirrorUVRow = MirrorUVRow_Any_SSSE3;
2237 if (IS_ALIGNED(width, 8)) {
2238 MirrorUVRow = MirrorUVRow_SSSE3;
2239 }
2240 }
2241 #endif
2242 #if defined(HAS_MIRRORUVROW_AVX2)
2243 if (TestCpuFlag(kCpuHasAVX2)) {
2244 MirrorUVRow = MirrorUVRow_Any_AVX2;
2245 if (IS_ALIGNED(width, 16)) {
2246 MirrorUVRow = MirrorUVRow_AVX2;
2247 }
2248 }
2249 #endif
2250 #if defined(HAS_MIRRORUVROW_MSA)
2251 if (TestCpuFlag(kCpuHasMSA)) {
2252 MirrorUVRow = MirrorUVRow_Any_MSA;
2253 if (IS_ALIGNED(width, 8)) {
2254 MirrorUVRow = MirrorUVRow_MSA;
2255 }
2256 }
2257 #endif
2258 #if defined(HAS_MIRRORUVROW_LASX)
2259 if (TestCpuFlag(kCpuHasLASX)) {
2260 MirrorUVRow = MirrorUVRow_Any_LASX;
2261 if (IS_ALIGNED(width, 16)) {
2262 MirrorUVRow = MirrorUVRow_LASX;
2263 }
2264 }
2265 #endif
2266
2267 // MirrorUV plane
2268 for (y = 0; y < height; ++y) {
2269 MirrorUVRow(src_uv, dst_uv, width);
2270 src_uv += src_stride_uv;
2271 dst_uv += dst_stride_uv;
2272 }
2273 }
2274
2275 // Mirror I400 with optional flipping
2276 LIBYUV_API
I400Mirror(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height)2277 int I400Mirror(const uint8_t* src_y,
2278 int src_stride_y,
2279 uint8_t* dst_y,
2280 int dst_stride_y,
2281 int width,
2282 int height) {
2283 if (!src_y || !dst_y || width <= 0 || height == 0) {
2284 return -1;
2285 }
2286 // Negative height means invert the image.
2287 if (height < 0) {
2288 height = -height;
2289 src_y = src_y + (height - 1) * src_stride_y;
2290 src_stride_y = -src_stride_y;
2291 }
2292
2293 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
2294 return 0;
2295 }
2296
2297 // Mirror I420 with optional flipping
2298 LIBYUV_API
I420Mirror(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)2299 int I420Mirror(const uint8_t* src_y,
2300 int src_stride_y,
2301 const uint8_t* src_u,
2302 int src_stride_u,
2303 const uint8_t* src_v,
2304 int src_stride_v,
2305 uint8_t* dst_y,
2306 int dst_stride_y,
2307 uint8_t* dst_u,
2308 int dst_stride_u,
2309 uint8_t* dst_v,
2310 int dst_stride_v,
2311 int width,
2312 int height) {
2313 int halfwidth = (width + 1) >> 1;
2314 int halfheight = (height + 1) >> 1;
2315
2316 if (!src_y || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
2317 height == 0) {
2318 return -1;
2319 }
2320
2321 // Negative height means invert the image.
2322 if (height < 0) {
2323 height = -height;
2324 halfheight = (height + 1) >> 1;
2325 src_y = src_y + (height - 1) * src_stride_y;
2326 src_u = src_u + (halfheight - 1) * src_stride_u;
2327 src_v = src_v + (halfheight - 1) * src_stride_v;
2328 src_stride_y = -src_stride_y;
2329 src_stride_u = -src_stride_u;
2330 src_stride_v = -src_stride_v;
2331 }
2332
2333 if (dst_y) {
2334 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
2335 }
2336 MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
2337 MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
2338 return 0;
2339 }
2340
2341 // NV12 mirror.
2342 LIBYUV_API
NV12Mirror(const uint8_t * src_y,int src_stride_y,const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int width,int height)2343 int NV12Mirror(const uint8_t* src_y,
2344 int src_stride_y,
2345 const uint8_t* src_uv,
2346 int src_stride_uv,
2347 uint8_t* dst_y,
2348 int dst_stride_y,
2349 uint8_t* dst_uv,
2350 int dst_stride_uv,
2351 int width,
2352 int height) {
2353 int halfwidth = (width + 1) >> 1;
2354 int halfheight = (height + 1) >> 1;
2355
2356 if (!src_y || !src_uv || !dst_uv || width <= 0 || height == 0) {
2357 return -1;
2358 }
2359
2360 // Negative height means invert the image.
2361 if (height < 0) {
2362 height = -height;
2363 halfheight = (height + 1) >> 1;
2364 src_y = src_y + (height - 1) * src_stride_y;
2365 src_uv = src_uv + (halfheight - 1) * src_stride_uv;
2366 src_stride_y = -src_stride_y;
2367 src_stride_uv = -src_stride_uv;
2368 }
2369
2370 if (dst_y) {
2371 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
2372 }
2373 MirrorUVPlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv, halfwidth,
2374 halfheight);
2375 return 0;
2376 }
2377
2378 // ARGB mirror.
2379 LIBYUV_API
ARGBMirror(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)2380 int ARGBMirror(const uint8_t* src_argb,
2381 int src_stride_argb,
2382 uint8_t* dst_argb,
2383 int dst_stride_argb,
2384 int width,
2385 int height) {
2386 int y;
2387 void (*ARGBMirrorRow)(const uint8_t* src, uint8_t* dst, int width) =
2388 ARGBMirrorRow_C;
2389 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
2390 return -1;
2391 }
2392 // Negative height means invert the image.
2393 if (height < 0) {
2394 height = -height;
2395 src_argb = src_argb + (height - 1) * src_stride_argb;
2396 src_stride_argb = -src_stride_argb;
2397 }
2398 #if defined(HAS_ARGBMIRRORROW_NEON)
2399 if (TestCpuFlag(kCpuHasNEON)) {
2400 ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
2401 if (IS_ALIGNED(width, 8)) {
2402 ARGBMirrorRow = ARGBMirrorRow_NEON;
2403 }
2404 }
2405 #endif
2406 #if defined(HAS_ARGBMIRRORROW_SSE2)
2407 if (TestCpuFlag(kCpuHasSSE2)) {
2408 ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
2409 if (IS_ALIGNED(width, 4)) {
2410 ARGBMirrorRow = ARGBMirrorRow_SSE2;
2411 }
2412 }
2413 #endif
2414 #if defined(HAS_ARGBMIRRORROW_AVX2)
2415 if (TestCpuFlag(kCpuHasAVX2)) {
2416 ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
2417 if (IS_ALIGNED(width, 8)) {
2418 ARGBMirrorRow = ARGBMirrorRow_AVX2;
2419 }
2420 }
2421 #endif
2422 #if defined(HAS_ARGBMIRRORROW_MSA)
2423 if (TestCpuFlag(kCpuHasMSA)) {
2424 ARGBMirrorRow = ARGBMirrorRow_Any_MSA;
2425 if (IS_ALIGNED(width, 16)) {
2426 ARGBMirrorRow = ARGBMirrorRow_MSA;
2427 }
2428 }
2429 #endif
2430 #if defined(HAS_ARGBMIRRORROW_LASX)
2431 if (TestCpuFlag(kCpuHasLASX)) {
2432 ARGBMirrorRow = ARGBMirrorRow_Any_LASX;
2433 if (IS_ALIGNED(width, 16)) {
2434 ARGBMirrorRow = ARGBMirrorRow_LASX;
2435 }
2436 }
2437 #endif
2438
2439 // Mirror plane
2440 for (y = 0; y < height; ++y) {
2441 ARGBMirrorRow(src_argb, dst_argb, width);
2442 src_argb += src_stride_argb;
2443 dst_argb += dst_stride_argb;
2444 }
2445 return 0;
2446 }
2447
2448 // RGB24 mirror.
2449 LIBYUV_API
RGB24Mirror(const uint8_t * src_rgb24,int src_stride_rgb24,uint8_t * dst_rgb24,int dst_stride_rgb24,int width,int height)2450 int RGB24Mirror(const uint8_t* src_rgb24,
2451 int src_stride_rgb24,
2452 uint8_t* dst_rgb24,
2453 int dst_stride_rgb24,
2454 int width,
2455 int height) {
2456 int y;
2457 void (*RGB24MirrorRow)(const uint8_t* src, uint8_t* dst, int width) =
2458 RGB24MirrorRow_C;
2459 if (!src_rgb24 || !dst_rgb24 || width <= 0 || height == 0) {
2460 return -1;
2461 }
2462 // Negative height means invert the image.
2463 if (height < 0) {
2464 height = -height;
2465 src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
2466 src_stride_rgb24 = -src_stride_rgb24;
2467 }
2468 #if defined(HAS_RGB24MIRRORROW_NEON)
2469 if (TestCpuFlag(kCpuHasNEON)) {
2470 RGB24MirrorRow = RGB24MirrorRow_Any_NEON;
2471 if (IS_ALIGNED(width, 16)) {
2472 RGB24MirrorRow = RGB24MirrorRow_NEON;
2473 }
2474 }
2475 #endif
2476 #if defined(HAS_RGB24MIRRORROW_SSSE3)
2477 if (TestCpuFlag(kCpuHasSSSE3)) {
2478 RGB24MirrorRow = RGB24MirrorRow_Any_SSSE3;
2479 if (IS_ALIGNED(width, 16)) {
2480 RGB24MirrorRow = RGB24MirrorRow_SSSE3;
2481 }
2482 }
2483 #endif
2484
2485 // Mirror plane
2486 for (y = 0; y < height; ++y) {
2487 RGB24MirrorRow(src_rgb24, dst_rgb24, width);
2488 src_rgb24 += src_stride_rgb24;
2489 dst_rgb24 += dst_stride_rgb24;
2490 }
2491 return 0;
2492 }
2493
2494 // Get a blender that optimized for the CPU and pixel count.
2495 // As there are 6 blenders to choose from, the caller should try to use
2496 // the same blend function for all pixels if possible.
2497 LIBYUV_API
GetARGBBlend()2498 ARGBBlendRow GetARGBBlend() {
2499 void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1,
2500 uint8_t* dst_argb, int width) = ARGBBlendRow_C;
2501 #if defined(HAS_ARGBBLENDROW_SSSE3)
2502 if (TestCpuFlag(kCpuHasSSSE3)) {
2503 ARGBBlendRow = ARGBBlendRow_SSSE3;
2504 return ARGBBlendRow;
2505 }
2506 #endif
2507 #if defined(HAS_ARGBBLENDROW_NEON)
2508 if (TestCpuFlag(kCpuHasNEON)) {
2509 ARGBBlendRow = ARGBBlendRow_NEON;
2510 }
2511 #endif
2512 #if defined(HAS_ARGBBLENDROW_MSA)
2513 if (TestCpuFlag(kCpuHasMSA)) {
2514 ARGBBlendRow = ARGBBlendRow_MSA;
2515 }
2516 #endif
2517 #if defined(HAS_ARGBBLENDROW_LSX)
2518 if (TestCpuFlag(kCpuHasLSX)) {
2519 ARGBBlendRow = ARGBBlendRow_LSX;
2520 }
2521 #endif
2522 return ARGBBlendRow;
2523 }
2524
2525 // Alpha Blend 2 ARGB images and store to destination.
2526 LIBYUV_API
ARGBBlend(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height)2527 int ARGBBlend(const uint8_t* src_argb0,
2528 int src_stride_argb0,
2529 const uint8_t* src_argb1,
2530 int src_stride_argb1,
2531 uint8_t* dst_argb,
2532 int dst_stride_argb,
2533 int width,
2534 int height) {
2535 int y;
2536 void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1,
2537 uint8_t* dst_argb, int width) = GetARGBBlend();
2538 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
2539 return -1;
2540 }
2541 // Negative height means invert the image.
2542 if (height < 0) {
2543 height = -height;
2544 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
2545 dst_stride_argb = -dst_stride_argb;
2546 }
2547 // Coalesce rows.
2548 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
2549 dst_stride_argb == width * 4) {
2550 width *= height;
2551 height = 1;
2552 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
2553 }
2554
2555 for (y = 0; y < height; ++y) {
2556 ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
2557 src_argb0 += src_stride_argb0;
2558 src_argb1 += src_stride_argb1;
2559 dst_argb += dst_stride_argb;
2560 }
2561 return 0;
2562 }
2563
2564 // Alpha Blend plane and store to destination.
2565 LIBYUV_API
BlendPlane(const uint8_t * src_y0,int src_stride_y0,const uint8_t * src_y1,int src_stride_y1,const uint8_t * alpha,int alpha_stride,uint8_t * dst_y,int dst_stride_y,int width,int height)2566 int BlendPlane(const uint8_t* src_y0,
2567 int src_stride_y0,
2568 const uint8_t* src_y1,
2569 int src_stride_y1,
2570 const uint8_t* alpha,
2571 int alpha_stride,
2572 uint8_t* dst_y,
2573 int dst_stride_y,
2574 int width,
2575 int height) {
2576 int y;
2577 void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1,
2578 const uint8_t* alpha, uint8_t* dst, int width) =
2579 BlendPlaneRow_C;
2580 if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0) {
2581 return -1;
2582 }
2583 // Negative height means invert the image.
2584 if (height < 0) {
2585 height = -height;
2586 dst_y = dst_y + (height - 1) * dst_stride_y;
2587 dst_stride_y = -dst_stride_y;
2588 }
2589
2590 // Coalesce rows for Y plane.
2591 if (src_stride_y0 == width && src_stride_y1 == width &&
2592 alpha_stride == width && dst_stride_y == width) {
2593 width *= height;
2594 height = 1;
2595 src_stride_y0 = src_stride_y1 = alpha_stride = dst_stride_y = 0;
2596 }
2597
2598 #if defined(HAS_BLENDPLANEROW_SSSE3)
2599 if (TestCpuFlag(kCpuHasSSSE3)) {
2600 BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
2601 if (IS_ALIGNED(width, 8)) {
2602 BlendPlaneRow = BlendPlaneRow_SSSE3;
2603 }
2604 }
2605 #endif
2606 #if defined(HAS_BLENDPLANEROW_AVX2)
2607 if (TestCpuFlag(kCpuHasAVX2)) {
2608 BlendPlaneRow = BlendPlaneRow_Any_AVX2;
2609 if (IS_ALIGNED(width, 32)) {
2610 BlendPlaneRow = BlendPlaneRow_AVX2;
2611 }
2612 }
2613 #endif
2614
2615 for (y = 0; y < height; ++y) {
2616 BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width);
2617 src_y0 += src_stride_y0;
2618 src_y1 += src_stride_y1;
2619 alpha += alpha_stride;
2620 dst_y += dst_stride_y;
2621 }
2622 return 0;
2623 }
2624
2625 #define MAXTWIDTH 2048
2626 // Alpha Blend YUV images and store to destination.
2627 LIBYUV_API
I420Blend(const uint8_t * src_y0,int src_stride_y0,const uint8_t * src_u0,int src_stride_u0,const uint8_t * src_v0,int src_stride_v0,const uint8_t * src_y1,int src_stride_y1,const uint8_t * src_u1,int src_stride_u1,const uint8_t * src_v1,int src_stride_v1,const uint8_t * alpha,int alpha_stride,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)2628 int I420Blend(const uint8_t* src_y0,
2629 int src_stride_y0,
2630 const uint8_t* src_u0,
2631 int src_stride_u0,
2632 const uint8_t* src_v0,
2633 int src_stride_v0,
2634 const uint8_t* src_y1,
2635 int src_stride_y1,
2636 const uint8_t* src_u1,
2637 int src_stride_u1,
2638 const uint8_t* src_v1,
2639 int src_stride_v1,
2640 const uint8_t* alpha,
2641 int alpha_stride,
2642 uint8_t* dst_y,
2643 int dst_stride_y,
2644 uint8_t* dst_u,
2645 int dst_stride_u,
2646 uint8_t* dst_v,
2647 int dst_stride_v,
2648 int width,
2649 int height) {
2650 int y;
2651 // Half width/height for UV.
2652 int halfwidth = (width + 1) >> 1;
2653 void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1,
2654 const uint8_t* alpha, uint8_t* dst, int width) =
2655 BlendPlaneRow_C;
2656 void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
2657 uint8_t* dst_ptr, int dst_width) = ScaleRowDown2Box_C;
2658
2659 if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 ||
2660 !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
2661 return -1;
2662 }
2663
2664 // Negative height means invert the image.
2665 if (height < 0) {
2666 height = -height;
2667 dst_y = dst_y + (height - 1) * dst_stride_y;
2668 dst_stride_y = -dst_stride_y;
2669 }
2670
2671 // Blend Y plane.
2672 BlendPlane(src_y0, src_stride_y0, src_y1, src_stride_y1, alpha, alpha_stride,
2673 dst_y, dst_stride_y, width, height);
2674
2675 #if defined(HAS_BLENDPLANEROW_SSSE3)
2676 if (TestCpuFlag(kCpuHasSSSE3)) {
2677 BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
2678 if (IS_ALIGNED(halfwidth, 8)) {
2679 BlendPlaneRow = BlendPlaneRow_SSSE3;
2680 }
2681 }
2682 #endif
2683 #if defined(HAS_BLENDPLANEROW_AVX2)
2684 if (TestCpuFlag(kCpuHasAVX2)) {
2685 BlendPlaneRow = BlendPlaneRow_Any_AVX2;
2686 if (IS_ALIGNED(halfwidth, 32)) {
2687 BlendPlaneRow = BlendPlaneRow_AVX2;
2688 }
2689 }
2690 #endif
2691 if (!IS_ALIGNED(width, 2)) {
2692 ScaleRowDown2 = ScaleRowDown2Box_Odd_C;
2693 }
2694 #if defined(HAS_SCALEROWDOWN2_NEON)
2695 if (TestCpuFlag(kCpuHasNEON)) {
2696 ScaleRowDown2 = ScaleRowDown2Box_Odd_NEON;
2697 if (IS_ALIGNED(width, 2)) {
2698 ScaleRowDown2 = ScaleRowDown2Box_Any_NEON;
2699 if (IS_ALIGNED(halfwidth, 16)) {
2700 ScaleRowDown2 = ScaleRowDown2Box_NEON;
2701 }
2702 }
2703 }
2704 #endif
2705 #if defined(HAS_SCALEROWDOWN2_SSSE3)
2706 if (TestCpuFlag(kCpuHasSSSE3)) {
2707 ScaleRowDown2 = ScaleRowDown2Box_Odd_SSSE3;
2708 if (IS_ALIGNED(width, 2)) {
2709 ScaleRowDown2 = ScaleRowDown2Box_Any_SSSE3;
2710 if (IS_ALIGNED(halfwidth, 16)) {
2711 ScaleRowDown2 = ScaleRowDown2Box_SSSE3;
2712 }
2713 }
2714 }
2715 #endif
2716 #if defined(HAS_SCALEROWDOWN2_AVX2)
2717 if (TestCpuFlag(kCpuHasAVX2)) {
2718 ScaleRowDown2 = ScaleRowDown2Box_Odd_AVX2;
2719 if (IS_ALIGNED(width, 2)) {
2720 ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2;
2721 if (IS_ALIGNED(halfwidth, 32)) {
2722 ScaleRowDown2 = ScaleRowDown2Box_AVX2;
2723 }
2724 }
2725 }
2726 #endif
2727
2728 // Row buffer for intermediate alpha pixels.
2729 align_buffer_64(halfalpha, halfwidth);
2730 for (y = 0; y < height; y += 2) {
2731 // last row of odd height image use 1 row of alpha instead of 2.
2732 if (y == (height - 1)) {
2733 alpha_stride = 0;
2734 }
2735 // Subsample 2 rows of UV to half width and half height.
2736 ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth);
2737 alpha += alpha_stride * 2;
2738 BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth);
2739 BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth);
2740 src_u0 += src_stride_u0;
2741 src_u1 += src_stride_u1;
2742 dst_u += dst_stride_u;
2743 src_v0 += src_stride_v0;
2744 src_v1 += src_stride_v1;
2745 dst_v += dst_stride_v;
2746 }
2747 free_aligned_buffer_64(halfalpha);
2748 return 0;
2749 }
2750
2751 // Multiply 2 ARGB images and store to destination.
2752 LIBYUV_API
ARGBMultiply(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height)2753 int ARGBMultiply(const uint8_t* src_argb0,
2754 int src_stride_argb0,
2755 const uint8_t* src_argb1,
2756 int src_stride_argb1,
2757 uint8_t* dst_argb,
2758 int dst_stride_argb,
2759 int width,
2760 int height) {
2761 int y;
2762 void (*ARGBMultiplyRow)(const uint8_t* src0, const uint8_t* src1,
2763 uint8_t* dst, int width) = ARGBMultiplyRow_C;
2764 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
2765 return -1;
2766 }
2767 // Negative height means invert the image.
2768 if (height < 0) {
2769 height = -height;
2770 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
2771 dst_stride_argb = -dst_stride_argb;
2772 }
2773 // Coalesce rows.
2774 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
2775 dst_stride_argb == width * 4) {
2776 width *= height;
2777 height = 1;
2778 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
2779 }
2780 #if defined(HAS_ARGBMULTIPLYROW_SSE2)
2781 if (TestCpuFlag(kCpuHasSSE2)) {
2782 ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
2783 if (IS_ALIGNED(width, 4)) {
2784 ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
2785 }
2786 }
2787 #endif
2788 #if defined(HAS_ARGBMULTIPLYROW_AVX2)
2789 if (TestCpuFlag(kCpuHasAVX2)) {
2790 ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
2791 if (IS_ALIGNED(width, 8)) {
2792 ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
2793 }
2794 }
2795 #endif
2796 #if defined(HAS_ARGBMULTIPLYROW_NEON)
2797 if (TestCpuFlag(kCpuHasNEON)) {
2798 ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
2799 if (IS_ALIGNED(width, 8)) {
2800 ARGBMultiplyRow = ARGBMultiplyRow_NEON;
2801 }
2802 }
2803 #endif
2804 #if defined(HAS_ARGBMULTIPLYROW_MSA)
2805 if (TestCpuFlag(kCpuHasMSA)) {
2806 ARGBMultiplyRow = ARGBMultiplyRow_Any_MSA;
2807 if (IS_ALIGNED(width, 4)) {
2808 ARGBMultiplyRow = ARGBMultiplyRow_MSA;
2809 }
2810 }
2811 #endif
2812 #if defined(HAS_ARGBMULTIPLYROW_LASX)
2813 if (TestCpuFlag(kCpuHasLASX)) {
2814 ARGBMultiplyRow = ARGBMultiplyRow_Any_LASX;
2815 if (IS_ALIGNED(width, 8)) {
2816 ARGBMultiplyRow = ARGBMultiplyRow_LASX;
2817 }
2818 }
2819 #endif
2820
2821 // Multiply plane
2822 for (y = 0; y < height; ++y) {
2823 ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
2824 src_argb0 += src_stride_argb0;
2825 src_argb1 += src_stride_argb1;
2826 dst_argb += dst_stride_argb;
2827 }
2828 return 0;
2829 }
2830
2831 // Add 2 ARGB images and store to destination.
2832 LIBYUV_API
ARGBAdd(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height)2833 int ARGBAdd(const uint8_t* src_argb0,
2834 int src_stride_argb0,
2835 const uint8_t* src_argb1,
2836 int src_stride_argb1,
2837 uint8_t* dst_argb,
2838 int dst_stride_argb,
2839 int width,
2840 int height) {
2841 int y;
2842 void (*ARGBAddRow)(const uint8_t* src0, const uint8_t* src1, uint8_t* dst,
2843 int width) = ARGBAddRow_C;
2844 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
2845 return -1;
2846 }
2847 // Negative height means invert the image.
2848 if (height < 0) {
2849 height = -height;
2850 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
2851 dst_stride_argb = -dst_stride_argb;
2852 }
2853 // Coalesce rows.
2854 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
2855 dst_stride_argb == width * 4) {
2856 width *= height;
2857 height = 1;
2858 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
2859 }
2860 #if defined(HAS_ARGBADDROW_SSE2)
2861 if (TestCpuFlag(kCpuHasSSE2)) {
2862 ARGBAddRow = ARGBAddRow_SSE2;
2863 }
2864 #endif
2865 #if defined(HAS_ARGBADDROW_SSE2)
2866 if (TestCpuFlag(kCpuHasSSE2)) {
2867 ARGBAddRow = ARGBAddRow_Any_SSE2;
2868 if (IS_ALIGNED(width, 4)) {
2869 ARGBAddRow = ARGBAddRow_SSE2;
2870 }
2871 }
2872 #endif
2873 #if defined(HAS_ARGBADDROW_AVX2)
2874 if (TestCpuFlag(kCpuHasAVX2)) {
2875 ARGBAddRow = ARGBAddRow_Any_AVX2;
2876 if (IS_ALIGNED(width, 8)) {
2877 ARGBAddRow = ARGBAddRow_AVX2;
2878 }
2879 }
2880 #endif
2881 #if defined(HAS_ARGBADDROW_NEON)
2882 if (TestCpuFlag(kCpuHasNEON)) {
2883 ARGBAddRow = ARGBAddRow_Any_NEON;
2884 if (IS_ALIGNED(width, 8)) {
2885 ARGBAddRow = ARGBAddRow_NEON;
2886 }
2887 }
2888 #endif
2889 #if defined(HAS_ARGBADDROW_MSA)
2890 if (TestCpuFlag(kCpuHasMSA)) {
2891 ARGBAddRow = ARGBAddRow_Any_MSA;
2892 if (IS_ALIGNED(width, 8)) {
2893 ARGBAddRow = ARGBAddRow_MSA;
2894 }
2895 }
2896 #endif
2897 #if defined(HAS_ARGBADDROW_LASX)
2898 if (TestCpuFlag(kCpuHasLASX)) {
2899 ARGBAddRow = ARGBAddRow_Any_LASX;
2900 if (IS_ALIGNED(width, 8)) {
2901 ARGBAddRow = ARGBAddRow_LASX;
2902 }
2903 }
2904 #endif
2905
2906 // Add plane
2907 for (y = 0; y < height; ++y) {
2908 ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
2909 src_argb0 += src_stride_argb0;
2910 src_argb1 += src_stride_argb1;
2911 dst_argb += dst_stride_argb;
2912 }
2913 return 0;
2914 }
2915
2916 // Subtract 2 ARGB images and store to destination.
2917 LIBYUV_API
ARGBSubtract(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height)2918 int ARGBSubtract(const uint8_t* src_argb0,
2919 int src_stride_argb0,
2920 const uint8_t* src_argb1,
2921 int src_stride_argb1,
2922 uint8_t* dst_argb,
2923 int dst_stride_argb,
2924 int width,
2925 int height) {
2926 int y;
2927 void (*ARGBSubtractRow)(const uint8_t* src0, const uint8_t* src1,
2928 uint8_t* dst, int width) = ARGBSubtractRow_C;
2929 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
2930 return -1;
2931 }
2932 // Negative height means invert the image.
2933 if (height < 0) {
2934 height = -height;
2935 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
2936 dst_stride_argb = -dst_stride_argb;
2937 }
2938 // Coalesce rows.
2939 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
2940 dst_stride_argb == width * 4) {
2941 width *= height;
2942 height = 1;
2943 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
2944 }
2945 #if defined(HAS_ARGBSUBTRACTROW_SSE2)
2946 if (TestCpuFlag(kCpuHasSSE2)) {
2947 ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
2948 if (IS_ALIGNED(width, 4)) {
2949 ARGBSubtractRow = ARGBSubtractRow_SSE2;
2950 }
2951 }
2952 #endif
2953 #if defined(HAS_ARGBSUBTRACTROW_AVX2)
2954 if (TestCpuFlag(kCpuHasAVX2)) {
2955 ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
2956 if (IS_ALIGNED(width, 8)) {
2957 ARGBSubtractRow = ARGBSubtractRow_AVX2;
2958 }
2959 }
2960 #endif
2961 #if defined(HAS_ARGBSUBTRACTROW_NEON)
2962 if (TestCpuFlag(kCpuHasNEON)) {
2963 ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
2964 if (IS_ALIGNED(width, 8)) {
2965 ARGBSubtractRow = ARGBSubtractRow_NEON;
2966 }
2967 }
2968 #endif
2969 #if defined(HAS_ARGBSUBTRACTROW_MSA)
2970 if (TestCpuFlag(kCpuHasMSA)) {
2971 ARGBSubtractRow = ARGBSubtractRow_Any_MSA;
2972 if (IS_ALIGNED(width, 8)) {
2973 ARGBSubtractRow = ARGBSubtractRow_MSA;
2974 }
2975 }
2976 #endif
2977 #if defined(HAS_ARGBSUBTRACTROW_LASX)
2978 if (TestCpuFlag(kCpuHasLASX)) {
2979 ARGBSubtractRow = ARGBSubtractRow_Any_LASX;
2980 if (IS_ALIGNED(width, 8)) {
2981 ARGBSubtractRow = ARGBSubtractRow_LASX;
2982 }
2983 }
2984 #endif
2985
2986 // Subtract plane
2987 for (y = 0; y < height; ++y) {
2988 ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
2989 src_argb0 += src_stride_argb0;
2990 src_argb1 += src_stride_argb1;
2991 dst_argb += dst_stride_argb;
2992 }
2993 return 0;
2994 }
2995
2996 // Convert RAW to RGB24.
2997 LIBYUV_API
RAWToRGB24(const uint8_t * src_raw,int src_stride_raw,uint8_t * dst_rgb24,int dst_stride_rgb24,int width,int height)2998 int RAWToRGB24(const uint8_t* src_raw,
2999 int src_stride_raw,
3000 uint8_t* dst_rgb24,
3001 int dst_stride_rgb24,
3002 int width,
3003 int height) {
3004 int y;
3005 void (*RAWToRGB24Row)(const uint8_t* src_rgb, uint8_t* dst_rgb24, int width) =
3006 RAWToRGB24Row_C;
3007 if (!src_raw || !dst_rgb24 || width <= 0 || height == 0) {
3008 return -1;
3009 }
3010 // Negative height means invert the image.
3011 if (height < 0) {
3012 height = -height;
3013 src_raw = src_raw + (height - 1) * src_stride_raw;
3014 src_stride_raw = -src_stride_raw;
3015 }
3016 // Coalesce rows.
3017 if (src_stride_raw == width * 3 && dst_stride_rgb24 == width * 3) {
3018 width *= height;
3019 height = 1;
3020 src_stride_raw = dst_stride_rgb24 = 0;
3021 }
3022 #if defined(HAS_RAWTORGB24ROW_SSSE3)
3023 if (TestCpuFlag(kCpuHasSSSE3)) {
3024 RAWToRGB24Row = RAWToRGB24Row_Any_SSSE3;
3025 if (IS_ALIGNED(width, 8)) {
3026 RAWToRGB24Row = RAWToRGB24Row_SSSE3;
3027 }
3028 }
3029 #endif
3030 #if defined(HAS_RAWTORGB24ROW_NEON)
3031 if (TestCpuFlag(kCpuHasNEON)) {
3032 RAWToRGB24Row = RAWToRGB24Row_Any_NEON;
3033 if (IS_ALIGNED(width, 8)) {
3034 RAWToRGB24Row = RAWToRGB24Row_NEON;
3035 }
3036 }
3037 #endif
3038 #if defined(HAS_RAWTORGB24ROW_MSA)
3039 if (TestCpuFlag(kCpuHasMSA)) {
3040 RAWToRGB24Row = RAWToRGB24Row_Any_MSA;
3041 if (IS_ALIGNED(width, 16)) {
3042 RAWToRGB24Row = RAWToRGB24Row_MSA;
3043 }
3044 }
3045 #endif
3046 #if defined(HAS_RAWTORGB24ROW_LSX)
3047 if (TestCpuFlag(kCpuHasLSX)) {
3048 RAWToRGB24Row = RAWToRGB24Row_Any_LSX;
3049 if (IS_ALIGNED(width, 16)) {
3050 RAWToRGB24Row = RAWToRGB24Row_LSX;
3051 }
3052 }
3053 #endif
3054
3055 for (y = 0; y < height; ++y) {
3056 RAWToRGB24Row(src_raw, dst_rgb24, width);
3057 src_raw += src_stride_raw;
3058 dst_rgb24 += dst_stride_rgb24;
3059 }
3060 return 0;
3061 }
3062
3063 LIBYUV_API
SetPlane(uint8_t * dst_y,int dst_stride_y,int width,int height,uint32_t value)3064 void SetPlane(uint8_t* dst_y,
3065 int dst_stride_y,
3066 int width,
3067 int height,
3068 uint32_t value) {
3069 int y;
3070 void (*SetRow)(uint8_t * dst, uint8_t value, int width) = SetRow_C;
3071
3072 if (width <= 0 || height == 0) {
3073 return;
3074 }
3075 if (height < 0) {
3076 height = -height;
3077 dst_y = dst_y + (height - 1) * dst_stride_y;
3078 dst_stride_y = -dst_stride_y;
3079 }
3080 // Coalesce rows.
3081 if (dst_stride_y == width) {
3082 width *= height;
3083 height = 1;
3084 dst_stride_y = 0;
3085 }
3086 #if defined(HAS_SETROW_NEON)
3087 if (TestCpuFlag(kCpuHasNEON)) {
3088 SetRow = SetRow_Any_NEON;
3089 if (IS_ALIGNED(width, 16)) {
3090 SetRow = SetRow_NEON;
3091 }
3092 }
3093 #endif
3094 #if defined(HAS_SETROW_X86)
3095 if (TestCpuFlag(kCpuHasX86)) {
3096 SetRow = SetRow_Any_X86;
3097 if (IS_ALIGNED(width, 4)) {
3098 SetRow = SetRow_X86;
3099 }
3100 }
3101 #endif
3102 #if defined(HAS_SETROW_ERMS)
3103 if (TestCpuFlag(kCpuHasERMS)) {
3104 SetRow = SetRow_ERMS;
3105 }
3106 #endif
3107 #if defined(HAS_SETROW_MSA)
3108 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 16)) {
3109 SetRow = SetRow_MSA;
3110 }
3111 #endif
3112 #if defined(HAS_SETROW_LSX)
3113 if (TestCpuFlag(kCpuHasLSX)) {
3114 SetRow = SetRow_Any_LSX;
3115 if (IS_ALIGNED(width, 16)) {
3116 SetRow = SetRow_LSX;
3117 }
3118 }
3119 #endif
3120
3121 // Set plane
3122 for (y = 0; y < height; ++y) {
3123 SetRow(dst_y, value, width);
3124 dst_y += dst_stride_y;
3125 }
3126 }
3127
3128 // Draw a rectangle into I420
3129 LIBYUV_API
I420Rect(uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int x,int y,int width,int height,int value_y,int value_u,int value_v)3130 int I420Rect(uint8_t* dst_y,
3131 int dst_stride_y,
3132 uint8_t* dst_u,
3133 int dst_stride_u,
3134 uint8_t* dst_v,
3135 int dst_stride_v,
3136 int x,
3137 int y,
3138 int width,
3139 int height,
3140 int value_y,
3141 int value_u,
3142 int value_v) {
3143 int halfwidth = (width + 1) >> 1;
3144 int halfheight = (height + 1) >> 1;
3145 uint8_t* start_y = dst_y + y * dst_stride_y + x;
3146 uint8_t* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
3147 uint8_t* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
3148
3149 if (!dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || x < 0 ||
3150 y < 0 || value_y < 0 || value_y > 255 || value_u < 0 || value_u > 255 ||
3151 value_v < 0 || value_v > 255) {
3152 return -1;
3153 }
3154
3155 SetPlane(start_y, dst_stride_y, width, height, value_y);
3156 SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
3157 SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
3158 return 0;
3159 }
3160
3161 // Draw a rectangle into ARGB
3162 LIBYUV_API
ARGBRect(uint8_t * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height,uint32_t value)3163 int ARGBRect(uint8_t* dst_argb,
3164 int dst_stride_argb,
3165 int dst_x,
3166 int dst_y,
3167 int width,
3168 int height,
3169 uint32_t value) {
3170 int y;
3171 void (*ARGBSetRow)(uint8_t * dst_argb, uint32_t value, int width) =
3172 ARGBSetRow_C;
3173 if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) {
3174 return -1;
3175 }
3176 if (height < 0) {
3177 height = -height;
3178 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
3179 dst_stride_argb = -dst_stride_argb;
3180 }
3181 dst_argb += dst_y * dst_stride_argb + dst_x * 4;
3182 // Coalesce rows.
3183 if (dst_stride_argb == width * 4) {
3184 width *= height;
3185 height = 1;
3186 dst_stride_argb = 0;
3187 }
3188
3189 #if defined(HAS_ARGBSETROW_NEON)
3190 if (TestCpuFlag(kCpuHasNEON)) {
3191 ARGBSetRow = ARGBSetRow_Any_NEON;
3192 if (IS_ALIGNED(width, 4)) {
3193 ARGBSetRow = ARGBSetRow_NEON;
3194 }
3195 }
3196 #endif
3197 #if defined(HAS_ARGBSETROW_X86)
3198 if (TestCpuFlag(kCpuHasX86)) {
3199 ARGBSetRow = ARGBSetRow_X86;
3200 }
3201 #endif
3202 #if defined(HAS_ARGBSETROW_MSA)
3203 if (TestCpuFlag(kCpuHasMSA)) {
3204 ARGBSetRow = ARGBSetRow_Any_MSA;
3205 if (IS_ALIGNED(width, 4)) {
3206 ARGBSetRow = ARGBSetRow_MSA;
3207 }
3208 }
3209 #endif
3210 #if defined(HAS_ARGBSETROW_LSX)
3211 if (TestCpuFlag(kCpuHasLSX)) {
3212 ARGBSetRow = ARGBSetRow_Any_LSX;
3213 if (IS_ALIGNED(width, 4)) {
3214 ARGBSetRow = ARGBSetRow_LSX;
3215 }
3216 }
3217 #endif
3218
3219 // Set plane
3220 for (y = 0; y < height; ++y) {
3221 ARGBSetRow(dst_argb, value, width);
3222 dst_argb += dst_stride_argb;
3223 }
3224 return 0;
3225 }
3226
3227 // Convert unattentuated ARGB to preattenuated ARGB.
3228 // An unattenutated ARGB alpha blend uses the formula
3229 // p = a * f + (1 - a) * b
3230 // where
3231 // p is output pixel
3232 // f is foreground pixel
3233 // b is background pixel
3234 // a is alpha value from foreground pixel
3235 // An preattenutated ARGB alpha blend uses the formula
3236 // p = f + (1 - a) * b
3237 // where
3238 // f is foreground pixel premultiplied by alpha
3239
3240 LIBYUV_API
ARGBAttenuate(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)3241 int ARGBAttenuate(const uint8_t* src_argb,
3242 int src_stride_argb,
3243 uint8_t* dst_argb,
3244 int dst_stride_argb,
3245 int width,
3246 int height) {
3247 int y;
3248 void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
3249 int width) = ARGBAttenuateRow_C;
3250 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
3251 return -1;
3252 }
3253 if (height < 0) {
3254 height = -height;
3255 src_argb = src_argb + (height - 1) * src_stride_argb;
3256 src_stride_argb = -src_stride_argb;
3257 }
3258 // Coalesce rows.
3259 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3260 width *= height;
3261 height = 1;
3262 src_stride_argb = dst_stride_argb = 0;
3263 }
3264 #if defined(HAS_ARGBATTENUATEROW_SSSE3)
3265 if (TestCpuFlag(kCpuHasSSSE3)) {
3266 ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
3267 if (IS_ALIGNED(width, 4)) {
3268 ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
3269 }
3270 }
3271 #endif
3272 #if defined(HAS_ARGBATTENUATEROW_AVX2)
3273 if (TestCpuFlag(kCpuHasAVX2)) {
3274 ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
3275 if (IS_ALIGNED(width, 8)) {
3276 ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
3277 }
3278 }
3279 #endif
3280 #if defined(HAS_ARGBATTENUATEROW_NEON)
3281 if (TestCpuFlag(kCpuHasNEON)) {
3282 ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
3283 if (IS_ALIGNED(width, 8)) {
3284 ARGBAttenuateRow = ARGBAttenuateRow_NEON;
3285 }
3286 }
3287 #endif
3288 #if defined(HAS_ARGBATTENUATEROW_MSA)
3289 if (TestCpuFlag(kCpuHasMSA)) {
3290 ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA;
3291 if (IS_ALIGNED(width, 8)) {
3292 ARGBAttenuateRow = ARGBAttenuateRow_MSA;
3293 }
3294 }
3295 #endif
3296 #if defined(HAS_ARGBATTENUATEROW_LASX)
3297 if (TestCpuFlag(kCpuHasLASX)) {
3298 ARGBAttenuateRow = ARGBAttenuateRow_Any_LASX;
3299 if (IS_ALIGNED(width, 16)) {
3300 ARGBAttenuateRow = ARGBAttenuateRow_LASX;
3301 }
3302 }
3303 #endif
3304
3305 for (y = 0; y < height; ++y) {
3306 ARGBAttenuateRow(src_argb, dst_argb, width);
3307 src_argb += src_stride_argb;
3308 dst_argb += dst_stride_argb;
3309 }
3310 return 0;
3311 }
3312
3313 // Convert preattentuated ARGB to unattenuated ARGB.
3314 LIBYUV_API
ARGBUnattenuate(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)3315 int ARGBUnattenuate(const uint8_t* src_argb,
3316 int src_stride_argb,
3317 uint8_t* dst_argb,
3318 int dst_stride_argb,
3319 int width,
3320 int height) {
3321 int y;
3322 void (*ARGBUnattenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
3323 int width) = ARGBUnattenuateRow_C;
3324 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
3325 return -1;
3326 }
3327 if (height < 0) {
3328 height = -height;
3329 src_argb = src_argb + (height - 1) * src_stride_argb;
3330 src_stride_argb = -src_stride_argb;
3331 }
3332 // Coalesce rows.
3333 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3334 width *= height;
3335 height = 1;
3336 src_stride_argb = dst_stride_argb = 0;
3337 }
3338 #if defined(HAS_ARGBUNATTENUATEROW_SSE2)
3339 if (TestCpuFlag(kCpuHasSSE2)) {
3340 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
3341 if (IS_ALIGNED(width, 4)) {
3342 ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
3343 }
3344 }
3345 #endif
3346 #if defined(HAS_ARGBUNATTENUATEROW_AVX2)
3347 if (TestCpuFlag(kCpuHasAVX2)) {
3348 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
3349 if (IS_ALIGNED(width, 8)) {
3350 ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
3351 }
3352 }
3353 #endif
3354 // TODO(fbarchard): Neon version.
3355
3356 for (y = 0; y < height; ++y) {
3357 ARGBUnattenuateRow(src_argb, dst_argb, width);
3358 src_argb += src_stride_argb;
3359 dst_argb += dst_stride_argb;
3360 }
3361 return 0;
3362 }
3363
3364 // Convert ARGB to Grayed ARGB.
3365 LIBYUV_API
ARGBGrayTo(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)3366 int ARGBGrayTo(const uint8_t* src_argb,
3367 int src_stride_argb,
3368 uint8_t* dst_argb,
3369 int dst_stride_argb,
3370 int width,
3371 int height) {
3372 int y;
3373 void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
3374 ARGBGrayRow_C;
3375 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
3376 return -1;
3377 }
3378 if (height < 0) {
3379 height = -height;
3380 src_argb = src_argb + (height - 1) * src_stride_argb;
3381 src_stride_argb = -src_stride_argb;
3382 }
3383 // Coalesce rows.
3384 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3385 width *= height;
3386 height = 1;
3387 src_stride_argb = dst_stride_argb = 0;
3388 }
3389 #if defined(HAS_ARGBGRAYROW_SSSE3)
3390 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
3391 ARGBGrayRow = ARGBGrayRow_SSSE3;
3392 }
3393 #endif
3394 #if defined(HAS_ARGBGRAYROW_NEON)
3395 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3396 ARGBGrayRow = ARGBGrayRow_NEON;
3397 }
3398 #endif
3399 #if defined(HAS_ARGBGRAYROW_MSA)
3400 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
3401 ARGBGrayRow = ARGBGrayRow_MSA;
3402 }
3403 #endif
3404 #if defined(HAS_ARGBGRAYROW_LASX)
3405 if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) {
3406 ARGBGrayRow = ARGBGrayRow_LASX;
3407 }
3408 #endif
3409
3410 for (y = 0; y < height; ++y) {
3411 ARGBGrayRow(src_argb, dst_argb, width);
3412 src_argb += src_stride_argb;
3413 dst_argb += dst_stride_argb;
3414 }
3415 return 0;
3416 }
3417
3418 // Make a rectangle of ARGB gray scale.
3419 LIBYUV_API
ARGBGray(uint8_t * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height)3420 int ARGBGray(uint8_t* dst_argb,
3421 int dst_stride_argb,
3422 int dst_x,
3423 int dst_y,
3424 int width,
3425 int height) {
3426 int y;
3427 void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
3428 ARGBGrayRow_C;
3429 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
3430 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
3431 return -1;
3432 }
3433 // Coalesce rows.
3434 if (dst_stride_argb == width * 4) {
3435 width *= height;
3436 height = 1;
3437 dst_stride_argb = 0;
3438 }
3439 #if defined(HAS_ARGBGRAYROW_SSSE3)
3440 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
3441 ARGBGrayRow = ARGBGrayRow_SSSE3;
3442 }
3443 #endif
3444 #if defined(HAS_ARGBGRAYROW_NEON)
3445 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3446 ARGBGrayRow = ARGBGrayRow_NEON;
3447 }
3448 #endif
3449 #if defined(HAS_ARGBGRAYROW_MSA)
3450 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
3451 ARGBGrayRow = ARGBGrayRow_MSA;
3452 }
3453 #endif
3454 #if defined(HAS_ARGBGRAYROW_LASX)
3455 if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) {
3456 ARGBGrayRow = ARGBGrayRow_LASX;
3457 }
3458 #endif
3459
3460 for (y = 0; y < height; ++y) {
3461 ARGBGrayRow(dst, dst, width);
3462 dst += dst_stride_argb;
3463 }
3464 return 0;
3465 }
3466
3467 // Make a rectangle of ARGB Sepia tone.
3468 LIBYUV_API
ARGBSepia(uint8_t * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height)3469 int ARGBSepia(uint8_t* dst_argb,
3470 int dst_stride_argb,
3471 int dst_x,
3472 int dst_y,
3473 int width,
3474 int height) {
3475 int y;
3476 void (*ARGBSepiaRow)(uint8_t * dst_argb, int width) = ARGBSepiaRow_C;
3477 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
3478 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
3479 return -1;
3480 }
3481 // Coalesce rows.
3482 if (dst_stride_argb == width * 4) {
3483 width *= height;
3484 height = 1;
3485 dst_stride_argb = 0;
3486 }
3487 #if defined(HAS_ARGBSEPIAROW_SSSE3)
3488 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
3489 ARGBSepiaRow = ARGBSepiaRow_SSSE3;
3490 }
3491 #endif
3492 #if defined(HAS_ARGBSEPIAROW_NEON)
3493 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3494 ARGBSepiaRow = ARGBSepiaRow_NEON;
3495 }
3496 #endif
3497 #if defined(HAS_ARGBSEPIAROW_MSA)
3498 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
3499 ARGBSepiaRow = ARGBSepiaRow_MSA;
3500 }
3501 #endif
3502 #if defined(HAS_ARGBSEPIAROW_LASX)
3503 if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) {
3504 ARGBSepiaRow = ARGBSepiaRow_LASX;
3505 }
3506 #endif
3507
3508 for (y = 0; y < height; ++y) {
3509 ARGBSepiaRow(dst, width);
3510 dst += dst_stride_argb;
3511 }
3512 return 0;
3513 }
3514
3515 // Apply a 4x4 matrix to each ARGB pixel.
3516 // Note: Normally for shading, but can be used to swizzle or invert.
3517 LIBYUV_API
ARGBColorMatrix(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,const int8_t * matrix_argb,int width,int height)3518 int ARGBColorMatrix(const uint8_t* src_argb,
3519 int src_stride_argb,
3520 uint8_t* dst_argb,
3521 int dst_stride_argb,
3522 const int8_t* matrix_argb,
3523 int width,
3524 int height) {
3525 int y;
3526 void (*ARGBColorMatrixRow)(const uint8_t* src_argb, uint8_t* dst_argb,
3527 const int8_t* matrix_argb, int width) =
3528 ARGBColorMatrixRow_C;
3529 if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
3530 return -1;
3531 }
3532 if (height < 0) {
3533 height = -height;
3534 src_argb = src_argb + (height - 1) * src_stride_argb;
3535 src_stride_argb = -src_stride_argb;
3536 }
3537 // Coalesce rows.
3538 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3539 width *= height;
3540 height = 1;
3541 src_stride_argb = dst_stride_argb = 0;
3542 }
3543 #if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
3544 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
3545 ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
3546 }
3547 #endif
3548 #if defined(HAS_ARGBCOLORMATRIXROW_NEON)
3549 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3550 ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
3551 }
3552 #endif
3553 #if defined(HAS_ARGBCOLORMATRIXROW_MSA)
3554 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
3555 ARGBColorMatrixRow = ARGBColorMatrixRow_MSA;
3556 }
3557 #endif
3558 #if defined(HAS_ARGBCOLORMATRIXROW_LSX)
3559 if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
3560 ARGBColorMatrixRow = ARGBColorMatrixRow_LSX;
3561 }
3562 #endif
3563 for (y = 0; y < height; ++y) {
3564 ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
3565 src_argb += src_stride_argb;
3566 dst_argb += dst_stride_argb;
3567 }
3568 return 0;
3569 }
3570
3571 // Apply a 4x3 matrix to each ARGB pixel.
3572 // Deprecated.
3573 LIBYUV_API
RGBColorMatrix(uint8_t * dst_argb,int dst_stride_argb,const int8_t * matrix_rgb,int dst_x,int dst_y,int width,int height)3574 int RGBColorMatrix(uint8_t* dst_argb,
3575 int dst_stride_argb,
3576 const int8_t* matrix_rgb,
3577 int dst_x,
3578 int dst_y,
3579 int width,
3580 int height) {
3581 SIMD_ALIGNED(int8_t matrix_argb[16]);
3582 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
3583 if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 || dst_x < 0 ||
3584 dst_y < 0) {
3585 return -1;
3586 }
3587
3588 // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
3589 matrix_argb[0] = matrix_rgb[0] / 2;
3590 matrix_argb[1] = matrix_rgb[1] / 2;
3591 matrix_argb[2] = matrix_rgb[2] / 2;
3592 matrix_argb[3] = matrix_rgb[3] / 2;
3593 matrix_argb[4] = matrix_rgb[4] / 2;
3594 matrix_argb[5] = matrix_rgb[5] / 2;
3595 matrix_argb[6] = matrix_rgb[6] / 2;
3596 matrix_argb[7] = matrix_rgb[7] / 2;
3597 matrix_argb[8] = matrix_rgb[8] / 2;
3598 matrix_argb[9] = matrix_rgb[9] / 2;
3599 matrix_argb[10] = matrix_rgb[10] / 2;
3600 matrix_argb[11] = matrix_rgb[11] / 2;
3601 matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
3602 matrix_argb[15] = 64; // 1.0
3603
3604 return ARGBColorMatrix((const uint8_t*)(dst), dst_stride_argb, dst,
3605 dst_stride_argb, &matrix_argb[0], width, height);
3606 }
3607
3608 // Apply a color table each ARGB pixel.
3609 // Table contains 256 ARGB values.
3610 LIBYUV_API
ARGBColorTable(uint8_t * dst_argb,int dst_stride_argb,const uint8_t * table_argb,int dst_x,int dst_y,int width,int height)3611 int ARGBColorTable(uint8_t* dst_argb,
3612 int dst_stride_argb,
3613 const uint8_t* table_argb,
3614 int dst_x,
3615 int dst_y,
3616 int width,
3617 int height) {
3618 int y;
3619 void (*ARGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
3620 int width) = ARGBColorTableRow_C;
3621 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
3622 if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
3623 dst_y < 0) {
3624 return -1;
3625 }
3626 // Coalesce rows.
3627 if (dst_stride_argb == width * 4) {
3628 width *= height;
3629 height = 1;
3630 dst_stride_argb = 0;
3631 }
3632 #if defined(HAS_ARGBCOLORTABLEROW_X86)
3633 if (TestCpuFlag(kCpuHasX86)) {
3634 ARGBColorTableRow = ARGBColorTableRow_X86;
3635 }
3636 #endif
3637 for (y = 0; y < height; ++y) {
3638 ARGBColorTableRow(dst, table_argb, width);
3639 dst += dst_stride_argb;
3640 }
3641 return 0;
3642 }
3643
3644 // Apply a color table each ARGB pixel but preserve destination alpha.
3645 // Table contains 256 ARGB values.
3646 LIBYUV_API
RGBColorTable(uint8_t * dst_argb,int dst_stride_argb,const uint8_t * table_argb,int dst_x,int dst_y,int width,int height)3647 int RGBColorTable(uint8_t* dst_argb,
3648 int dst_stride_argb,
3649 const uint8_t* table_argb,
3650 int dst_x,
3651 int dst_y,
3652 int width,
3653 int height) {
3654 int y;
3655 void (*RGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb,
3656 int width) = RGBColorTableRow_C;
3657 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
3658 if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
3659 dst_y < 0) {
3660 return -1;
3661 }
3662 // Coalesce rows.
3663 if (dst_stride_argb == width * 4) {
3664 width *= height;
3665 height = 1;
3666 dst_stride_argb = 0;
3667 }
3668 #if defined(HAS_RGBCOLORTABLEROW_X86)
3669 if (TestCpuFlag(kCpuHasX86)) {
3670 RGBColorTableRow = RGBColorTableRow_X86;
3671 }
3672 #endif
3673 for (y = 0; y < height; ++y) {
3674 RGBColorTableRow(dst, table_argb, width);
3675 dst += dst_stride_argb;
3676 }
3677 return 0;
3678 }
3679
3680 // ARGBQuantize is used to posterize art.
3681 // e.g. rgb / qvalue * qvalue + qvalue / 2
3682 // But the low levels implement efficiently with 3 parameters, and could be
3683 // used for other high level operations.
3684 // dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
3685 // where scale is 1 / interval_size as a fixed point value.
3686 // The divide is replaces with a multiply by reciprocal fixed point multiply.
3687 // Caveat - although SSE2 saturates, the C function does not and should be used
3688 // with care if doing anything but quantization.
3689 LIBYUV_API
ARGBQuantize(uint8_t * dst_argb,int dst_stride_argb,int scale,int interval_size,int interval_offset,int dst_x,int dst_y,int width,int height)3690 int ARGBQuantize(uint8_t* dst_argb,
3691 int dst_stride_argb,
3692 int scale,
3693 int interval_size,
3694 int interval_offset,
3695 int dst_x,
3696 int dst_y,
3697 int width,
3698 int height) {
3699 int y;
3700 void (*ARGBQuantizeRow)(uint8_t * dst_argb, int scale, int interval_size,
3701 int interval_offset, int width) = ARGBQuantizeRow_C;
3702 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
3703 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
3704 interval_size < 1 || interval_size > 255) {
3705 return -1;
3706 }
3707 // Coalesce rows.
3708 if (dst_stride_argb == width * 4) {
3709 width *= height;
3710 height = 1;
3711 dst_stride_argb = 0;
3712 }
3713 #if defined(HAS_ARGBQUANTIZEROW_SSE2)
3714 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
3715 ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
3716 }
3717 #endif
3718 #if defined(HAS_ARGBQUANTIZEROW_NEON)
3719 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3720 ARGBQuantizeRow = ARGBQuantizeRow_NEON;
3721 }
3722 #endif
3723 #if defined(HAS_ARGBQUANTIZEROW_MSA)
3724 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
3725 ARGBQuantizeRow = ARGBQuantizeRow_MSA;
3726 }
3727 #endif
3728 #if defined(HAS_ARGBQUANTIZEROW_LSX)
3729 if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
3730 ARGBQuantizeRow = ARGBQuantizeRow_LSX;
3731 }
3732 #endif
3733 for (y = 0; y < height; ++y) {
3734 ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
3735 dst += dst_stride_argb;
3736 }
3737 return 0;
3738 }
3739
3740 // Computes table of cumulative sum for image where the value is the sum
3741 // of all values above and to the left of the entry. Used by ARGBBlur.
3742 LIBYUV_API
ARGBComputeCumulativeSum(const uint8_t * src_argb,int src_stride_argb,int32_t * dst_cumsum,int dst_stride32_cumsum,int width,int height)3743 int ARGBComputeCumulativeSum(const uint8_t* src_argb,
3744 int src_stride_argb,
3745 int32_t* dst_cumsum,
3746 int dst_stride32_cumsum,
3747 int width,
3748 int height) {
3749 int y;
3750 void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum,
3751 const int32_t* previous_cumsum, int width) =
3752 ComputeCumulativeSumRow_C;
3753 int32_t* previous_cumsum = dst_cumsum;
3754 if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
3755 return -1;
3756 }
3757 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
3758 if (TestCpuFlag(kCpuHasSSE2)) {
3759 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
3760 }
3761 #endif
3762
3763 memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel.
3764 for (y = 0; y < height; ++y) {
3765 ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
3766 previous_cumsum = dst_cumsum;
3767 dst_cumsum += dst_stride32_cumsum;
3768 src_argb += src_stride_argb;
3769 }
3770 return 0;
3771 }
3772
3773 // Blur ARGB image.
3774 // Caller should allocate CumulativeSum table of width * height * 16 bytes
3775 // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
3776 // as the buffer is treated as circular.
3777 LIBYUV_API
ARGBBlur(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int32_t * dst_cumsum,int dst_stride32_cumsum,int width,int height,int radius)3778 int ARGBBlur(const uint8_t* src_argb,
3779 int src_stride_argb,
3780 uint8_t* dst_argb,
3781 int dst_stride_argb,
3782 int32_t* dst_cumsum,
3783 int dst_stride32_cumsum,
3784 int width,
3785 int height,
3786 int radius) {
3787 int y;
3788 void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum,
3789 const int32_t* previous_cumsum, int width) =
3790 ComputeCumulativeSumRow_C;
3791 void (*CumulativeSumToAverageRow)(
3792 const int32_t* topleft, const int32_t* botleft, int width, int area,
3793 uint8_t* dst, int count) = CumulativeSumToAverageRow_C;
3794 int32_t* cumsum_bot_row;
3795 int32_t* max_cumsum_bot_row;
3796 int32_t* cumsum_top_row;
3797
3798 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
3799 return -1;
3800 }
3801 if (height < 0) {
3802 height = -height;
3803 src_argb = src_argb + (height - 1) * src_stride_argb;
3804 src_stride_argb = -src_stride_argb;
3805 }
3806 if (radius > height) {
3807 radius = height;
3808 }
3809 if (radius > (width / 2 - 1)) {
3810 radius = width / 2 - 1;
3811 }
3812 if (radius <= 0 || height <= 1) {
3813 return -1;
3814 }
3815 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
3816 if (TestCpuFlag(kCpuHasSSE2)) {
3817 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
3818 CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
3819 }
3820 #endif
3821 // Compute enough CumulativeSum for first row to be blurred. After this
3822 // one row of CumulativeSum is updated at a time.
3823 ARGBComputeCumulativeSum(src_argb, src_stride_argb, dst_cumsum,
3824 dst_stride32_cumsum, width, radius);
3825
3826 src_argb = src_argb + radius * src_stride_argb;
3827 cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
3828
3829 max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
3830 cumsum_top_row = &dst_cumsum[0];
3831
3832 for (y = 0; y < height; ++y) {
3833 int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
3834 int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
3835 int area = radius * (bot_y - top_y);
3836 int boxwidth = radius * 4;
3837 int x;
3838 int n;
3839
3840 // Increment cumsum_top_row pointer with circular buffer wrap around.
3841 if (top_y) {
3842 cumsum_top_row += dst_stride32_cumsum;
3843 if (cumsum_top_row >= max_cumsum_bot_row) {
3844 cumsum_top_row = dst_cumsum;
3845 }
3846 }
3847 // Increment cumsum_bot_row pointer with circular buffer wrap around and
3848 // then fill in a row of CumulativeSum.
3849 if ((y + radius) < height) {
3850 const int32_t* prev_cumsum_bot_row = cumsum_bot_row;
3851 cumsum_bot_row += dst_stride32_cumsum;
3852 if (cumsum_bot_row >= max_cumsum_bot_row) {
3853 cumsum_bot_row = dst_cumsum;
3854 }
3855 ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
3856 width);
3857 src_argb += src_stride_argb;
3858 }
3859
3860 // Left clipped.
3861 for (x = 0; x < radius + 1; ++x) {
3862 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area,
3863 &dst_argb[x * 4], 1);
3864 area += (bot_y - top_y);
3865 boxwidth += 4;
3866 }
3867
3868 // Middle unclipped.
3869 n = (width - 1) - radius - x + 1;
3870 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area,
3871 &dst_argb[x * 4], n);
3872
3873 // Right clipped.
3874 for (x += n; x <= width - 1; ++x) {
3875 area -= (bot_y - top_y);
3876 boxwidth -= 4;
3877 CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
3878 cumsum_bot_row + (x - radius - 1) * 4, boxwidth,
3879 area, &dst_argb[x * 4], 1);
3880 }
3881 dst_argb += dst_stride_argb;
3882 }
3883 return 0;
3884 }
3885
3886 // Multiply ARGB image by a specified ARGB value.
3887 LIBYUV_API
ARGBShade(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height,uint32_t value)3888 int ARGBShade(const uint8_t* src_argb,
3889 int src_stride_argb,
3890 uint8_t* dst_argb,
3891 int dst_stride_argb,
3892 int width,
3893 int height,
3894 uint32_t value) {
3895 int y;
3896 void (*ARGBShadeRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width,
3897 uint32_t value) = ARGBShadeRow_C;
3898 if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
3899 return -1;
3900 }
3901 if (height < 0) {
3902 height = -height;
3903 src_argb = src_argb + (height - 1) * src_stride_argb;
3904 src_stride_argb = -src_stride_argb;
3905 }
3906 // Coalesce rows.
3907 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3908 width *= height;
3909 height = 1;
3910 src_stride_argb = dst_stride_argb = 0;
3911 }
3912 #if defined(HAS_ARGBSHADEROW_SSE2)
3913 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
3914 ARGBShadeRow = ARGBShadeRow_SSE2;
3915 }
3916 #endif
3917 #if defined(HAS_ARGBSHADEROW_NEON)
3918 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3919 ARGBShadeRow = ARGBShadeRow_NEON;
3920 }
3921 #endif
3922 #if defined(HAS_ARGBSHADEROW_MSA)
3923 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 4)) {
3924 ARGBShadeRow = ARGBShadeRow_MSA;
3925 }
3926 #endif
3927 #if defined(HAS_ARGBSHADEROW_LASX)
3928 if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 8)) {
3929 ARGBShadeRow = ARGBShadeRow_LASX;
3930 }
3931 #endif
3932
3933 for (y = 0; y < height; ++y) {
3934 ARGBShadeRow(src_argb, dst_argb, width, value);
3935 src_argb += src_stride_argb;
3936 dst_argb += dst_stride_argb;
3937 }
3938 return 0;
3939 }
3940
3941 // Interpolate 2 planes by specified amount (0 to 255).
3942 LIBYUV_API
InterpolatePlane(const uint8_t * src0,int src_stride0,const uint8_t * src1,int src_stride1,uint8_t * dst,int dst_stride,int width,int height,int interpolation)3943 int InterpolatePlane(const uint8_t* src0,
3944 int src_stride0,
3945 const uint8_t* src1,
3946 int src_stride1,
3947 uint8_t* dst,
3948 int dst_stride,
3949 int width,
3950 int height,
3951 int interpolation) {
3952 int y;
3953 void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
3954 ptrdiff_t src_stride, int dst_width,
3955 int source_y_fraction) = InterpolateRow_C;
3956 if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
3957 return -1;
3958 }
3959 // Negative height means invert the image.
3960 if (height < 0) {
3961 height = -height;
3962 dst = dst + (height - 1) * dst_stride;
3963 dst_stride = -dst_stride;
3964 }
3965 // Coalesce rows.
3966 if (src_stride0 == width && src_stride1 == width && dst_stride == width) {
3967 width *= height;
3968 height = 1;
3969 src_stride0 = src_stride1 = dst_stride = 0;
3970 }
3971 #if defined(HAS_INTERPOLATEROW_SSSE3)
3972 if (TestCpuFlag(kCpuHasSSSE3)) {
3973 InterpolateRow = InterpolateRow_Any_SSSE3;
3974 if (IS_ALIGNED(width, 16)) {
3975 InterpolateRow = InterpolateRow_SSSE3;
3976 }
3977 }
3978 #endif
3979 #if defined(HAS_INTERPOLATEROW_AVX2)
3980 if (TestCpuFlag(kCpuHasAVX2)) {
3981 InterpolateRow = InterpolateRow_Any_AVX2;
3982 if (IS_ALIGNED(width, 32)) {
3983 InterpolateRow = InterpolateRow_AVX2;
3984 }
3985 }
3986 #endif
3987 #if defined(HAS_INTERPOLATEROW_NEON)
3988 if (TestCpuFlag(kCpuHasNEON)) {
3989 InterpolateRow = InterpolateRow_Any_NEON;
3990 if (IS_ALIGNED(width, 16)) {
3991 InterpolateRow = InterpolateRow_NEON;
3992 }
3993 }
3994 #endif
3995 #if defined(HAS_INTERPOLATEROW_MSA)
3996 if (TestCpuFlag(kCpuHasMSA)) {
3997 InterpolateRow = InterpolateRow_Any_MSA;
3998 if (IS_ALIGNED(width, 32)) {
3999 InterpolateRow = InterpolateRow_MSA;
4000 }
4001 }
4002 #endif
4003 #if defined(HAS_INTERPOLATEROW_LSX)
4004 if (TestCpuFlag(kCpuHasLSX)) {
4005 InterpolateRow = InterpolateRow_Any_LSX;
4006 if (IS_ALIGNED(width, 32)) {
4007 InterpolateRow = InterpolateRow_LSX;
4008 }
4009 }
4010 #endif
4011
4012 for (y = 0; y < height; ++y) {
4013 InterpolateRow(dst, src0, src1 - src0, width, interpolation);
4014 src0 += src_stride0;
4015 src1 += src_stride1;
4016 dst += dst_stride;
4017 }
4018 return 0;
4019 }
4020
4021 // Interpolate 2 planes by specified amount (0 to 255).
4022 LIBYUV_API
InterpolatePlane_16(const uint16_t * src0,int src_stride0,const uint16_t * src1,int src_stride1,uint16_t * dst,int dst_stride,int width,int height,int interpolation)4023 int InterpolatePlane_16(const uint16_t* src0,
4024 int src_stride0,
4025 const uint16_t* src1,
4026 int src_stride1,
4027 uint16_t* dst,
4028 int dst_stride,
4029 int width,
4030 int height,
4031 int interpolation) {
4032 int y;
4033 void (*InterpolateRow_16)(uint16_t * dst_ptr, const uint16_t* src_ptr,
4034 ptrdiff_t src_stride, int dst_width,
4035 int source_y_fraction) = InterpolateRow_16_C;
4036 if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
4037 return -1;
4038 }
4039 // Negative height means invert the image.
4040 if (height < 0) {
4041 height = -height;
4042 dst = dst + (height - 1) * dst_stride;
4043 dst_stride = -dst_stride;
4044 }
4045 // Coalesce rows.
4046 if (src_stride0 == width && src_stride1 == width && dst_stride == width) {
4047 width *= height;
4048 height = 1;
4049 src_stride0 = src_stride1 = dst_stride = 0;
4050 }
4051 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
4052 if (TestCpuFlag(kCpuHasSSSE3)) {
4053 InterpolateRow_16 = InterpolateRow_16_Any_SSSE3;
4054 if (IS_ALIGNED(width, 16)) {
4055 InterpolateRow_16 = InterpolateRow_16_SSSE3;
4056 }
4057 }
4058 #endif
4059 #if defined(HAS_INTERPOLATEROW_16_AVX2)
4060 if (TestCpuFlag(kCpuHasAVX2)) {
4061 InterpolateRow_16 = InterpolateRow_16_Any_AVX2;
4062 if (IS_ALIGNED(width, 32)) {
4063 InterpolateRow_16 = InterpolateRow_16_AVX2;
4064 }
4065 }
4066 #endif
4067 #if defined(HAS_INTERPOLATEROW_16_NEON)
4068 if (TestCpuFlag(kCpuHasNEON)) {
4069 InterpolateRow_16 = InterpolateRow_16_Any_NEON;
4070 if (IS_ALIGNED(width, 8)) {
4071 InterpolateRow_16 = InterpolateRow_16_NEON;
4072 }
4073 }
4074 #endif
4075 #if defined(HAS_INTERPOLATEROW_16_MSA)
4076 if (TestCpuFlag(kCpuHasMSA)) {
4077 InterpolateRow_16 = InterpolateRow_16_Any_MSA;
4078 if (IS_ALIGNED(width, 32)) {
4079 InterpolateRow_16 = InterpolateRow_16_MSA;
4080 }
4081 }
4082 #endif
4083 #if defined(HAS_INTERPOLATEROW_16_LSX)
4084 if (TestCpuFlag(kCpuHasLSX)) {
4085 InterpolateRow_16 = InterpolateRow_16_Any_LSX;
4086 if (IS_ALIGNED(width, 32)) {
4087 InterpolateRow_16 = InterpolateRow_16_LSX;
4088 }
4089 }
4090 #endif
4091
4092 for (y = 0; y < height; ++y) {
4093 InterpolateRow_16(dst, src0, src1 - src0, width, interpolation);
4094 src0 += src_stride0;
4095 src1 += src_stride1;
4096 dst += dst_stride;
4097 }
4098 return 0;
4099 }
4100
4101 // Interpolate 2 ARGB images by specified amount (0 to 255).
4102 LIBYUV_API
ARGBInterpolate(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height,int interpolation)4103 int ARGBInterpolate(const uint8_t* src_argb0,
4104 int src_stride_argb0,
4105 const uint8_t* src_argb1,
4106 int src_stride_argb1,
4107 uint8_t* dst_argb,
4108 int dst_stride_argb,
4109 int width,
4110 int height,
4111 int interpolation) {
4112 return InterpolatePlane(src_argb0, src_stride_argb0, src_argb1,
4113 src_stride_argb1, dst_argb, dst_stride_argb,
4114 width * 4, height, interpolation);
4115 }
4116
4117 // Interpolate 2 YUV images by specified amount (0 to 255).
4118 LIBYUV_API
I420Interpolate(const uint8_t * src0_y,int src0_stride_y,const uint8_t * src0_u,int src0_stride_u,const uint8_t * src0_v,int src0_stride_v,const uint8_t * src1_y,int src1_stride_y,const uint8_t * src1_u,int src1_stride_u,const uint8_t * src1_v,int src1_stride_v,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height,int interpolation)4119 int I420Interpolate(const uint8_t* src0_y,
4120 int src0_stride_y,
4121 const uint8_t* src0_u,
4122 int src0_stride_u,
4123 const uint8_t* src0_v,
4124 int src0_stride_v,
4125 const uint8_t* src1_y,
4126 int src1_stride_y,
4127 const uint8_t* src1_u,
4128 int src1_stride_u,
4129 const uint8_t* src1_v,
4130 int src1_stride_v,
4131 uint8_t* dst_y,
4132 int dst_stride_y,
4133 uint8_t* dst_u,
4134 int dst_stride_u,
4135 uint8_t* dst_v,
4136 int dst_stride_v,
4137 int width,
4138 int height,
4139 int interpolation) {
4140 int halfwidth = (width + 1) >> 1;
4141 int halfheight = (height + 1) >> 1;
4142
4143 if (!src0_y || !src0_u || !src0_v || !src1_y || !src1_u || !src1_v ||
4144 !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
4145 return -1;
4146 }
4147
4148 InterpolatePlane(src0_y, src0_stride_y, src1_y, src1_stride_y, dst_y,
4149 dst_stride_y, width, height, interpolation);
4150 InterpolatePlane(src0_u, src0_stride_u, src1_u, src1_stride_u, dst_u,
4151 dst_stride_u, halfwidth, halfheight, interpolation);
4152 InterpolatePlane(src0_v, src0_stride_v, src1_v, src1_stride_v, dst_v,
4153 dst_stride_v, halfwidth, halfheight, interpolation);
4154 return 0;
4155 }
4156
4157 // Shuffle ARGB channel order. e.g. BGRA to ARGB.
4158 LIBYUV_API
ARGBShuffle(const uint8_t * src_bgra,int src_stride_bgra,uint8_t * dst_argb,int dst_stride_argb,const uint8_t * shuffler,int width,int height)4159 int ARGBShuffle(const uint8_t* src_bgra,
4160 int src_stride_bgra,
4161 uint8_t* dst_argb,
4162 int dst_stride_argb,
4163 const uint8_t* shuffler,
4164 int width,
4165 int height) {
4166 int y;
4167 void (*ARGBShuffleRow)(const uint8_t* src_bgra, uint8_t* dst_argb,
4168 const uint8_t* shuffler, int width) = ARGBShuffleRow_C;
4169 if (!src_bgra || !dst_argb || width <= 0 || height == 0) {
4170 return -1;
4171 }
4172 // Negative height means invert the image.
4173 if (height < 0) {
4174 height = -height;
4175 src_bgra = src_bgra + (height - 1) * src_stride_bgra;
4176 src_stride_bgra = -src_stride_bgra;
4177 }
4178 // Coalesce rows.
4179 if (src_stride_bgra == width * 4 && dst_stride_argb == width * 4) {
4180 width *= height;
4181 height = 1;
4182 src_stride_bgra = dst_stride_argb = 0;
4183 }
4184 #if defined(HAS_ARGBSHUFFLEROW_SSSE3)
4185 if (TestCpuFlag(kCpuHasSSSE3)) {
4186 ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
4187 if (IS_ALIGNED(width, 8)) {
4188 ARGBShuffleRow = ARGBShuffleRow_SSSE3;
4189 }
4190 }
4191 #endif
4192 #if defined(HAS_ARGBSHUFFLEROW_AVX2)
4193 if (TestCpuFlag(kCpuHasAVX2)) {
4194 ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
4195 if (IS_ALIGNED(width, 16)) {
4196 ARGBShuffleRow = ARGBShuffleRow_AVX2;
4197 }
4198 }
4199 #endif
4200 #if defined(HAS_ARGBSHUFFLEROW_NEON)
4201 if (TestCpuFlag(kCpuHasNEON)) {
4202 ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
4203 if (IS_ALIGNED(width, 4)) {
4204 ARGBShuffleRow = ARGBShuffleRow_NEON;
4205 }
4206 }
4207 #endif
4208 #if defined(HAS_ARGBSHUFFLEROW_MSA)
4209 if (TestCpuFlag(kCpuHasMSA)) {
4210 ARGBShuffleRow = ARGBShuffleRow_Any_MSA;
4211 if (IS_ALIGNED(width, 8)) {
4212 ARGBShuffleRow = ARGBShuffleRow_MSA;
4213 }
4214 }
4215 #endif
4216 #if defined(HAS_ARGBSHUFFLEROW_LASX)
4217 if (TestCpuFlag(kCpuHasLASX)) {
4218 ARGBShuffleRow = ARGBShuffleRow_Any_LASX;
4219 if (IS_ALIGNED(width, 16)) {
4220 ARGBShuffleRow = ARGBShuffleRow_LASX;
4221 }
4222 }
4223 #endif
4224
4225 for (y = 0; y < height; ++y) {
4226 ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
4227 src_bgra += src_stride_bgra;
4228 dst_argb += dst_stride_argb;
4229 }
4230 return 0;
4231 }
4232
4233 // Shuffle AR64 channel order. e.g. AR64 to AB64.
4234 LIBYUV_API
AR64Shuffle(const uint16_t * src_ar64,int src_stride_ar64,uint16_t * dst_ar64,int dst_stride_ar64,const uint8_t * shuffler,int width,int height)4235 int AR64Shuffle(const uint16_t* src_ar64,
4236 int src_stride_ar64,
4237 uint16_t* dst_ar64,
4238 int dst_stride_ar64,
4239 const uint8_t* shuffler,
4240 int width,
4241 int height) {
4242 int y;
4243 void (*AR64ShuffleRow)(const uint8_t* src_ar64, uint8_t* dst_ar64,
4244 const uint8_t* shuffler, int width) = AR64ShuffleRow_C;
4245 if (!src_ar64 || !dst_ar64 || width <= 0 || height == 0) {
4246 return -1;
4247 }
4248 // Negative height means invert the image.
4249 if (height < 0) {
4250 height = -height;
4251 src_ar64 = src_ar64 + (height - 1) * src_stride_ar64;
4252 src_stride_ar64 = -src_stride_ar64;
4253 }
4254 // Coalesce rows.
4255 if (src_stride_ar64 == width * 4 && dst_stride_ar64 == width * 4) {
4256 width *= height;
4257 height = 1;
4258 src_stride_ar64 = dst_stride_ar64 = 0;
4259 }
4260 // Assembly versions can be reused if it's implemented with shuffle.
4261 #if defined(HAS_ARGBSHUFFLEROW_SSSE3)
4262 if (TestCpuFlag(kCpuHasSSSE3)) {
4263 AR64ShuffleRow = ARGBShuffleRow_Any_SSSE3;
4264 if (IS_ALIGNED(width, 8)) {
4265 AR64ShuffleRow = ARGBShuffleRow_SSSE3;
4266 }
4267 }
4268 #endif
4269 #if defined(HAS_ARGBSHUFFLEROW_AVX2)
4270 if (TestCpuFlag(kCpuHasAVX2)) {
4271 AR64ShuffleRow = ARGBShuffleRow_Any_AVX2;
4272 if (IS_ALIGNED(width, 16)) {
4273 AR64ShuffleRow = ARGBShuffleRow_AVX2;
4274 }
4275 }
4276 #endif
4277 #if defined(HAS_ARGBSHUFFLEROW_NEON)
4278 if (TestCpuFlag(kCpuHasNEON)) {
4279 AR64ShuffleRow = ARGBShuffleRow_Any_NEON;
4280 if (IS_ALIGNED(width, 4)) {
4281 AR64ShuffleRow = ARGBShuffleRow_NEON;
4282 }
4283 }
4284 #endif
4285
4286 for (y = 0; y < height; ++y) {
4287 AR64ShuffleRow((uint8_t*)(src_ar64), (uint8_t*)(dst_ar64), shuffler,
4288 width * 2);
4289 src_ar64 += src_stride_ar64;
4290 dst_ar64 += dst_stride_ar64;
4291 }
4292 return 0;
4293 }
4294
4295 // Gauss blur a float plane using Gaussian 5x5 filter with
4296 // coefficients of 1, 4, 6, 4, 1.
4297 // Each destination pixel is a blur of the 5x5
4298 // pixels from the source.
4299 // Source edges are clamped.
4300 // Edge is 2 pixels on each side, and interior is multiple of 4.
4301 LIBYUV_API
GaussPlane_F32(const float * src,int src_stride,float * dst,int dst_stride,int width,int height)4302 int GaussPlane_F32(const float* src,
4303 int src_stride,
4304 float* dst,
4305 int dst_stride,
4306 int width,
4307 int height) {
4308 int y;
4309 void (*GaussCol_F32)(const float* src0, const float* src1, const float* src2,
4310 const float* src3, const float* src4, float* dst,
4311 int width) = GaussCol_F32_C;
4312 void (*GaussRow_F32)(const float* src, float* dst, int width) =
4313 GaussRow_F32_C;
4314 if (!src || !dst || width <= 0 || height == 0) {
4315 return -1;
4316 }
4317 // Negative height means invert the image.
4318 if (height < 0) {
4319 height = -height;
4320 src = src + (height - 1) * src_stride;
4321 src_stride = -src_stride;
4322 }
4323
4324 #if defined(HAS_GAUSSCOL_F32_NEON)
4325 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
4326 GaussCol_F32 = GaussCol_F32_NEON;
4327 }
4328 #endif
4329 #if defined(HAS_GAUSSROW_F32_NEON)
4330 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
4331 GaussRow_F32 = GaussRow_F32_NEON;
4332 }
4333 #endif
4334 {
4335 // 2 pixels on each side, but aligned out to 16 bytes.
4336 align_buffer_64(rowbuf, (4 + width + 4) * 4);
4337 memset(rowbuf, 0, 16);
4338 memset(rowbuf + (4 + width) * 4, 0, 16);
4339 float* row = (float*)(rowbuf + 16);
4340 const float* src0 = src;
4341 const float* src1 = src;
4342 const float* src2 = src;
4343 const float* src3 = src2 + ((height > 1) ? src_stride : 0);
4344 const float* src4 = src3 + ((height > 2) ? src_stride : 0);
4345
4346 for (y = 0; y < height; ++y) {
4347 GaussCol_F32(src0, src1, src2, src3, src4, row, width);
4348
4349 // Extrude edge by 2 floats
4350 row[-2] = row[-1] = row[0];
4351 row[width + 1] = row[width] = row[width - 1];
4352
4353 GaussRow_F32(row - 2, dst, width);
4354
4355 src0 = src1;
4356 src1 = src2;
4357 src2 = src3;
4358 src3 = src4;
4359 if ((y + 2) < (height - 1)) {
4360 src4 += src_stride;
4361 }
4362 dst += dst_stride;
4363 }
4364 free_aligned_buffer_64(rowbuf);
4365 }
4366 return 0;
4367 }
4368
4369 // Sobel ARGB effect.
ARGBSobelize(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height,void (* SobelRow)(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst,int width))4370 static int ARGBSobelize(const uint8_t* src_argb,
4371 int src_stride_argb,
4372 uint8_t* dst_argb,
4373 int dst_stride_argb,
4374 int width,
4375 int height,
4376 void (*SobelRow)(const uint8_t* src_sobelx,
4377 const uint8_t* src_sobely,
4378 uint8_t* dst,
4379 int width)) {
4380 int y;
4381 void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_g, int width) =
4382 ARGBToYJRow_C;
4383 void (*SobelYRow)(const uint8_t* src_y0, const uint8_t* src_y1,
4384 uint8_t* dst_sobely, int width) = SobelYRow_C;
4385 void (*SobelXRow)(const uint8_t* src_y0, const uint8_t* src_y1,
4386 const uint8_t* src_y2, uint8_t* dst_sobely, int width) =
4387 SobelXRow_C;
4388 const int kEdge = 16; // Extra pixels at start of row for extrude/align.
4389 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
4390 return -1;
4391 }
4392 // Negative height means invert the image.
4393 if (height < 0) {
4394 height = -height;
4395 src_argb = src_argb + (height - 1) * src_stride_argb;
4396 src_stride_argb = -src_stride_argb;
4397 }
4398
4399 #if defined(HAS_ARGBTOYJROW_SSSE3)
4400 if (TestCpuFlag(kCpuHasSSSE3)) {
4401 ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
4402 if (IS_ALIGNED(width, 16)) {
4403 ARGBToYJRow = ARGBToYJRow_SSSE3;
4404 }
4405 }
4406 #endif
4407 #if defined(HAS_ARGBTOYJROW_AVX2)
4408 if (TestCpuFlag(kCpuHasAVX2)) {
4409 ARGBToYJRow = ARGBToYJRow_Any_AVX2;
4410 if (IS_ALIGNED(width, 32)) {
4411 ARGBToYJRow = ARGBToYJRow_AVX2;
4412 }
4413 }
4414 #endif
4415 #if defined(HAS_ARGBTOYJROW_NEON)
4416 if (TestCpuFlag(kCpuHasNEON)) {
4417 ARGBToYJRow = ARGBToYJRow_Any_NEON;
4418 if (IS_ALIGNED(width, 16)) {
4419 ARGBToYJRow = ARGBToYJRow_NEON;
4420 }
4421 }
4422 #endif
4423 #if defined(HAS_ARGBTOYJROW_MSA)
4424 if (TestCpuFlag(kCpuHasMSA)) {
4425 ARGBToYJRow = ARGBToYJRow_Any_MSA;
4426 if (IS_ALIGNED(width, 16)) {
4427 ARGBToYJRow = ARGBToYJRow_MSA;
4428 }
4429 }
4430 #endif
4431 #if defined(HAS_ARGBTOYJROW_LSX)
4432 if (TestCpuFlag(kCpuHasLSX)) {
4433 ARGBToYJRow = ARGBToYJRow_Any_LSX;
4434 if (IS_ALIGNED(width, 16)) {
4435 ARGBToYJRow = ARGBToYJRow_LSX;
4436 }
4437 }
4438 #endif
4439 #if defined(HAS_ARGBTOYJROW_LASX)
4440 if (TestCpuFlag(kCpuHasLASX)) {
4441 ARGBToYJRow = ARGBToYJRow_Any_LASX;
4442 if (IS_ALIGNED(width, 32)) {
4443 ARGBToYJRow = ARGBToYJRow_LASX;
4444 }
4445 }
4446 #endif
4447
4448 #if defined(HAS_SOBELYROW_SSE2)
4449 if (TestCpuFlag(kCpuHasSSE2)) {
4450 SobelYRow = SobelYRow_SSE2;
4451 }
4452 #endif
4453 #if defined(HAS_SOBELYROW_NEON)
4454 if (TestCpuFlag(kCpuHasNEON)) {
4455 SobelYRow = SobelYRow_NEON;
4456 }
4457 #endif
4458 #if defined(HAS_SOBELYROW_MSA)
4459 if (TestCpuFlag(kCpuHasMSA)) {
4460 SobelYRow = SobelYRow_MSA;
4461 }
4462 #endif
4463 #if defined(HAS_SOBELXROW_SSE2)
4464 if (TestCpuFlag(kCpuHasSSE2)) {
4465 SobelXRow = SobelXRow_SSE2;
4466 }
4467 #endif
4468 #if defined(HAS_SOBELXROW_NEON)
4469 if (TestCpuFlag(kCpuHasNEON)) {
4470 SobelXRow = SobelXRow_NEON;
4471 }
4472 #endif
4473 #if defined(HAS_SOBELXROW_MSA)
4474 if (TestCpuFlag(kCpuHasMSA)) {
4475 SobelXRow = SobelXRow_MSA;
4476 }
4477 #endif
4478 {
4479 // 3 rows with edges before/after.
4480 const int kRowSize = (width + kEdge + 31) & ~31;
4481 align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
4482 uint8_t* row_sobelx = rows;
4483 uint8_t* row_sobely = rows + kRowSize;
4484 uint8_t* row_y = rows + kRowSize * 2;
4485
4486 // Convert first row.
4487 uint8_t* row_y0 = row_y + kEdge;
4488 uint8_t* row_y1 = row_y0 + kRowSize;
4489 uint8_t* row_y2 = row_y1 + kRowSize;
4490 ARGBToYJRow(src_argb, row_y0, width);
4491 row_y0[-1] = row_y0[0];
4492 memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind.
4493 ARGBToYJRow(src_argb, row_y1, width);
4494 row_y1[-1] = row_y1[0];
4495 memset(row_y1 + width, row_y1[width - 1], 16);
4496 memset(row_y2 + width, 0, 16);
4497
4498 for (y = 0; y < height; ++y) {
4499 // Convert next row of ARGB to G.
4500 if (y < (height - 1)) {
4501 src_argb += src_stride_argb;
4502 }
4503 ARGBToYJRow(src_argb, row_y2, width);
4504 row_y2[-1] = row_y2[0];
4505 row_y2[width] = row_y2[width - 1];
4506
4507 SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
4508 SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
4509 SobelRow(row_sobelx, row_sobely, dst_argb, width);
4510
4511 // Cycle thru circular queue of 3 row_y buffers.
4512 {
4513 uint8_t* row_yt = row_y0;
4514 row_y0 = row_y1;
4515 row_y1 = row_y2;
4516 row_y2 = row_yt;
4517 }
4518
4519 dst_argb += dst_stride_argb;
4520 }
4521 free_aligned_buffer_64(rows);
4522 }
4523 return 0;
4524 }
4525
4526 // Sobel ARGB effect.
4527 LIBYUV_API
ARGBSobel(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)4528 int ARGBSobel(const uint8_t* src_argb,
4529 int src_stride_argb,
4530 uint8_t* dst_argb,
4531 int dst_stride_argb,
4532 int width,
4533 int height) {
4534 void (*SobelRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
4535 uint8_t* dst_argb, int width) = SobelRow_C;
4536 #if defined(HAS_SOBELROW_SSE2)
4537 if (TestCpuFlag(kCpuHasSSE2)) {
4538 SobelRow = SobelRow_Any_SSE2;
4539 if (IS_ALIGNED(width, 16)) {
4540 SobelRow = SobelRow_SSE2;
4541 }
4542 }
4543 #endif
4544 #if defined(HAS_SOBELROW_NEON)
4545 if (TestCpuFlag(kCpuHasNEON)) {
4546 SobelRow = SobelRow_Any_NEON;
4547 if (IS_ALIGNED(width, 8)) {
4548 SobelRow = SobelRow_NEON;
4549 }
4550 }
4551 #endif
4552 #if defined(HAS_SOBELROW_MSA)
4553 if (TestCpuFlag(kCpuHasMSA)) {
4554 SobelRow = SobelRow_Any_MSA;
4555 if (IS_ALIGNED(width, 16)) {
4556 SobelRow = SobelRow_MSA;
4557 }
4558 }
4559 #endif
4560 #if defined(HAS_SOBELROW_LSX)
4561 if (TestCpuFlag(kCpuHasLSX)) {
4562 SobelRow = SobelRow_Any_LSX;
4563 if (IS_ALIGNED(width, 16)) {
4564 SobelRow = SobelRow_LSX;
4565 }
4566 }
4567 #endif
4568 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
4569 width, height, SobelRow);
4570 }
4571
4572 // Sobel ARGB effect with planar output.
4573 LIBYUV_API
ARGBSobelToPlane(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_y,int dst_stride_y,int width,int height)4574 int ARGBSobelToPlane(const uint8_t* src_argb,
4575 int src_stride_argb,
4576 uint8_t* dst_y,
4577 int dst_stride_y,
4578 int width,
4579 int height) {
4580 void (*SobelToPlaneRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
4581 uint8_t* dst_, int width) = SobelToPlaneRow_C;
4582 #if defined(HAS_SOBELTOPLANEROW_SSE2)
4583 if (TestCpuFlag(kCpuHasSSE2)) {
4584 SobelToPlaneRow = SobelToPlaneRow_Any_SSE2;
4585 if (IS_ALIGNED(width, 16)) {
4586 SobelToPlaneRow = SobelToPlaneRow_SSE2;
4587 }
4588 }
4589 #endif
4590 #if defined(HAS_SOBELTOPLANEROW_NEON)
4591 if (TestCpuFlag(kCpuHasNEON)) {
4592 SobelToPlaneRow = SobelToPlaneRow_Any_NEON;
4593 if (IS_ALIGNED(width, 16)) {
4594 SobelToPlaneRow = SobelToPlaneRow_NEON;
4595 }
4596 }
4597 #endif
4598 #if defined(HAS_SOBELTOPLANEROW_MSA)
4599 if (TestCpuFlag(kCpuHasMSA)) {
4600 SobelToPlaneRow = SobelToPlaneRow_Any_MSA;
4601 if (IS_ALIGNED(width, 32)) {
4602 SobelToPlaneRow = SobelToPlaneRow_MSA;
4603 }
4604 }
4605 #endif
4606 #if defined(HAS_SOBELTOPLANEROW_LSX)
4607 if (TestCpuFlag(kCpuHasLSX)) {
4608 SobelToPlaneRow = SobelToPlaneRow_Any_LSX;
4609 if (IS_ALIGNED(width, 32)) {
4610 SobelToPlaneRow = SobelToPlaneRow_LSX;
4611 }
4612 }
4613 #endif
4614 return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, width,
4615 height, SobelToPlaneRow);
4616 }
4617
4618 // SobelXY ARGB effect.
4619 // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel.
4620 LIBYUV_API
ARGBSobelXY(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)4621 int ARGBSobelXY(const uint8_t* src_argb,
4622 int src_stride_argb,
4623 uint8_t* dst_argb,
4624 int dst_stride_argb,
4625 int width,
4626 int height) {
4627 void (*SobelXYRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
4628 uint8_t* dst_argb, int width) = SobelXYRow_C;
4629 #if defined(HAS_SOBELXYROW_SSE2)
4630 if (TestCpuFlag(kCpuHasSSE2)) {
4631 SobelXYRow = SobelXYRow_Any_SSE2;
4632 if (IS_ALIGNED(width, 16)) {
4633 SobelXYRow = SobelXYRow_SSE2;
4634 }
4635 }
4636 #endif
4637 #if defined(HAS_SOBELXYROW_NEON)
4638 if (TestCpuFlag(kCpuHasNEON)) {
4639 SobelXYRow = SobelXYRow_Any_NEON;
4640 if (IS_ALIGNED(width, 8)) {
4641 SobelXYRow = SobelXYRow_NEON;
4642 }
4643 }
4644 #endif
4645 #if defined(HAS_SOBELXYROW_MSA)
4646 if (TestCpuFlag(kCpuHasMSA)) {
4647 SobelXYRow = SobelXYRow_Any_MSA;
4648 if (IS_ALIGNED(width, 16)) {
4649 SobelXYRow = SobelXYRow_MSA;
4650 }
4651 }
4652 #endif
4653 #if defined(HAS_SOBELXYROW_LSX)
4654 if (TestCpuFlag(kCpuHasLSX)) {
4655 SobelXYRow = SobelXYRow_Any_LSX;
4656 if (IS_ALIGNED(width, 16)) {
4657 SobelXYRow = SobelXYRow_LSX;
4658 }
4659 }
4660 #endif
4661 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
4662 width, height, SobelXYRow);
4663 }
4664
4665 // Apply a 4x4 polynomial to each ARGB pixel.
4666 LIBYUV_API
ARGBPolynomial(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,const float * poly,int width,int height)4667 int ARGBPolynomial(const uint8_t* src_argb,
4668 int src_stride_argb,
4669 uint8_t* dst_argb,
4670 int dst_stride_argb,
4671 const float* poly,
4672 int width,
4673 int height) {
4674 int y;
4675 void (*ARGBPolynomialRow)(const uint8_t* src_argb, uint8_t* dst_argb,
4676 const float* poly, int width) = ARGBPolynomialRow_C;
4677 if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
4678 return -1;
4679 }
4680 // Negative height means invert the image.
4681 if (height < 0) {
4682 height = -height;
4683 src_argb = src_argb + (height - 1) * src_stride_argb;
4684 src_stride_argb = -src_stride_argb;
4685 }
4686 // Coalesce rows.
4687 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
4688 width *= height;
4689 height = 1;
4690 src_stride_argb = dst_stride_argb = 0;
4691 }
4692 #if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
4693 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
4694 ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
4695 }
4696 #endif
4697 #if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
4698 if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
4699 IS_ALIGNED(width, 2)) {
4700 ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
4701 }
4702 #endif
4703
4704 for (y = 0; y < height; ++y) {
4705 ARGBPolynomialRow(src_argb, dst_argb, poly, width);
4706 src_argb += src_stride_argb;
4707 dst_argb += dst_stride_argb;
4708 }
4709 return 0;
4710 }
4711
4712 // Convert plane of 16 bit shorts to half floats.
4713 // Source values are multiplied by scale before storing as half float.
4714 LIBYUV_API
HalfFloatPlane(const uint16_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,float scale,int width,int height)4715 int HalfFloatPlane(const uint16_t* src_y,
4716 int src_stride_y,
4717 uint16_t* dst_y,
4718 int dst_stride_y,
4719 float scale,
4720 int width,
4721 int height) {
4722 int y;
4723 void (*HalfFloatRow)(const uint16_t* src, uint16_t* dst, float scale,
4724 int width) = HalfFloatRow_C;
4725 if (!src_y || !dst_y || width <= 0 || height == 0) {
4726 return -1;
4727 }
4728 src_stride_y >>= 1;
4729 dst_stride_y >>= 1;
4730 // Negative height means invert the image.
4731 if (height < 0) {
4732 height = -height;
4733 src_y = src_y + (height - 1) * src_stride_y;
4734 src_stride_y = -src_stride_y;
4735 }
4736 // Coalesce rows.
4737 if (src_stride_y == width && dst_stride_y == width) {
4738 width *= height;
4739 height = 1;
4740 src_stride_y = dst_stride_y = 0;
4741 }
4742 #if defined(HAS_HALFFLOATROW_SSE2)
4743 if (TestCpuFlag(kCpuHasSSE2)) {
4744 HalfFloatRow = HalfFloatRow_Any_SSE2;
4745 if (IS_ALIGNED(width, 8)) {
4746 HalfFloatRow = HalfFloatRow_SSE2;
4747 }
4748 }
4749 #endif
4750 #if defined(HAS_HALFFLOATROW_AVX2)
4751 if (TestCpuFlag(kCpuHasAVX2)) {
4752 HalfFloatRow = HalfFloatRow_Any_AVX2;
4753 if (IS_ALIGNED(width, 16)) {
4754 HalfFloatRow = HalfFloatRow_AVX2;
4755 }
4756 }
4757 #endif
4758 #if defined(HAS_HALFFLOATROW_F16C)
4759 if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasF16C)) {
4760 HalfFloatRow =
4761 (scale == 1.0f) ? HalfFloat1Row_Any_F16C : HalfFloatRow_Any_F16C;
4762 if (IS_ALIGNED(width, 16)) {
4763 HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_F16C : HalfFloatRow_F16C;
4764 }
4765 }
4766 #endif
4767 #if defined(HAS_HALFFLOATROW_NEON)
4768 if (TestCpuFlag(kCpuHasNEON)) {
4769 HalfFloatRow =
4770 (scale == 1.0f) ? HalfFloat1Row_Any_NEON : HalfFloatRow_Any_NEON;
4771 if (IS_ALIGNED(width, 8)) {
4772 HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_NEON : HalfFloatRow_NEON;
4773 }
4774 }
4775 #endif
4776 #if defined(HAS_HALFFLOATROW_MSA)
4777 if (TestCpuFlag(kCpuHasMSA)) {
4778 HalfFloatRow = HalfFloatRow_Any_MSA;
4779 if (IS_ALIGNED(width, 32)) {
4780 HalfFloatRow = HalfFloatRow_MSA;
4781 }
4782 }
4783 #endif
4784 #if defined(HAS_HALFFLOATROW_LSX)
4785 if (TestCpuFlag(kCpuHasLSX)) {
4786 HalfFloatRow = HalfFloatRow_Any_LSX;
4787 if (IS_ALIGNED(width, 32)) {
4788 HalfFloatRow = HalfFloatRow_LSX;
4789 }
4790 }
4791 #endif
4792
4793 for (y = 0; y < height; ++y) {
4794 HalfFloatRow(src_y, dst_y, scale, width);
4795 src_y += src_stride_y;
4796 dst_y += dst_stride_y;
4797 }
4798 return 0;
4799 }
4800
4801 // Convert a buffer of bytes to floats, scale the values and store as floats.
4802 LIBYUV_API
ByteToFloat(const uint8_t * src_y,float * dst_y,float scale,int width)4803 int ByteToFloat(const uint8_t* src_y, float* dst_y, float scale, int width) {
4804 void (*ByteToFloatRow)(const uint8_t* src, float* dst, float scale,
4805 int width) = ByteToFloatRow_C;
4806 if (!src_y || !dst_y || width <= 0) {
4807 return -1;
4808 }
4809 #if defined(HAS_BYTETOFLOATROW_NEON)
4810 if (TestCpuFlag(kCpuHasNEON)) {
4811 ByteToFloatRow = ByteToFloatRow_Any_NEON;
4812 if (IS_ALIGNED(width, 8)) {
4813 ByteToFloatRow = ByteToFloatRow_NEON;
4814 }
4815 }
4816 #endif
4817
4818 ByteToFloatRow(src_y, dst_y, scale, width);
4819 return 0;
4820 }
4821
4822 // Apply a lumacolortable to each ARGB pixel.
4823 LIBYUV_API
ARGBLumaColorTable(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,const uint8_t * luma,int width,int height)4824 int ARGBLumaColorTable(const uint8_t* src_argb,
4825 int src_stride_argb,
4826 uint8_t* dst_argb,
4827 int dst_stride_argb,
4828 const uint8_t* luma,
4829 int width,
4830 int height) {
4831 int y;
4832 void (*ARGBLumaColorTableRow)(
4833 const uint8_t* src_argb, uint8_t* dst_argb, int width,
4834 const uint8_t* luma, const uint32_t lumacoeff) = ARGBLumaColorTableRow_C;
4835 if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
4836 return -1;
4837 }
4838 // Negative height means invert the image.
4839 if (height < 0) {
4840 height = -height;
4841 src_argb = src_argb + (height - 1) * src_stride_argb;
4842 src_stride_argb = -src_stride_argb;
4843 }
4844 // Coalesce rows.
4845 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
4846 width *= height;
4847 height = 1;
4848 src_stride_argb = dst_stride_argb = 0;
4849 }
4850 #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
4851 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
4852 ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
4853 }
4854 #endif
4855
4856 for (y = 0; y < height; ++y) {
4857 ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
4858 src_argb += src_stride_argb;
4859 dst_argb += dst_stride_argb;
4860 }
4861 return 0;
4862 }
4863
4864 // Copy Alpha from one ARGB image to another.
4865 LIBYUV_API
ARGBCopyAlpha(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)4866 int ARGBCopyAlpha(const uint8_t* src_argb,
4867 int src_stride_argb,
4868 uint8_t* dst_argb,
4869 int dst_stride_argb,
4870 int width,
4871 int height) {
4872 int y;
4873 void (*ARGBCopyAlphaRow)(const uint8_t* src_argb, uint8_t* dst_argb,
4874 int width) = ARGBCopyAlphaRow_C;
4875 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
4876 return -1;
4877 }
4878 // Negative height means invert the image.
4879 if (height < 0) {
4880 height = -height;
4881 src_argb = src_argb + (height - 1) * src_stride_argb;
4882 src_stride_argb = -src_stride_argb;
4883 }
4884 // Coalesce rows.
4885 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
4886 width *= height;
4887 height = 1;
4888 src_stride_argb = dst_stride_argb = 0;
4889 }
4890 #if defined(HAS_ARGBCOPYALPHAROW_SSE2)
4891 if (TestCpuFlag(kCpuHasSSE2)) {
4892 ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_SSE2;
4893 if (IS_ALIGNED(width, 8)) {
4894 ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
4895 }
4896 }
4897 #endif
4898 #if defined(HAS_ARGBCOPYALPHAROW_AVX2)
4899 if (TestCpuFlag(kCpuHasAVX2)) {
4900 ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_AVX2;
4901 if (IS_ALIGNED(width, 16)) {
4902 ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
4903 }
4904 }
4905 #endif
4906
4907 for (y = 0; y < height; ++y) {
4908 ARGBCopyAlphaRow(src_argb, dst_argb, width);
4909 src_argb += src_stride_argb;
4910 dst_argb += dst_stride_argb;
4911 }
4912 return 0;
4913 }
4914
4915 // Extract just the alpha channel from ARGB.
4916 LIBYUV_API
ARGBExtractAlpha(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_a,int dst_stride_a,int width,int height)4917 int ARGBExtractAlpha(const uint8_t* src_argb,
4918 int src_stride_argb,
4919 uint8_t* dst_a,
4920 int dst_stride_a,
4921 int width,
4922 int height) {
4923 if (!src_argb || !dst_a || width <= 0 || height == 0) {
4924 return -1;
4925 }
4926 // Negative height means invert the image.
4927 if (height < 0) {
4928 height = -height;
4929 src_argb += (height - 1) * src_stride_argb;
4930 src_stride_argb = -src_stride_argb;
4931 }
4932 // Coalesce rows.
4933 if (src_stride_argb == width * 4 && dst_stride_a == width) {
4934 width *= height;
4935 height = 1;
4936 src_stride_argb = dst_stride_a = 0;
4937 }
4938 void (*ARGBExtractAlphaRow)(const uint8_t* src_argb, uint8_t* dst_a,
4939 int width) = ARGBExtractAlphaRow_C;
4940 #if defined(HAS_ARGBEXTRACTALPHAROW_SSE2)
4941 if (TestCpuFlag(kCpuHasSSE2)) {
4942 ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2
4943 : ARGBExtractAlphaRow_Any_SSE2;
4944 }
4945 #endif
4946 #if defined(HAS_ARGBEXTRACTALPHAROW_AVX2)
4947 if (TestCpuFlag(kCpuHasAVX2)) {
4948 ARGBExtractAlphaRow = IS_ALIGNED(width, 32) ? ARGBExtractAlphaRow_AVX2
4949 : ARGBExtractAlphaRow_Any_AVX2;
4950 }
4951 #endif
4952 #if defined(HAS_ARGBEXTRACTALPHAROW_NEON)
4953 if (TestCpuFlag(kCpuHasNEON)) {
4954 ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON
4955 : ARGBExtractAlphaRow_Any_NEON;
4956 }
4957 #endif
4958 #if defined(HAS_ARGBEXTRACTALPHAROW_MSA)
4959 if (TestCpuFlag(kCpuHasMSA)) {
4960 ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_MSA
4961 : ARGBExtractAlphaRow_Any_MSA;
4962 }
4963 #endif
4964 #if defined(HAS_ARGBEXTRACTALPHAROW_LSX)
4965 if (TestCpuFlag(kCpuHasLSX)) {
4966 ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_LSX
4967 : ARGBExtractAlphaRow_Any_LSX;
4968 }
4969 #endif
4970
4971 for (int y = 0; y < height; ++y) {
4972 ARGBExtractAlphaRow(src_argb, dst_a, width);
4973 src_argb += src_stride_argb;
4974 dst_a += dst_stride_a;
4975 }
4976 return 0;
4977 }
4978
4979 // Copy a planar Y channel to the alpha channel of a destination ARGB image.
4980 LIBYUV_API
ARGBCopyYToAlpha(const uint8_t * src_y,int src_stride_y,uint8_t * dst_argb,int dst_stride_argb,int width,int height)4981 int ARGBCopyYToAlpha(const uint8_t* src_y,
4982 int src_stride_y,
4983 uint8_t* dst_argb,
4984 int dst_stride_argb,
4985 int width,
4986 int height) {
4987 int y;
4988 void (*ARGBCopyYToAlphaRow)(const uint8_t* src_y, uint8_t* dst_argb,
4989 int width) = ARGBCopyYToAlphaRow_C;
4990 if (!src_y || !dst_argb || width <= 0 || height == 0) {
4991 return -1;
4992 }
4993 // Negative height means invert the image.
4994 if (height < 0) {
4995 height = -height;
4996 src_y = src_y + (height - 1) * src_stride_y;
4997 src_stride_y = -src_stride_y;
4998 }
4999 // Coalesce rows.
5000 if (src_stride_y == width && dst_stride_argb == width * 4) {
5001 width *= height;
5002 height = 1;
5003 src_stride_y = dst_stride_argb = 0;
5004 }
5005 #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
5006 if (TestCpuFlag(kCpuHasSSE2)) {
5007 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_SSE2;
5008 if (IS_ALIGNED(width, 8)) {
5009 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
5010 }
5011 }
5012 #endif
5013 #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
5014 if (TestCpuFlag(kCpuHasAVX2)) {
5015 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_AVX2;
5016 if (IS_ALIGNED(width, 16)) {
5017 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
5018 }
5019 }
5020 #endif
5021
5022 for (y = 0; y < height; ++y) {
5023 ARGBCopyYToAlphaRow(src_y, dst_argb, width);
5024 src_y += src_stride_y;
5025 dst_argb += dst_stride_argb;
5026 }
5027 return 0;
5028 }
5029
5030 // TODO(fbarchard): Consider if width is even Y channel can be split
5031 // directly. A SplitUVRow_Odd function could copy the remaining chroma.
5032
5033 LIBYUV_API
YUY2ToNV12(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int width,int height)5034 int YUY2ToNV12(const uint8_t* src_yuy2,
5035 int src_stride_yuy2,
5036 uint8_t* dst_y,
5037 int dst_stride_y,
5038 uint8_t* dst_uv,
5039 int dst_stride_uv,
5040 int width,
5041 int height) {
5042 int y;
5043 int halfwidth = (width + 1) >> 1;
5044 void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
5045 int width) = SplitUVRow_C;
5046 void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
5047 ptrdiff_t src_stride, int dst_width,
5048 int source_y_fraction) = InterpolateRow_C;
5049
5050 if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) {
5051 return -1;
5052 }
5053
5054 // Negative height means invert the image.
5055 if (height < 0) {
5056 height = -height;
5057 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
5058 src_stride_yuy2 = -src_stride_yuy2;
5059 }
5060 #if defined(HAS_SPLITUVROW_SSE2)
5061 if (TestCpuFlag(kCpuHasSSE2)) {
5062 SplitUVRow = SplitUVRow_Any_SSE2;
5063 if (IS_ALIGNED(width, 16)) {
5064 SplitUVRow = SplitUVRow_SSE2;
5065 }
5066 }
5067 #endif
5068 #if defined(HAS_SPLITUVROW_AVX2)
5069 if (TestCpuFlag(kCpuHasAVX2)) {
5070 SplitUVRow = SplitUVRow_Any_AVX2;
5071 if (IS_ALIGNED(width, 32)) {
5072 SplitUVRow = SplitUVRow_AVX2;
5073 }
5074 }
5075 #endif
5076 #if defined(HAS_SPLITUVROW_NEON)
5077 if (TestCpuFlag(kCpuHasNEON)) {
5078 SplitUVRow = SplitUVRow_Any_NEON;
5079 if (IS_ALIGNED(width, 16)) {
5080 SplitUVRow = SplitUVRow_NEON;
5081 }
5082 }
5083 #endif
5084 #if defined(HAS_SPLITUVROW_MSA)
5085 if (TestCpuFlag(kCpuHasMSA)) {
5086 SplitUVRow = SplitUVRow_Any_MSA;
5087 if (IS_ALIGNED(width, 32)) {
5088 SplitUVRow = SplitUVRow_MSA;
5089 }
5090 }
5091 #endif
5092 #if defined(HAS_SPLITUVROW_LSX)
5093 if (TestCpuFlag(kCpuHasLSX)) {
5094 SplitUVRow = SplitUVRow_Any_LSX;
5095 if (IS_ALIGNED(width, 32)) {
5096 SplitUVRow = SplitUVRow_LSX;
5097 }
5098 }
5099 #endif
5100 #if defined(HAS_INTERPOLATEROW_SSSE3)
5101 if (TestCpuFlag(kCpuHasSSSE3)) {
5102 InterpolateRow = InterpolateRow_Any_SSSE3;
5103 if (IS_ALIGNED(width, 16)) {
5104 InterpolateRow = InterpolateRow_SSSE3;
5105 }
5106 }
5107 #endif
5108 #if defined(HAS_INTERPOLATEROW_AVX2)
5109 if (TestCpuFlag(kCpuHasAVX2)) {
5110 InterpolateRow = InterpolateRow_Any_AVX2;
5111 if (IS_ALIGNED(width, 32)) {
5112 InterpolateRow = InterpolateRow_AVX2;
5113 }
5114 }
5115 #endif
5116 #if defined(HAS_INTERPOLATEROW_NEON)
5117 if (TestCpuFlag(kCpuHasNEON)) {
5118 InterpolateRow = InterpolateRow_Any_NEON;
5119 if (IS_ALIGNED(width, 16)) {
5120 InterpolateRow = InterpolateRow_NEON;
5121 }
5122 }
5123 #endif
5124 #if defined(HAS_INTERPOLATEROW_MSA)
5125 if (TestCpuFlag(kCpuHasMSA)) {
5126 InterpolateRow = InterpolateRow_Any_MSA;
5127 if (IS_ALIGNED(width, 32)) {
5128 InterpolateRow = InterpolateRow_MSA;
5129 }
5130 }
5131 #endif
5132 #if defined(HAS_INTERPOLATEROW_LSX)
5133 if (TestCpuFlag(kCpuHasLSX)) {
5134 InterpolateRow = InterpolateRow_Any_LSX;
5135 if (IS_ALIGNED(width, 32)) {
5136 InterpolateRow = InterpolateRow_LSX;
5137 }
5138 }
5139 #endif
5140
5141 {
5142 int awidth = halfwidth * 2;
5143 // row of y and 2 rows of uv
5144 align_buffer_64(rows, awidth * 3);
5145
5146 for (y = 0; y < height - 1; y += 2) {
5147 // Split Y from UV.
5148 SplitUVRow(src_yuy2, rows, rows + awidth, awidth);
5149 memcpy(dst_y, rows, width);
5150 SplitUVRow(src_yuy2 + src_stride_yuy2, rows, rows + awidth * 2, awidth);
5151 memcpy(dst_y + dst_stride_y, rows, width);
5152 InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
5153 src_yuy2 += src_stride_yuy2 * 2;
5154 dst_y += dst_stride_y * 2;
5155 dst_uv += dst_stride_uv;
5156 }
5157 if (height & 1) {
5158 // Split Y from UV.
5159 SplitUVRow(src_yuy2, rows, dst_uv, awidth);
5160 memcpy(dst_y, rows, width);
5161 }
5162 free_aligned_buffer_64(rows);
5163 }
5164 return 0;
5165 }
5166
5167 LIBYUV_API
UYVYToNV12(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int width,int height)5168 int UYVYToNV12(const uint8_t* src_uyvy,
5169 int src_stride_uyvy,
5170 uint8_t* dst_y,
5171 int dst_stride_y,
5172 uint8_t* dst_uv,
5173 int dst_stride_uv,
5174 int width,
5175 int height) {
5176 int y;
5177 int halfwidth = (width + 1) >> 1;
5178 void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
5179 int width) = SplitUVRow_C;
5180 void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
5181 ptrdiff_t src_stride, int dst_width,
5182 int source_y_fraction) = InterpolateRow_C;
5183
5184 if (!src_uyvy || !dst_y || !dst_uv || width <= 0 || height == 0) {
5185 return -1;
5186 }
5187
5188 // Negative height means invert the image.
5189 if (height < 0) {
5190 height = -height;
5191 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
5192 src_stride_uyvy = -src_stride_uyvy;
5193 }
5194 #if defined(HAS_SPLITUVROW_SSE2)
5195 if (TestCpuFlag(kCpuHasSSE2)) {
5196 SplitUVRow = SplitUVRow_Any_SSE2;
5197 if (IS_ALIGNED(width, 16)) {
5198 SplitUVRow = SplitUVRow_SSE2;
5199 }
5200 }
5201 #endif
5202 #if defined(HAS_SPLITUVROW_AVX2)
5203 if (TestCpuFlag(kCpuHasAVX2)) {
5204 SplitUVRow = SplitUVRow_Any_AVX2;
5205 if (IS_ALIGNED(width, 32)) {
5206 SplitUVRow = SplitUVRow_AVX2;
5207 }
5208 }
5209 #endif
5210 #if defined(HAS_SPLITUVROW_NEON)
5211 if (TestCpuFlag(kCpuHasNEON)) {
5212 SplitUVRow = SplitUVRow_Any_NEON;
5213 if (IS_ALIGNED(width, 16)) {
5214 SplitUVRow = SplitUVRow_NEON;
5215 }
5216 }
5217 #endif
5218 #if defined(HAS_SPLITUVROW_MSA)
5219 if (TestCpuFlag(kCpuHasMSA)) {
5220 SplitUVRow = SplitUVRow_Any_MSA;
5221 if (IS_ALIGNED(width, 32)) {
5222 SplitUVRow = SplitUVRow_MSA;
5223 }
5224 }
5225 #endif
5226 #if defined(HAS_SPLITUVROW_LSX)
5227 if (TestCpuFlag(kCpuHasLSX)) {
5228 SplitUVRow = SplitUVRow_Any_LSX;
5229 if (IS_ALIGNED(width, 32)) {
5230 SplitUVRow = SplitUVRow_LSX;
5231 }
5232 }
5233 #endif
5234 #if defined(HAS_INTERPOLATEROW_SSSE3)
5235 if (TestCpuFlag(kCpuHasSSSE3)) {
5236 InterpolateRow = InterpolateRow_Any_SSSE3;
5237 if (IS_ALIGNED(width, 16)) {
5238 InterpolateRow = InterpolateRow_SSSE3;
5239 }
5240 }
5241 #endif
5242 #if defined(HAS_INTERPOLATEROW_AVX2)
5243 if (TestCpuFlag(kCpuHasAVX2)) {
5244 InterpolateRow = InterpolateRow_Any_AVX2;
5245 if (IS_ALIGNED(width, 32)) {
5246 InterpolateRow = InterpolateRow_AVX2;
5247 }
5248 }
5249 #endif
5250 #if defined(HAS_INTERPOLATEROW_NEON)
5251 if (TestCpuFlag(kCpuHasNEON)) {
5252 InterpolateRow = InterpolateRow_Any_NEON;
5253 if (IS_ALIGNED(width, 16)) {
5254 InterpolateRow = InterpolateRow_NEON;
5255 }
5256 }
5257 #endif
5258 #if defined(HAS_INTERPOLATEROW_MSA)
5259 if (TestCpuFlag(kCpuHasMSA)) {
5260 InterpolateRow = InterpolateRow_Any_MSA;
5261 if (IS_ALIGNED(width, 32)) {
5262 InterpolateRow = InterpolateRow_MSA;
5263 }
5264 }
5265 #endif
5266 #if defined(HAS_INTERPOLATEROW_LSX)
5267 if (TestCpuFlag(kCpuHasLSX)) {
5268 InterpolateRow = InterpolateRow_Any_LSX;
5269 if (IS_ALIGNED(width, 32)) {
5270 InterpolateRow = InterpolateRow_LSX;
5271 }
5272 }
5273 #endif
5274
5275 {
5276 int awidth = halfwidth * 2;
5277 // row of y and 2 rows of uv
5278 align_buffer_64(rows, awidth * 3);
5279
5280 for (y = 0; y < height - 1; y += 2) {
5281 // Split Y from UV.
5282 SplitUVRow(src_uyvy, rows + awidth, rows, awidth);
5283 memcpy(dst_y, rows, width);
5284 SplitUVRow(src_uyvy + src_stride_uyvy, rows + awidth * 2, rows, awidth);
5285 memcpy(dst_y + dst_stride_y, rows, width);
5286 InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
5287 src_uyvy += src_stride_uyvy * 2;
5288 dst_y += dst_stride_y * 2;
5289 dst_uv += dst_stride_uv;
5290 }
5291 if (height & 1) {
5292 // Split Y from UV.
5293 SplitUVRow(src_uyvy, dst_uv, rows, awidth);
5294 memcpy(dst_y, rows, width);
5295 }
5296 free_aligned_buffer_64(rows);
5297 }
5298 return 0;
5299 }
5300
5301 // width and height are src size allowing odd size handling.
5302 LIBYUV_API
HalfMergeUVPlane(const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_uv,int dst_stride_uv,int width,int height)5303 void HalfMergeUVPlane(const uint8_t* src_u,
5304 int src_stride_u,
5305 const uint8_t* src_v,
5306 int src_stride_v,
5307 uint8_t* dst_uv,
5308 int dst_stride_uv,
5309 int width,
5310 int height) {
5311 int y;
5312 void (*HalfMergeUVRow)(const uint8_t* src_u, int src_stride_u,
5313 const uint8_t* src_v, int src_stride_v,
5314 uint8_t* dst_uv, int width) = HalfMergeUVRow_C;
5315
5316 // Negative height means invert the image.
5317 if (height < 0) {
5318 height = -height;
5319 src_u = src_u + (height - 1) * src_stride_u;
5320 src_v = src_v + (height - 1) * src_stride_v;
5321 src_stride_u = -src_stride_u;
5322 src_stride_v = -src_stride_v;
5323 }
5324 #if defined(HAS_HALFMERGEUVROW_NEON)
5325 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
5326 HalfMergeUVRow = HalfMergeUVRow_NEON;
5327 }
5328 #endif
5329 #if defined(HAS_HALFMERGEUVROW_SSSE3)
5330 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
5331 HalfMergeUVRow = HalfMergeUVRow_SSSE3;
5332 }
5333 #endif
5334 #if defined(HAS_HALFMERGEUVROW_AVX2)
5335 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
5336 HalfMergeUVRow = HalfMergeUVRow_AVX2;
5337 }
5338 #endif
5339 for (y = 0; y < height - 1; y += 2) {
5340 // Merge a row of U and V into a row of UV.
5341 HalfMergeUVRow(src_u, src_stride_u, src_v, src_stride_v, dst_uv, width);
5342 src_u += src_stride_u * 2;
5343 src_v += src_stride_v * 2;
5344 dst_uv += dst_stride_uv;
5345 }
5346 if (height & 1) {
5347 HalfMergeUVRow(src_u, 0, src_v, 0, dst_uv, width);
5348 }
5349 }
5350
5351 #ifdef __cplusplus
5352 } // extern "C"
5353 } // namespace libyuv
5354 #endif
5355