1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/planar_functions.h"
12
13 #include <assert.h>
14 #include <string.h> // for memset()
15
16 #include "libyuv/cpu_id.h"
17 #ifdef HAVE_JPEG
18 #include "libyuv/mjpeg_decoder.h"
19 #endif
20 #include "libyuv/row.h"
21 #include "libyuv/scale_row.h" // for ScaleRowDown2
22
23 #ifdef __cplusplus
24 namespace libyuv {
25 extern "C" {
26 #endif
27
28 // Copy a plane of data
29 LIBYUV_API
CopyPlane(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height)30 void CopyPlane(const uint8_t* src_y,
31 int src_stride_y,
32 uint8_t* dst_y,
33 int dst_stride_y,
34 int width,
35 int height) {
36 int y;
37 void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C;
38 if (width <= 0 || height == 0) {
39 return;
40 }
41 // Negative height means invert the image.
42 if (height < 0) {
43 height = -height;
44 dst_y = dst_y + (height - 1) * dst_stride_y;
45 dst_stride_y = -dst_stride_y;
46 }
47 // Coalesce rows.
48 if (src_stride_y == width && dst_stride_y == width) {
49 width *= height;
50 height = 1;
51 src_stride_y = dst_stride_y = 0;
52 }
53 // Nothing to do.
54 if (src_y == dst_y && src_stride_y == dst_stride_y) {
55 return;
56 }
57
58 #if defined(HAS_COPYROW_SSE2)
59 if (TestCpuFlag(kCpuHasSSE2)) {
60 CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
61 }
62 #endif
63 #if defined(HAS_COPYROW_AVX)
64 if (TestCpuFlag(kCpuHasAVX)) {
65 CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
66 }
67 #endif
68 #if defined(HAS_COPYROW_ERMS)
69 if (TestCpuFlag(kCpuHasERMS)) {
70 CopyRow = CopyRow_ERMS;
71 }
72 #endif
73 #if defined(HAS_COPYROW_NEON)
74 if (TestCpuFlag(kCpuHasNEON)) {
75 CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
76 }
77 #endif
78 #if defined(HAS_COPYROW_RVV)
79 if (TestCpuFlag(kCpuHasRVV)) {
80 CopyRow = CopyRow_RVV;
81 }
82 #endif
83
84 // Copy plane
85 for (y = 0; y < height; ++y) {
86 CopyRow(src_y, dst_y, width);
87 src_y += src_stride_y;
88 dst_y += dst_stride_y;
89 }
90 }
91
92 LIBYUV_API
CopyPlane_16(const uint16_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,int width,int height)93 void CopyPlane_16(const uint16_t* src_y,
94 int src_stride_y,
95 uint16_t* dst_y,
96 int dst_stride_y,
97 int width,
98 int height) {
99 CopyPlane((const uint8_t*)src_y, src_stride_y * 2, (uint8_t*)dst_y,
100 dst_stride_y * 2, width * 2, height);
101 }
102
103 // Convert a plane of 16 bit data to 8 bit
104 LIBYUV_API
Convert16To8Plane(const uint16_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int scale,int width,int height)105 void Convert16To8Plane(const uint16_t* src_y,
106 int src_stride_y,
107 uint8_t* dst_y,
108 int dst_stride_y,
109 int scale, // 16384 for 10 bits
110 int width,
111 int height) {
112 int y;
113 void (*Convert16To8Row)(const uint16_t* src_y, uint8_t* dst_y, int scale,
114 int width) = Convert16To8Row_C;
115
116 if (width <= 0 || height == 0) {
117 return;
118 }
119 // Negative height means invert the image.
120 if (height < 0) {
121 height = -height;
122 dst_y = dst_y + (height - 1) * dst_stride_y;
123 dst_stride_y = -dst_stride_y;
124 }
125 // Coalesce rows.
126 if (src_stride_y == width && dst_stride_y == width) {
127 width *= height;
128 height = 1;
129 src_stride_y = dst_stride_y = 0;
130 }
131 #if defined(HAS_CONVERT16TO8ROW_NEON)
132 if (TestCpuFlag(kCpuHasNEON)) {
133 Convert16To8Row = Convert16To8Row_Any_NEON;
134 if (IS_ALIGNED(width, 16)) {
135 Convert16To8Row = Convert16To8Row_NEON;
136 }
137 }
138 #endif
139 #if defined(HAS_CONVERT16TO8ROW_SSSE3)
140 if (TestCpuFlag(kCpuHasSSSE3)) {
141 Convert16To8Row = Convert16To8Row_Any_SSSE3;
142 if (IS_ALIGNED(width, 16)) {
143 Convert16To8Row = Convert16To8Row_SSSE3;
144 }
145 }
146 #endif
147 #if defined(HAS_CONVERT16TO8ROW_AVX2)
148 if (TestCpuFlag(kCpuHasAVX2)) {
149 Convert16To8Row = Convert16To8Row_Any_AVX2;
150 if (IS_ALIGNED(width, 32)) {
151 Convert16To8Row = Convert16To8Row_AVX2;
152 }
153 }
154 #endif
155
156 // Convert plane
157 for (y = 0; y < height; ++y) {
158 Convert16To8Row(src_y, dst_y, scale, width);
159 src_y += src_stride_y;
160 dst_y += dst_stride_y;
161 }
162 }
163
164 // Convert a plane of 8 bit data to 16 bit
165 LIBYUV_API
Convert8To16Plane(const uint8_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,int scale,int width,int height)166 void Convert8To16Plane(const uint8_t* src_y,
167 int src_stride_y,
168 uint16_t* dst_y,
169 int dst_stride_y,
170 int scale, // 1024 for 10 bits
171 int width,
172 int height) {
173 int y;
174 void (*Convert8To16Row)(const uint8_t* src_y, uint16_t* dst_y, int scale,
175 int width) = Convert8To16Row_C;
176
177 if (width <= 0 || height == 0) {
178 return;
179 }
180 // Negative height means invert the image.
181 if (height < 0) {
182 height = -height;
183 dst_y = dst_y + (height - 1) * dst_stride_y;
184 dst_stride_y = -dst_stride_y;
185 }
186 // Coalesce rows.
187 if (src_stride_y == width && dst_stride_y == width) {
188 width *= height;
189 height = 1;
190 src_stride_y = dst_stride_y = 0;
191 }
192 #if defined(HAS_CONVERT8TO16ROW_SSE2)
193 if (TestCpuFlag(kCpuHasSSE2)) {
194 Convert8To16Row = Convert8To16Row_Any_SSE2;
195 if (IS_ALIGNED(width, 16)) {
196 Convert8To16Row = Convert8To16Row_SSE2;
197 }
198 }
199 #endif
200 #if defined(HAS_CONVERT8TO16ROW_AVX2)
201 if (TestCpuFlag(kCpuHasAVX2)) {
202 Convert8To16Row = Convert8To16Row_Any_AVX2;
203 if (IS_ALIGNED(width, 32)) {
204 Convert8To16Row = Convert8To16Row_AVX2;
205 }
206 }
207 #endif
208
209 // Convert plane
210 for (y = 0; y < height; ++y) {
211 Convert8To16Row(src_y, dst_y, scale, width);
212 src_y += src_stride_y;
213 dst_y += dst_stride_y;
214 }
215 }
216
217 // Copy I422.
218 LIBYUV_API
I422Copy(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)219 int I422Copy(const uint8_t* src_y,
220 int src_stride_y,
221 const uint8_t* src_u,
222 int src_stride_u,
223 const uint8_t* src_v,
224 int src_stride_v,
225 uint8_t* dst_y,
226 int dst_stride_y,
227 uint8_t* dst_u,
228 int dst_stride_u,
229 uint8_t* dst_v,
230 int dst_stride_v,
231 int width,
232 int height) {
233 int halfwidth = (width + 1) >> 1;
234
235 if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
236 height == 0) {
237 return -1;
238 }
239
240 // Negative height means invert the image.
241 if (height < 0) {
242 height = -height;
243 src_y = src_y + (height - 1) * src_stride_y;
244 src_u = src_u + (height - 1) * src_stride_u;
245 src_v = src_v + (height - 1) * src_stride_v;
246 src_stride_y = -src_stride_y;
247 src_stride_u = -src_stride_u;
248 src_stride_v = -src_stride_v;
249 }
250
251 if (dst_y) {
252 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
253 }
254 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
255 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
256 return 0;
257 }
258
259 // Copy I444.
260 LIBYUV_API
I444Copy(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)261 int I444Copy(const uint8_t* src_y,
262 int src_stride_y,
263 const uint8_t* src_u,
264 int src_stride_u,
265 const uint8_t* src_v,
266 int src_stride_v,
267 uint8_t* dst_y,
268 int dst_stride_y,
269 uint8_t* dst_u,
270 int dst_stride_u,
271 uint8_t* dst_v,
272 int dst_stride_v,
273 int width,
274 int height) {
275 if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
276 height == 0) {
277 return -1;
278 }
279 // Negative height means invert the image.
280 if (height < 0) {
281 height = -height;
282 src_y = src_y + (height - 1) * src_stride_y;
283 src_u = src_u + (height - 1) * src_stride_u;
284 src_v = src_v + (height - 1) * src_stride_v;
285 src_stride_y = -src_stride_y;
286 src_stride_u = -src_stride_u;
287 src_stride_v = -src_stride_v;
288 }
289
290 if (dst_y) {
291 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
292 }
293 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
294 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
295 return 0;
296 }
297
298 // Copy I210.
299 LIBYUV_API
I210Copy(const uint16_t * src_y,int src_stride_y,const uint16_t * src_u,int src_stride_u,const uint16_t * src_v,int src_stride_v,uint16_t * dst_y,int dst_stride_y,uint16_t * dst_u,int dst_stride_u,uint16_t * dst_v,int dst_stride_v,int width,int height)300 int I210Copy(const uint16_t* src_y,
301 int src_stride_y,
302 const uint16_t* src_u,
303 int src_stride_u,
304 const uint16_t* src_v,
305 int src_stride_v,
306 uint16_t* dst_y,
307 int dst_stride_y,
308 uint16_t* dst_u,
309 int dst_stride_u,
310 uint16_t* dst_v,
311 int dst_stride_v,
312 int width,
313 int height) {
314 int halfwidth = (width + 1) >> 1;
315
316 if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
317 height == 0) {
318 return -1;
319 }
320
321 // Negative height means invert the image.
322 if (height < 0) {
323 height = -height;
324 src_y = src_y + (height - 1) * src_stride_y;
325 src_u = src_u + (height - 1) * src_stride_u;
326 src_v = src_v + (height - 1) * src_stride_v;
327 src_stride_y = -src_stride_y;
328 src_stride_u = -src_stride_u;
329 src_stride_v = -src_stride_v;
330 }
331
332 if (dst_y) {
333 CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
334 }
335 // Copy UV planes.
336 CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
337 CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
338 return 0;
339 }
340
341 // Copy I410.
342 LIBYUV_API
I410Copy(const uint16_t * src_y,int src_stride_y,const uint16_t * src_u,int src_stride_u,const uint16_t * src_v,int src_stride_v,uint16_t * dst_y,int dst_stride_y,uint16_t * dst_u,int dst_stride_u,uint16_t * dst_v,int dst_stride_v,int width,int height)343 int I410Copy(const uint16_t* src_y,
344 int src_stride_y,
345 const uint16_t* src_u,
346 int src_stride_u,
347 const uint16_t* src_v,
348 int src_stride_v,
349 uint16_t* dst_y,
350 int dst_stride_y,
351 uint16_t* dst_u,
352 int dst_stride_u,
353 uint16_t* dst_v,
354 int dst_stride_v,
355 int width,
356 int height) {
357 if ((!src_y && dst_y) || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
358 height == 0) {
359 return -1;
360 }
361 // Negative height means invert the image.
362 if (height < 0) {
363 height = -height;
364 src_y = src_y + (height - 1) * src_stride_y;
365 src_u = src_u + (height - 1) * src_stride_u;
366 src_v = src_v + (height - 1) * src_stride_v;
367 src_stride_y = -src_stride_y;
368 src_stride_u = -src_stride_u;
369 src_stride_v = -src_stride_v;
370 }
371
372 if (dst_y) {
373 CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
374 }
375 CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
376 CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
377 return 0;
378 }
379
380 // Copy I400.
381 LIBYUV_API
I400ToI400(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height)382 int I400ToI400(const uint8_t* src_y,
383 int src_stride_y,
384 uint8_t* dst_y,
385 int dst_stride_y,
386 int width,
387 int height) {
388 if (!src_y || !dst_y || width <= 0 || height == 0) {
389 return -1;
390 }
391 // Negative height means invert the image.
392 if (height < 0) {
393 height = -height;
394 src_y = src_y + (height - 1) * src_stride_y;
395 src_stride_y = -src_stride_y;
396 }
397 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
398 return 0;
399 }
400
401 // Convert I420 to I400.
402 LIBYUV_API
I420ToI400(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_y,int dst_stride_y,int width,int height)403 int I420ToI400(const uint8_t* src_y,
404 int src_stride_y,
405 const uint8_t* src_u,
406 int src_stride_u,
407 const uint8_t* src_v,
408 int src_stride_v,
409 uint8_t* dst_y,
410 int dst_stride_y,
411 int width,
412 int height) {
413 (void)src_u;
414 (void)src_stride_u;
415 (void)src_v;
416 (void)src_stride_v;
417 if (!src_y || !dst_y || width <= 0 || height == 0) {
418 return -1;
419 }
420 // Negative height means invert the image.
421 if (height < 0) {
422 height = -height;
423 src_y = src_y + (height - 1) * src_stride_y;
424 src_stride_y = -src_stride_y;
425 }
426
427 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
428 return 0;
429 }
430
431 // Copy NV12. Supports inverting.
432 LIBYUV_API
NV12Copy(const uint8_t * src_y,int src_stride_y,const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int width,int height)433 int NV12Copy(const uint8_t* src_y,
434 int src_stride_y,
435 const uint8_t* src_uv,
436 int src_stride_uv,
437 uint8_t* dst_y,
438 int dst_stride_y,
439 uint8_t* dst_uv,
440 int dst_stride_uv,
441 int width,
442 int height) {
443 int halfwidth = (width + 1) >> 1;
444 int halfheight = (height + 1) >> 1;
445
446 if (!src_y || !dst_y || !src_uv || !dst_uv || width <= 0 || height == 0) {
447 return -1;
448 }
449
450 // Negative height means invert the image.
451 if (height < 0) {
452 height = -height;
453 halfheight = (height + 1) >> 1;
454 src_y = src_y + (height - 1) * src_stride_y;
455 src_uv = src_uv + (halfheight - 1) * src_stride_uv;
456 src_stride_y = -src_stride_y;
457 src_stride_uv = -src_stride_uv;
458 }
459 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
460 CopyPlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv, halfwidth * 2,
461 halfheight);
462 return 0;
463 }
464
465 // Copy NV21. Supports inverting.
466 LIBYUV_API
NV21Copy(const uint8_t * src_y,int src_stride_y,const uint8_t * src_vu,int src_stride_vu,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_vu,int dst_stride_vu,int width,int height)467 int NV21Copy(const uint8_t* src_y,
468 int src_stride_y,
469 const uint8_t* src_vu,
470 int src_stride_vu,
471 uint8_t* dst_y,
472 int dst_stride_y,
473 uint8_t* dst_vu,
474 int dst_stride_vu,
475 int width,
476 int height) {
477 return NV12Copy(src_y, src_stride_y, src_vu, src_stride_vu, dst_y,
478 dst_stride_y, dst_vu, dst_stride_vu, width, height);
479 }
480
481 // Support function for NV12 etc UV channels.
482 // Width and height are plane sizes (typically half pixel width).
483 LIBYUV_API
SplitUVPlane(const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)484 void SplitUVPlane(const uint8_t* src_uv,
485 int src_stride_uv,
486 uint8_t* dst_u,
487 int dst_stride_u,
488 uint8_t* dst_v,
489 int dst_stride_v,
490 int width,
491 int height) {
492 int y;
493 void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
494 int width) = SplitUVRow_C;
495 if (width <= 0 || height == 0) {
496 return;
497 }
498 // Negative height means invert the image.
499 if (height < 0) {
500 height = -height;
501 dst_u = dst_u + (height - 1) * dst_stride_u;
502 dst_v = dst_v + (height - 1) * dst_stride_v;
503 dst_stride_u = -dst_stride_u;
504 dst_stride_v = -dst_stride_v;
505 }
506 // Coalesce rows.
507 if (src_stride_uv == width * 2 && dst_stride_u == width &&
508 dst_stride_v == width) {
509 width *= height;
510 height = 1;
511 src_stride_uv = dst_stride_u = dst_stride_v = 0;
512 }
513 #if defined(HAS_SPLITUVROW_SSE2)
514 if (TestCpuFlag(kCpuHasSSE2)) {
515 SplitUVRow = SplitUVRow_Any_SSE2;
516 if (IS_ALIGNED(width, 16)) {
517 SplitUVRow = SplitUVRow_SSE2;
518 }
519 }
520 #endif
521 #if defined(HAS_SPLITUVROW_AVX2)
522 if (TestCpuFlag(kCpuHasAVX2)) {
523 SplitUVRow = SplitUVRow_Any_AVX2;
524 if (IS_ALIGNED(width, 32)) {
525 SplitUVRow = SplitUVRow_AVX2;
526 }
527 }
528 #endif
529 #if defined(HAS_SPLITUVROW_NEON)
530 if (TestCpuFlag(kCpuHasNEON)) {
531 SplitUVRow = SplitUVRow_Any_NEON;
532 if (IS_ALIGNED(width, 16)) {
533 SplitUVRow = SplitUVRow_NEON;
534 }
535 }
536 #endif
537 #if defined(HAS_SPLITUVROW_MSA)
538 if (TestCpuFlag(kCpuHasMSA)) {
539 SplitUVRow = SplitUVRow_Any_MSA;
540 if (IS_ALIGNED(width, 32)) {
541 SplitUVRow = SplitUVRow_MSA;
542 }
543 }
544 #endif
545 #if defined(HAS_SPLITUVROW_LSX)
546 if (TestCpuFlag(kCpuHasLSX)) {
547 SplitUVRow = SplitUVRow_Any_LSX;
548 if (IS_ALIGNED(width, 32)) {
549 SplitUVRow = SplitUVRow_LSX;
550 }
551 }
552 #endif
553 #if defined(HAS_SPLITUVROW_RVV)
554 if (TestCpuFlag(kCpuHasRVV)) {
555 SplitUVRow = SplitUVRow_RVV;
556 }
557 #endif
558
559 for (y = 0; y < height; ++y) {
560 // Copy a row of UV.
561 SplitUVRow(src_uv, dst_u, dst_v, width);
562 dst_u += dst_stride_u;
563 dst_v += dst_stride_v;
564 src_uv += src_stride_uv;
565 }
566 }
567
568 LIBYUV_API
MergeUVPlane(const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_uv,int dst_stride_uv,int width,int height)569 void MergeUVPlane(const uint8_t* src_u,
570 int src_stride_u,
571 const uint8_t* src_v,
572 int src_stride_v,
573 uint8_t* dst_uv,
574 int dst_stride_uv,
575 int width,
576 int height) {
577 int y;
578 void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v,
579 uint8_t* dst_uv, int width) = MergeUVRow_C;
580 if (width <= 0 || height == 0) {
581 return;
582 }
583 // Negative height means invert the image.
584 if (height < 0) {
585 height = -height;
586 dst_uv = dst_uv + (height - 1) * dst_stride_uv;
587 dst_stride_uv = -dst_stride_uv;
588 }
589 // Coalesce rows.
590 if (src_stride_u == width && src_stride_v == width &&
591 dst_stride_uv == width * 2) {
592 width *= height;
593 height = 1;
594 src_stride_u = src_stride_v = dst_stride_uv = 0;
595 }
596 #if defined(HAS_MERGEUVROW_SSE2)
597 if (TestCpuFlag(kCpuHasSSE2)) {
598 MergeUVRow = MergeUVRow_Any_SSE2;
599 if (IS_ALIGNED(width, 16)) {
600 MergeUVRow = MergeUVRow_SSE2;
601 }
602 }
603 #endif
604 #if defined(HAS_MERGEUVROW_AVX2)
605 if (TestCpuFlag(kCpuHasAVX2)) {
606 MergeUVRow = MergeUVRow_Any_AVX2;
607 if (IS_ALIGNED(width, 16)) {
608 MergeUVRow = MergeUVRow_AVX2;
609 }
610 }
611 #endif
612 #if defined(HAS_MERGEUVROW_AVX512BW)
613 if (TestCpuFlag(kCpuHasAVX512BW)) {
614 MergeUVRow = MergeUVRow_Any_AVX512BW;
615 if (IS_ALIGNED(width, 32)) {
616 MergeUVRow = MergeUVRow_AVX512BW;
617 }
618 }
619 #endif
620 #if defined(HAS_MERGEUVROW_NEON)
621 if (TestCpuFlag(kCpuHasNEON)) {
622 MergeUVRow = MergeUVRow_Any_NEON;
623 if (IS_ALIGNED(width, 16)) {
624 MergeUVRow = MergeUVRow_NEON;
625 }
626 }
627 #endif
628 #if defined(HAS_MERGEUVROW_MSA)
629 if (TestCpuFlag(kCpuHasMSA)) {
630 MergeUVRow = MergeUVRow_Any_MSA;
631 if (IS_ALIGNED(width, 16)) {
632 MergeUVRow = MergeUVRow_MSA;
633 }
634 }
635 #endif
636 #if defined(HAS_MERGEUVROW_LSX)
637 if (TestCpuFlag(kCpuHasLSX)) {
638 MergeUVRow = MergeUVRow_Any_LSX;
639 if (IS_ALIGNED(width, 16)) {
640 MergeUVRow = MergeUVRow_LSX;
641 }
642 }
643 #endif
644 #if defined(HAS_MERGEUVROW_RVV)
645 if (TestCpuFlag(kCpuHasRVV)) {
646 MergeUVRow = MergeUVRow_RVV;
647 }
648 #endif
649
650 for (y = 0; y < height; ++y) {
651 // Merge a row of U and V into a row of UV.
652 MergeUVRow(src_u, src_v, dst_uv, width);
653 src_u += src_stride_u;
654 src_v += src_stride_v;
655 dst_uv += dst_stride_uv;
656 }
657 }
658
659 // Support function for P010 etc UV channels.
660 // Width and height are plane sizes (typically half pixel width).
661 LIBYUV_API
SplitUVPlane_16(const uint16_t * src_uv,int src_stride_uv,uint16_t * dst_u,int dst_stride_u,uint16_t * dst_v,int dst_stride_v,int width,int height,int depth)662 void SplitUVPlane_16(const uint16_t* src_uv,
663 int src_stride_uv,
664 uint16_t* dst_u,
665 int dst_stride_u,
666 uint16_t* dst_v,
667 int dst_stride_v,
668 int width,
669 int height,
670 int depth) {
671 int y;
672 void (*SplitUVRow_16)(const uint16_t* src_uv, uint16_t* dst_u,
673 uint16_t* dst_v, int depth, int width) =
674 SplitUVRow_16_C;
675 if (width <= 0 || height == 0) {
676 return;
677 }
678 // Negative height means invert the image.
679 if (height < 0) {
680 height = -height;
681 dst_u = dst_u + (height - 1) * dst_stride_u;
682 dst_v = dst_v + (height - 1) * dst_stride_v;
683 dst_stride_u = -dst_stride_u;
684 dst_stride_v = -dst_stride_v;
685 }
686 // Coalesce rows.
687 if (src_stride_uv == width * 2 && dst_stride_u == width &&
688 dst_stride_v == width) {
689 width *= height;
690 height = 1;
691 src_stride_uv = dst_stride_u = dst_stride_v = 0;
692 }
693 #if defined(HAS_SPLITUVROW_16_AVX2)
694 if (TestCpuFlag(kCpuHasAVX2)) {
695 SplitUVRow_16 = SplitUVRow_16_Any_AVX2;
696 if (IS_ALIGNED(width, 16)) {
697 SplitUVRow_16 = SplitUVRow_16_AVX2;
698 }
699 }
700 #endif
701 #if defined(HAS_SPLITUVROW_16_NEON)
702 if (TestCpuFlag(kCpuHasNEON)) {
703 SplitUVRow_16 = SplitUVRow_16_Any_NEON;
704 if (IS_ALIGNED(width, 8)) {
705 SplitUVRow_16 = SplitUVRow_16_NEON;
706 }
707 }
708 #endif
709
710 for (y = 0; y < height; ++y) {
711 // Copy a row of UV.
712 SplitUVRow_16(src_uv, dst_u, dst_v, depth, width);
713 dst_u += dst_stride_u;
714 dst_v += dst_stride_v;
715 src_uv += src_stride_uv;
716 }
717 }
718
719 LIBYUV_API
MergeUVPlane_16(const uint16_t * src_u,int src_stride_u,const uint16_t * src_v,int src_stride_v,uint16_t * dst_uv,int dst_stride_uv,int width,int height,int depth)720 void MergeUVPlane_16(const uint16_t* src_u,
721 int src_stride_u,
722 const uint16_t* src_v,
723 int src_stride_v,
724 uint16_t* dst_uv,
725 int dst_stride_uv,
726 int width,
727 int height,
728 int depth) {
729 int y;
730 void (*MergeUVRow_16)(const uint16_t* src_u, const uint16_t* src_v,
731 uint16_t* dst_uv, int depth, int width) =
732 MergeUVRow_16_C;
733 assert(depth >= 8);
734 assert(depth <= 16);
735 if (width <= 0 || height == 0) {
736 return;
737 }
738 // Negative height means invert the image.
739 if (height < 0) {
740 height = -height;
741 dst_uv = dst_uv + (height - 1) * dst_stride_uv;
742 dst_stride_uv = -dst_stride_uv;
743 }
744 // Coalesce rows.
745 if (src_stride_u == width && src_stride_v == width &&
746 dst_stride_uv == width * 2) {
747 width *= height;
748 height = 1;
749 src_stride_u = src_stride_v = dst_stride_uv = 0;
750 }
751 #if defined(HAS_MERGEUVROW_16_AVX2)
752 if (TestCpuFlag(kCpuHasAVX2)) {
753 MergeUVRow_16 = MergeUVRow_16_Any_AVX2;
754 if (IS_ALIGNED(width, 8)) {
755 MergeUVRow_16 = MergeUVRow_16_AVX2;
756 }
757 }
758 #endif
759 #if defined(HAS_MERGEUVROW_16_NEON)
760 if (TestCpuFlag(kCpuHasNEON)) {
761 MergeUVRow_16 = MergeUVRow_16_Any_NEON;
762 if (IS_ALIGNED(width, 8)) {
763 MergeUVRow_16 = MergeUVRow_16_NEON;
764 }
765 }
766 #endif
767
768 for (y = 0; y < height; ++y) {
769 // Merge a row of U and V into a row of UV.
770 MergeUVRow_16(src_u, src_v, dst_uv, depth, width);
771 src_u += src_stride_u;
772 src_v += src_stride_v;
773 dst_uv += dst_stride_uv;
774 }
775 }
776
777 // Convert plane from lsb to msb
778 LIBYUV_API
ConvertToMSBPlane_16(const uint16_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,int width,int height,int depth)779 void ConvertToMSBPlane_16(const uint16_t* src_y,
780 int src_stride_y,
781 uint16_t* dst_y,
782 int dst_stride_y,
783 int width,
784 int height,
785 int depth) {
786 int y;
787 int scale = 1 << (16 - depth);
788 void (*MultiplyRow_16)(const uint16_t* src_y, uint16_t* dst_y, int scale,
789 int width) = MultiplyRow_16_C;
790 if (width <= 0 || height == 0) {
791 return;
792 }
793 // Negative height means invert the image.
794 if (height < 0) {
795 height = -height;
796 dst_y = dst_y + (height - 1) * dst_stride_y;
797 dst_stride_y = -dst_stride_y;
798 }
799 // Coalesce rows.
800 if (src_stride_y == width && dst_stride_y == width) {
801 width *= height;
802 height = 1;
803 src_stride_y = dst_stride_y = 0;
804 }
805
806 #if defined(HAS_MULTIPLYROW_16_AVX2)
807 if (TestCpuFlag(kCpuHasAVX2)) {
808 MultiplyRow_16 = MultiplyRow_16_Any_AVX2;
809 if (IS_ALIGNED(width, 32)) {
810 MultiplyRow_16 = MultiplyRow_16_AVX2;
811 }
812 }
813 #endif
814 #if defined(HAS_MULTIPLYROW_16_NEON)
815 if (TestCpuFlag(kCpuHasNEON)) {
816 MultiplyRow_16 = MultiplyRow_16_Any_NEON;
817 if (IS_ALIGNED(width, 16)) {
818 MultiplyRow_16 = MultiplyRow_16_NEON;
819 }
820 }
821 #endif
822
823 for (y = 0; y < height; ++y) {
824 MultiplyRow_16(src_y, dst_y, scale, width);
825 src_y += src_stride_y;
826 dst_y += dst_stride_y;
827 }
828 }
829
830 // Convert plane from msb to lsb
831 LIBYUV_API
ConvertToLSBPlane_16(const uint16_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,int width,int height,int depth)832 void ConvertToLSBPlane_16(const uint16_t* src_y,
833 int src_stride_y,
834 uint16_t* dst_y,
835 int dst_stride_y,
836 int width,
837 int height,
838 int depth) {
839 int y;
840 int scale = 1 << depth;
841 void (*DivideRow)(const uint16_t* src_y, uint16_t* dst_y, int scale,
842 int width) = DivideRow_16_C;
843 if (width <= 0 || height == 0) {
844 return;
845 }
846 // Negative height means invert the image.
847 if (height < 0) {
848 height = -height;
849 dst_y = dst_y + (height - 1) * dst_stride_y;
850 dst_stride_y = -dst_stride_y;
851 }
852 // Coalesce rows.
853 if (src_stride_y == width && dst_stride_y == width) {
854 width *= height;
855 height = 1;
856 src_stride_y = dst_stride_y = 0;
857 }
858
859 #if defined(HAS_DIVIDEROW_16_AVX2)
860 if (TestCpuFlag(kCpuHasAVX2)) {
861 DivideRow = DivideRow_16_Any_AVX2;
862 if (IS_ALIGNED(width, 32)) {
863 DivideRow = DivideRow_16_AVX2;
864 }
865 }
866 #endif
867 #if defined(HAS_DIVIDEROW_16_NEON)
868 if (TestCpuFlag(kCpuHasNEON)) {
869 DivideRow = DivideRow_16_Any_NEON;
870 if (IS_ALIGNED(width, 16)) {
871 DivideRow = DivideRow_16_NEON;
872 }
873 }
874 #endif
875
876 for (y = 0; y < height; ++y) {
877 DivideRow(src_y, dst_y, scale, width);
878 src_y += src_stride_y;
879 dst_y += dst_stride_y;
880 }
881 }
882
883 // Swap U and V channels in interleaved UV plane.
884 LIBYUV_API
SwapUVPlane(const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_vu,int dst_stride_vu,int width,int height)885 void SwapUVPlane(const uint8_t* src_uv,
886 int src_stride_uv,
887 uint8_t* dst_vu,
888 int dst_stride_vu,
889 int width,
890 int height) {
891 int y;
892 void (*SwapUVRow)(const uint8_t* src_uv, uint8_t* dst_vu, int width) =
893 SwapUVRow_C;
894 if (width <= 0 || height == 0) {
895 return;
896 }
897 // Negative height means invert the image.
898 if (height < 0) {
899 height = -height;
900 src_uv = src_uv + (height - 1) * src_stride_uv;
901 src_stride_uv = -src_stride_uv;
902 }
903 // Coalesce rows.
904 if (src_stride_uv == width * 2 && dst_stride_vu == width * 2) {
905 width *= height;
906 height = 1;
907 src_stride_uv = dst_stride_vu = 0;
908 }
909
910 #if defined(HAS_SWAPUVROW_SSSE3)
911 if (TestCpuFlag(kCpuHasSSSE3)) {
912 SwapUVRow = SwapUVRow_Any_SSSE3;
913 if (IS_ALIGNED(width, 16)) {
914 SwapUVRow = SwapUVRow_SSSE3;
915 }
916 }
917 #endif
918 #if defined(HAS_SWAPUVROW_AVX2)
919 if (TestCpuFlag(kCpuHasAVX2)) {
920 SwapUVRow = SwapUVRow_Any_AVX2;
921 if (IS_ALIGNED(width, 32)) {
922 SwapUVRow = SwapUVRow_AVX2;
923 }
924 }
925 #endif
926 #if defined(HAS_SWAPUVROW_NEON)
927 if (TestCpuFlag(kCpuHasNEON)) {
928 SwapUVRow = SwapUVRow_Any_NEON;
929 if (IS_ALIGNED(width, 16)) {
930 SwapUVRow = SwapUVRow_NEON;
931 }
932 }
933 #endif
934
935 for (y = 0; y < height; ++y) {
936 SwapUVRow(src_uv, dst_vu, width);
937 src_uv += src_stride_uv;
938 dst_vu += dst_stride_vu;
939 }
940 }
941
942 // Convert NV21 to NV12.
943 LIBYUV_API
NV21ToNV12(const uint8_t * src_y,int src_stride_y,const uint8_t * src_vu,int src_stride_vu,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int width,int height)944 int NV21ToNV12(const uint8_t* src_y,
945 int src_stride_y,
946 const uint8_t* src_vu,
947 int src_stride_vu,
948 uint8_t* dst_y,
949 int dst_stride_y,
950 uint8_t* dst_uv,
951 int dst_stride_uv,
952 int width,
953 int height) {
954 int halfwidth = (width + 1) >> 1;
955 int halfheight = (height + 1) >> 1;
956
957 if (!src_vu || !dst_uv || width <= 0 || height == 0) {
958 return -1;
959 }
960
961 if (dst_y) {
962 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
963 }
964
965 // Negative height means invert the image.
966 if (height < 0) {
967 height = -height;
968 halfheight = (height + 1) >> 1;
969 src_vu = src_vu + (halfheight - 1) * src_stride_vu;
970 src_stride_vu = -src_stride_vu;
971 }
972
973 SwapUVPlane(src_vu, src_stride_vu, dst_uv, dst_stride_uv, halfwidth,
974 halfheight);
975 return 0;
976 }
977
978 // Test if tile_height is a power of 2 (16 or 32)
979 #define IS_POWEROFTWO(x) (!((x) & ((x)-1)))
980
981 // Detile a plane of data
982 // tile width is 16 and assumed.
983 // tile_height is 16 or 32 for MM21.
984 // src_stride_y is bytes per row of source ignoring tiling. e.g. 640
985 // TODO: More detile row functions.
986 LIBYUV_API
DetilePlane(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height,int tile_height)987 int DetilePlane(const uint8_t* src_y,
988 int src_stride_y,
989 uint8_t* dst_y,
990 int dst_stride_y,
991 int width,
992 int height,
993 int tile_height) {
994 const ptrdiff_t src_tile_stride = 16 * tile_height;
995 int y;
996 void (*DetileRow)(const uint8_t* src, ptrdiff_t src_tile_stride, uint8_t* dst,
997 int width) = DetileRow_C;
998 if (!src_y || !dst_y || width <= 0 || height == 0 ||
999 !IS_POWEROFTWO(tile_height)) {
1000 return -1;
1001 }
1002
1003 // Negative height means invert the image.
1004 if (height < 0) {
1005 height = -height;
1006 dst_y = dst_y + (height - 1) * dst_stride_y;
1007 dst_stride_y = -dst_stride_y;
1008 }
1009
1010 #if defined(HAS_DETILEROW_SSE2)
1011 if (TestCpuFlag(kCpuHasSSE2)) {
1012 DetileRow = DetileRow_Any_SSE2;
1013 if (IS_ALIGNED(width, 16)) {
1014 DetileRow = DetileRow_SSE2;
1015 }
1016 }
1017 #endif
1018 #if defined(HAS_DETILEROW_NEON)
1019 if (TestCpuFlag(kCpuHasNEON)) {
1020 DetileRow = DetileRow_Any_NEON;
1021 if (IS_ALIGNED(width, 16)) {
1022 DetileRow = DetileRow_NEON;
1023 }
1024 }
1025 #endif
1026
1027 // Detile plane
1028 for (y = 0; y < height; ++y) {
1029 DetileRow(src_y, src_tile_stride, dst_y, width);
1030 dst_y += dst_stride_y;
1031 src_y += 16;
1032 // Advance to next row of tiles.
1033 if ((y & (tile_height - 1)) == (tile_height - 1)) {
1034 src_y = src_y - src_tile_stride + src_stride_y * tile_height;
1035 }
1036 }
1037 return 0;
1038 }
1039
1040 // Convert a plane of 16 bit tiles of 16 x H to linear.
1041 // tile width is 16 and assumed.
1042 // tile_height is 16 or 32 for MT2T.
1043 LIBYUV_API
DetilePlane_16(const uint16_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,int width,int height,int tile_height)1044 int DetilePlane_16(const uint16_t* src_y,
1045 int src_stride_y,
1046 uint16_t* dst_y,
1047 int dst_stride_y,
1048 int width,
1049 int height,
1050 int tile_height) {
1051 const ptrdiff_t src_tile_stride = 16 * tile_height;
1052 int y;
1053 void (*DetileRow_16)(const uint16_t* src, ptrdiff_t src_tile_stride,
1054 uint16_t* dst, int width) = DetileRow_16_C;
1055 if (!src_y || !dst_y || width <= 0 || height == 0 ||
1056 !IS_POWEROFTWO(tile_height)) {
1057 return -1;
1058 }
1059
1060 // Negative height means invert the image.
1061 if (height < 0) {
1062 height = -height;
1063 dst_y = dst_y + (height - 1) * dst_stride_y;
1064 dst_stride_y = -dst_stride_y;
1065 }
1066
1067 #if defined(HAS_DETILEROW_16_SSE2)
1068 if (TestCpuFlag(kCpuHasSSE2)) {
1069 DetileRow_16 = DetileRow_16_Any_SSE2;
1070 if (IS_ALIGNED(width, 16)) {
1071 DetileRow_16 = DetileRow_16_SSE2;
1072 }
1073 }
1074 #endif
1075 #if defined(HAS_DETILEROW_16_AVX)
1076 if (TestCpuFlag(kCpuHasAVX)) {
1077 DetileRow_16 = DetileRow_16_Any_AVX;
1078 if (IS_ALIGNED(width, 16)) {
1079 DetileRow_16 = DetileRow_16_AVX;
1080 }
1081 }
1082 #endif
1083 #if defined(HAS_DETILEROW_16_NEON)
1084 if (TestCpuFlag(kCpuHasNEON)) {
1085 DetileRow_16 = DetileRow_16_Any_NEON;
1086 if (IS_ALIGNED(width, 16)) {
1087 DetileRow_16 = DetileRow_16_NEON;
1088 }
1089 }
1090 #endif
1091
1092 // Detile plane
1093 for (y = 0; y < height; ++y) {
1094 DetileRow_16(src_y, src_tile_stride, dst_y, width);
1095 dst_y += dst_stride_y;
1096 src_y += 16;
1097 // Advance to next row of tiles.
1098 if ((y & (tile_height - 1)) == (tile_height - 1)) {
1099 src_y = src_y - src_tile_stride + src_stride_y * tile_height;
1100 }
1101 }
1102 return 0;
1103 }
1104
1105 LIBYUV_API
DetileSplitUVPlane(const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height,int tile_height)1106 void DetileSplitUVPlane(const uint8_t* src_uv,
1107 int src_stride_uv,
1108 uint8_t* dst_u,
1109 int dst_stride_u,
1110 uint8_t* dst_v,
1111 int dst_stride_v,
1112 int width,
1113 int height,
1114 int tile_height) {
1115 const ptrdiff_t src_tile_stride = 16 * tile_height;
1116 int y;
1117 void (*DetileSplitUVRow)(const uint8_t* src, ptrdiff_t src_tile_stride,
1118 uint8_t* dst_u, uint8_t* dst_v, int width) =
1119 DetileSplitUVRow_C;
1120 assert(src_stride_uv >= 0);
1121 assert(tile_height > 0);
1122 assert(src_stride_uv > 0);
1123
1124 if (width <= 0 || height == 0) {
1125 return;
1126 }
1127 // Negative height means invert the image.
1128 if (height < 0) {
1129 height = -height;
1130 dst_u = dst_u + (height - 1) * dst_stride_u;
1131 dst_stride_u = -dst_stride_u;
1132 dst_v = dst_v + (height - 1) * dst_stride_v;
1133 dst_stride_v = -dst_stride_v;
1134 }
1135
1136 #if defined(HAS_DETILESPLITUVROW_SSSE3)
1137 if (TestCpuFlag(kCpuHasSSSE3)) {
1138 DetileSplitUVRow = DetileSplitUVRow_Any_SSSE3;
1139 if (IS_ALIGNED(width, 16)) {
1140 DetileSplitUVRow = DetileSplitUVRow_SSSE3;
1141 }
1142 }
1143 #endif
1144 #if defined(HAS_DETILESPLITUVROW_NEON)
1145 if (TestCpuFlag(kCpuHasNEON)) {
1146 DetileSplitUVRow = DetileSplitUVRow_Any_NEON;
1147 if (IS_ALIGNED(width, 16)) {
1148 DetileSplitUVRow = DetileSplitUVRow_NEON;
1149 }
1150 }
1151 #endif
1152
1153 // Detile plane
1154 for (y = 0; y < height; ++y) {
1155 DetileSplitUVRow(src_uv, src_tile_stride, dst_u, dst_v, width);
1156 dst_u += dst_stride_u;
1157 dst_v += dst_stride_v;
1158 src_uv += 16;
1159 // Advance to next row of tiles.
1160 if ((y & (tile_height - 1)) == (tile_height - 1)) {
1161 src_uv = src_uv - src_tile_stride + src_stride_uv * tile_height;
1162 }
1163 }
1164 }
1165
1166 LIBYUV_API
DetileToYUY2(const uint8_t * src_y,int src_stride_y,const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_yuy2,int dst_stride_yuy2,int width,int height,int tile_height)1167 void DetileToYUY2(const uint8_t* src_y,
1168 int src_stride_y,
1169 const uint8_t* src_uv,
1170 int src_stride_uv,
1171 uint8_t* dst_yuy2,
1172 int dst_stride_yuy2,
1173 int width,
1174 int height,
1175 int tile_height) {
1176 const ptrdiff_t src_y_tile_stride = 16 * tile_height;
1177 const ptrdiff_t src_uv_tile_stride = src_y_tile_stride / 2;
1178 int y;
1179 void (*DetileToYUY2)(const uint8_t* src_y, ptrdiff_t src_y_tile_stride,
1180 const uint8_t* src_uv, ptrdiff_t src_uv_tile_stride,
1181 uint8_t* dst_yuy2, int width) = DetileToYUY2_C;
1182 assert(src_stride_y >= 0);
1183 assert(src_stride_y > 0);
1184 assert(src_stride_uv >= 0);
1185 assert(src_stride_uv > 0);
1186 assert(tile_height > 0);
1187
1188 if (width <= 0 || height == 0 || tile_height <= 0) {
1189 return;
1190 }
1191 // Negative height means invert the image.
1192 if (height < 0) {
1193 height = -height;
1194 dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2;
1195 dst_stride_yuy2 = -dst_stride_yuy2;
1196 }
1197
1198 #if defined(HAS_DETILETOYUY2_NEON)
1199 if (TestCpuFlag(kCpuHasNEON)) {
1200 DetileToYUY2 = DetileToYUY2_Any_NEON;
1201 if (IS_ALIGNED(width, 16)) {
1202 DetileToYUY2 = DetileToYUY2_NEON;
1203 }
1204 }
1205 #endif
1206
1207 #if defined(HAS_DETILETOYUY2_SSE2)
1208 if (TestCpuFlag(kCpuHasSSE2)) {
1209 DetileToYUY2 = DetileToYUY2_Any_SSE2;
1210 if (IS_ALIGNED(width, 16)) {
1211 DetileToYUY2 = DetileToYUY2_SSE2;
1212 }
1213 }
1214 #endif
1215
1216 // Detile plane
1217 for (y = 0; y < height; ++y) {
1218 DetileToYUY2(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride, dst_yuy2,
1219 width);
1220 dst_yuy2 += dst_stride_yuy2;
1221 src_y += 16;
1222
1223 if (y & 0x1)
1224 src_uv += 16;
1225
1226 // Advance to next row of tiles.
1227 if ((y & (tile_height - 1)) == (tile_height - 1)) {
1228 src_y = src_y - src_y_tile_stride + src_stride_y * tile_height;
1229 src_uv = src_uv - src_uv_tile_stride + src_stride_uv * (tile_height / 2);
1230 }
1231 }
1232 }
1233
1234 // Support function for NV12 etc RGB channels.
1235 // Width and height are plane sizes (typically half pixel width).
1236 LIBYUV_API
SplitRGBPlane(const uint8_t * src_rgb,int src_stride_rgb,uint8_t * dst_r,int dst_stride_r,uint8_t * dst_g,int dst_stride_g,uint8_t * dst_b,int dst_stride_b,int width,int height)1237 void SplitRGBPlane(const uint8_t* src_rgb,
1238 int src_stride_rgb,
1239 uint8_t* dst_r,
1240 int dst_stride_r,
1241 uint8_t* dst_g,
1242 int dst_stride_g,
1243 uint8_t* dst_b,
1244 int dst_stride_b,
1245 int width,
1246 int height) {
1247 int y;
1248 void (*SplitRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
1249 uint8_t* dst_b, int width) = SplitRGBRow_C;
1250 if (width <= 0 || height == 0) {
1251 return;
1252 }
1253 // Negative height means invert the image.
1254 if (height < 0) {
1255 height = -height;
1256 dst_r = dst_r + (height - 1) * dst_stride_r;
1257 dst_g = dst_g + (height - 1) * dst_stride_g;
1258 dst_b = dst_b + (height - 1) * dst_stride_b;
1259 dst_stride_r = -dst_stride_r;
1260 dst_stride_g = -dst_stride_g;
1261 dst_stride_b = -dst_stride_b;
1262 }
1263 // Coalesce rows.
1264 if (src_stride_rgb == width * 3 && dst_stride_r == width &&
1265 dst_stride_g == width && dst_stride_b == width) {
1266 width *= height;
1267 height = 1;
1268 src_stride_rgb = dst_stride_r = dst_stride_g = dst_stride_b = 0;
1269 }
1270 #if defined(HAS_SPLITRGBROW_SSSE3)
1271 if (TestCpuFlag(kCpuHasSSSE3)) {
1272 SplitRGBRow = SplitRGBRow_Any_SSSE3;
1273 if (IS_ALIGNED(width, 16)) {
1274 SplitRGBRow = SplitRGBRow_SSSE3;
1275 }
1276 }
1277 #endif
1278 #if defined(HAS_SPLITRGBROW_NEON)
1279 if (TestCpuFlag(kCpuHasNEON)) {
1280 SplitRGBRow = SplitRGBRow_Any_NEON;
1281 if (IS_ALIGNED(width, 16)) {
1282 SplitRGBRow = SplitRGBRow_NEON;
1283 }
1284 }
1285 #endif
1286 #if defined(HAS_SPLITRGBROW_RVV)
1287 if (TestCpuFlag(kCpuHasRVV)) {
1288 SplitRGBRow = SplitRGBRow_RVV;
1289 }
1290 #endif
1291
1292 for (y = 0; y < height; ++y) {
1293 // Copy a row of RGB.
1294 SplitRGBRow(src_rgb, dst_r, dst_g, dst_b, width);
1295 dst_r += dst_stride_r;
1296 dst_g += dst_stride_g;
1297 dst_b += dst_stride_b;
1298 src_rgb += src_stride_rgb;
1299 }
1300 }
1301
1302 LIBYUV_API
MergeRGBPlane(const uint8_t * src_r,int src_stride_r,const uint8_t * src_g,int src_stride_g,const uint8_t * src_b,int src_stride_b,uint8_t * dst_rgb,int dst_stride_rgb,int width,int height)1303 void MergeRGBPlane(const uint8_t* src_r,
1304 int src_stride_r,
1305 const uint8_t* src_g,
1306 int src_stride_g,
1307 const uint8_t* src_b,
1308 int src_stride_b,
1309 uint8_t* dst_rgb,
1310 int dst_stride_rgb,
1311 int width,
1312 int height) {
1313 int y;
1314 void (*MergeRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
1315 const uint8_t* src_b, uint8_t* dst_rgb, int width) =
1316 MergeRGBRow_C;
1317 if (width <= 0 || height == 0) {
1318 return;
1319 }
1320 // Coalesce rows.
1321 // Negative height means invert the image.
1322 if (height < 0) {
1323 height = -height;
1324 dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb;
1325 dst_stride_rgb = -dst_stride_rgb;
1326 }
1327 // Coalesce rows.
1328 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1329 dst_stride_rgb == width * 3) {
1330 width *= height;
1331 height = 1;
1332 src_stride_r = src_stride_g = src_stride_b = dst_stride_rgb = 0;
1333 }
1334 #if defined(HAS_MERGERGBROW_SSSE3)
1335 if (TestCpuFlag(kCpuHasSSSE3)) {
1336 MergeRGBRow = MergeRGBRow_Any_SSSE3;
1337 if (IS_ALIGNED(width, 16)) {
1338 MergeRGBRow = MergeRGBRow_SSSE3;
1339 }
1340 }
1341 #endif
1342 #if defined(HAS_MERGERGBROW_NEON)
1343 if (TestCpuFlag(kCpuHasNEON)) {
1344 MergeRGBRow = MergeRGBRow_Any_NEON;
1345 if (IS_ALIGNED(width, 16)) {
1346 MergeRGBRow = MergeRGBRow_NEON;
1347 }
1348 }
1349 #endif
1350 #if defined(HAS_MERGERGBROW_RVV)
1351 if (TestCpuFlag(kCpuHasRVV)) {
1352 MergeRGBRow = MergeRGBRow_RVV;
1353 }
1354 #endif
1355
1356 for (y = 0; y < height; ++y) {
1357 // Merge a row of U and V into a row of RGB.
1358 MergeRGBRow(src_r, src_g, src_b, dst_rgb, width);
1359 src_r += src_stride_r;
1360 src_g += src_stride_g;
1361 src_b += src_stride_b;
1362 dst_rgb += dst_stride_rgb;
1363 }
1364 }
1365
1366 LIBYUV_NOINLINE
SplitARGBPlaneAlpha(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_r,int dst_stride_r,uint8_t * dst_g,int dst_stride_g,uint8_t * dst_b,int dst_stride_b,uint8_t * dst_a,int dst_stride_a,int width,int height)1367 static void SplitARGBPlaneAlpha(const uint8_t* src_argb,
1368 int src_stride_argb,
1369 uint8_t* dst_r,
1370 int dst_stride_r,
1371 uint8_t* dst_g,
1372 int dst_stride_g,
1373 uint8_t* dst_b,
1374 int dst_stride_b,
1375 uint8_t* dst_a,
1376 int dst_stride_a,
1377 int width,
1378 int height) {
1379 int y;
1380 void (*SplitARGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
1381 uint8_t* dst_b, uint8_t* dst_a, int width) =
1382 SplitARGBRow_C;
1383
1384 assert(height > 0);
1385
1386 if (width <= 0 || height == 0) {
1387 return;
1388 }
1389 if (src_stride_argb == width * 4 && dst_stride_r == width &&
1390 dst_stride_g == width && dst_stride_b == width && dst_stride_a == width) {
1391 width *= height;
1392 height = 1;
1393 src_stride_argb = dst_stride_r = dst_stride_g = dst_stride_b =
1394 dst_stride_a = 0;
1395 }
1396
1397 #if defined(HAS_SPLITARGBROW_SSE2)
1398 if (TestCpuFlag(kCpuHasSSE2)) {
1399 SplitARGBRow = SplitARGBRow_Any_SSE2;
1400 if (IS_ALIGNED(width, 8)) {
1401 SplitARGBRow = SplitARGBRow_SSE2;
1402 }
1403 }
1404 #endif
1405 #if defined(HAS_SPLITARGBROW_SSSE3)
1406 if (TestCpuFlag(kCpuHasSSSE3)) {
1407 SplitARGBRow = SplitARGBRow_Any_SSSE3;
1408 if (IS_ALIGNED(width, 8)) {
1409 SplitARGBRow = SplitARGBRow_SSSE3;
1410 }
1411 }
1412 #endif
1413 #if defined(HAS_SPLITARGBROW_AVX2)
1414 if (TestCpuFlag(kCpuHasAVX2)) {
1415 SplitARGBRow = SplitARGBRow_Any_AVX2;
1416 if (IS_ALIGNED(width, 16)) {
1417 SplitARGBRow = SplitARGBRow_AVX2;
1418 }
1419 }
1420 #endif
1421 #if defined(HAS_SPLITARGBROW_NEON)
1422 if (TestCpuFlag(kCpuHasNEON)) {
1423 SplitARGBRow = SplitARGBRow_Any_NEON;
1424 if (IS_ALIGNED(width, 16)) {
1425 SplitARGBRow = SplitARGBRow_NEON;
1426 }
1427 }
1428 #endif
1429 #if defined(HAS_SPLITARGBROW_RVV)
1430 if (TestCpuFlag(kCpuHasRVV)) {
1431 SplitARGBRow = SplitARGBRow_RVV;
1432 }
1433 #endif
1434
1435 for (y = 0; y < height; ++y) {
1436 SplitARGBRow(src_argb, dst_r, dst_g, dst_b, dst_a, width);
1437 dst_r += dst_stride_r;
1438 dst_g += dst_stride_g;
1439 dst_b += dst_stride_b;
1440 dst_a += dst_stride_a;
1441 src_argb += src_stride_argb;
1442 }
1443 }
1444
1445 LIBYUV_NOINLINE
SplitARGBPlaneOpaque(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_r,int dst_stride_r,uint8_t * dst_g,int dst_stride_g,uint8_t * dst_b,int dst_stride_b,int width,int height)1446 static void SplitARGBPlaneOpaque(const uint8_t* src_argb,
1447 int src_stride_argb,
1448 uint8_t* dst_r,
1449 int dst_stride_r,
1450 uint8_t* dst_g,
1451 int dst_stride_g,
1452 uint8_t* dst_b,
1453 int dst_stride_b,
1454 int width,
1455 int height) {
1456 int y;
1457 void (*SplitXRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g,
1458 uint8_t* dst_b, int width) = SplitXRGBRow_C;
1459 assert(height > 0);
1460
1461 if (width <= 0 || height == 0) {
1462 return;
1463 }
1464 if (src_stride_argb == width * 4 && dst_stride_r == width &&
1465 dst_stride_g == width && dst_stride_b == width) {
1466 width *= height;
1467 height = 1;
1468 src_stride_argb = dst_stride_r = dst_stride_g = dst_stride_b = 0;
1469 }
1470
1471 #if defined(HAS_SPLITXRGBROW_SSE2)
1472 if (TestCpuFlag(kCpuHasSSE2)) {
1473 SplitXRGBRow = SplitXRGBRow_Any_SSE2;
1474 if (IS_ALIGNED(width, 8)) {
1475 SplitXRGBRow = SplitXRGBRow_SSE2;
1476 }
1477 }
1478 #endif
1479 #if defined(HAS_SPLITXRGBROW_SSSE3)
1480 if (TestCpuFlag(kCpuHasSSSE3)) {
1481 SplitXRGBRow = SplitXRGBRow_Any_SSSE3;
1482 if (IS_ALIGNED(width, 8)) {
1483 SplitXRGBRow = SplitXRGBRow_SSSE3;
1484 }
1485 }
1486 #endif
1487 #if defined(HAS_SPLITXRGBROW_AVX2)
1488 if (TestCpuFlag(kCpuHasAVX2)) {
1489 SplitXRGBRow = SplitXRGBRow_Any_AVX2;
1490 if (IS_ALIGNED(width, 16)) {
1491 SplitXRGBRow = SplitXRGBRow_AVX2;
1492 }
1493 }
1494 #endif
1495 #if defined(HAS_SPLITXRGBROW_NEON)
1496 if (TestCpuFlag(kCpuHasNEON)) {
1497 SplitXRGBRow = SplitXRGBRow_Any_NEON;
1498 if (IS_ALIGNED(width, 16)) {
1499 SplitXRGBRow = SplitXRGBRow_NEON;
1500 }
1501 }
1502 #endif
1503 #if defined(HAS_SPLITXRGBROW_RVV)
1504 if (TestCpuFlag(kCpuHasRVV)) {
1505 SplitXRGBRow = SplitXRGBRow_RVV;
1506 }
1507 #endif
1508
1509 for (y = 0; y < height; ++y) {
1510 SplitXRGBRow(src_argb, dst_r, dst_g, dst_b, width);
1511 dst_r += dst_stride_r;
1512 dst_g += dst_stride_g;
1513 dst_b += dst_stride_b;
1514 src_argb += src_stride_argb;
1515 }
1516 }
1517
1518 LIBYUV_API
SplitARGBPlane(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_r,int dst_stride_r,uint8_t * dst_g,int dst_stride_g,uint8_t * dst_b,int dst_stride_b,uint8_t * dst_a,int dst_stride_a,int width,int height)1519 void SplitARGBPlane(const uint8_t* src_argb,
1520 int src_stride_argb,
1521 uint8_t* dst_r,
1522 int dst_stride_r,
1523 uint8_t* dst_g,
1524 int dst_stride_g,
1525 uint8_t* dst_b,
1526 int dst_stride_b,
1527 uint8_t* dst_a,
1528 int dst_stride_a,
1529 int width,
1530 int height) {
1531 // Negative height means invert the image.
1532 if (height < 0) {
1533 height = -height;
1534 dst_r = dst_r + (height - 1) * dst_stride_r;
1535 dst_g = dst_g + (height - 1) * dst_stride_g;
1536 dst_b = dst_b + (height - 1) * dst_stride_b;
1537 dst_a = dst_a + (height - 1) * dst_stride_a;
1538 dst_stride_r = -dst_stride_r;
1539 dst_stride_g = -dst_stride_g;
1540 dst_stride_b = -dst_stride_b;
1541 dst_stride_a = -dst_stride_a;
1542 }
1543
1544 if (dst_a == NULL) {
1545 SplitARGBPlaneOpaque(src_argb, src_stride_argb, dst_r, dst_stride_r, dst_g,
1546 dst_stride_g, dst_b, dst_stride_b, width, height);
1547 } else {
1548 SplitARGBPlaneAlpha(src_argb, src_stride_argb, dst_r, dst_stride_r, dst_g,
1549 dst_stride_g, dst_b, dst_stride_b, dst_a, dst_stride_a,
1550 width, height);
1551 }
1552 }
1553
1554 LIBYUV_NOINLINE
MergeARGBPlaneAlpha(const uint8_t * src_r,int src_stride_r,const uint8_t * src_g,int src_stride_g,const uint8_t * src_b,int src_stride_b,const uint8_t * src_a,int src_stride_a,uint8_t * dst_argb,int dst_stride_argb,int width,int height)1555 static void MergeARGBPlaneAlpha(const uint8_t* src_r,
1556 int src_stride_r,
1557 const uint8_t* src_g,
1558 int src_stride_g,
1559 const uint8_t* src_b,
1560 int src_stride_b,
1561 const uint8_t* src_a,
1562 int src_stride_a,
1563 uint8_t* dst_argb,
1564 int dst_stride_argb,
1565 int width,
1566 int height) {
1567 int y;
1568 void (*MergeARGBRow)(const uint8_t* src_r, const uint8_t* src_g,
1569 const uint8_t* src_b, const uint8_t* src_a,
1570 uint8_t* dst_argb, int width) = MergeARGBRow_C;
1571
1572 assert(height > 0);
1573
1574 if (width <= 0 || height == 0) {
1575 return;
1576 }
1577 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1578 src_stride_a == width && dst_stride_argb == width * 4) {
1579 width *= height;
1580 height = 1;
1581 src_stride_r = src_stride_g = src_stride_b = src_stride_a =
1582 dst_stride_argb = 0;
1583 }
1584 #if defined(HAS_MERGEARGBROW_SSE2)
1585 if (TestCpuFlag(kCpuHasSSE2)) {
1586 MergeARGBRow = MergeARGBRow_Any_SSE2;
1587 if (IS_ALIGNED(width, 8)) {
1588 MergeARGBRow = MergeARGBRow_SSE2;
1589 }
1590 }
1591 #endif
1592 #if defined(HAS_MERGEARGBROW_AVX2)
1593 if (TestCpuFlag(kCpuHasAVX2)) {
1594 MergeARGBRow = MergeARGBRow_Any_AVX2;
1595 if (IS_ALIGNED(width, 16)) {
1596 MergeARGBRow = MergeARGBRow_AVX2;
1597 }
1598 }
1599 #endif
1600 #if defined(HAS_MERGEARGBROW_NEON)
1601 if (TestCpuFlag(kCpuHasNEON)) {
1602 MergeARGBRow = MergeARGBRow_Any_NEON;
1603 if (IS_ALIGNED(width, 16)) {
1604 MergeARGBRow = MergeARGBRow_NEON;
1605 }
1606 }
1607 #endif
1608 #if defined(HAS_MERGEARGBROW_RVV)
1609 if (TestCpuFlag(kCpuHasRVV)) {
1610 MergeARGBRow = MergeARGBRow_RVV;
1611 }
1612 #endif
1613
1614 for (y = 0; y < height; ++y) {
1615 MergeARGBRow(src_r, src_g, src_b, src_a, dst_argb, width);
1616 src_r += src_stride_r;
1617 src_g += src_stride_g;
1618 src_b += src_stride_b;
1619 src_a += src_stride_a;
1620 dst_argb += dst_stride_argb;
1621 }
1622 }
1623
1624 LIBYUV_NOINLINE
MergeARGBPlaneOpaque(const uint8_t * src_r,int src_stride_r,const uint8_t * src_g,int src_stride_g,const uint8_t * src_b,int src_stride_b,uint8_t * dst_argb,int dst_stride_argb,int width,int height)1625 static void MergeARGBPlaneOpaque(const uint8_t* src_r,
1626 int src_stride_r,
1627 const uint8_t* src_g,
1628 int src_stride_g,
1629 const uint8_t* src_b,
1630 int src_stride_b,
1631 uint8_t* dst_argb,
1632 int dst_stride_argb,
1633 int width,
1634 int height) {
1635 int y;
1636 void (*MergeXRGBRow)(const uint8_t* src_r, const uint8_t* src_g,
1637 const uint8_t* src_b, uint8_t* dst_argb, int width) =
1638 MergeXRGBRow_C;
1639
1640 assert(height > 0);
1641
1642 if (width <= 0 || height == 0) {
1643 return;
1644 }
1645 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1646 dst_stride_argb == width * 4) {
1647 width *= height;
1648 height = 1;
1649 src_stride_r = src_stride_g = src_stride_b = dst_stride_argb = 0;
1650 }
1651 #if defined(HAS_MERGEXRGBROW_SSE2)
1652 if (TestCpuFlag(kCpuHasSSE2)) {
1653 MergeXRGBRow = MergeXRGBRow_Any_SSE2;
1654 if (IS_ALIGNED(width, 8)) {
1655 MergeXRGBRow = MergeXRGBRow_SSE2;
1656 }
1657 }
1658 #endif
1659 #if defined(HAS_MERGEXRGBROW_AVX2)
1660 if (TestCpuFlag(kCpuHasAVX2)) {
1661 MergeXRGBRow = MergeXRGBRow_Any_AVX2;
1662 if (IS_ALIGNED(width, 16)) {
1663 MergeXRGBRow = MergeXRGBRow_AVX2;
1664 }
1665 }
1666 #endif
1667 #if defined(HAS_MERGEXRGBROW_NEON)
1668 if (TestCpuFlag(kCpuHasNEON)) {
1669 MergeXRGBRow = MergeXRGBRow_Any_NEON;
1670 if (IS_ALIGNED(width, 16)) {
1671 MergeXRGBRow = MergeXRGBRow_NEON;
1672 }
1673 }
1674 #endif
1675 #if defined(HAS_MERGEXRGBROW_RVV)
1676 if (TestCpuFlag(kCpuHasRVV)) {
1677 MergeXRGBRow = MergeXRGBRow_RVV;
1678 }
1679 #endif
1680
1681 for (y = 0; y < height; ++y) {
1682 MergeXRGBRow(src_r, src_g, src_b, dst_argb, width);
1683 src_r += src_stride_r;
1684 src_g += src_stride_g;
1685 src_b += src_stride_b;
1686 dst_argb += dst_stride_argb;
1687 }
1688 }
1689
1690 LIBYUV_API
MergeARGBPlane(const uint8_t * src_r,int src_stride_r,const uint8_t * src_g,int src_stride_g,const uint8_t * src_b,int src_stride_b,const uint8_t * src_a,int src_stride_a,uint8_t * dst_argb,int dst_stride_argb,int width,int height)1691 void MergeARGBPlane(const uint8_t* src_r,
1692 int src_stride_r,
1693 const uint8_t* src_g,
1694 int src_stride_g,
1695 const uint8_t* src_b,
1696 int src_stride_b,
1697 const uint8_t* src_a,
1698 int src_stride_a,
1699 uint8_t* dst_argb,
1700 int dst_stride_argb,
1701 int width,
1702 int height) {
1703 // Negative height means invert the image.
1704 if (height < 0) {
1705 height = -height;
1706 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1707 dst_stride_argb = -dst_stride_argb;
1708 }
1709
1710 if (src_a == NULL) {
1711 MergeARGBPlaneOpaque(src_r, src_stride_r, src_g, src_stride_g, src_b,
1712 src_stride_b, dst_argb, dst_stride_argb, width,
1713 height);
1714 } else {
1715 MergeARGBPlaneAlpha(src_r, src_stride_r, src_g, src_stride_g, src_b,
1716 src_stride_b, src_a, src_stride_a, dst_argb,
1717 dst_stride_argb, width, height);
1718 }
1719 }
1720
1721 // TODO(yuan): Support 2 bit alpha channel.
1722 LIBYUV_API
MergeXR30Plane(const uint16_t * src_r,int src_stride_r,const uint16_t * src_g,int src_stride_g,const uint16_t * src_b,int src_stride_b,uint8_t * dst_ar30,int dst_stride_ar30,int width,int height,int depth)1723 void MergeXR30Plane(const uint16_t* src_r,
1724 int src_stride_r,
1725 const uint16_t* src_g,
1726 int src_stride_g,
1727 const uint16_t* src_b,
1728 int src_stride_b,
1729 uint8_t* dst_ar30,
1730 int dst_stride_ar30,
1731 int width,
1732 int height,
1733 int depth) {
1734 int y;
1735 void (*MergeXR30Row)(const uint16_t* src_r, const uint16_t* src_g,
1736 const uint16_t* src_b, uint8_t* dst_ar30, int depth,
1737 int width) = MergeXR30Row_C;
1738
1739 // Negative height means invert the image.
1740 if (height < 0) {
1741 height = -height;
1742 dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30;
1743 dst_stride_ar30 = -dst_stride_ar30;
1744 }
1745 // Coalesce rows.
1746 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1747 dst_stride_ar30 == width * 4) {
1748 width *= height;
1749 height = 1;
1750 src_stride_r = src_stride_g = src_stride_b = dst_stride_ar30 = 0;
1751 }
1752 #if defined(HAS_MERGEXR30ROW_AVX2)
1753 if (TestCpuFlag(kCpuHasAVX2)) {
1754 MergeXR30Row = MergeXR30Row_Any_AVX2;
1755 if (IS_ALIGNED(width, 16)) {
1756 MergeXR30Row = MergeXR30Row_AVX2;
1757 }
1758 }
1759 #endif
1760 #if defined(HAS_MERGEXR30ROW_NEON)
1761 if (TestCpuFlag(kCpuHasNEON)) {
1762 if (depth == 10) {
1763 MergeXR30Row = MergeXR30Row_10_Any_NEON;
1764 if (IS_ALIGNED(width, 8)) {
1765 MergeXR30Row = MergeXR30Row_10_NEON;
1766 }
1767 } else {
1768 MergeXR30Row = MergeXR30Row_Any_NEON;
1769 if (IS_ALIGNED(width, 8)) {
1770 MergeXR30Row = MergeXR30Row_NEON;
1771 }
1772 }
1773 }
1774 #endif
1775
1776 for (y = 0; y < height; ++y) {
1777 MergeXR30Row(src_r, src_g, src_b, dst_ar30, depth, width);
1778 src_r += src_stride_r;
1779 src_g += src_stride_g;
1780 src_b += src_stride_b;
1781 dst_ar30 += dst_stride_ar30;
1782 }
1783 }
1784
1785 LIBYUV_NOINLINE
MergeAR64PlaneAlpha(const uint16_t * src_r,int src_stride_r,const uint16_t * src_g,int src_stride_g,const uint16_t * src_b,int src_stride_b,const uint16_t * src_a,int src_stride_a,uint16_t * dst_ar64,int dst_stride_ar64,int width,int height,int depth)1786 static void MergeAR64PlaneAlpha(const uint16_t* src_r,
1787 int src_stride_r,
1788 const uint16_t* src_g,
1789 int src_stride_g,
1790 const uint16_t* src_b,
1791 int src_stride_b,
1792 const uint16_t* src_a,
1793 int src_stride_a,
1794 uint16_t* dst_ar64,
1795 int dst_stride_ar64,
1796 int width,
1797 int height,
1798 int depth) {
1799 int y;
1800 void (*MergeAR64Row)(const uint16_t* src_r, const uint16_t* src_g,
1801 const uint16_t* src_b, const uint16_t* src_a,
1802 uint16_t* dst_argb, int depth, int width) =
1803 MergeAR64Row_C;
1804
1805 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1806 src_stride_a == width && dst_stride_ar64 == width * 4) {
1807 width *= height;
1808 height = 1;
1809 src_stride_r = src_stride_g = src_stride_b = src_stride_a =
1810 dst_stride_ar64 = 0;
1811 }
1812 #if defined(HAS_MERGEAR64ROW_AVX2)
1813 if (TestCpuFlag(kCpuHasAVX2)) {
1814 MergeAR64Row = MergeAR64Row_Any_AVX2;
1815 if (IS_ALIGNED(width, 16)) {
1816 MergeAR64Row = MergeAR64Row_AVX2;
1817 }
1818 }
1819 #endif
1820 #if defined(HAS_MERGEAR64ROW_NEON)
1821 if (TestCpuFlag(kCpuHasNEON)) {
1822 MergeAR64Row = MergeAR64Row_Any_NEON;
1823 if (IS_ALIGNED(width, 8)) {
1824 MergeAR64Row = MergeAR64Row_NEON;
1825 }
1826 }
1827 #endif
1828
1829 for (y = 0; y < height; ++y) {
1830 MergeAR64Row(src_r, src_g, src_b, src_a, dst_ar64, depth, width);
1831 src_r += src_stride_r;
1832 src_g += src_stride_g;
1833 src_b += src_stride_b;
1834 src_a += src_stride_a;
1835 dst_ar64 += dst_stride_ar64;
1836 }
1837 }
1838
1839 LIBYUV_NOINLINE
MergeAR64PlaneOpaque(const uint16_t * src_r,int src_stride_r,const uint16_t * src_g,int src_stride_g,const uint16_t * src_b,int src_stride_b,uint16_t * dst_ar64,int dst_stride_ar64,int width,int height,int depth)1840 static void MergeAR64PlaneOpaque(const uint16_t* src_r,
1841 int src_stride_r,
1842 const uint16_t* src_g,
1843 int src_stride_g,
1844 const uint16_t* src_b,
1845 int src_stride_b,
1846 uint16_t* dst_ar64,
1847 int dst_stride_ar64,
1848 int width,
1849 int height,
1850 int depth) {
1851 int y;
1852 void (*MergeXR64Row)(const uint16_t* src_r, const uint16_t* src_g,
1853 const uint16_t* src_b, uint16_t* dst_argb, int depth,
1854 int width) = MergeXR64Row_C;
1855
1856 // Coalesce rows.
1857 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1858 dst_stride_ar64 == width * 4) {
1859 width *= height;
1860 height = 1;
1861 src_stride_r = src_stride_g = src_stride_b = dst_stride_ar64 = 0;
1862 }
1863 #if defined(HAS_MERGEXR64ROW_AVX2)
1864 if (TestCpuFlag(kCpuHasAVX2)) {
1865 MergeXR64Row = MergeXR64Row_Any_AVX2;
1866 if (IS_ALIGNED(width, 16)) {
1867 MergeXR64Row = MergeXR64Row_AVX2;
1868 }
1869 }
1870 #endif
1871 #if defined(HAS_MERGEXR64ROW_NEON)
1872 if (TestCpuFlag(kCpuHasNEON)) {
1873 MergeXR64Row = MergeXR64Row_Any_NEON;
1874 if (IS_ALIGNED(width, 8)) {
1875 MergeXR64Row = MergeXR64Row_NEON;
1876 }
1877 }
1878 #endif
1879
1880 for (y = 0; y < height; ++y) {
1881 MergeXR64Row(src_r, src_g, src_b, dst_ar64, depth, width);
1882 src_r += src_stride_r;
1883 src_g += src_stride_g;
1884 src_b += src_stride_b;
1885 dst_ar64 += dst_stride_ar64;
1886 }
1887 }
1888
1889 LIBYUV_API
MergeAR64Plane(const uint16_t * src_r,int src_stride_r,const uint16_t * src_g,int src_stride_g,const uint16_t * src_b,int src_stride_b,const uint16_t * src_a,int src_stride_a,uint16_t * dst_ar64,int dst_stride_ar64,int width,int height,int depth)1890 void MergeAR64Plane(const uint16_t* src_r,
1891 int src_stride_r,
1892 const uint16_t* src_g,
1893 int src_stride_g,
1894 const uint16_t* src_b,
1895 int src_stride_b,
1896 const uint16_t* src_a,
1897 int src_stride_a,
1898 uint16_t* dst_ar64,
1899 int dst_stride_ar64,
1900 int width,
1901 int height,
1902 int depth) {
1903 // Negative height means invert the image.
1904 if (height < 0) {
1905 height = -height;
1906 dst_ar64 = dst_ar64 + (height - 1) * dst_stride_ar64;
1907 dst_stride_ar64 = -dst_stride_ar64;
1908 }
1909
1910 if (src_a == NULL) {
1911 MergeAR64PlaneOpaque(src_r, src_stride_r, src_g, src_stride_g, src_b,
1912 src_stride_b, dst_ar64, dst_stride_ar64, width, height,
1913 depth);
1914 } else {
1915 MergeAR64PlaneAlpha(src_r, src_stride_r, src_g, src_stride_g, src_b,
1916 src_stride_b, src_a, src_stride_a, dst_ar64,
1917 dst_stride_ar64, width, height, depth);
1918 }
1919 }
1920
1921 LIBYUV_NOINLINE
MergeARGB16To8PlaneAlpha(const uint16_t * src_r,int src_stride_r,const uint16_t * src_g,int src_stride_g,const uint16_t * src_b,int src_stride_b,const uint16_t * src_a,int src_stride_a,uint8_t * dst_argb,int dst_stride_argb,int width,int height,int depth)1922 static void MergeARGB16To8PlaneAlpha(const uint16_t* src_r,
1923 int src_stride_r,
1924 const uint16_t* src_g,
1925 int src_stride_g,
1926 const uint16_t* src_b,
1927 int src_stride_b,
1928 const uint16_t* src_a,
1929 int src_stride_a,
1930 uint8_t* dst_argb,
1931 int dst_stride_argb,
1932 int width,
1933 int height,
1934 int depth) {
1935 int y;
1936 void (*MergeARGB16To8Row)(const uint16_t* src_r, const uint16_t* src_g,
1937 const uint16_t* src_b, const uint16_t* src_a,
1938 uint8_t* dst_argb, int depth, int width) =
1939 MergeARGB16To8Row_C;
1940
1941 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1942 src_stride_a == width && dst_stride_argb == width * 4) {
1943 width *= height;
1944 height = 1;
1945 src_stride_r = src_stride_g = src_stride_b = src_stride_a =
1946 dst_stride_argb = 0;
1947 }
1948 #if defined(HAS_MERGEARGB16TO8ROW_AVX2)
1949 if (TestCpuFlag(kCpuHasAVX2)) {
1950 MergeARGB16To8Row = MergeARGB16To8Row_Any_AVX2;
1951 if (IS_ALIGNED(width, 16)) {
1952 MergeARGB16To8Row = MergeARGB16To8Row_AVX2;
1953 }
1954 }
1955 #endif
1956 #if defined(HAS_MERGEARGB16TO8ROW_NEON)
1957 if (TestCpuFlag(kCpuHasNEON)) {
1958 MergeARGB16To8Row = MergeARGB16To8Row_Any_NEON;
1959 if (IS_ALIGNED(width, 8)) {
1960 MergeARGB16To8Row = MergeARGB16To8Row_NEON;
1961 }
1962 }
1963 #endif
1964
1965 for (y = 0; y < height; ++y) {
1966 MergeARGB16To8Row(src_r, src_g, src_b, src_a, dst_argb, depth, width);
1967 src_r += src_stride_r;
1968 src_g += src_stride_g;
1969 src_b += src_stride_b;
1970 src_a += src_stride_a;
1971 dst_argb += dst_stride_argb;
1972 }
1973 }
1974
1975 LIBYUV_NOINLINE
MergeARGB16To8PlaneOpaque(const uint16_t * src_r,int src_stride_r,const uint16_t * src_g,int src_stride_g,const uint16_t * src_b,int src_stride_b,uint8_t * dst_argb,int dst_stride_argb,int width,int height,int depth)1976 static void MergeARGB16To8PlaneOpaque(const uint16_t* src_r,
1977 int src_stride_r,
1978 const uint16_t* src_g,
1979 int src_stride_g,
1980 const uint16_t* src_b,
1981 int src_stride_b,
1982 uint8_t* dst_argb,
1983 int dst_stride_argb,
1984 int width,
1985 int height,
1986 int depth) {
1987 int y;
1988 void (*MergeXRGB16To8Row)(const uint16_t* src_r, const uint16_t* src_g,
1989 const uint16_t* src_b, uint8_t* dst_argb, int depth,
1990 int width) = MergeXRGB16To8Row_C;
1991
1992 // Coalesce rows.
1993 if (src_stride_r == width && src_stride_g == width && src_stride_b == width &&
1994 dst_stride_argb == width * 4) {
1995 width *= height;
1996 height = 1;
1997 src_stride_r = src_stride_g = src_stride_b = dst_stride_argb = 0;
1998 }
1999 #if defined(HAS_MERGEXRGB16TO8ROW_AVX2)
2000 if (TestCpuFlag(kCpuHasAVX2)) {
2001 MergeXRGB16To8Row = MergeXRGB16To8Row_Any_AVX2;
2002 if (IS_ALIGNED(width, 16)) {
2003 MergeXRGB16To8Row = MergeXRGB16To8Row_AVX2;
2004 }
2005 }
2006 #endif
2007 #if defined(HAS_MERGEXRGB16TO8ROW_NEON)
2008 if (TestCpuFlag(kCpuHasNEON)) {
2009 MergeXRGB16To8Row = MergeXRGB16To8Row_Any_NEON;
2010 if (IS_ALIGNED(width, 8)) {
2011 MergeXRGB16To8Row = MergeXRGB16To8Row_NEON;
2012 }
2013 }
2014 #endif
2015
2016 for (y = 0; y < height; ++y) {
2017 MergeXRGB16To8Row(src_r, src_g, src_b, dst_argb, depth, width);
2018 src_r += src_stride_r;
2019 src_g += src_stride_g;
2020 src_b += src_stride_b;
2021 dst_argb += dst_stride_argb;
2022 }
2023 }
2024
2025 LIBYUV_API
MergeARGB16To8Plane(const uint16_t * src_r,int src_stride_r,const uint16_t * src_g,int src_stride_g,const uint16_t * src_b,int src_stride_b,const uint16_t * src_a,int src_stride_a,uint8_t * dst_argb,int dst_stride_argb,int width,int height,int depth)2026 void MergeARGB16To8Plane(const uint16_t* src_r,
2027 int src_stride_r,
2028 const uint16_t* src_g,
2029 int src_stride_g,
2030 const uint16_t* src_b,
2031 int src_stride_b,
2032 const uint16_t* src_a,
2033 int src_stride_a,
2034 uint8_t* dst_argb,
2035 int dst_stride_argb,
2036 int width,
2037 int height,
2038 int depth) {
2039 // Negative height means invert the image.
2040 if (height < 0) {
2041 height = -height;
2042 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
2043 dst_stride_argb = -dst_stride_argb;
2044 }
2045
2046 if (src_a == NULL) {
2047 MergeARGB16To8PlaneOpaque(src_r, src_stride_r, src_g, src_stride_g, src_b,
2048 src_stride_b, dst_argb, dst_stride_argb, width,
2049 height, depth);
2050 } else {
2051 MergeARGB16To8PlaneAlpha(src_r, src_stride_r, src_g, src_stride_g, src_b,
2052 src_stride_b, src_a, src_stride_a, dst_argb,
2053 dst_stride_argb, width, height, depth);
2054 }
2055 }
2056
2057 // Convert YUY2 to I422.
2058 LIBYUV_API
YUY2ToI422(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)2059 int YUY2ToI422(const uint8_t* src_yuy2,
2060 int src_stride_yuy2,
2061 uint8_t* dst_y,
2062 int dst_stride_y,
2063 uint8_t* dst_u,
2064 int dst_stride_u,
2065 uint8_t* dst_v,
2066 int dst_stride_v,
2067 int width,
2068 int height) {
2069 int y;
2070 void (*YUY2ToUV422Row)(const uint8_t* src_yuy2, uint8_t* dst_u,
2071 uint8_t* dst_v, int width) = YUY2ToUV422Row_C;
2072 void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
2073 YUY2ToYRow_C;
2074 if (!src_yuy2 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
2075 return -1;
2076 }
2077 // Negative height means invert the image.
2078 if (height < 0) {
2079 height = -height;
2080 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
2081 src_stride_yuy2 = -src_stride_yuy2;
2082 }
2083 // Coalesce rows.
2084 if (src_stride_yuy2 == width * 2 && dst_stride_y == width &&
2085 dst_stride_u * 2 == width && dst_stride_v * 2 == width &&
2086 width * height <= 32768) {
2087 width *= height;
2088 height = 1;
2089 src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
2090 }
2091 #if defined(HAS_YUY2TOYROW_SSE2)
2092 if (TestCpuFlag(kCpuHasSSE2)) {
2093 YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
2094 YUY2ToYRow = YUY2ToYRow_Any_SSE2;
2095 if (IS_ALIGNED(width, 16)) {
2096 YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
2097 YUY2ToYRow = YUY2ToYRow_SSE2;
2098 }
2099 }
2100 #endif
2101 #if defined(HAS_YUY2TOYROW_AVX2)
2102 if (TestCpuFlag(kCpuHasAVX2)) {
2103 YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
2104 YUY2ToYRow = YUY2ToYRow_Any_AVX2;
2105 if (IS_ALIGNED(width, 32)) {
2106 YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
2107 YUY2ToYRow = YUY2ToYRow_AVX2;
2108 }
2109 }
2110 #endif
2111 #if defined(HAS_YUY2TOYROW_NEON)
2112 if (TestCpuFlag(kCpuHasNEON)) {
2113 YUY2ToYRow = YUY2ToYRow_Any_NEON;
2114 YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
2115 if (IS_ALIGNED(width, 16)) {
2116 YUY2ToYRow = YUY2ToYRow_NEON;
2117 YUY2ToUV422Row = YUY2ToUV422Row_NEON;
2118 }
2119 }
2120 #endif
2121 #if defined(HAS_YUY2TOYROW_MSA) && defined(HAS_YUY2TOUV422ROW_MSA)
2122 if (TestCpuFlag(kCpuHasMSA)) {
2123 YUY2ToYRow = YUY2ToYRow_Any_MSA;
2124 YUY2ToUV422Row = YUY2ToUV422Row_Any_MSA;
2125 if (IS_ALIGNED(width, 32)) {
2126 YUY2ToYRow = YUY2ToYRow_MSA;
2127 YUY2ToUV422Row = YUY2ToUV422Row_MSA;
2128 }
2129 }
2130 #endif
2131 #if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUV422ROW_LSX)
2132 if (TestCpuFlag(kCpuHasLSX)) {
2133 YUY2ToYRow = YUY2ToYRow_Any_LSX;
2134 YUY2ToUV422Row = YUY2ToUV422Row_Any_LSX;
2135 if (IS_ALIGNED(width, 16)) {
2136 YUY2ToYRow = YUY2ToYRow_LSX;
2137 YUY2ToUV422Row = YUY2ToUV422Row_LSX;
2138 }
2139 }
2140 #endif
2141 #if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUV422ROW_LASX)
2142 if (TestCpuFlag(kCpuHasLASX)) {
2143 YUY2ToYRow = YUY2ToYRow_Any_LASX;
2144 YUY2ToUV422Row = YUY2ToUV422Row_Any_LASX;
2145 if (IS_ALIGNED(width, 32)) {
2146 YUY2ToYRow = YUY2ToYRow_LASX;
2147 YUY2ToUV422Row = YUY2ToUV422Row_LASX;
2148 }
2149 }
2150 #endif
2151
2152 for (y = 0; y < height; ++y) {
2153 YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
2154 YUY2ToYRow(src_yuy2, dst_y, width);
2155 src_yuy2 += src_stride_yuy2;
2156 dst_y += dst_stride_y;
2157 dst_u += dst_stride_u;
2158 dst_v += dst_stride_v;
2159 }
2160 return 0;
2161 }
2162
2163 // Convert UYVY to I422.
2164 LIBYUV_API
UYVYToI422(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)2165 int UYVYToI422(const uint8_t* src_uyvy,
2166 int src_stride_uyvy,
2167 uint8_t* dst_y,
2168 int dst_stride_y,
2169 uint8_t* dst_u,
2170 int dst_stride_u,
2171 uint8_t* dst_v,
2172 int dst_stride_v,
2173 int width,
2174 int height) {
2175 int y;
2176 void (*UYVYToUV422Row)(const uint8_t* src_uyvy, uint8_t* dst_u,
2177 uint8_t* dst_v, int width) = UYVYToUV422Row_C;
2178 void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) =
2179 UYVYToYRow_C;
2180 if (!src_uyvy || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
2181 return -1;
2182 }
2183 // Negative height means invert the image.
2184 if (height < 0) {
2185 height = -height;
2186 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
2187 src_stride_uyvy = -src_stride_uyvy;
2188 }
2189 // Coalesce rows.
2190 if (src_stride_uyvy == width * 2 && dst_stride_y == width &&
2191 dst_stride_u * 2 == width && dst_stride_v * 2 == width &&
2192 width * height <= 32768) {
2193 width *= height;
2194 height = 1;
2195 src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
2196 }
2197 #if defined(HAS_UYVYTOYROW_SSE2)
2198 if (TestCpuFlag(kCpuHasSSE2)) {
2199 UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
2200 UYVYToYRow = UYVYToYRow_Any_SSE2;
2201 if (IS_ALIGNED(width, 16)) {
2202 UYVYToUV422Row = UYVYToUV422Row_SSE2;
2203 UYVYToYRow = UYVYToYRow_SSE2;
2204 }
2205 }
2206 #endif
2207 #if defined(HAS_UYVYTOYROW_AVX2)
2208 if (TestCpuFlag(kCpuHasAVX2)) {
2209 UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
2210 UYVYToYRow = UYVYToYRow_Any_AVX2;
2211 if (IS_ALIGNED(width, 32)) {
2212 UYVYToUV422Row = UYVYToUV422Row_AVX2;
2213 UYVYToYRow = UYVYToYRow_AVX2;
2214 }
2215 }
2216 #endif
2217 #if defined(HAS_UYVYTOYROW_NEON)
2218 if (TestCpuFlag(kCpuHasNEON)) {
2219 UYVYToYRow = UYVYToYRow_Any_NEON;
2220 UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
2221 if (IS_ALIGNED(width, 16)) {
2222 UYVYToYRow = UYVYToYRow_NEON;
2223 UYVYToUV422Row = UYVYToUV422Row_NEON;
2224 }
2225 }
2226 #endif
2227 #if defined(HAS_UYVYTOYROW_MSA) && defined(HAS_UYVYTOUV422ROW_MSA)
2228 if (TestCpuFlag(kCpuHasMSA)) {
2229 UYVYToYRow = UYVYToYRow_Any_MSA;
2230 UYVYToUV422Row = UYVYToUV422Row_Any_MSA;
2231 if (IS_ALIGNED(width, 32)) {
2232 UYVYToYRow = UYVYToYRow_MSA;
2233 UYVYToUV422Row = UYVYToUV422Row_MSA;
2234 }
2235 }
2236 #endif
2237 #if defined(HAS_UYVYTOYROW_LSX) && defined(HAS_UYVYTOUV422ROW_LSX)
2238 if (TestCpuFlag(kCpuHasLSX)) {
2239 UYVYToYRow = UYVYToYRow_Any_LSX;
2240 UYVYToUV422Row = UYVYToUV422Row_Any_LSX;
2241 if (IS_ALIGNED(width, 16)) {
2242 UYVYToYRow = UYVYToYRow_LSX;
2243 UYVYToUV422Row = UYVYToUV422Row_LSX;
2244 }
2245 }
2246 #endif
2247 #if defined(HAS_UYVYTOYROW_LASX) && defined(HAS_UYVYTOUV422ROW_LASX)
2248 if (TestCpuFlag(kCpuHasLASX)) {
2249 UYVYToYRow = UYVYToYRow_Any_LASX;
2250 UYVYToUV422Row = UYVYToUV422Row_Any_LASX;
2251 if (IS_ALIGNED(width, 32)) {
2252 UYVYToYRow = UYVYToYRow_LASX;
2253 UYVYToUV422Row = UYVYToUV422Row_LASX;
2254 }
2255 }
2256 #endif
2257
2258 for (y = 0; y < height; ++y) {
2259 UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
2260 UYVYToYRow(src_uyvy, dst_y, width);
2261 src_uyvy += src_stride_uyvy;
2262 dst_y += dst_stride_y;
2263 dst_u += dst_stride_u;
2264 dst_v += dst_stride_v;
2265 }
2266 return 0;
2267 }
2268
2269 // Convert YUY2 to Y.
2270 LIBYUV_API
YUY2ToY(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_y,int dst_stride_y,int width,int height)2271 int YUY2ToY(const uint8_t* src_yuy2,
2272 int src_stride_yuy2,
2273 uint8_t* dst_y,
2274 int dst_stride_y,
2275 int width,
2276 int height) {
2277 int y;
2278 void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
2279 YUY2ToYRow_C;
2280 if (!src_yuy2 || !dst_y || width <= 0 || height == 0) {
2281 return -1;
2282 }
2283 // Negative height means invert the image.
2284 if (height < 0) {
2285 height = -height;
2286 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
2287 src_stride_yuy2 = -src_stride_yuy2;
2288 }
2289 // Coalesce rows.
2290 if (src_stride_yuy2 == width * 2 && dst_stride_y == width) {
2291 width *= height;
2292 height = 1;
2293 src_stride_yuy2 = dst_stride_y = 0;
2294 }
2295 #if defined(HAS_YUY2TOYROW_SSE2)
2296 if (TestCpuFlag(kCpuHasSSE2)) {
2297 YUY2ToYRow = YUY2ToYRow_Any_SSE2;
2298 if (IS_ALIGNED(width, 16)) {
2299 YUY2ToYRow = YUY2ToYRow_SSE2;
2300 }
2301 }
2302 #endif
2303 #if defined(HAS_YUY2TOYROW_AVX2)
2304 if (TestCpuFlag(kCpuHasAVX2)) {
2305 YUY2ToYRow = YUY2ToYRow_Any_AVX2;
2306 if (IS_ALIGNED(width, 32)) {
2307 YUY2ToYRow = YUY2ToYRow_AVX2;
2308 }
2309 }
2310 #endif
2311 #if defined(HAS_YUY2TOYROW_NEON)
2312 if (TestCpuFlag(kCpuHasNEON)) {
2313 YUY2ToYRow = YUY2ToYRow_Any_NEON;
2314 if (IS_ALIGNED(width, 16)) {
2315 YUY2ToYRow = YUY2ToYRow_NEON;
2316 }
2317 }
2318 #endif
2319 #if defined(HAS_YUY2TOYROW_MSA)
2320 if (TestCpuFlag(kCpuHasMSA)) {
2321 YUY2ToYRow = YUY2ToYRow_Any_MSA;
2322 if (IS_ALIGNED(width, 32)) {
2323 YUY2ToYRow = YUY2ToYRow_MSA;
2324 }
2325 }
2326 #endif
2327
2328 for (y = 0; y < height; ++y) {
2329 YUY2ToYRow(src_yuy2, dst_y, width);
2330 src_yuy2 += src_stride_yuy2;
2331 dst_y += dst_stride_y;
2332 }
2333 return 0;
2334 }
2335
2336 // Convert UYVY to Y.
2337 LIBYUV_API
UYVYToY(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_y,int dst_stride_y,int width,int height)2338 int UYVYToY(const uint8_t* src_uyvy,
2339 int src_stride_uyvy,
2340 uint8_t* dst_y,
2341 int dst_stride_y,
2342 int width,
2343 int height) {
2344 int y;
2345 void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) =
2346 UYVYToYRow_C;
2347 if (!src_uyvy || !dst_y || width <= 0 || height == 0) {
2348 return -1;
2349 }
2350 // Negative height means invert the image.
2351 if (height < 0) {
2352 height = -height;
2353 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
2354 src_stride_uyvy = -src_stride_uyvy;
2355 }
2356 // Coalesce rows.
2357 if (src_stride_uyvy == width * 2 && dst_stride_y == width) {
2358 width *= height;
2359 height = 1;
2360 src_stride_uyvy = dst_stride_y = 0;
2361 }
2362 #if defined(HAS_UYVYTOYROW_SSE2)
2363 if (TestCpuFlag(kCpuHasSSE2)) {
2364 UYVYToYRow = UYVYToYRow_Any_SSE2;
2365 if (IS_ALIGNED(width, 16)) {
2366 UYVYToYRow = UYVYToYRow_SSE2;
2367 }
2368 }
2369 #endif
2370 #if defined(HAS_UYVYTOYROW_AVX2)
2371 if (TestCpuFlag(kCpuHasAVX2)) {
2372 UYVYToYRow = UYVYToYRow_Any_AVX2;
2373 if (IS_ALIGNED(width, 32)) {
2374 UYVYToYRow = UYVYToYRow_AVX2;
2375 }
2376 }
2377 #endif
2378 #if defined(HAS_UYVYTOYROW_NEON)
2379 if (TestCpuFlag(kCpuHasNEON)) {
2380 UYVYToYRow = UYVYToYRow_Any_NEON;
2381 if (IS_ALIGNED(width, 16)) {
2382 UYVYToYRow = UYVYToYRow_NEON;
2383 }
2384 }
2385 #endif
2386 #if defined(HAS_UYVYTOYROW_MSA)
2387 if (TestCpuFlag(kCpuHasMSA)) {
2388 UYVYToYRow = UYVYToYRow_Any_MSA;
2389 if (IS_ALIGNED(width, 32)) {
2390 UYVYToYRow = UYVYToYRow_MSA;
2391 }
2392 }
2393 #endif
2394 #if defined(HAS_UYVYTOYROW_LSX)
2395 if (TestCpuFlag(kCpuHasLSX)) {
2396 UYVYToYRow = UYVYToYRow_Any_LSX;
2397 if (IS_ALIGNED(width, 16)) {
2398 UYVYToYRow = UYVYToYRow_LSX;
2399 }
2400 }
2401 #endif
2402
2403 for (y = 0; y < height; ++y) {
2404 UYVYToYRow(src_uyvy, dst_y, width);
2405 src_uyvy += src_stride_uyvy;
2406 dst_y += dst_stride_y;
2407 }
2408 return 0;
2409 }
2410
2411 // Mirror a plane of data.
2412 // See Also I400Mirror
2413 LIBYUV_API
MirrorPlane(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height)2414 void MirrorPlane(const uint8_t* src_y,
2415 int src_stride_y,
2416 uint8_t* dst_y,
2417 int dst_stride_y,
2418 int width,
2419 int height) {
2420 int y;
2421 void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C;
2422 // Negative height means invert the image.
2423 if (height < 0) {
2424 height = -height;
2425 src_y = src_y + (height - 1) * src_stride_y;
2426 src_stride_y = -src_stride_y;
2427 }
2428 #if defined(HAS_MIRRORROW_NEON)
2429 if (TestCpuFlag(kCpuHasNEON)) {
2430 MirrorRow = MirrorRow_Any_NEON;
2431 if (IS_ALIGNED(width, 32)) {
2432 MirrorRow = MirrorRow_NEON;
2433 }
2434 }
2435 #endif
2436 #if defined(HAS_MIRRORROW_SSSE3)
2437 if (TestCpuFlag(kCpuHasSSSE3)) {
2438 MirrorRow = MirrorRow_Any_SSSE3;
2439 if (IS_ALIGNED(width, 16)) {
2440 MirrorRow = MirrorRow_SSSE3;
2441 }
2442 }
2443 #endif
2444 #if defined(HAS_MIRRORROW_AVX2)
2445 if (TestCpuFlag(kCpuHasAVX2)) {
2446 MirrorRow = MirrorRow_Any_AVX2;
2447 if (IS_ALIGNED(width, 32)) {
2448 MirrorRow = MirrorRow_AVX2;
2449 }
2450 }
2451 #endif
2452 #if defined(HAS_MIRRORROW_MSA)
2453 if (TestCpuFlag(kCpuHasMSA)) {
2454 MirrorRow = MirrorRow_Any_MSA;
2455 if (IS_ALIGNED(width, 64)) {
2456 MirrorRow = MirrorRow_MSA;
2457 }
2458 }
2459 #endif
2460 #if defined(HAS_MIRRORROW_LSX)
2461 if (TestCpuFlag(kCpuHasLSX)) {
2462 MirrorRow = MirrorRow_Any_LSX;
2463 if (IS_ALIGNED(width, 32)) {
2464 MirrorRow = MirrorRow_LSX;
2465 }
2466 }
2467 #endif
2468 #if defined(HAS_MIRRORROW_LASX)
2469 if (TestCpuFlag(kCpuHasLASX)) {
2470 MirrorRow = MirrorRow_Any_LASX;
2471 if (IS_ALIGNED(width, 64)) {
2472 MirrorRow = MirrorRow_LASX;
2473 }
2474 }
2475 #endif
2476
2477 // Mirror plane
2478 for (y = 0; y < height; ++y) {
2479 MirrorRow(src_y, dst_y, width);
2480 src_y += src_stride_y;
2481 dst_y += dst_stride_y;
2482 }
2483 }
2484
2485 // Mirror a plane of UV data.
2486 LIBYUV_API
MirrorUVPlane(const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_uv,int dst_stride_uv,int width,int height)2487 void MirrorUVPlane(const uint8_t* src_uv,
2488 int src_stride_uv,
2489 uint8_t* dst_uv,
2490 int dst_stride_uv,
2491 int width,
2492 int height) {
2493 int y;
2494 void (*MirrorUVRow)(const uint8_t* src, uint8_t* dst, int width) =
2495 MirrorUVRow_C;
2496 // Negative height means invert the image.
2497 if (height < 0) {
2498 height = -height;
2499 src_uv = src_uv + (height - 1) * src_stride_uv;
2500 src_stride_uv = -src_stride_uv;
2501 }
2502 #if defined(HAS_MIRRORUVROW_NEON)
2503 if (TestCpuFlag(kCpuHasNEON)) {
2504 MirrorUVRow = MirrorUVRow_Any_NEON;
2505 if (IS_ALIGNED(width, 32)) {
2506 MirrorUVRow = MirrorUVRow_NEON;
2507 }
2508 }
2509 #endif
2510 #if defined(HAS_MIRRORUVROW_SSSE3)
2511 if (TestCpuFlag(kCpuHasSSSE3)) {
2512 MirrorUVRow = MirrorUVRow_Any_SSSE3;
2513 if (IS_ALIGNED(width, 8)) {
2514 MirrorUVRow = MirrorUVRow_SSSE3;
2515 }
2516 }
2517 #endif
2518 #if defined(HAS_MIRRORUVROW_AVX2)
2519 if (TestCpuFlag(kCpuHasAVX2)) {
2520 MirrorUVRow = MirrorUVRow_Any_AVX2;
2521 if (IS_ALIGNED(width, 16)) {
2522 MirrorUVRow = MirrorUVRow_AVX2;
2523 }
2524 }
2525 #endif
2526 #if defined(HAS_MIRRORUVROW_MSA)
2527 if (TestCpuFlag(kCpuHasMSA)) {
2528 MirrorUVRow = MirrorUVRow_Any_MSA;
2529 if (IS_ALIGNED(width, 8)) {
2530 MirrorUVRow = MirrorUVRow_MSA;
2531 }
2532 }
2533 #endif
2534 #if defined(HAS_MIRRORUVROW_LSX)
2535 if (TestCpuFlag(kCpuHasLSX)) {
2536 MirrorUVRow = MirrorUVRow_Any_LSX;
2537 if (IS_ALIGNED(width, 8)) {
2538 MirrorUVRow = MirrorUVRow_LSX;
2539 }
2540 }
2541 #endif
2542 #if defined(HAS_MIRRORUVROW_LASX)
2543 if (TestCpuFlag(kCpuHasLASX)) {
2544 MirrorUVRow = MirrorUVRow_Any_LASX;
2545 if (IS_ALIGNED(width, 16)) {
2546 MirrorUVRow = MirrorUVRow_LASX;
2547 }
2548 }
2549 #endif
2550
2551 // MirrorUV plane
2552 for (y = 0; y < height; ++y) {
2553 MirrorUVRow(src_uv, dst_uv, width);
2554 src_uv += src_stride_uv;
2555 dst_uv += dst_stride_uv;
2556 }
2557 }
2558
2559 // Mirror I400 with optional flipping
2560 LIBYUV_API
I400Mirror(const uint8_t * src_y,int src_stride_y,uint8_t * dst_y,int dst_stride_y,int width,int height)2561 int I400Mirror(const uint8_t* src_y,
2562 int src_stride_y,
2563 uint8_t* dst_y,
2564 int dst_stride_y,
2565 int width,
2566 int height) {
2567 if (!src_y || !dst_y || width <= 0 || height == 0) {
2568 return -1;
2569 }
2570 // Negative height means invert the image.
2571 if (height < 0) {
2572 height = -height;
2573 src_y = src_y + (height - 1) * src_stride_y;
2574 src_stride_y = -src_stride_y;
2575 }
2576
2577 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
2578 return 0;
2579 }
2580
2581 // Mirror I420 with optional flipping
2582 LIBYUV_API
I420Mirror(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)2583 int I420Mirror(const uint8_t* src_y,
2584 int src_stride_y,
2585 const uint8_t* src_u,
2586 int src_stride_u,
2587 const uint8_t* src_v,
2588 int src_stride_v,
2589 uint8_t* dst_y,
2590 int dst_stride_y,
2591 uint8_t* dst_u,
2592 int dst_stride_u,
2593 uint8_t* dst_v,
2594 int dst_stride_v,
2595 int width,
2596 int height) {
2597 int halfwidth = (width + 1) >> 1;
2598 int halfheight = (height + 1) >> 1;
2599
2600 if (!src_y || !src_u || !src_v || !dst_u || !dst_v || width <= 0 ||
2601 height == 0) {
2602 return -1;
2603 }
2604
2605 // Negative height means invert the image.
2606 if (height < 0) {
2607 height = -height;
2608 halfheight = (height + 1) >> 1;
2609 src_y = src_y + (height - 1) * src_stride_y;
2610 src_u = src_u + (halfheight - 1) * src_stride_u;
2611 src_v = src_v + (halfheight - 1) * src_stride_v;
2612 src_stride_y = -src_stride_y;
2613 src_stride_u = -src_stride_u;
2614 src_stride_v = -src_stride_v;
2615 }
2616
2617 if (dst_y) {
2618 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
2619 }
2620 MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
2621 MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
2622 return 0;
2623 }
2624
2625 // NV12 mirror.
2626 LIBYUV_API
NV12Mirror(const uint8_t * src_y,int src_stride_y,const uint8_t * src_uv,int src_stride_uv,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int width,int height)2627 int NV12Mirror(const uint8_t* src_y,
2628 int src_stride_y,
2629 const uint8_t* src_uv,
2630 int src_stride_uv,
2631 uint8_t* dst_y,
2632 int dst_stride_y,
2633 uint8_t* dst_uv,
2634 int dst_stride_uv,
2635 int width,
2636 int height) {
2637 int halfwidth = (width + 1) >> 1;
2638 int halfheight = (height + 1) >> 1;
2639
2640 if (!src_y || !src_uv || !dst_uv || width <= 0 || height == 0) {
2641 return -1;
2642 }
2643
2644 // Negative height means invert the image.
2645 if (height < 0) {
2646 height = -height;
2647 halfheight = (height + 1) >> 1;
2648 src_y = src_y + (height - 1) * src_stride_y;
2649 src_uv = src_uv + (halfheight - 1) * src_stride_uv;
2650 src_stride_y = -src_stride_y;
2651 src_stride_uv = -src_stride_uv;
2652 }
2653
2654 if (dst_y) {
2655 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
2656 }
2657 MirrorUVPlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv, halfwidth,
2658 halfheight);
2659 return 0;
2660 }
2661
2662 // ARGB mirror.
2663 LIBYUV_API
ARGBMirror(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)2664 int ARGBMirror(const uint8_t* src_argb,
2665 int src_stride_argb,
2666 uint8_t* dst_argb,
2667 int dst_stride_argb,
2668 int width,
2669 int height) {
2670 int y;
2671 void (*ARGBMirrorRow)(const uint8_t* src, uint8_t* dst, int width) =
2672 ARGBMirrorRow_C;
2673 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
2674 return -1;
2675 }
2676 // Negative height means invert the image.
2677 if (height < 0) {
2678 height = -height;
2679 src_argb = src_argb + (height - 1) * src_stride_argb;
2680 src_stride_argb = -src_stride_argb;
2681 }
2682 #if defined(HAS_ARGBMIRRORROW_NEON)
2683 if (TestCpuFlag(kCpuHasNEON)) {
2684 ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
2685 if (IS_ALIGNED(width, 8)) {
2686 ARGBMirrorRow = ARGBMirrorRow_NEON;
2687 }
2688 }
2689 #endif
2690 #if defined(HAS_ARGBMIRRORROW_SSE2)
2691 if (TestCpuFlag(kCpuHasSSE2)) {
2692 ARGBMirrorRow = ARGBMirrorRow_Any_SSE2;
2693 if (IS_ALIGNED(width, 4)) {
2694 ARGBMirrorRow = ARGBMirrorRow_SSE2;
2695 }
2696 }
2697 #endif
2698 #if defined(HAS_ARGBMIRRORROW_AVX2)
2699 if (TestCpuFlag(kCpuHasAVX2)) {
2700 ARGBMirrorRow = ARGBMirrorRow_Any_AVX2;
2701 if (IS_ALIGNED(width, 8)) {
2702 ARGBMirrorRow = ARGBMirrorRow_AVX2;
2703 }
2704 }
2705 #endif
2706 #if defined(HAS_ARGBMIRRORROW_MSA)
2707 if (TestCpuFlag(kCpuHasMSA)) {
2708 ARGBMirrorRow = ARGBMirrorRow_Any_MSA;
2709 if (IS_ALIGNED(width, 16)) {
2710 ARGBMirrorRow = ARGBMirrorRow_MSA;
2711 }
2712 }
2713 #endif
2714 #if defined(HAS_ARGBMIRRORROW_LSX)
2715 if (TestCpuFlag(kCpuHasLSX)) {
2716 ARGBMirrorRow = ARGBMirrorRow_Any_LSX;
2717 if (IS_ALIGNED(width, 8)) {
2718 ARGBMirrorRow = ARGBMirrorRow_LSX;
2719 }
2720 }
2721 #endif
2722 #if defined(HAS_ARGBMIRRORROW_LASX)
2723 if (TestCpuFlag(kCpuHasLASX)) {
2724 ARGBMirrorRow = ARGBMirrorRow_Any_LASX;
2725 if (IS_ALIGNED(width, 16)) {
2726 ARGBMirrorRow = ARGBMirrorRow_LASX;
2727 }
2728 }
2729 #endif
2730
2731 // Mirror plane
2732 for (y = 0; y < height; ++y) {
2733 ARGBMirrorRow(src_argb, dst_argb, width);
2734 src_argb += src_stride_argb;
2735 dst_argb += dst_stride_argb;
2736 }
2737 return 0;
2738 }
2739
2740 // RGB24 mirror.
2741 LIBYUV_API
RGB24Mirror(const uint8_t * src_rgb24,int src_stride_rgb24,uint8_t * dst_rgb24,int dst_stride_rgb24,int width,int height)2742 int RGB24Mirror(const uint8_t* src_rgb24,
2743 int src_stride_rgb24,
2744 uint8_t* dst_rgb24,
2745 int dst_stride_rgb24,
2746 int width,
2747 int height) {
2748 int y;
2749 void (*RGB24MirrorRow)(const uint8_t* src, uint8_t* dst, int width) =
2750 RGB24MirrorRow_C;
2751 if (!src_rgb24 || !dst_rgb24 || width <= 0 || height == 0) {
2752 return -1;
2753 }
2754 // Negative height means invert the image.
2755 if (height < 0) {
2756 height = -height;
2757 src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
2758 src_stride_rgb24 = -src_stride_rgb24;
2759 }
2760 #if defined(HAS_RGB24MIRRORROW_NEON)
2761 if (TestCpuFlag(kCpuHasNEON)) {
2762 RGB24MirrorRow = RGB24MirrorRow_Any_NEON;
2763 if (IS_ALIGNED(width, 16)) {
2764 RGB24MirrorRow = RGB24MirrorRow_NEON;
2765 }
2766 }
2767 #endif
2768 #if defined(HAS_RGB24MIRRORROW_SSSE3)
2769 if (TestCpuFlag(kCpuHasSSSE3)) {
2770 RGB24MirrorRow = RGB24MirrorRow_Any_SSSE3;
2771 if (IS_ALIGNED(width, 16)) {
2772 RGB24MirrorRow = RGB24MirrorRow_SSSE3;
2773 }
2774 }
2775 #endif
2776
2777 // Mirror plane
2778 for (y = 0; y < height; ++y) {
2779 RGB24MirrorRow(src_rgb24, dst_rgb24, width);
2780 src_rgb24 += src_stride_rgb24;
2781 dst_rgb24 += dst_stride_rgb24;
2782 }
2783 return 0;
2784 }
2785
2786 // Alpha Blend 2 ARGB images and store to destination.
2787 LIBYUV_API
ARGBBlend(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height)2788 int ARGBBlend(const uint8_t* src_argb0,
2789 int src_stride_argb0,
2790 const uint8_t* src_argb1,
2791 int src_stride_argb1,
2792 uint8_t* dst_argb,
2793 int dst_stride_argb,
2794 int width,
2795 int height) {
2796 int y;
2797 void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1,
2798 uint8_t* dst_argb, int width) = ARGBBlendRow_C;
2799 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
2800 return -1;
2801 }
2802 // Negative height means invert the image.
2803 if (height < 0) {
2804 height = -height;
2805 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
2806 dst_stride_argb = -dst_stride_argb;
2807 }
2808 // Coalesce rows.
2809 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
2810 dst_stride_argb == width * 4) {
2811 width *= height;
2812 height = 1;
2813 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
2814 }
2815 #if defined(HAS_ARGBBLENDROW_SSSE3)
2816 if (TestCpuFlag(kCpuHasSSSE3)) {
2817 ARGBBlendRow = ARGBBlendRow_SSSE3;
2818 }
2819 #endif
2820 #if defined(HAS_ARGBBLENDROW_NEON)
2821 if (TestCpuFlag(kCpuHasNEON)) {
2822 ARGBBlendRow = ARGBBlendRow_NEON;
2823 }
2824 #endif
2825 #if defined(HAS_ARGBBLENDROW_MSA)
2826 if (TestCpuFlag(kCpuHasMSA)) {
2827 ARGBBlendRow = ARGBBlendRow_MSA;
2828 }
2829 #endif
2830 #if defined(HAS_ARGBBLENDROW_LSX)
2831 if (TestCpuFlag(kCpuHasLSX)) {
2832 ARGBBlendRow = ARGBBlendRow_LSX;
2833 }
2834 #endif
2835 #if defined(HAS_ARGBBLENDROW_RVV)
2836 if (TestCpuFlag(kCpuHasRVV)) {
2837 ARGBBlendRow = ARGBBlendRow_RVV;
2838 }
2839 #endif
2840 for (y = 0; y < height; ++y) {
2841 ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
2842 src_argb0 += src_stride_argb0;
2843 src_argb1 += src_stride_argb1;
2844 dst_argb += dst_stride_argb;
2845 }
2846 return 0;
2847 }
2848
2849 // Alpha Blend plane and store to destination.
2850 LIBYUV_API
BlendPlane(const uint8_t * src_y0,int src_stride_y0,const uint8_t * src_y1,int src_stride_y1,const uint8_t * alpha,int alpha_stride,uint8_t * dst_y,int dst_stride_y,int width,int height)2851 int BlendPlane(const uint8_t* src_y0,
2852 int src_stride_y0,
2853 const uint8_t* src_y1,
2854 int src_stride_y1,
2855 const uint8_t* alpha,
2856 int alpha_stride,
2857 uint8_t* dst_y,
2858 int dst_stride_y,
2859 int width,
2860 int height) {
2861 int y;
2862 void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1,
2863 const uint8_t* alpha, uint8_t* dst, int width) =
2864 BlendPlaneRow_C;
2865 if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0) {
2866 return -1;
2867 }
2868 // Negative height means invert the image.
2869 if (height < 0) {
2870 height = -height;
2871 dst_y = dst_y + (height - 1) * dst_stride_y;
2872 dst_stride_y = -dst_stride_y;
2873 }
2874
2875 // Coalesce rows for Y plane.
2876 if (src_stride_y0 == width && src_stride_y1 == width &&
2877 alpha_stride == width && dst_stride_y == width) {
2878 width *= height;
2879 height = 1;
2880 src_stride_y0 = src_stride_y1 = alpha_stride = dst_stride_y = 0;
2881 }
2882
2883 #if defined(HAS_BLENDPLANEROW_SSSE3)
2884 if (TestCpuFlag(kCpuHasSSSE3)) {
2885 BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
2886 if (IS_ALIGNED(width, 8)) {
2887 BlendPlaneRow = BlendPlaneRow_SSSE3;
2888 }
2889 }
2890 #endif
2891 #if defined(HAS_BLENDPLANEROW_AVX2)
2892 if (TestCpuFlag(kCpuHasAVX2)) {
2893 BlendPlaneRow = BlendPlaneRow_Any_AVX2;
2894 if (IS_ALIGNED(width, 32)) {
2895 BlendPlaneRow = BlendPlaneRow_AVX2;
2896 }
2897 }
2898 #endif
2899 #if defined(HAS_BLENDPLANEROW_RVV)
2900 if (TestCpuFlag(kCpuHasRVV)) {
2901 BlendPlaneRow = BlendPlaneRow_RVV;
2902 }
2903 #endif
2904
2905 for (y = 0; y < height; ++y) {
2906 BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width);
2907 src_y0 += src_stride_y0;
2908 src_y1 += src_stride_y1;
2909 alpha += alpha_stride;
2910 dst_y += dst_stride_y;
2911 }
2912 return 0;
2913 }
2914
2915 #define MAXTWIDTH 2048
2916 // Alpha Blend YUV images and store to destination.
2917 LIBYUV_API
I420Blend(const uint8_t * src_y0,int src_stride_y0,const uint8_t * src_u0,int src_stride_u0,const uint8_t * src_v0,int src_stride_v0,const uint8_t * src_y1,int src_stride_y1,const uint8_t * src_u1,int src_stride_u1,const uint8_t * src_v1,int src_stride_v1,const uint8_t * alpha,int alpha_stride,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height)2918 int I420Blend(const uint8_t* src_y0,
2919 int src_stride_y0,
2920 const uint8_t* src_u0,
2921 int src_stride_u0,
2922 const uint8_t* src_v0,
2923 int src_stride_v0,
2924 const uint8_t* src_y1,
2925 int src_stride_y1,
2926 const uint8_t* src_u1,
2927 int src_stride_u1,
2928 const uint8_t* src_v1,
2929 int src_stride_v1,
2930 const uint8_t* alpha,
2931 int alpha_stride,
2932 uint8_t* dst_y,
2933 int dst_stride_y,
2934 uint8_t* dst_u,
2935 int dst_stride_u,
2936 uint8_t* dst_v,
2937 int dst_stride_v,
2938 int width,
2939 int height) {
2940 int y;
2941 // Half width/height for UV.
2942 int halfwidth = (width + 1) >> 1;
2943 void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1,
2944 const uint8_t* alpha, uint8_t* dst, int width) =
2945 BlendPlaneRow_C;
2946 void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
2947 uint8_t* dst_ptr, int dst_width) = ScaleRowDown2Box_C;
2948
2949 if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 ||
2950 !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
2951 return -1;
2952 }
2953
2954 // Negative height means invert the image.
2955 if (height < 0) {
2956 height = -height;
2957 dst_y = dst_y + (height - 1) * dst_stride_y;
2958 dst_stride_y = -dst_stride_y;
2959 }
2960
2961 // Blend Y plane.
2962 BlendPlane(src_y0, src_stride_y0, src_y1, src_stride_y1, alpha, alpha_stride,
2963 dst_y, dst_stride_y, width, height);
2964
2965 #if defined(HAS_BLENDPLANEROW_SSSE3)
2966 if (TestCpuFlag(kCpuHasSSSE3)) {
2967 BlendPlaneRow = BlendPlaneRow_Any_SSSE3;
2968 if (IS_ALIGNED(halfwidth, 8)) {
2969 BlendPlaneRow = BlendPlaneRow_SSSE3;
2970 }
2971 }
2972 #endif
2973 #if defined(HAS_BLENDPLANEROW_AVX2)
2974 if (TestCpuFlag(kCpuHasAVX2)) {
2975 BlendPlaneRow = BlendPlaneRow_Any_AVX2;
2976 if (IS_ALIGNED(halfwidth, 32)) {
2977 BlendPlaneRow = BlendPlaneRow_AVX2;
2978 }
2979 }
2980 #endif
2981 #if defined(HAS_BLENDPLANEROW_RVV)
2982 if (TestCpuFlag(kCpuHasRVV)) {
2983 BlendPlaneRow = BlendPlaneRow_RVV;
2984 }
2985 #endif
2986 if (!IS_ALIGNED(width, 2)) {
2987 ScaleRowDown2 = ScaleRowDown2Box_Odd_C;
2988 }
2989 #if defined(HAS_SCALEROWDOWN2_NEON)
2990 if (TestCpuFlag(kCpuHasNEON)) {
2991 ScaleRowDown2 = ScaleRowDown2Box_Odd_NEON;
2992 if (IS_ALIGNED(width, 2)) {
2993 ScaleRowDown2 = ScaleRowDown2Box_Any_NEON;
2994 if (IS_ALIGNED(halfwidth, 16)) {
2995 ScaleRowDown2 = ScaleRowDown2Box_NEON;
2996 }
2997 }
2998 }
2999 #endif
3000 #if defined(HAS_SCALEROWDOWN2_SSSE3)
3001 if (TestCpuFlag(kCpuHasSSSE3)) {
3002 ScaleRowDown2 = ScaleRowDown2Box_Odd_SSSE3;
3003 if (IS_ALIGNED(width, 2)) {
3004 ScaleRowDown2 = ScaleRowDown2Box_Any_SSSE3;
3005 if (IS_ALIGNED(halfwidth, 16)) {
3006 ScaleRowDown2 = ScaleRowDown2Box_SSSE3;
3007 }
3008 }
3009 }
3010 #endif
3011 #if defined(HAS_SCALEROWDOWN2_AVX2)
3012 if (TestCpuFlag(kCpuHasAVX2)) {
3013 ScaleRowDown2 = ScaleRowDown2Box_Odd_AVX2;
3014 if (IS_ALIGNED(width, 2)) {
3015 ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2;
3016 if (IS_ALIGNED(halfwidth, 32)) {
3017 ScaleRowDown2 = ScaleRowDown2Box_AVX2;
3018 }
3019 }
3020 }
3021 #endif
3022 #if defined(HAS_SCALEROWDOWN2_RVV)
3023 if (TestCpuFlag(kCpuHasRVV)) {
3024 ScaleRowDown2 = ScaleRowDown2Box_RVV;
3025 }
3026 #endif
3027
3028 // Row buffer for intermediate alpha pixels.
3029 align_buffer_64(halfalpha, halfwidth);
3030 if (!halfalpha)
3031 return 1;
3032 for (y = 0; y < height; y += 2) {
3033 // last row of odd height image use 1 row of alpha instead of 2.
3034 if (y == (height - 1)) {
3035 alpha_stride = 0;
3036 }
3037 // Subsample 2 rows of UV to half width and half height.
3038 ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth);
3039 alpha += alpha_stride * 2;
3040 BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth);
3041 BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth);
3042 src_u0 += src_stride_u0;
3043 src_u1 += src_stride_u1;
3044 dst_u += dst_stride_u;
3045 src_v0 += src_stride_v0;
3046 src_v1 += src_stride_v1;
3047 dst_v += dst_stride_v;
3048 }
3049 free_aligned_buffer_64(halfalpha);
3050 return 0;
3051 }
3052
3053 // Multiply 2 ARGB images and store to destination.
3054 LIBYUV_API
ARGBMultiply(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height)3055 int ARGBMultiply(const uint8_t* src_argb0,
3056 int src_stride_argb0,
3057 const uint8_t* src_argb1,
3058 int src_stride_argb1,
3059 uint8_t* dst_argb,
3060 int dst_stride_argb,
3061 int width,
3062 int height) {
3063 int y;
3064 void (*ARGBMultiplyRow)(const uint8_t* src0, const uint8_t* src1,
3065 uint8_t* dst, int width) = ARGBMultiplyRow_C;
3066 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
3067 return -1;
3068 }
3069 // Negative height means invert the image.
3070 if (height < 0) {
3071 height = -height;
3072 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
3073 dst_stride_argb = -dst_stride_argb;
3074 }
3075 // Coalesce rows.
3076 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
3077 dst_stride_argb == width * 4) {
3078 width *= height;
3079 height = 1;
3080 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
3081 }
3082 #if defined(HAS_ARGBMULTIPLYROW_SSE2)
3083 if (TestCpuFlag(kCpuHasSSE2)) {
3084 ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
3085 if (IS_ALIGNED(width, 4)) {
3086 ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
3087 }
3088 }
3089 #endif
3090 #if defined(HAS_ARGBMULTIPLYROW_AVX2)
3091 if (TestCpuFlag(kCpuHasAVX2)) {
3092 ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
3093 if (IS_ALIGNED(width, 8)) {
3094 ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
3095 }
3096 }
3097 #endif
3098 #if defined(HAS_ARGBMULTIPLYROW_NEON)
3099 if (TestCpuFlag(kCpuHasNEON)) {
3100 ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
3101 if (IS_ALIGNED(width, 8)) {
3102 ARGBMultiplyRow = ARGBMultiplyRow_NEON;
3103 }
3104 }
3105 #endif
3106 #if defined(HAS_ARGBMULTIPLYROW_MSA)
3107 if (TestCpuFlag(kCpuHasMSA)) {
3108 ARGBMultiplyRow = ARGBMultiplyRow_Any_MSA;
3109 if (IS_ALIGNED(width, 4)) {
3110 ARGBMultiplyRow = ARGBMultiplyRow_MSA;
3111 }
3112 }
3113 #endif
3114 #if defined(HAS_ARGBMULTIPLYROW_LSX)
3115 if (TestCpuFlag(kCpuHasLSX)) {
3116 ARGBMultiplyRow = ARGBMultiplyRow_Any_LSX;
3117 if (IS_ALIGNED(width, 4)) {
3118 ARGBMultiplyRow = ARGBMultiplyRow_LSX;
3119 }
3120 }
3121 #endif
3122 #if defined(HAS_ARGBMULTIPLYROW_LASX)
3123 if (TestCpuFlag(kCpuHasLASX)) {
3124 ARGBMultiplyRow = ARGBMultiplyRow_Any_LASX;
3125 if (IS_ALIGNED(width, 8)) {
3126 ARGBMultiplyRow = ARGBMultiplyRow_LASX;
3127 }
3128 }
3129 #endif
3130
3131 // Multiply plane
3132 for (y = 0; y < height; ++y) {
3133 ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
3134 src_argb0 += src_stride_argb0;
3135 src_argb1 += src_stride_argb1;
3136 dst_argb += dst_stride_argb;
3137 }
3138 return 0;
3139 }
3140
3141 // Add 2 ARGB images and store to destination.
3142 LIBYUV_API
ARGBAdd(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height)3143 int ARGBAdd(const uint8_t* src_argb0,
3144 int src_stride_argb0,
3145 const uint8_t* src_argb1,
3146 int src_stride_argb1,
3147 uint8_t* dst_argb,
3148 int dst_stride_argb,
3149 int width,
3150 int height) {
3151 int y;
3152 void (*ARGBAddRow)(const uint8_t* src0, const uint8_t* src1, uint8_t* dst,
3153 int width) = ARGBAddRow_C;
3154 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
3155 return -1;
3156 }
3157 // Negative height means invert the image.
3158 if (height < 0) {
3159 height = -height;
3160 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
3161 dst_stride_argb = -dst_stride_argb;
3162 }
3163 // Coalesce rows.
3164 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
3165 dst_stride_argb == width * 4) {
3166 width *= height;
3167 height = 1;
3168 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
3169 }
3170 #if defined(HAS_ARGBADDROW_SSE2)
3171 if (TestCpuFlag(kCpuHasSSE2)) {
3172 ARGBAddRow = ARGBAddRow_SSE2;
3173 }
3174 #endif
3175 #if defined(HAS_ARGBADDROW_SSE2)
3176 if (TestCpuFlag(kCpuHasSSE2)) {
3177 ARGBAddRow = ARGBAddRow_Any_SSE2;
3178 if (IS_ALIGNED(width, 4)) {
3179 ARGBAddRow = ARGBAddRow_SSE2;
3180 }
3181 }
3182 #endif
3183 #if defined(HAS_ARGBADDROW_AVX2)
3184 if (TestCpuFlag(kCpuHasAVX2)) {
3185 ARGBAddRow = ARGBAddRow_Any_AVX2;
3186 if (IS_ALIGNED(width, 8)) {
3187 ARGBAddRow = ARGBAddRow_AVX2;
3188 }
3189 }
3190 #endif
3191 #if defined(HAS_ARGBADDROW_NEON)
3192 if (TestCpuFlag(kCpuHasNEON)) {
3193 ARGBAddRow = ARGBAddRow_Any_NEON;
3194 if (IS_ALIGNED(width, 8)) {
3195 ARGBAddRow = ARGBAddRow_NEON;
3196 }
3197 }
3198 #endif
3199 #if defined(HAS_ARGBADDROW_MSA)
3200 if (TestCpuFlag(kCpuHasMSA)) {
3201 ARGBAddRow = ARGBAddRow_Any_MSA;
3202 if (IS_ALIGNED(width, 8)) {
3203 ARGBAddRow = ARGBAddRow_MSA;
3204 }
3205 }
3206 #endif
3207 #if defined(HAS_ARGBADDROW_LSX)
3208 if (TestCpuFlag(kCpuHasLSX)) {
3209 ARGBAddRow = ARGBAddRow_Any_LSX;
3210 if (IS_ALIGNED(width, 4)) {
3211 ARGBAddRow = ARGBAddRow_LSX;
3212 }
3213 }
3214 #endif
3215 #if defined(HAS_ARGBADDROW_LASX)
3216 if (TestCpuFlag(kCpuHasLASX)) {
3217 ARGBAddRow = ARGBAddRow_Any_LASX;
3218 if (IS_ALIGNED(width, 8)) {
3219 ARGBAddRow = ARGBAddRow_LASX;
3220 }
3221 }
3222 #endif
3223
3224 // Add plane
3225 for (y = 0; y < height; ++y) {
3226 ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
3227 src_argb0 += src_stride_argb0;
3228 src_argb1 += src_stride_argb1;
3229 dst_argb += dst_stride_argb;
3230 }
3231 return 0;
3232 }
3233
3234 // Subtract 2 ARGB images and store to destination.
3235 LIBYUV_API
ARGBSubtract(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height)3236 int ARGBSubtract(const uint8_t* src_argb0,
3237 int src_stride_argb0,
3238 const uint8_t* src_argb1,
3239 int src_stride_argb1,
3240 uint8_t* dst_argb,
3241 int dst_stride_argb,
3242 int width,
3243 int height) {
3244 int y;
3245 void (*ARGBSubtractRow)(const uint8_t* src0, const uint8_t* src1,
3246 uint8_t* dst, int width) = ARGBSubtractRow_C;
3247 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
3248 return -1;
3249 }
3250 // Negative height means invert the image.
3251 if (height < 0) {
3252 height = -height;
3253 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
3254 dst_stride_argb = -dst_stride_argb;
3255 }
3256 // Coalesce rows.
3257 if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
3258 dst_stride_argb == width * 4) {
3259 width *= height;
3260 height = 1;
3261 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
3262 }
3263 #if defined(HAS_ARGBSUBTRACTROW_SSE2)
3264 if (TestCpuFlag(kCpuHasSSE2)) {
3265 ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
3266 if (IS_ALIGNED(width, 4)) {
3267 ARGBSubtractRow = ARGBSubtractRow_SSE2;
3268 }
3269 }
3270 #endif
3271 #if defined(HAS_ARGBSUBTRACTROW_AVX2)
3272 if (TestCpuFlag(kCpuHasAVX2)) {
3273 ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
3274 if (IS_ALIGNED(width, 8)) {
3275 ARGBSubtractRow = ARGBSubtractRow_AVX2;
3276 }
3277 }
3278 #endif
3279 #if defined(HAS_ARGBSUBTRACTROW_NEON)
3280 if (TestCpuFlag(kCpuHasNEON)) {
3281 ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
3282 if (IS_ALIGNED(width, 8)) {
3283 ARGBSubtractRow = ARGBSubtractRow_NEON;
3284 }
3285 }
3286 #endif
3287 #if defined(HAS_ARGBSUBTRACTROW_MSA)
3288 if (TestCpuFlag(kCpuHasMSA)) {
3289 ARGBSubtractRow = ARGBSubtractRow_Any_MSA;
3290 if (IS_ALIGNED(width, 8)) {
3291 ARGBSubtractRow = ARGBSubtractRow_MSA;
3292 }
3293 }
3294 #endif
3295 #if defined(HAS_ARGBSUBTRACTROW_LSX)
3296 if (TestCpuFlag(kCpuHasLSX)) {
3297 ARGBSubtractRow = ARGBSubtractRow_Any_LSX;
3298 if (IS_ALIGNED(width, 4)) {
3299 ARGBSubtractRow = ARGBSubtractRow_LSX;
3300 }
3301 }
3302 #endif
3303 #if defined(HAS_ARGBSUBTRACTROW_LASX)
3304 if (TestCpuFlag(kCpuHasLASX)) {
3305 ARGBSubtractRow = ARGBSubtractRow_Any_LASX;
3306 if (IS_ALIGNED(width, 8)) {
3307 ARGBSubtractRow = ARGBSubtractRow_LASX;
3308 }
3309 }
3310 #endif
3311
3312 // Subtract plane
3313 for (y = 0; y < height; ++y) {
3314 ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
3315 src_argb0 += src_stride_argb0;
3316 src_argb1 += src_stride_argb1;
3317 dst_argb += dst_stride_argb;
3318 }
3319 return 0;
3320 }
3321
3322 // Convert RAW to RGB24.
3323 LIBYUV_API
RAWToRGB24(const uint8_t * src_raw,int src_stride_raw,uint8_t * dst_rgb24,int dst_stride_rgb24,int width,int height)3324 int RAWToRGB24(const uint8_t* src_raw,
3325 int src_stride_raw,
3326 uint8_t* dst_rgb24,
3327 int dst_stride_rgb24,
3328 int width,
3329 int height) {
3330 int y;
3331 void (*RAWToRGB24Row)(const uint8_t* src_rgb, uint8_t* dst_rgb24, int width) =
3332 RAWToRGB24Row_C;
3333 if (!src_raw || !dst_rgb24 || width <= 0 || height == 0) {
3334 return -1;
3335 }
3336 // Negative height means invert the image.
3337 if (height < 0) {
3338 height = -height;
3339 src_raw = src_raw + (height - 1) * src_stride_raw;
3340 src_stride_raw = -src_stride_raw;
3341 }
3342 // Coalesce rows.
3343 if (src_stride_raw == width * 3 && dst_stride_rgb24 == width * 3) {
3344 width *= height;
3345 height = 1;
3346 src_stride_raw = dst_stride_rgb24 = 0;
3347 }
3348 #if defined(HAS_RAWTORGB24ROW_SSSE3)
3349 if (TestCpuFlag(kCpuHasSSSE3)) {
3350 RAWToRGB24Row = RAWToRGB24Row_Any_SSSE3;
3351 if (IS_ALIGNED(width, 8)) {
3352 RAWToRGB24Row = RAWToRGB24Row_SSSE3;
3353 }
3354 }
3355 #endif
3356 #if defined(HAS_RAWTORGB24ROW_NEON)
3357 if (TestCpuFlag(kCpuHasNEON)) {
3358 RAWToRGB24Row = RAWToRGB24Row_Any_NEON;
3359 if (IS_ALIGNED(width, 8)) {
3360 RAWToRGB24Row = RAWToRGB24Row_NEON;
3361 }
3362 }
3363 #endif
3364 #if defined(HAS_RAWTORGB24ROW_MSA)
3365 if (TestCpuFlag(kCpuHasMSA)) {
3366 RAWToRGB24Row = RAWToRGB24Row_Any_MSA;
3367 if (IS_ALIGNED(width, 16)) {
3368 RAWToRGB24Row = RAWToRGB24Row_MSA;
3369 }
3370 }
3371 #endif
3372 #if defined(HAS_RAWTORGB24ROW_LSX)
3373 if (TestCpuFlag(kCpuHasLSX)) {
3374 RAWToRGB24Row = RAWToRGB24Row_Any_LSX;
3375 if (IS_ALIGNED(width, 16)) {
3376 RAWToRGB24Row = RAWToRGB24Row_LSX;
3377 }
3378 }
3379 #endif
3380 #if defined(HAS_RAWTORGB24ROW_RVV)
3381 if (TestCpuFlag(kCpuHasRVV)) {
3382 RAWToRGB24Row = RAWToRGB24Row_RVV;
3383 }
3384 #endif
3385
3386 for (y = 0; y < height; ++y) {
3387 RAWToRGB24Row(src_raw, dst_rgb24, width);
3388 src_raw += src_stride_raw;
3389 dst_rgb24 += dst_stride_rgb24;
3390 }
3391 return 0;
3392 }
3393
3394 // TODO(fbarchard): Consider uint8_t value
3395 LIBYUV_API
SetPlane(uint8_t * dst_y,int dst_stride_y,int width,int height,uint32_t value)3396 void SetPlane(uint8_t* dst_y,
3397 int dst_stride_y,
3398 int width,
3399 int height,
3400 uint32_t value) {
3401 int y;
3402 void (*SetRow)(uint8_t* dst, uint8_t value, int width) = SetRow_C;
3403
3404 if (width <= 0 || height == 0) {
3405 return;
3406 }
3407 if (height < 0) {
3408 height = -height;
3409 dst_y = dst_y + (height - 1) * dst_stride_y;
3410 dst_stride_y = -dst_stride_y;
3411 }
3412 // Coalesce rows.
3413 if (dst_stride_y == width) {
3414 width *= height;
3415 height = 1;
3416 dst_stride_y = 0;
3417 }
3418 #if defined(HAS_SETROW_NEON)
3419 if (TestCpuFlag(kCpuHasNEON)) {
3420 SetRow = SetRow_Any_NEON;
3421 if (IS_ALIGNED(width, 16)) {
3422 SetRow = SetRow_NEON;
3423 }
3424 }
3425 #endif
3426 #if defined(HAS_SETROW_X86)
3427 if (TestCpuFlag(kCpuHasX86)) {
3428 SetRow = SetRow_Any_X86;
3429 if (IS_ALIGNED(width, 4)) {
3430 SetRow = SetRow_X86;
3431 }
3432 }
3433 #endif
3434 #if defined(HAS_SETROW_ERMS)
3435 if (TestCpuFlag(kCpuHasERMS)) {
3436 SetRow = SetRow_ERMS;
3437 }
3438 #endif
3439 #if defined(HAS_SETROW_MSA)
3440 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 16)) {
3441 SetRow = SetRow_MSA;
3442 }
3443 #endif
3444 #if defined(HAS_SETROW_LSX)
3445 if (TestCpuFlag(kCpuHasLSX)) {
3446 SetRow = SetRow_Any_LSX;
3447 if (IS_ALIGNED(width, 16)) {
3448 SetRow = SetRow_LSX;
3449 }
3450 }
3451 #endif
3452
3453 // Set plane
3454 for (y = 0; y < height; ++y) {
3455 SetRow(dst_y, (uint8_t)value, width);
3456 dst_y += dst_stride_y;
3457 }
3458 }
3459
3460 // Draw a rectangle into I420
3461 LIBYUV_API
I420Rect(uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int x,int y,int width,int height,int value_y,int value_u,int value_v)3462 int I420Rect(uint8_t* dst_y,
3463 int dst_stride_y,
3464 uint8_t* dst_u,
3465 int dst_stride_u,
3466 uint8_t* dst_v,
3467 int dst_stride_v,
3468 int x,
3469 int y,
3470 int width,
3471 int height,
3472 int value_y,
3473 int value_u,
3474 int value_v) {
3475 int halfwidth = (width + 1) >> 1;
3476 int halfheight = (height + 1) >> 1;
3477 uint8_t* start_y = dst_y + y * dst_stride_y + x;
3478 uint8_t* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
3479 uint8_t* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
3480
3481 if (!dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || x < 0 ||
3482 y < 0 || value_y < 0 || value_y > 255 || value_u < 0 || value_u > 255 ||
3483 value_v < 0 || value_v > 255) {
3484 return -1;
3485 }
3486
3487 SetPlane(start_y, dst_stride_y, width, height, value_y);
3488 SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
3489 SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
3490 return 0;
3491 }
3492
3493 // Draw a rectangle into ARGB
3494 LIBYUV_API
ARGBRect(uint8_t * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height,uint32_t value)3495 int ARGBRect(uint8_t* dst_argb,
3496 int dst_stride_argb,
3497 int dst_x,
3498 int dst_y,
3499 int width,
3500 int height,
3501 uint32_t value) {
3502 int y;
3503 void (*ARGBSetRow)(uint8_t* dst_argb, uint32_t value, int width) =
3504 ARGBSetRow_C;
3505 if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) {
3506 return -1;
3507 }
3508 if (height < 0) {
3509 height = -height;
3510 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
3511 dst_stride_argb = -dst_stride_argb;
3512 }
3513 dst_argb += dst_y * dst_stride_argb + dst_x * 4;
3514 // Coalesce rows.
3515 if (dst_stride_argb == width * 4) {
3516 width *= height;
3517 height = 1;
3518 dst_stride_argb = 0;
3519 }
3520
3521 #if defined(HAS_ARGBSETROW_NEON)
3522 if (TestCpuFlag(kCpuHasNEON)) {
3523 ARGBSetRow = ARGBSetRow_Any_NEON;
3524 if (IS_ALIGNED(width, 4)) {
3525 ARGBSetRow = ARGBSetRow_NEON;
3526 }
3527 }
3528 #endif
3529 #if defined(HAS_ARGBSETROW_X86)
3530 if (TestCpuFlag(kCpuHasX86)) {
3531 ARGBSetRow = ARGBSetRow_X86;
3532 }
3533 #endif
3534 #if defined(HAS_ARGBSETROW_MSA)
3535 if (TestCpuFlag(kCpuHasMSA)) {
3536 ARGBSetRow = ARGBSetRow_Any_MSA;
3537 if (IS_ALIGNED(width, 4)) {
3538 ARGBSetRow = ARGBSetRow_MSA;
3539 }
3540 }
3541 #endif
3542 #if defined(HAS_ARGBSETROW_LSX)
3543 if (TestCpuFlag(kCpuHasLSX)) {
3544 ARGBSetRow = ARGBSetRow_Any_LSX;
3545 if (IS_ALIGNED(width, 4)) {
3546 ARGBSetRow = ARGBSetRow_LSX;
3547 }
3548 }
3549 #endif
3550
3551 // Set plane
3552 for (y = 0; y < height; ++y) {
3553 ARGBSetRow(dst_argb, value, width);
3554 dst_argb += dst_stride_argb;
3555 }
3556 return 0;
3557 }
3558
3559 // Convert unattentuated ARGB to preattenuated ARGB.
3560 // An unattenutated ARGB alpha blend uses the formula
3561 // p = a * f + (1 - a) * b
3562 // where
3563 // p is output pixel
3564 // f is foreground pixel
3565 // b is background pixel
3566 // a is alpha value from foreground pixel
3567 // An preattenutated ARGB alpha blend uses the formula
3568 // p = f + (1 - a) * b
3569 // where
3570 // f is foreground pixel premultiplied by alpha
3571
3572 LIBYUV_API
ARGBAttenuate(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)3573 int ARGBAttenuate(const uint8_t* src_argb,
3574 int src_stride_argb,
3575 uint8_t* dst_argb,
3576 int dst_stride_argb,
3577 int width,
3578 int height) {
3579 int y;
3580 void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
3581 int width) = ARGBAttenuateRow_C;
3582 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
3583 return -1;
3584 }
3585 if (height < 0) {
3586 height = -height;
3587 src_argb = src_argb + (height - 1) * src_stride_argb;
3588 src_stride_argb = -src_stride_argb;
3589 }
3590 // Coalesce rows.
3591 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3592 width *= height;
3593 height = 1;
3594 src_stride_argb = dst_stride_argb = 0;
3595 }
3596 #if defined(HAS_ARGBATTENUATEROW_SSSE3)
3597 if (TestCpuFlag(kCpuHasSSSE3)) {
3598 ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
3599 if (IS_ALIGNED(width, 4)) {
3600 ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
3601 }
3602 }
3603 #endif
3604 #if defined(HAS_ARGBATTENUATEROW_AVX2)
3605 if (TestCpuFlag(kCpuHasAVX2)) {
3606 ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
3607 if (IS_ALIGNED(width, 8)) {
3608 ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
3609 }
3610 }
3611 #endif
3612 #if defined(HAS_ARGBATTENUATEROW_NEON)
3613 if (TestCpuFlag(kCpuHasNEON)) {
3614 ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
3615 if (IS_ALIGNED(width, 8)) {
3616 ARGBAttenuateRow = ARGBAttenuateRow_NEON;
3617 }
3618 }
3619 #endif
3620 #if defined(HAS_ARGBATTENUATEROW_MSA)
3621 if (TestCpuFlag(kCpuHasMSA)) {
3622 ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA;
3623 if (IS_ALIGNED(width, 8)) {
3624 ARGBAttenuateRow = ARGBAttenuateRow_MSA;
3625 }
3626 }
3627 #endif
3628 #if defined(HAS_ARGBATTENUATEROW_LSX)
3629 if (TestCpuFlag(kCpuHasLSX)) {
3630 ARGBAttenuateRow = ARGBAttenuateRow_Any_LSX;
3631 if (IS_ALIGNED(width, 8)) {
3632 ARGBAttenuateRow = ARGBAttenuateRow_LSX;
3633 }
3634 }
3635 #endif
3636 #if defined(HAS_ARGBATTENUATEROW_LASX)
3637 if (TestCpuFlag(kCpuHasLASX)) {
3638 ARGBAttenuateRow = ARGBAttenuateRow_Any_LASX;
3639 if (IS_ALIGNED(width, 16)) {
3640 ARGBAttenuateRow = ARGBAttenuateRow_LASX;
3641 }
3642 }
3643 #endif
3644 #if defined(HAS_ARGBATTENUATEROW_RVV)
3645 if (TestCpuFlag(kCpuHasRVV)) {
3646 ARGBAttenuateRow = ARGBAttenuateRow_RVV;
3647 }
3648 #endif
3649
3650 for (y = 0; y < height; ++y) {
3651 ARGBAttenuateRow(src_argb, dst_argb, width);
3652 src_argb += src_stride_argb;
3653 dst_argb += dst_stride_argb;
3654 }
3655 return 0;
3656 }
3657
3658 // Convert preattentuated ARGB to unattenuated ARGB.
3659 LIBYUV_API
ARGBUnattenuate(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)3660 int ARGBUnattenuate(const uint8_t* src_argb,
3661 int src_stride_argb,
3662 uint8_t* dst_argb,
3663 int dst_stride_argb,
3664 int width,
3665 int height) {
3666 int y;
3667 void (*ARGBUnattenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb,
3668 int width) = ARGBUnattenuateRow_C;
3669 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
3670 return -1;
3671 }
3672 if (height < 0) {
3673 height = -height;
3674 src_argb = src_argb + (height - 1) * src_stride_argb;
3675 src_stride_argb = -src_stride_argb;
3676 }
3677 // Coalesce rows.
3678 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3679 width *= height;
3680 height = 1;
3681 src_stride_argb = dst_stride_argb = 0;
3682 }
3683 #if defined(HAS_ARGBUNATTENUATEROW_SSE2)
3684 if (TestCpuFlag(kCpuHasSSE2)) {
3685 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
3686 if (IS_ALIGNED(width, 4)) {
3687 ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
3688 }
3689 }
3690 #endif
3691 #if defined(HAS_ARGBUNATTENUATEROW_AVX2)
3692 if (TestCpuFlag(kCpuHasAVX2)) {
3693 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
3694 if (IS_ALIGNED(width, 8)) {
3695 ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
3696 }
3697 }
3698 #endif
3699 // TODO(fbarchard): Neon version.
3700
3701 for (y = 0; y < height; ++y) {
3702 ARGBUnattenuateRow(src_argb, dst_argb, width);
3703 src_argb += src_stride_argb;
3704 dst_argb += dst_stride_argb;
3705 }
3706 return 0;
3707 }
3708
3709 // Convert ARGB to Grayed ARGB.
3710 LIBYUV_API
ARGBGrayTo(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)3711 int ARGBGrayTo(const uint8_t* src_argb,
3712 int src_stride_argb,
3713 uint8_t* dst_argb,
3714 int dst_stride_argb,
3715 int width,
3716 int height) {
3717 int y;
3718 void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
3719 ARGBGrayRow_C;
3720 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
3721 return -1;
3722 }
3723 if (height < 0) {
3724 height = -height;
3725 src_argb = src_argb + (height - 1) * src_stride_argb;
3726 src_stride_argb = -src_stride_argb;
3727 }
3728 // Coalesce rows.
3729 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3730 width *= height;
3731 height = 1;
3732 src_stride_argb = dst_stride_argb = 0;
3733 }
3734 #if defined(HAS_ARGBGRAYROW_SSSE3)
3735 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
3736 ARGBGrayRow = ARGBGrayRow_SSSE3;
3737 }
3738 #endif
3739 #if defined(HAS_ARGBGRAYROW_NEON)
3740 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3741 ARGBGrayRow = ARGBGrayRow_NEON;
3742 }
3743 #endif
3744 #if defined(HAS_ARGBGRAYROW_MSA)
3745 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
3746 ARGBGrayRow = ARGBGrayRow_MSA;
3747 }
3748 #endif
3749 #if defined(HAS_ARGBGRAYROW_LSX)
3750 if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
3751 ARGBGrayRow = ARGBGrayRow_LSX;
3752 }
3753 #endif
3754 #if defined(HAS_ARGBGRAYROW_LASX)
3755 if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) {
3756 ARGBGrayRow = ARGBGrayRow_LASX;
3757 }
3758 #endif
3759
3760 for (y = 0; y < height; ++y) {
3761 ARGBGrayRow(src_argb, dst_argb, width);
3762 src_argb += src_stride_argb;
3763 dst_argb += dst_stride_argb;
3764 }
3765 return 0;
3766 }
3767
3768 // Make a rectangle of ARGB gray scale.
3769 LIBYUV_API
ARGBGray(uint8_t * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height)3770 int ARGBGray(uint8_t* dst_argb,
3771 int dst_stride_argb,
3772 int dst_x,
3773 int dst_y,
3774 int width,
3775 int height) {
3776 int y;
3777 void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
3778 ARGBGrayRow_C;
3779 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
3780 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
3781 return -1;
3782 }
3783 // Coalesce rows.
3784 if (dst_stride_argb == width * 4) {
3785 width *= height;
3786 height = 1;
3787 dst_stride_argb = 0;
3788 }
3789 #if defined(HAS_ARGBGRAYROW_SSSE3)
3790 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
3791 ARGBGrayRow = ARGBGrayRow_SSSE3;
3792 }
3793 #endif
3794 #if defined(HAS_ARGBGRAYROW_NEON)
3795 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3796 ARGBGrayRow = ARGBGrayRow_NEON;
3797 }
3798 #endif
3799 #if defined(HAS_ARGBGRAYROW_MSA)
3800 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
3801 ARGBGrayRow = ARGBGrayRow_MSA;
3802 }
3803 #endif
3804 #if defined(HAS_ARGBGRAYROW_LSX)
3805 if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
3806 ARGBGrayRow = ARGBGrayRow_LSX;
3807 }
3808 #endif
3809 #if defined(HAS_ARGBGRAYROW_LASX)
3810 if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) {
3811 ARGBGrayRow = ARGBGrayRow_LASX;
3812 }
3813 #endif
3814
3815 for (y = 0; y < height; ++y) {
3816 ARGBGrayRow(dst, dst, width);
3817 dst += dst_stride_argb;
3818 }
3819 return 0;
3820 }
3821
3822 // Make a rectangle of ARGB Sepia tone.
3823 LIBYUV_API
ARGBSepia(uint8_t * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height)3824 int ARGBSepia(uint8_t* dst_argb,
3825 int dst_stride_argb,
3826 int dst_x,
3827 int dst_y,
3828 int width,
3829 int height) {
3830 int y;
3831 void (*ARGBSepiaRow)(uint8_t* dst_argb, int width) = ARGBSepiaRow_C;
3832 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
3833 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
3834 return -1;
3835 }
3836 // Coalesce rows.
3837 if (dst_stride_argb == width * 4) {
3838 width *= height;
3839 height = 1;
3840 dst_stride_argb = 0;
3841 }
3842 #if defined(HAS_ARGBSEPIAROW_SSSE3)
3843 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
3844 ARGBSepiaRow = ARGBSepiaRow_SSSE3;
3845 }
3846 #endif
3847 #if defined(HAS_ARGBSEPIAROW_NEON)
3848 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3849 ARGBSepiaRow = ARGBSepiaRow_NEON;
3850 }
3851 #endif
3852 #if defined(HAS_ARGBSEPIAROW_MSA)
3853 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
3854 ARGBSepiaRow = ARGBSepiaRow_MSA;
3855 }
3856 #endif
3857 #if defined(HAS_ARGBSEPIAROW_LSX)
3858 if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
3859 ARGBSepiaRow = ARGBSepiaRow_LSX;
3860 }
3861 #endif
3862 #if defined(HAS_ARGBSEPIAROW_LASX)
3863 if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 16)) {
3864 ARGBSepiaRow = ARGBSepiaRow_LASX;
3865 }
3866 #endif
3867
3868 for (y = 0; y < height; ++y) {
3869 ARGBSepiaRow(dst, width);
3870 dst += dst_stride_argb;
3871 }
3872 return 0;
3873 }
3874
3875 // Apply a 4x4 matrix to each ARGB pixel.
3876 // Note: Normally for shading, but can be used to swizzle or invert.
3877 LIBYUV_API
ARGBColorMatrix(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,const int8_t * matrix_argb,int width,int height)3878 int ARGBColorMatrix(const uint8_t* src_argb,
3879 int src_stride_argb,
3880 uint8_t* dst_argb,
3881 int dst_stride_argb,
3882 const int8_t* matrix_argb,
3883 int width,
3884 int height) {
3885 int y;
3886 void (*ARGBColorMatrixRow)(const uint8_t* src_argb, uint8_t* dst_argb,
3887 const int8_t* matrix_argb, int width) =
3888 ARGBColorMatrixRow_C;
3889 if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
3890 return -1;
3891 }
3892 if (height < 0) {
3893 height = -height;
3894 src_argb = src_argb + (height - 1) * src_stride_argb;
3895 src_stride_argb = -src_stride_argb;
3896 }
3897 // Coalesce rows.
3898 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
3899 width *= height;
3900 height = 1;
3901 src_stride_argb = dst_stride_argb = 0;
3902 }
3903 #if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
3904 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) {
3905 ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
3906 }
3907 #endif
3908 #if defined(HAS_ARGBCOLORMATRIXROW_NEON)
3909 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
3910 ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
3911 }
3912 #endif
3913 #if defined(HAS_ARGBCOLORMATRIXROW_MSA)
3914 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
3915 ARGBColorMatrixRow = ARGBColorMatrixRow_MSA;
3916 }
3917 #endif
3918 #if defined(HAS_ARGBCOLORMATRIXROW_LSX)
3919 if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
3920 ARGBColorMatrixRow = ARGBColorMatrixRow_LSX;
3921 }
3922 #endif
3923 for (y = 0; y < height; ++y) {
3924 ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
3925 src_argb += src_stride_argb;
3926 dst_argb += dst_stride_argb;
3927 }
3928 return 0;
3929 }
3930
3931 // Apply a 4x3 matrix to each ARGB pixel.
3932 // Deprecated.
3933 LIBYUV_API
RGBColorMatrix(uint8_t * dst_argb,int dst_stride_argb,const int8_t * matrix_rgb,int dst_x,int dst_y,int width,int height)3934 int RGBColorMatrix(uint8_t* dst_argb,
3935 int dst_stride_argb,
3936 const int8_t* matrix_rgb,
3937 int dst_x,
3938 int dst_y,
3939 int width,
3940 int height) {
3941 SIMD_ALIGNED(int8_t matrix_argb[16]);
3942 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
3943 if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 || dst_x < 0 ||
3944 dst_y < 0) {
3945 return -1;
3946 }
3947
3948 // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
3949 matrix_argb[0] = matrix_rgb[0] / 2;
3950 matrix_argb[1] = matrix_rgb[1] / 2;
3951 matrix_argb[2] = matrix_rgb[2] / 2;
3952 matrix_argb[3] = matrix_rgb[3] / 2;
3953 matrix_argb[4] = matrix_rgb[4] / 2;
3954 matrix_argb[5] = matrix_rgb[5] / 2;
3955 matrix_argb[6] = matrix_rgb[6] / 2;
3956 matrix_argb[7] = matrix_rgb[7] / 2;
3957 matrix_argb[8] = matrix_rgb[8] / 2;
3958 matrix_argb[9] = matrix_rgb[9] / 2;
3959 matrix_argb[10] = matrix_rgb[10] / 2;
3960 matrix_argb[11] = matrix_rgb[11] / 2;
3961 matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
3962 matrix_argb[15] = 64; // 1.0
3963
3964 return ARGBColorMatrix((const uint8_t*)(dst), dst_stride_argb, dst,
3965 dst_stride_argb, &matrix_argb[0], width, height);
3966 }
3967
3968 // Apply a color table each ARGB pixel.
3969 // Table contains 256 ARGB values.
3970 LIBYUV_API
ARGBColorTable(uint8_t * dst_argb,int dst_stride_argb,const uint8_t * table_argb,int dst_x,int dst_y,int width,int height)3971 int ARGBColorTable(uint8_t* dst_argb,
3972 int dst_stride_argb,
3973 const uint8_t* table_argb,
3974 int dst_x,
3975 int dst_y,
3976 int width,
3977 int height) {
3978 int y;
3979 void (*ARGBColorTableRow)(uint8_t* dst_argb, const uint8_t* table_argb,
3980 int width) = ARGBColorTableRow_C;
3981 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
3982 if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
3983 dst_y < 0) {
3984 return -1;
3985 }
3986 // Coalesce rows.
3987 if (dst_stride_argb == width * 4) {
3988 width *= height;
3989 height = 1;
3990 dst_stride_argb = 0;
3991 }
3992 #if defined(HAS_ARGBCOLORTABLEROW_X86)
3993 if (TestCpuFlag(kCpuHasX86)) {
3994 ARGBColorTableRow = ARGBColorTableRow_X86;
3995 }
3996 #endif
3997 for (y = 0; y < height; ++y) {
3998 ARGBColorTableRow(dst, table_argb, width);
3999 dst += dst_stride_argb;
4000 }
4001 return 0;
4002 }
4003
4004 // Apply a color table each ARGB pixel but preserve destination alpha.
4005 // Table contains 256 ARGB values.
4006 LIBYUV_API
RGBColorTable(uint8_t * dst_argb,int dst_stride_argb,const uint8_t * table_argb,int dst_x,int dst_y,int width,int height)4007 int RGBColorTable(uint8_t* dst_argb,
4008 int dst_stride_argb,
4009 const uint8_t* table_argb,
4010 int dst_x,
4011 int dst_y,
4012 int width,
4013 int height) {
4014 int y;
4015 void (*RGBColorTableRow)(uint8_t* dst_argb, const uint8_t* table_argb,
4016 int width) = RGBColorTableRow_C;
4017 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
4018 if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 ||
4019 dst_y < 0) {
4020 return -1;
4021 }
4022 // Coalesce rows.
4023 if (dst_stride_argb == width * 4) {
4024 width *= height;
4025 height = 1;
4026 dst_stride_argb = 0;
4027 }
4028 #if defined(HAS_RGBCOLORTABLEROW_X86)
4029 if (TestCpuFlag(kCpuHasX86)) {
4030 RGBColorTableRow = RGBColorTableRow_X86;
4031 }
4032 #endif
4033 for (y = 0; y < height; ++y) {
4034 RGBColorTableRow(dst, table_argb, width);
4035 dst += dst_stride_argb;
4036 }
4037 return 0;
4038 }
4039
4040 // ARGBQuantize is used to posterize art.
4041 // e.g. rgb / qvalue * qvalue + qvalue / 2
4042 // But the low levels implement efficiently with 3 parameters, and could be
4043 // used for other high level operations.
4044 // dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
4045 // where scale is 1 / interval_size as a fixed point value.
4046 // The divide is replaces with a multiply by reciprocal fixed point multiply.
4047 // Caveat - although SSE2 saturates, the C function does not and should be used
4048 // with care if doing anything but quantization.
4049 LIBYUV_API
ARGBQuantize(uint8_t * dst_argb,int dst_stride_argb,int scale,int interval_size,int interval_offset,int dst_x,int dst_y,int width,int height)4050 int ARGBQuantize(uint8_t* dst_argb,
4051 int dst_stride_argb,
4052 int scale,
4053 int interval_size,
4054 int interval_offset,
4055 int dst_x,
4056 int dst_y,
4057 int width,
4058 int height) {
4059 int y;
4060 void (*ARGBQuantizeRow)(uint8_t* dst_argb, int scale, int interval_size,
4061 int interval_offset, int width) = ARGBQuantizeRow_C;
4062 uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
4063 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
4064 interval_size < 1 || interval_size > 255) {
4065 return -1;
4066 }
4067 // Coalesce rows.
4068 if (dst_stride_argb == width * 4) {
4069 width *= height;
4070 height = 1;
4071 dst_stride_argb = 0;
4072 }
4073 #if defined(HAS_ARGBQUANTIZEROW_SSE2)
4074 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
4075 ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
4076 }
4077 #endif
4078 #if defined(HAS_ARGBQUANTIZEROW_NEON)
4079 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
4080 ARGBQuantizeRow = ARGBQuantizeRow_NEON;
4081 }
4082 #endif
4083 #if defined(HAS_ARGBQUANTIZEROW_MSA)
4084 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) {
4085 ARGBQuantizeRow = ARGBQuantizeRow_MSA;
4086 }
4087 #endif
4088 #if defined(HAS_ARGBQUANTIZEROW_LSX)
4089 if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 8)) {
4090 ARGBQuantizeRow = ARGBQuantizeRow_LSX;
4091 }
4092 #endif
4093 for (y = 0; y < height; ++y) {
4094 ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
4095 dst += dst_stride_argb;
4096 }
4097 return 0;
4098 }
4099
4100 // Computes table of cumulative sum for image where the value is the sum
4101 // of all values above and to the left of the entry. Used by ARGBBlur.
4102 LIBYUV_API
ARGBComputeCumulativeSum(const uint8_t * src_argb,int src_stride_argb,int32_t * dst_cumsum,int dst_stride32_cumsum,int width,int height)4103 int ARGBComputeCumulativeSum(const uint8_t* src_argb,
4104 int src_stride_argb,
4105 int32_t* dst_cumsum,
4106 int dst_stride32_cumsum,
4107 int width,
4108 int height) {
4109 int y;
4110 void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum,
4111 const int32_t* previous_cumsum, int width) =
4112 ComputeCumulativeSumRow_C;
4113 int32_t* previous_cumsum = dst_cumsum;
4114 if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
4115 return -1;
4116 }
4117 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
4118 if (TestCpuFlag(kCpuHasSSE2)) {
4119 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
4120 }
4121 #endif
4122
4123 memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel.
4124 for (y = 0; y < height; ++y) {
4125 ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
4126 previous_cumsum = dst_cumsum;
4127 dst_cumsum += dst_stride32_cumsum;
4128 src_argb += src_stride_argb;
4129 }
4130 return 0;
4131 }
4132
4133 // Blur ARGB image.
4134 // Caller should allocate CumulativeSum table of width * height * 16 bytes
4135 // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
4136 // as the buffer is treated as circular.
4137 LIBYUV_API
ARGBBlur(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int32_t * dst_cumsum,int dst_stride32_cumsum,int width,int height,int radius)4138 int ARGBBlur(const uint8_t* src_argb,
4139 int src_stride_argb,
4140 uint8_t* dst_argb,
4141 int dst_stride_argb,
4142 int32_t* dst_cumsum,
4143 int dst_stride32_cumsum,
4144 int width,
4145 int height,
4146 int radius) {
4147 int y;
4148 void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum,
4149 const int32_t* previous_cumsum, int width) =
4150 ComputeCumulativeSumRow_C;
4151 void (*CumulativeSumToAverageRow)(
4152 const int32_t* topleft, const int32_t* botleft, int width, int area,
4153 uint8_t* dst, int count) = CumulativeSumToAverageRow_C;
4154 int32_t* cumsum_bot_row;
4155 int32_t* max_cumsum_bot_row;
4156 int32_t* cumsum_top_row;
4157
4158 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
4159 return -1;
4160 }
4161 if (height < 0) {
4162 height = -height;
4163 src_argb = src_argb + (height - 1) * src_stride_argb;
4164 src_stride_argb = -src_stride_argb;
4165 }
4166 if (radius > height) {
4167 radius = height;
4168 }
4169 if (radius > (width / 2 - 1)) {
4170 radius = width / 2 - 1;
4171 }
4172 if (radius <= 0 || height <= 1) {
4173 return -1;
4174 }
4175 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
4176 if (TestCpuFlag(kCpuHasSSE2)) {
4177 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
4178 CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
4179 }
4180 #endif
4181 // Compute enough CumulativeSum for first row to be blurred. After this
4182 // one row of CumulativeSum is updated at a time.
4183 ARGBComputeCumulativeSum(src_argb, src_stride_argb, dst_cumsum,
4184 dst_stride32_cumsum, width, radius);
4185
4186 src_argb = src_argb + radius * src_stride_argb;
4187 cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
4188
4189 max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
4190 cumsum_top_row = &dst_cumsum[0];
4191
4192 for (y = 0; y < height; ++y) {
4193 int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
4194 int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
4195 int area = radius * (bot_y - top_y);
4196 int boxwidth = radius * 4;
4197 int x;
4198 int n;
4199
4200 // Increment cumsum_top_row pointer with circular buffer wrap around.
4201 if (top_y) {
4202 cumsum_top_row += dst_stride32_cumsum;
4203 if (cumsum_top_row >= max_cumsum_bot_row) {
4204 cumsum_top_row = dst_cumsum;
4205 }
4206 }
4207 // Increment cumsum_bot_row pointer with circular buffer wrap around and
4208 // then fill in a row of CumulativeSum.
4209 if ((y + radius) < height) {
4210 const int32_t* prev_cumsum_bot_row = cumsum_bot_row;
4211 cumsum_bot_row += dst_stride32_cumsum;
4212 if (cumsum_bot_row >= max_cumsum_bot_row) {
4213 cumsum_bot_row = dst_cumsum;
4214 }
4215 ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
4216 width);
4217 src_argb += src_stride_argb;
4218 }
4219
4220 // Left clipped.
4221 for (x = 0; x < radius + 1; ++x) {
4222 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area,
4223 &dst_argb[x * 4], 1);
4224 area += (bot_y - top_y);
4225 boxwidth += 4;
4226 }
4227
4228 // Middle unclipped.
4229 n = (width - 1) - radius - x + 1;
4230 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area,
4231 &dst_argb[x * 4], n);
4232
4233 // Right clipped.
4234 for (x += n; x <= width - 1; ++x) {
4235 area -= (bot_y - top_y);
4236 boxwidth -= 4;
4237 CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
4238 cumsum_bot_row + (x - radius - 1) * 4, boxwidth,
4239 area, &dst_argb[x * 4], 1);
4240 }
4241 dst_argb += dst_stride_argb;
4242 }
4243 return 0;
4244 }
4245
4246 // Multiply ARGB image by a specified ARGB value.
4247 LIBYUV_API
ARGBShade(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height,uint32_t value)4248 int ARGBShade(const uint8_t* src_argb,
4249 int src_stride_argb,
4250 uint8_t* dst_argb,
4251 int dst_stride_argb,
4252 int width,
4253 int height,
4254 uint32_t value) {
4255 int y;
4256 void (*ARGBShadeRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width,
4257 uint32_t value) = ARGBShadeRow_C;
4258 if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
4259 return -1;
4260 }
4261 if (height < 0) {
4262 height = -height;
4263 src_argb = src_argb + (height - 1) * src_stride_argb;
4264 src_stride_argb = -src_stride_argb;
4265 }
4266 // Coalesce rows.
4267 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
4268 width *= height;
4269 height = 1;
4270 src_stride_argb = dst_stride_argb = 0;
4271 }
4272 #if defined(HAS_ARGBSHADEROW_SSE2)
4273 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) {
4274 ARGBShadeRow = ARGBShadeRow_SSE2;
4275 }
4276 #endif
4277 #if defined(HAS_ARGBSHADEROW_NEON)
4278 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
4279 ARGBShadeRow = ARGBShadeRow_NEON;
4280 }
4281 #endif
4282 #if defined(HAS_ARGBSHADEROW_MSA)
4283 if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 4)) {
4284 ARGBShadeRow = ARGBShadeRow_MSA;
4285 }
4286 #endif
4287 #if defined(HAS_ARGBSHADEROW_LSX)
4288 if (TestCpuFlag(kCpuHasLSX) && IS_ALIGNED(width, 4)) {
4289 ARGBShadeRow = ARGBShadeRow_LSX;
4290 }
4291 #endif
4292 #if defined(HAS_ARGBSHADEROW_LASX)
4293 if (TestCpuFlag(kCpuHasLASX) && IS_ALIGNED(width, 8)) {
4294 ARGBShadeRow = ARGBShadeRow_LASX;
4295 }
4296 #endif
4297
4298 for (y = 0; y < height; ++y) {
4299 ARGBShadeRow(src_argb, dst_argb, width, value);
4300 src_argb += src_stride_argb;
4301 dst_argb += dst_stride_argb;
4302 }
4303 return 0;
4304 }
4305
4306 // Interpolate 2 planes by specified amount (0 to 255).
4307 LIBYUV_API
InterpolatePlane(const uint8_t * src0,int src_stride0,const uint8_t * src1,int src_stride1,uint8_t * dst,int dst_stride,int width,int height,int interpolation)4308 int InterpolatePlane(const uint8_t* src0,
4309 int src_stride0,
4310 const uint8_t* src1,
4311 int src_stride1,
4312 uint8_t* dst,
4313 int dst_stride,
4314 int width,
4315 int height,
4316 int interpolation) {
4317 int y;
4318 void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
4319 ptrdiff_t src_stride, int dst_width,
4320 int source_y_fraction) = InterpolateRow_C;
4321 if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
4322 return -1;
4323 }
4324 // Negative height means invert the image.
4325 if (height < 0) {
4326 height = -height;
4327 dst = dst + (height - 1) * dst_stride;
4328 dst_stride = -dst_stride;
4329 }
4330 // Coalesce rows.
4331 if (src_stride0 == width && src_stride1 == width && dst_stride == width) {
4332 width *= height;
4333 height = 1;
4334 src_stride0 = src_stride1 = dst_stride = 0;
4335 }
4336 #if defined(HAS_INTERPOLATEROW_SSSE3)
4337 if (TestCpuFlag(kCpuHasSSSE3)) {
4338 InterpolateRow = InterpolateRow_Any_SSSE3;
4339 if (IS_ALIGNED(width, 16)) {
4340 InterpolateRow = InterpolateRow_SSSE3;
4341 }
4342 }
4343 #endif
4344 #if defined(HAS_INTERPOLATEROW_AVX2)
4345 if (TestCpuFlag(kCpuHasAVX2)) {
4346 InterpolateRow = InterpolateRow_Any_AVX2;
4347 if (IS_ALIGNED(width, 32)) {
4348 InterpolateRow = InterpolateRow_AVX2;
4349 }
4350 }
4351 #endif
4352 #if defined(HAS_INTERPOLATEROW_NEON)
4353 if (TestCpuFlag(kCpuHasNEON)) {
4354 InterpolateRow = InterpolateRow_Any_NEON;
4355 if (IS_ALIGNED(width, 16)) {
4356 InterpolateRow = InterpolateRow_NEON;
4357 }
4358 }
4359 #endif
4360 #if defined(HAS_INTERPOLATEROW_MSA)
4361 if (TestCpuFlag(kCpuHasMSA)) {
4362 InterpolateRow = InterpolateRow_Any_MSA;
4363 if (IS_ALIGNED(width, 32)) {
4364 InterpolateRow = InterpolateRow_MSA;
4365 }
4366 }
4367 #endif
4368 #if defined(HAS_INTERPOLATEROW_LSX)
4369 if (TestCpuFlag(kCpuHasLSX)) {
4370 InterpolateRow = InterpolateRow_Any_LSX;
4371 if (IS_ALIGNED(width, 32)) {
4372 InterpolateRow = InterpolateRow_LSX;
4373 }
4374 }
4375 #endif
4376 #if defined(HAS_INTERPOLATEROW_RVV)
4377 if (TestCpuFlag(kCpuHasRVV)) {
4378 InterpolateRow = InterpolateRow_RVV;
4379 }
4380 #endif
4381
4382 for (y = 0; y < height; ++y) {
4383 InterpolateRow(dst, src0, src1 - src0, width, interpolation);
4384 src0 += src_stride0;
4385 src1 += src_stride1;
4386 dst += dst_stride;
4387 }
4388 return 0;
4389 }
4390
4391 // Interpolate 2 planes by specified amount (0 to 255).
4392 LIBYUV_API
InterpolatePlane_16(const uint16_t * src0,int src_stride0,const uint16_t * src1,int src_stride1,uint16_t * dst,int dst_stride,int width,int height,int interpolation)4393 int InterpolatePlane_16(const uint16_t* src0,
4394 int src_stride0,
4395 const uint16_t* src1,
4396 int src_stride1,
4397 uint16_t* dst,
4398 int dst_stride,
4399 int width,
4400 int height,
4401 int interpolation) {
4402 int y;
4403 void (*InterpolateRow_16)(uint16_t* dst_ptr, const uint16_t* src_ptr,
4404 ptrdiff_t src_stride, int dst_width,
4405 int source_y_fraction) = InterpolateRow_16_C;
4406 if (!src0 || !src1 || !dst || width <= 0 || height == 0) {
4407 return -1;
4408 }
4409 // Negative height means invert the image.
4410 if (height < 0) {
4411 height = -height;
4412 dst = dst + (height - 1) * dst_stride;
4413 dst_stride = -dst_stride;
4414 }
4415 // Coalesce rows.
4416 if (src_stride0 == width && src_stride1 == width && dst_stride == width) {
4417 width *= height;
4418 height = 1;
4419 src_stride0 = src_stride1 = dst_stride = 0;
4420 }
4421 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
4422 if (TestCpuFlag(kCpuHasSSSE3)) {
4423 InterpolateRow_16 = InterpolateRow_16_Any_SSSE3;
4424 if (IS_ALIGNED(width, 16)) {
4425 InterpolateRow_16 = InterpolateRow_16_SSSE3;
4426 }
4427 }
4428 #endif
4429 #if defined(HAS_INTERPOLATEROW_16_AVX2)
4430 if (TestCpuFlag(kCpuHasAVX2)) {
4431 InterpolateRow_16 = InterpolateRow_16_Any_AVX2;
4432 if (IS_ALIGNED(width, 32)) {
4433 InterpolateRow_16 = InterpolateRow_16_AVX2;
4434 }
4435 }
4436 #endif
4437 #if defined(HAS_INTERPOLATEROW_16_NEON)
4438 if (TestCpuFlag(kCpuHasNEON)) {
4439 InterpolateRow_16 = InterpolateRow_16_Any_NEON;
4440 if (IS_ALIGNED(width, 8)) {
4441 InterpolateRow_16 = InterpolateRow_16_NEON;
4442 }
4443 }
4444 #endif
4445 #if defined(HAS_INTERPOLATEROW_16_MSA)
4446 if (TestCpuFlag(kCpuHasMSA)) {
4447 InterpolateRow_16 = InterpolateRow_16_Any_MSA;
4448 if (IS_ALIGNED(width, 32)) {
4449 InterpolateRow_16 = InterpolateRow_16_MSA;
4450 }
4451 }
4452 #endif
4453 #if defined(HAS_INTERPOLATEROW_16_LSX)
4454 if (TestCpuFlag(kCpuHasLSX)) {
4455 InterpolateRow_16 = InterpolateRow_16_Any_LSX;
4456 if (IS_ALIGNED(width, 32)) {
4457 InterpolateRow_16 = InterpolateRow_16_LSX;
4458 }
4459 }
4460 #endif
4461
4462 for (y = 0; y < height; ++y) {
4463 InterpolateRow_16(dst, src0, src1 - src0, width, interpolation);
4464 src0 += src_stride0;
4465 src1 += src_stride1;
4466 dst += dst_stride;
4467 }
4468 return 0;
4469 }
4470
4471 // Interpolate 2 ARGB images by specified amount (0 to 255).
4472 LIBYUV_API
ARGBInterpolate(const uint8_t * src_argb0,int src_stride_argb0,const uint8_t * src_argb1,int src_stride_argb1,uint8_t * dst_argb,int dst_stride_argb,int width,int height,int interpolation)4473 int ARGBInterpolate(const uint8_t* src_argb0,
4474 int src_stride_argb0,
4475 const uint8_t* src_argb1,
4476 int src_stride_argb1,
4477 uint8_t* dst_argb,
4478 int dst_stride_argb,
4479 int width,
4480 int height,
4481 int interpolation) {
4482 return InterpolatePlane(src_argb0, src_stride_argb0, src_argb1,
4483 src_stride_argb1, dst_argb, dst_stride_argb,
4484 width * 4, height, interpolation);
4485 }
4486
4487 // Interpolate 2 YUV images by specified amount (0 to 255).
4488 LIBYUV_API
I420Interpolate(const uint8_t * src0_y,int src0_stride_y,const uint8_t * src0_u,int src0_stride_u,const uint8_t * src0_v,int src0_stride_v,const uint8_t * src1_y,int src1_stride_y,const uint8_t * src1_u,int src1_stride_u,const uint8_t * src1_v,int src1_stride_v,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int width,int height,int interpolation)4489 int I420Interpolate(const uint8_t* src0_y,
4490 int src0_stride_y,
4491 const uint8_t* src0_u,
4492 int src0_stride_u,
4493 const uint8_t* src0_v,
4494 int src0_stride_v,
4495 const uint8_t* src1_y,
4496 int src1_stride_y,
4497 const uint8_t* src1_u,
4498 int src1_stride_u,
4499 const uint8_t* src1_v,
4500 int src1_stride_v,
4501 uint8_t* dst_y,
4502 int dst_stride_y,
4503 uint8_t* dst_u,
4504 int dst_stride_u,
4505 uint8_t* dst_v,
4506 int dst_stride_v,
4507 int width,
4508 int height,
4509 int interpolation) {
4510 int halfwidth = (width + 1) >> 1;
4511 int halfheight = (height + 1) >> 1;
4512
4513 if (!src0_y || !src0_u || !src0_v || !src1_y || !src1_u || !src1_v ||
4514 !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
4515 return -1;
4516 }
4517
4518 InterpolatePlane(src0_y, src0_stride_y, src1_y, src1_stride_y, dst_y,
4519 dst_stride_y, width, height, interpolation);
4520 InterpolatePlane(src0_u, src0_stride_u, src1_u, src1_stride_u, dst_u,
4521 dst_stride_u, halfwidth, halfheight, interpolation);
4522 InterpolatePlane(src0_v, src0_stride_v, src1_v, src1_stride_v, dst_v,
4523 dst_stride_v, halfwidth, halfheight, interpolation);
4524 return 0;
4525 }
4526
4527 // Shuffle ARGB channel order. e.g. BGRA to ARGB.
4528 LIBYUV_API
ARGBShuffle(const uint8_t * src_bgra,int src_stride_bgra,uint8_t * dst_argb,int dst_stride_argb,const uint8_t * shuffler,int width,int height)4529 int ARGBShuffle(const uint8_t* src_bgra,
4530 int src_stride_bgra,
4531 uint8_t* dst_argb,
4532 int dst_stride_argb,
4533 const uint8_t* shuffler,
4534 int width,
4535 int height) {
4536 int y;
4537 void (*ARGBShuffleRow)(const uint8_t* src_bgra, uint8_t* dst_argb,
4538 const uint8_t* shuffler, int width) = ARGBShuffleRow_C;
4539 if (!src_bgra || !dst_argb || width <= 0 || height == 0) {
4540 return -1;
4541 }
4542 // Negative height means invert the image.
4543 if (height < 0) {
4544 height = -height;
4545 src_bgra = src_bgra + (height - 1) * src_stride_bgra;
4546 src_stride_bgra = -src_stride_bgra;
4547 }
4548 // Coalesce rows.
4549 if (src_stride_bgra == width * 4 && dst_stride_argb == width * 4) {
4550 width *= height;
4551 height = 1;
4552 src_stride_bgra = dst_stride_argb = 0;
4553 }
4554 #if defined(HAS_ARGBSHUFFLEROW_SSSE3)
4555 if (TestCpuFlag(kCpuHasSSSE3)) {
4556 ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
4557 if (IS_ALIGNED(width, 8)) {
4558 ARGBShuffleRow = ARGBShuffleRow_SSSE3;
4559 }
4560 }
4561 #endif
4562 #if defined(HAS_ARGBSHUFFLEROW_AVX2)
4563 if (TestCpuFlag(kCpuHasAVX2)) {
4564 ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
4565 if (IS_ALIGNED(width, 16)) {
4566 ARGBShuffleRow = ARGBShuffleRow_AVX2;
4567 }
4568 }
4569 #endif
4570 #if defined(HAS_ARGBSHUFFLEROW_NEON)
4571 if (TestCpuFlag(kCpuHasNEON)) {
4572 ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
4573 if (IS_ALIGNED(width, 4)) {
4574 ARGBShuffleRow = ARGBShuffleRow_NEON;
4575 }
4576 }
4577 #endif
4578 #if defined(HAS_ARGBSHUFFLEROW_MSA)
4579 if (TestCpuFlag(kCpuHasMSA)) {
4580 ARGBShuffleRow = ARGBShuffleRow_Any_MSA;
4581 if (IS_ALIGNED(width, 8)) {
4582 ARGBShuffleRow = ARGBShuffleRow_MSA;
4583 }
4584 }
4585 #endif
4586 #if defined(HAS_ARGBSHUFFLEROW_LSX)
4587 if (TestCpuFlag(kCpuHasLSX)) {
4588 ARGBShuffleRow = ARGBShuffleRow_Any_LSX;
4589 if (IS_ALIGNED(width, 8)) {
4590 ARGBShuffleRow = ARGBShuffleRow_LSX;
4591 }
4592 }
4593 #endif
4594 #if defined(HAS_ARGBSHUFFLEROW_LASX)
4595 if (TestCpuFlag(kCpuHasLASX)) {
4596 ARGBShuffleRow = ARGBShuffleRow_Any_LASX;
4597 if (IS_ALIGNED(width, 16)) {
4598 ARGBShuffleRow = ARGBShuffleRow_LASX;
4599 }
4600 }
4601 #endif
4602
4603 for (y = 0; y < height; ++y) {
4604 ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
4605 src_bgra += src_stride_bgra;
4606 dst_argb += dst_stride_argb;
4607 }
4608 return 0;
4609 }
4610
4611 // Shuffle AR64 channel order. e.g. AR64 to AB64.
4612 LIBYUV_API
AR64Shuffle(const uint16_t * src_ar64,int src_stride_ar64,uint16_t * dst_ar64,int dst_stride_ar64,const uint8_t * shuffler,int width,int height)4613 int AR64Shuffle(const uint16_t* src_ar64,
4614 int src_stride_ar64,
4615 uint16_t* dst_ar64,
4616 int dst_stride_ar64,
4617 const uint8_t* shuffler,
4618 int width,
4619 int height) {
4620 int y;
4621 void (*AR64ShuffleRow)(const uint8_t* src_ar64, uint8_t* dst_ar64,
4622 const uint8_t* shuffler, int width) = AR64ShuffleRow_C;
4623 if (!src_ar64 || !dst_ar64 || width <= 0 || height == 0) {
4624 return -1;
4625 }
4626 // Negative height means invert the image.
4627 if (height < 0) {
4628 height = -height;
4629 src_ar64 = src_ar64 + (height - 1) * src_stride_ar64;
4630 src_stride_ar64 = -src_stride_ar64;
4631 }
4632 // Coalesce rows.
4633 if (src_stride_ar64 == width * 4 && dst_stride_ar64 == width * 4) {
4634 width *= height;
4635 height = 1;
4636 src_stride_ar64 = dst_stride_ar64 = 0;
4637 }
4638 // Assembly versions can be reused if it's implemented with shuffle.
4639 #if defined(HAS_ARGBSHUFFLEROW_SSSE3)
4640 if (TestCpuFlag(kCpuHasSSSE3)) {
4641 AR64ShuffleRow = ARGBShuffleRow_Any_SSSE3;
4642 if (IS_ALIGNED(width, 8)) {
4643 AR64ShuffleRow = ARGBShuffleRow_SSSE3;
4644 }
4645 }
4646 #endif
4647 #if defined(HAS_ARGBSHUFFLEROW_AVX2)
4648 if (TestCpuFlag(kCpuHasAVX2)) {
4649 AR64ShuffleRow = ARGBShuffleRow_Any_AVX2;
4650 if (IS_ALIGNED(width, 16)) {
4651 AR64ShuffleRow = ARGBShuffleRow_AVX2;
4652 }
4653 }
4654 #endif
4655 #if defined(HAS_ARGBSHUFFLEROW_NEON)
4656 if (TestCpuFlag(kCpuHasNEON)) {
4657 AR64ShuffleRow = ARGBShuffleRow_Any_NEON;
4658 if (IS_ALIGNED(width, 4)) {
4659 AR64ShuffleRow = ARGBShuffleRow_NEON;
4660 }
4661 }
4662 #endif
4663
4664 for (y = 0; y < height; ++y) {
4665 AR64ShuffleRow((uint8_t*)(src_ar64), (uint8_t*)(dst_ar64), shuffler,
4666 width * 2);
4667 src_ar64 += src_stride_ar64;
4668 dst_ar64 += dst_stride_ar64;
4669 }
4670 return 0;
4671 }
4672
4673 // Gauss blur a float plane using Gaussian 5x5 filter with
4674 // coefficients of 1, 4, 6, 4, 1.
4675 // Each destination pixel is a blur of the 5x5
4676 // pixels from the source.
4677 // Source edges are clamped.
4678 // Edge is 2 pixels on each side, and interior is multiple of 4.
4679 LIBYUV_API
GaussPlane_F32(const float * src,int src_stride,float * dst,int dst_stride,int width,int height)4680 int GaussPlane_F32(const float* src,
4681 int src_stride,
4682 float* dst,
4683 int dst_stride,
4684 int width,
4685 int height) {
4686 int y;
4687 void (*GaussCol_F32)(const float* src0, const float* src1, const float* src2,
4688 const float* src3, const float* src4, float* dst,
4689 int width) = GaussCol_F32_C;
4690 void (*GaussRow_F32)(const float* src, float* dst, int width) =
4691 GaussRow_F32_C;
4692 if (!src || !dst || width <= 0 || height == 0) {
4693 return -1;
4694 }
4695 // Negative height means invert the image.
4696 if (height < 0) {
4697 height = -height;
4698 src = src + (height - 1) * src_stride;
4699 src_stride = -src_stride;
4700 }
4701
4702 #if defined(HAS_GAUSSCOL_F32_NEON)
4703 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
4704 GaussCol_F32 = GaussCol_F32_NEON;
4705 }
4706 #endif
4707 #if defined(HAS_GAUSSROW_F32_NEON)
4708 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
4709 GaussRow_F32 = GaussRow_F32_NEON;
4710 }
4711 #endif
4712 {
4713 // 2 pixels on each side, but aligned out to 16 bytes.
4714 align_buffer_64(rowbuf, (4 + width + 4) * 4);
4715 if (!rowbuf)
4716 return 1;
4717 memset(rowbuf, 0, 16);
4718 memset(rowbuf + (4 + width) * 4, 0, 16);
4719 float* row = (float*)(rowbuf + 16);
4720 const float* src0 = src;
4721 const float* src1 = src;
4722 const float* src2 = src;
4723 const float* src3 = src2 + ((height > 1) ? src_stride : 0);
4724 const float* src4 = src3 + ((height > 2) ? src_stride : 0);
4725
4726 for (y = 0; y < height; ++y) {
4727 GaussCol_F32(src0, src1, src2, src3, src4, row, width);
4728
4729 // Extrude edge by 2 floats
4730 row[-2] = row[-1] = row[0];
4731 row[width + 1] = row[width] = row[width - 1];
4732
4733 GaussRow_F32(row - 2, dst, width);
4734
4735 src0 = src1;
4736 src1 = src2;
4737 src2 = src3;
4738 src3 = src4;
4739 if ((y + 2) < (height - 1)) {
4740 src4 += src_stride;
4741 }
4742 dst += dst_stride;
4743 }
4744 free_aligned_buffer_64(rowbuf);
4745 }
4746 return 0;
4747 }
4748
4749 // Sobel ARGB effect.
ARGBSobelize(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height,void (* SobelRow)(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst,int width))4750 static int ARGBSobelize(const uint8_t* src_argb,
4751 int src_stride_argb,
4752 uint8_t* dst_argb,
4753 int dst_stride_argb,
4754 int width,
4755 int height,
4756 void (*SobelRow)(const uint8_t* src_sobelx,
4757 const uint8_t* src_sobely,
4758 uint8_t* dst,
4759 int width)) {
4760 int y;
4761 void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_g, int width) =
4762 ARGBToYJRow_C;
4763 void (*SobelYRow)(const uint8_t* src_y0, const uint8_t* src_y1,
4764 uint8_t* dst_sobely, int width) = SobelYRow_C;
4765 void (*SobelXRow)(const uint8_t* src_y0, const uint8_t* src_y1,
4766 const uint8_t* src_y2, uint8_t* dst_sobely, int width) =
4767 SobelXRow_C;
4768 const int kEdge = 16; // Extra pixels at start of row for extrude/align.
4769 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
4770 return -1;
4771 }
4772 // Negative height means invert the image.
4773 if (height < 0) {
4774 height = -height;
4775 src_argb = src_argb + (height - 1) * src_stride_argb;
4776 src_stride_argb = -src_stride_argb;
4777 }
4778
4779 #if defined(HAS_ARGBTOYJROW_SSSE3)
4780 if (TestCpuFlag(kCpuHasSSSE3)) {
4781 ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
4782 if (IS_ALIGNED(width, 16)) {
4783 ARGBToYJRow = ARGBToYJRow_SSSE3;
4784 }
4785 }
4786 #endif
4787 #if defined(HAS_ARGBTOYJROW_AVX2)
4788 if (TestCpuFlag(kCpuHasAVX2)) {
4789 ARGBToYJRow = ARGBToYJRow_Any_AVX2;
4790 if (IS_ALIGNED(width, 32)) {
4791 ARGBToYJRow = ARGBToYJRow_AVX2;
4792 }
4793 }
4794 #endif
4795 #if defined(HAS_ARGBTOYJROW_NEON)
4796 if (TestCpuFlag(kCpuHasNEON)) {
4797 ARGBToYJRow = ARGBToYJRow_Any_NEON;
4798 if (IS_ALIGNED(width, 16)) {
4799 ARGBToYJRow = ARGBToYJRow_NEON;
4800 }
4801 }
4802 #endif
4803 #if defined(HAS_ARGBTOYJROW_MSA)
4804 if (TestCpuFlag(kCpuHasMSA)) {
4805 ARGBToYJRow = ARGBToYJRow_Any_MSA;
4806 if (IS_ALIGNED(width, 16)) {
4807 ARGBToYJRow = ARGBToYJRow_MSA;
4808 }
4809 }
4810 #endif
4811 #if defined(HAS_ARGBTOYJROW_LSX)
4812 if (TestCpuFlag(kCpuHasLSX)) {
4813 ARGBToYJRow = ARGBToYJRow_Any_LSX;
4814 if (IS_ALIGNED(width, 16)) {
4815 ARGBToYJRow = ARGBToYJRow_LSX;
4816 }
4817 }
4818 #endif
4819 #if defined(HAS_ARGBTOYJROW_LASX)
4820 if (TestCpuFlag(kCpuHasLASX)) {
4821 ARGBToYJRow = ARGBToYJRow_Any_LASX;
4822 if (IS_ALIGNED(width, 32)) {
4823 ARGBToYJRow = ARGBToYJRow_LASX;
4824 }
4825 }
4826 #endif
4827 #if defined(HAS_ARGBTOYJROW_RVV)
4828 if (TestCpuFlag(kCpuHasRVV)) {
4829 ARGBToYJRow = ARGBToYJRow_RVV;
4830 }
4831 #endif
4832
4833 #if defined(HAS_SOBELYROW_SSE2)
4834 if (TestCpuFlag(kCpuHasSSE2)) {
4835 SobelYRow = SobelYRow_SSE2;
4836 }
4837 #endif
4838 #if defined(HAS_SOBELYROW_NEON)
4839 if (TestCpuFlag(kCpuHasNEON)) {
4840 SobelYRow = SobelYRow_NEON;
4841 }
4842 #endif
4843 #if defined(HAS_SOBELYROW_MSA)
4844 if (TestCpuFlag(kCpuHasMSA)) {
4845 SobelYRow = SobelYRow_MSA;
4846 }
4847 #endif
4848 #if defined(HAS_SOBELXROW_SSE2)
4849 if (TestCpuFlag(kCpuHasSSE2)) {
4850 SobelXRow = SobelXRow_SSE2;
4851 }
4852 #endif
4853 #if defined(HAS_SOBELXROW_NEON)
4854 if (TestCpuFlag(kCpuHasNEON)) {
4855 SobelXRow = SobelXRow_NEON;
4856 }
4857 #endif
4858 #if defined(HAS_SOBELXROW_MSA)
4859 if (TestCpuFlag(kCpuHasMSA)) {
4860 SobelXRow = SobelXRow_MSA;
4861 }
4862 #endif
4863 {
4864 // 3 rows with edges before/after.
4865 const int row_size = (width + kEdge + 31) & ~31;
4866 align_buffer_64(rows, row_size * 2 + (kEdge + row_size * 3 + kEdge));
4867 uint8_t* row_sobelx = rows;
4868 uint8_t* row_sobely = rows + row_size;
4869 uint8_t* row_y = rows + row_size * 2;
4870
4871 // Convert first row.
4872 uint8_t* row_y0 = row_y + kEdge;
4873 uint8_t* row_y1 = row_y0 + row_size;
4874 uint8_t* row_y2 = row_y1 + row_size;
4875 if (!rows)
4876 return 1;
4877 ARGBToYJRow(src_argb, row_y0, width);
4878 row_y0[-1] = row_y0[0];
4879 memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind.
4880 ARGBToYJRow(src_argb, row_y1, width);
4881 row_y1[-1] = row_y1[0];
4882 memset(row_y1 + width, row_y1[width - 1], 16);
4883 memset(row_y2 + width, 0, 16);
4884
4885 for (y = 0; y < height; ++y) {
4886 // Convert next row of ARGB to G.
4887 if (y < (height - 1)) {
4888 src_argb += src_stride_argb;
4889 }
4890 ARGBToYJRow(src_argb, row_y2, width);
4891 row_y2[-1] = row_y2[0];
4892 row_y2[width] = row_y2[width - 1];
4893
4894 SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
4895 SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
4896 SobelRow(row_sobelx, row_sobely, dst_argb, width);
4897
4898 // Cycle thru circular queue of 3 row_y buffers.
4899 {
4900 uint8_t* row_yt = row_y0;
4901 row_y0 = row_y1;
4902 row_y1 = row_y2;
4903 row_y2 = row_yt;
4904 }
4905
4906 dst_argb += dst_stride_argb;
4907 }
4908 free_aligned_buffer_64(rows);
4909 }
4910 return 0;
4911 }
4912
4913 // Sobel ARGB effect.
4914 LIBYUV_API
ARGBSobel(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)4915 int ARGBSobel(const uint8_t* src_argb,
4916 int src_stride_argb,
4917 uint8_t* dst_argb,
4918 int dst_stride_argb,
4919 int width,
4920 int height) {
4921 void (*SobelRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
4922 uint8_t* dst_argb, int width) = SobelRow_C;
4923 #if defined(HAS_SOBELROW_SSE2)
4924 if (TestCpuFlag(kCpuHasSSE2)) {
4925 SobelRow = SobelRow_Any_SSE2;
4926 if (IS_ALIGNED(width, 16)) {
4927 SobelRow = SobelRow_SSE2;
4928 }
4929 }
4930 #endif
4931 #if defined(HAS_SOBELROW_NEON)
4932 if (TestCpuFlag(kCpuHasNEON)) {
4933 SobelRow = SobelRow_Any_NEON;
4934 if (IS_ALIGNED(width, 8)) {
4935 SobelRow = SobelRow_NEON;
4936 }
4937 }
4938 #endif
4939 #if defined(HAS_SOBELROW_MSA)
4940 if (TestCpuFlag(kCpuHasMSA)) {
4941 SobelRow = SobelRow_Any_MSA;
4942 if (IS_ALIGNED(width, 16)) {
4943 SobelRow = SobelRow_MSA;
4944 }
4945 }
4946 #endif
4947 #if defined(HAS_SOBELROW_LSX)
4948 if (TestCpuFlag(kCpuHasLSX)) {
4949 SobelRow = SobelRow_Any_LSX;
4950 if (IS_ALIGNED(width, 16)) {
4951 SobelRow = SobelRow_LSX;
4952 }
4953 }
4954 #endif
4955 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
4956 width, height, SobelRow);
4957 }
4958
4959 // Sobel ARGB effect with planar output.
4960 LIBYUV_API
ARGBSobelToPlane(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_y,int dst_stride_y,int width,int height)4961 int ARGBSobelToPlane(const uint8_t* src_argb,
4962 int src_stride_argb,
4963 uint8_t* dst_y,
4964 int dst_stride_y,
4965 int width,
4966 int height) {
4967 void (*SobelToPlaneRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
4968 uint8_t* dst_, int width) = SobelToPlaneRow_C;
4969 #if defined(HAS_SOBELTOPLANEROW_SSE2)
4970 if (TestCpuFlag(kCpuHasSSE2)) {
4971 SobelToPlaneRow = SobelToPlaneRow_Any_SSE2;
4972 if (IS_ALIGNED(width, 16)) {
4973 SobelToPlaneRow = SobelToPlaneRow_SSE2;
4974 }
4975 }
4976 #endif
4977 #if defined(HAS_SOBELTOPLANEROW_NEON)
4978 if (TestCpuFlag(kCpuHasNEON)) {
4979 SobelToPlaneRow = SobelToPlaneRow_Any_NEON;
4980 if (IS_ALIGNED(width, 16)) {
4981 SobelToPlaneRow = SobelToPlaneRow_NEON;
4982 }
4983 }
4984 #endif
4985 #if defined(HAS_SOBELTOPLANEROW_MSA)
4986 if (TestCpuFlag(kCpuHasMSA)) {
4987 SobelToPlaneRow = SobelToPlaneRow_Any_MSA;
4988 if (IS_ALIGNED(width, 32)) {
4989 SobelToPlaneRow = SobelToPlaneRow_MSA;
4990 }
4991 }
4992 #endif
4993 #if defined(HAS_SOBELTOPLANEROW_LSX)
4994 if (TestCpuFlag(kCpuHasLSX)) {
4995 SobelToPlaneRow = SobelToPlaneRow_Any_LSX;
4996 if (IS_ALIGNED(width, 32)) {
4997 SobelToPlaneRow = SobelToPlaneRow_LSX;
4998 }
4999 }
5000 #endif
5001 return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, width,
5002 height, SobelToPlaneRow);
5003 }
5004
5005 // SobelXY ARGB effect.
5006 // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel.
5007 LIBYUV_API
ARGBSobelXY(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)5008 int ARGBSobelXY(const uint8_t* src_argb,
5009 int src_stride_argb,
5010 uint8_t* dst_argb,
5011 int dst_stride_argb,
5012 int width,
5013 int height) {
5014 void (*SobelXYRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely,
5015 uint8_t* dst_argb, int width) = SobelXYRow_C;
5016 #if defined(HAS_SOBELXYROW_SSE2)
5017 if (TestCpuFlag(kCpuHasSSE2)) {
5018 SobelXYRow = SobelXYRow_Any_SSE2;
5019 if (IS_ALIGNED(width, 16)) {
5020 SobelXYRow = SobelXYRow_SSE2;
5021 }
5022 }
5023 #endif
5024 #if defined(HAS_SOBELXYROW_NEON)
5025 if (TestCpuFlag(kCpuHasNEON)) {
5026 SobelXYRow = SobelXYRow_Any_NEON;
5027 if (IS_ALIGNED(width, 8)) {
5028 SobelXYRow = SobelXYRow_NEON;
5029 }
5030 }
5031 #endif
5032 #if defined(HAS_SOBELXYROW_MSA)
5033 if (TestCpuFlag(kCpuHasMSA)) {
5034 SobelXYRow = SobelXYRow_Any_MSA;
5035 if (IS_ALIGNED(width, 16)) {
5036 SobelXYRow = SobelXYRow_MSA;
5037 }
5038 }
5039 #endif
5040 #if defined(HAS_SOBELXYROW_LSX)
5041 if (TestCpuFlag(kCpuHasLSX)) {
5042 SobelXYRow = SobelXYRow_Any_LSX;
5043 if (IS_ALIGNED(width, 16)) {
5044 SobelXYRow = SobelXYRow_LSX;
5045 }
5046 }
5047 #endif
5048 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
5049 width, height, SobelXYRow);
5050 }
5051
5052 // Apply a 4x4 polynomial to each ARGB pixel.
5053 LIBYUV_API
ARGBPolynomial(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,const float * poly,int width,int height)5054 int ARGBPolynomial(const uint8_t* src_argb,
5055 int src_stride_argb,
5056 uint8_t* dst_argb,
5057 int dst_stride_argb,
5058 const float* poly,
5059 int width,
5060 int height) {
5061 int y;
5062 void (*ARGBPolynomialRow)(const uint8_t* src_argb, uint8_t* dst_argb,
5063 const float* poly, int width) = ARGBPolynomialRow_C;
5064 if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
5065 return -1;
5066 }
5067 // Negative height means invert the image.
5068 if (height < 0) {
5069 height = -height;
5070 src_argb = src_argb + (height - 1) * src_stride_argb;
5071 src_stride_argb = -src_stride_argb;
5072 }
5073 // Coalesce rows.
5074 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
5075 width *= height;
5076 height = 1;
5077 src_stride_argb = dst_stride_argb = 0;
5078 }
5079 #if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
5080 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
5081 ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
5082 }
5083 #endif
5084 #if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
5085 if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
5086 IS_ALIGNED(width, 2)) {
5087 ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
5088 }
5089 #endif
5090
5091 for (y = 0; y < height; ++y) {
5092 ARGBPolynomialRow(src_argb, dst_argb, poly, width);
5093 src_argb += src_stride_argb;
5094 dst_argb += dst_stride_argb;
5095 }
5096 return 0;
5097 }
5098
5099 // Convert plane of 16 bit shorts to half floats.
5100 // Source values are multiplied by scale before storing as half float.
5101 LIBYUV_API
HalfFloatPlane(const uint16_t * src_y,int src_stride_y,uint16_t * dst_y,int dst_stride_y,float scale,int width,int height)5102 int HalfFloatPlane(const uint16_t* src_y,
5103 int src_stride_y,
5104 uint16_t* dst_y,
5105 int dst_stride_y,
5106 float scale,
5107 int width,
5108 int height) {
5109 int y;
5110 void (*HalfFloatRow)(const uint16_t* src, uint16_t* dst, float scale,
5111 int width) = HalfFloatRow_C;
5112 if (!src_y || !dst_y || width <= 0 || height == 0) {
5113 return -1;
5114 }
5115 src_stride_y >>= 1;
5116 dst_stride_y >>= 1;
5117 // Negative height means invert the image.
5118 if (height < 0) {
5119 height = -height;
5120 src_y = src_y + (height - 1) * src_stride_y;
5121 src_stride_y = -src_stride_y;
5122 }
5123 // Coalesce rows.
5124 if (src_stride_y == width && dst_stride_y == width) {
5125 width *= height;
5126 height = 1;
5127 src_stride_y = dst_stride_y = 0;
5128 }
5129 #if defined(HAS_HALFFLOATROW_SSE2)
5130 if (TestCpuFlag(kCpuHasSSE2)) {
5131 HalfFloatRow = HalfFloatRow_Any_SSE2;
5132 if (IS_ALIGNED(width, 8)) {
5133 HalfFloatRow = HalfFloatRow_SSE2;
5134 }
5135 }
5136 #endif
5137 #if defined(HAS_HALFFLOATROW_AVX2)
5138 if (TestCpuFlag(kCpuHasAVX2)) {
5139 HalfFloatRow = HalfFloatRow_Any_AVX2;
5140 if (IS_ALIGNED(width, 16)) {
5141 HalfFloatRow = HalfFloatRow_AVX2;
5142 }
5143 }
5144 #endif
5145 #if defined(HAS_HALFFLOATROW_F16C)
5146 if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasF16C)) {
5147 HalfFloatRow =
5148 (scale == 1.0f) ? HalfFloat1Row_Any_F16C : HalfFloatRow_Any_F16C;
5149 if (IS_ALIGNED(width, 16)) {
5150 HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_F16C : HalfFloatRow_F16C;
5151 }
5152 }
5153 #endif
5154 #if defined(HAS_HALFFLOATROW_NEON)
5155 if (TestCpuFlag(kCpuHasNEON)) {
5156 HalfFloatRow =
5157 (scale == 1.0f) ? HalfFloat1Row_Any_NEON : HalfFloatRow_Any_NEON;
5158 if (IS_ALIGNED(width, 8)) {
5159 HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_NEON : HalfFloatRow_NEON;
5160 }
5161 }
5162 #endif
5163 #if defined(HAS_HALFFLOATROW_MSA)
5164 if (TestCpuFlag(kCpuHasMSA)) {
5165 HalfFloatRow = HalfFloatRow_Any_MSA;
5166 if (IS_ALIGNED(width, 32)) {
5167 HalfFloatRow = HalfFloatRow_MSA;
5168 }
5169 }
5170 #endif
5171 #if defined(HAS_HALFFLOATROW_LSX)
5172 if (TestCpuFlag(kCpuHasLSX)) {
5173 HalfFloatRow = HalfFloatRow_Any_LSX;
5174 if (IS_ALIGNED(width, 32)) {
5175 HalfFloatRow = HalfFloatRow_LSX;
5176 }
5177 }
5178 #endif
5179
5180 for (y = 0; y < height; ++y) {
5181 HalfFloatRow(src_y, dst_y, scale, width);
5182 src_y += src_stride_y;
5183 dst_y += dst_stride_y;
5184 }
5185 return 0;
5186 }
5187
5188 // Convert a buffer of bytes to floats, scale the values and store as floats.
5189 LIBYUV_API
ByteToFloat(const uint8_t * src_y,float * dst_y,float scale,int width)5190 int ByteToFloat(const uint8_t* src_y, float* dst_y, float scale, int width) {
5191 void (*ByteToFloatRow)(const uint8_t* src, float* dst, float scale,
5192 int width) = ByteToFloatRow_C;
5193 if (!src_y || !dst_y || width <= 0) {
5194 return -1;
5195 }
5196 #if defined(HAS_BYTETOFLOATROW_NEON)
5197 if (TestCpuFlag(kCpuHasNEON)) {
5198 ByteToFloatRow = ByteToFloatRow_Any_NEON;
5199 if (IS_ALIGNED(width, 8)) {
5200 ByteToFloatRow = ByteToFloatRow_NEON;
5201 }
5202 }
5203 #endif
5204
5205 ByteToFloatRow(src_y, dst_y, scale, width);
5206 return 0;
5207 }
5208
5209 // Apply a lumacolortable to each ARGB pixel.
5210 LIBYUV_API
ARGBLumaColorTable(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,const uint8_t * luma,int width,int height)5211 int ARGBLumaColorTable(const uint8_t* src_argb,
5212 int src_stride_argb,
5213 uint8_t* dst_argb,
5214 int dst_stride_argb,
5215 const uint8_t* luma,
5216 int width,
5217 int height) {
5218 int y;
5219 void (*ARGBLumaColorTableRow)(
5220 const uint8_t* src_argb, uint8_t* dst_argb, int width,
5221 const uint8_t* luma, const uint32_t lumacoeff) = ARGBLumaColorTableRow_C;
5222 if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
5223 return -1;
5224 }
5225 // Negative height means invert the image.
5226 if (height < 0) {
5227 height = -height;
5228 src_argb = src_argb + (height - 1) * src_stride_argb;
5229 src_stride_argb = -src_stride_argb;
5230 }
5231 // Coalesce rows.
5232 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
5233 width *= height;
5234 height = 1;
5235 src_stride_argb = dst_stride_argb = 0;
5236 }
5237 #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
5238 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
5239 ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
5240 }
5241 #endif
5242
5243 for (y = 0; y < height; ++y) {
5244 ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
5245 src_argb += src_stride_argb;
5246 dst_argb += dst_stride_argb;
5247 }
5248 return 0;
5249 }
5250
5251 // Copy Alpha from one ARGB image to another.
5252 LIBYUV_API
ARGBCopyAlpha(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_argb,int dst_stride_argb,int width,int height)5253 int ARGBCopyAlpha(const uint8_t* src_argb,
5254 int src_stride_argb,
5255 uint8_t* dst_argb,
5256 int dst_stride_argb,
5257 int width,
5258 int height) {
5259 int y;
5260 void (*ARGBCopyAlphaRow)(const uint8_t* src_argb, uint8_t* dst_argb,
5261 int width) = ARGBCopyAlphaRow_C;
5262 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
5263 return -1;
5264 }
5265 // Negative height means invert the image.
5266 if (height < 0) {
5267 height = -height;
5268 src_argb = src_argb + (height - 1) * src_stride_argb;
5269 src_stride_argb = -src_stride_argb;
5270 }
5271 // Coalesce rows.
5272 if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) {
5273 width *= height;
5274 height = 1;
5275 src_stride_argb = dst_stride_argb = 0;
5276 }
5277 #if defined(HAS_ARGBCOPYALPHAROW_SSE2)
5278 if (TestCpuFlag(kCpuHasSSE2)) {
5279 ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_SSE2;
5280 if (IS_ALIGNED(width, 8)) {
5281 ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
5282 }
5283 }
5284 #endif
5285 #if defined(HAS_ARGBCOPYALPHAROW_AVX2)
5286 if (TestCpuFlag(kCpuHasAVX2)) {
5287 ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_AVX2;
5288 if (IS_ALIGNED(width, 16)) {
5289 ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
5290 }
5291 }
5292 #endif
5293
5294 for (y = 0; y < height; ++y) {
5295 ARGBCopyAlphaRow(src_argb, dst_argb, width);
5296 src_argb += src_stride_argb;
5297 dst_argb += dst_stride_argb;
5298 }
5299 return 0;
5300 }
5301
5302 // Extract just the alpha channel from ARGB.
5303 LIBYUV_API
ARGBExtractAlpha(const uint8_t * src_argb,int src_stride_argb,uint8_t * dst_a,int dst_stride_a,int width,int height)5304 int ARGBExtractAlpha(const uint8_t* src_argb,
5305 int src_stride_argb,
5306 uint8_t* dst_a,
5307 int dst_stride_a,
5308 int width,
5309 int height) {
5310 if (!src_argb || !dst_a || width <= 0 || height == 0) {
5311 return -1;
5312 }
5313 // Negative height means invert the image.
5314 if (height < 0) {
5315 height = -height;
5316 src_argb += (height - 1) * src_stride_argb;
5317 src_stride_argb = -src_stride_argb;
5318 }
5319 // Coalesce rows.
5320 if (src_stride_argb == width * 4 && dst_stride_a == width) {
5321 width *= height;
5322 height = 1;
5323 src_stride_argb = dst_stride_a = 0;
5324 }
5325 void (*ARGBExtractAlphaRow)(const uint8_t* src_argb, uint8_t* dst_a,
5326 int width) = ARGBExtractAlphaRow_C;
5327 #if defined(HAS_ARGBEXTRACTALPHAROW_SSE2)
5328 if (TestCpuFlag(kCpuHasSSE2)) {
5329 ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2
5330 : ARGBExtractAlphaRow_Any_SSE2;
5331 }
5332 #endif
5333 #if defined(HAS_ARGBEXTRACTALPHAROW_AVX2)
5334 if (TestCpuFlag(kCpuHasAVX2)) {
5335 ARGBExtractAlphaRow = IS_ALIGNED(width, 32) ? ARGBExtractAlphaRow_AVX2
5336 : ARGBExtractAlphaRow_Any_AVX2;
5337 }
5338 #endif
5339 #if defined(HAS_ARGBEXTRACTALPHAROW_NEON)
5340 if (TestCpuFlag(kCpuHasNEON)) {
5341 ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON
5342 : ARGBExtractAlphaRow_Any_NEON;
5343 }
5344 #endif
5345 #if defined(HAS_ARGBEXTRACTALPHAROW_MSA)
5346 if (TestCpuFlag(kCpuHasMSA)) {
5347 ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_MSA
5348 : ARGBExtractAlphaRow_Any_MSA;
5349 }
5350 #endif
5351 #if defined(HAS_ARGBEXTRACTALPHAROW_LSX)
5352 if (TestCpuFlag(kCpuHasLSX)) {
5353 ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_LSX
5354 : ARGBExtractAlphaRow_Any_LSX;
5355 }
5356 #endif
5357 #if defined(HAS_ARGBEXTRACTALPHAROW_RVV)
5358 if (TestCpuFlag(kCpuHasRVV)) {
5359 ARGBExtractAlphaRow = ARGBExtractAlphaRow_RVV;
5360 }
5361 #endif
5362
5363 for (int y = 0; y < height; ++y) {
5364 ARGBExtractAlphaRow(src_argb, dst_a, width);
5365 src_argb += src_stride_argb;
5366 dst_a += dst_stride_a;
5367 }
5368 return 0;
5369 }
5370
5371 // Copy a planar Y channel to the alpha channel of a destination ARGB image.
5372 LIBYUV_API
ARGBCopyYToAlpha(const uint8_t * src_y,int src_stride_y,uint8_t * dst_argb,int dst_stride_argb,int width,int height)5373 int ARGBCopyYToAlpha(const uint8_t* src_y,
5374 int src_stride_y,
5375 uint8_t* dst_argb,
5376 int dst_stride_argb,
5377 int width,
5378 int height) {
5379 int y;
5380 void (*ARGBCopyYToAlphaRow)(const uint8_t* src_y, uint8_t* dst_argb,
5381 int width) = ARGBCopyYToAlphaRow_C;
5382 if (!src_y || !dst_argb || width <= 0 || height == 0) {
5383 return -1;
5384 }
5385 // Negative height means invert the image.
5386 if (height < 0) {
5387 height = -height;
5388 src_y = src_y + (height - 1) * src_stride_y;
5389 src_stride_y = -src_stride_y;
5390 }
5391 // Coalesce rows.
5392 if (src_stride_y == width && dst_stride_argb == width * 4) {
5393 width *= height;
5394 height = 1;
5395 src_stride_y = dst_stride_argb = 0;
5396 }
5397 #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
5398 if (TestCpuFlag(kCpuHasSSE2)) {
5399 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_SSE2;
5400 if (IS_ALIGNED(width, 8)) {
5401 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
5402 }
5403 }
5404 #endif
5405 #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
5406 if (TestCpuFlag(kCpuHasAVX2)) {
5407 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_AVX2;
5408 if (IS_ALIGNED(width, 16)) {
5409 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
5410 }
5411 }
5412 #endif
5413 #if defined(HAS_ARGBCOPYYTOALPHAROW_RVV)
5414 if (TestCpuFlag(kCpuHasRVV)) {
5415 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_RVV;
5416 }
5417 #endif
5418
5419 for (y = 0; y < height; ++y) {
5420 ARGBCopyYToAlphaRow(src_y, dst_argb, width);
5421 src_y += src_stride_y;
5422 dst_argb += dst_stride_argb;
5423 }
5424 return 0;
5425 }
5426
5427 LIBYUV_API
YUY2ToNV12(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int width,int height)5428 int YUY2ToNV12(const uint8_t* src_yuy2,
5429 int src_stride_yuy2,
5430 uint8_t* dst_y,
5431 int dst_stride_y,
5432 uint8_t* dst_uv,
5433 int dst_stride_uv,
5434 int width,
5435 int height) {
5436 int y;
5437 void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) =
5438 YUY2ToYRow_C;
5439 void (*YUY2ToNVUVRow)(const uint8_t* src_yuy2, int stride_yuy2,
5440 uint8_t* dst_uv, int width) = YUY2ToNVUVRow_C;
5441 if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) {
5442 return -1;
5443 }
5444
5445 // Negative height means invert the image.
5446 if (height < 0) {
5447 height = -height;
5448 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
5449 src_stride_yuy2 = -src_stride_yuy2;
5450 }
5451 #if defined(HAS_YUY2TOYROW_SSE2)
5452 if (TestCpuFlag(kCpuHasSSE2)) {
5453 YUY2ToYRow = YUY2ToYRow_Any_SSE2;
5454 if (IS_ALIGNED(width, 16)) {
5455 YUY2ToYRow = YUY2ToYRow_SSE2;
5456 }
5457 }
5458 #endif
5459 #if defined(HAS_YUY2TOYROW_AVX2)
5460 if (TestCpuFlag(kCpuHasAVX2)) {
5461 YUY2ToYRow = YUY2ToYRow_Any_AVX2;
5462 if (IS_ALIGNED(width, 32)) {
5463 YUY2ToYRow = YUY2ToYRow_AVX2;
5464 }
5465 }
5466 #endif
5467 #if defined(HAS_YUY2TOYROW_NEON)
5468 if (TestCpuFlag(kCpuHasNEON)) {
5469 YUY2ToYRow = YUY2ToYRow_Any_NEON;
5470 if (IS_ALIGNED(width, 16)) {
5471 YUY2ToYRow = YUY2ToYRow_NEON;
5472 }
5473 }
5474 #endif
5475 #if defined(HAS_YUY2TOYROW_MSA) && defined(HAS_YUY2TOUV422ROW_MSA)
5476 if (TestCpuFlag(kCpuHasMSA)) {
5477 YUY2ToYRow = YUY2ToYRow_Any_MSA;
5478 if (IS_ALIGNED(width, 32)) {
5479 YUY2ToYRow = YUY2ToYRow_MSA;
5480 }
5481 }
5482 #endif
5483 #if defined(HAS_YUY2TOYROW_LSX) && defined(HAS_YUY2TOUV422ROW_LSX)
5484 if (TestCpuFlag(kCpuHasLSX)) {
5485 YUY2ToYRow = YUY2ToYRow_Any_LSX;
5486 if (IS_ALIGNED(width, 16)) {
5487 YUY2ToYRow = YUY2ToYRow_LSX;
5488 }
5489 }
5490 #endif
5491 #if defined(HAS_YUY2TOYROW_LASX) && defined(HAS_YUY2TOUV422ROW_LASX)
5492 if (TestCpuFlag(kCpuHasLASX)) {
5493 YUY2ToYRow = YUY2ToYRow_Any_LASX;
5494 if (IS_ALIGNED(width, 32)) {
5495 YUY2ToYRow = YUY2ToYRow_LASX;
5496 }
5497 }
5498 #endif
5499
5500 #if defined(HAS_YUY2TONVUVROW_SSE2)
5501 if (TestCpuFlag(kCpuHasSSE2)) {
5502 YUY2ToNVUVRow = YUY2ToNVUVRow_Any_SSE2;
5503 if (IS_ALIGNED(width, 16)) {
5504 YUY2ToNVUVRow = YUY2ToNVUVRow_SSE2;
5505 }
5506 }
5507 #endif
5508 #if defined(HAS_YUY2TONVUVROW_AVX2)
5509 if (TestCpuFlag(kCpuHasAVX2)) {
5510 YUY2ToNVUVRow = YUY2ToNVUVRow_Any_AVX2;
5511 if (IS_ALIGNED(width, 32)) {
5512 YUY2ToNVUVRow = YUY2ToNVUVRow_AVX2;
5513 }
5514 }
5515 #endif
5516 #if defined(HAS_YUY2TONVUVROW_NEON)
5517 if (TestCpuFlag(kCpuHasNEON)) {
5518 YUY2ToNVUVRow = YUY2ToNVUVRow_Any_NEON;
5519 if (IS_ALIGNED(width, 16)) {
5520 YUY2ToNVUVRow = YUY2ToNVUVRow_NEON;
5521 }
5522 }
5523 #endif
5524
5525 for (y = 0; y < height - 1; y += 2) {
5526 YUY2ToYRow(src_yuy2, dst_y, width);
5527 YUY2ToYRow(src_yuy2 + src_stride_yuy2, dst_y + dst_stride_y, width);
5528 YUY2ToNVUVRow(src_yuy2, src_stride_yuy2, dst_uv, width);
5529 src_yuy2 += src_stride_yuy2 * 2;
5530 dst_y += dst_stride_y * 2;
5531 dst_uv += dst_stride_uv;
5532 }
5533 if (height & 1) {
5534 YUY2ToYRow(src_yuy2, dst_y, width);
5535 YUY2ToNVUVRow(src_yuy2, 0, dst_uv, width);
5536 }
5537 return 0;
5538 }
5539
5540 LIBYUV_API
UYVYToNV12(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int width,int height)5541 int UYVYToNV12(const uint8_t* src_uyvy,
5542 int src_stride_uyvy,
5543 uint8_t* dst_y,
5544 int dst_stride_y,
5545 uint8_t* dst_uv,
5546 int dst_stride_uv,
5547 int width,
5548 int height) {
5549 int y;
5550 int halfwidth = (width + 1) >> 1;
5551 void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v,
5552 int width) = SplitUVRow_C;
5553 void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
5554 ptrdiff_t src_stride, int dst_width,
5555 int source_y_fraction) = InterpolateRow_C;
5556
5557 if (!src_uyvy || !dst_y || !dst_uv || width <= 0 || height == 0) {
5558 return -1;
5559 }
5560
5561 // Negative height means invert the image.
5562 if (height < 0) {
5563 height = -height;
5564 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
5565 src_stride_uyvy = -src_stride_uyvy;
5566 }
5567 #if defined(HAS_SPLITUVROW_SSE2)
5568 if (TestCpuFlag(kCpuHasSSE2)) {
5569 SplitUVRow = SplitUVRow_Any_SSE2;
5570 if (IS_ALIGNED(width, 16)) {
5571 SplitUVRow = SplitUVRow_SSE2;
5572 }
5573 }
5574 #endif
5575 #if defined(HAS_SPLITUVROW_AVX2)
5576 if (TestCpuFlag(kCpuHasAVX2)) {
5577 SplitUVRow = SplitUVRow_Any_AVX2;
5578 if (IS_ALIGNED(width, 32)) {
5579 SplitUVRow = SplitUVRow_AVX2;
5580 }
5581 }
5582 #endif
5583 #if defined(HAS_SPLITUVROW_NEON)
5584 if (TestCpuFlag(kCpuHasNEON)) {
5585 SplitUVRow = SplitUVRow_Any_NEON;
5586 if (IS_ALIGNED(width, 16)) {
5587 SplitUVRow = SplitUVRow_NEON;
5588 }
5589 }
5590 #endif
5591 #if defined(HAS_SPLITUVROW_MSA)
5592 if (TestCpuFlag(kCpuHasMSA)) {
5593 SplitUVRow = SplitUVRow_Any_MSA;
5594 if (IS_ALIGNED(width, 32)) {
5595 SplitUVRow = SplitUVRow_MSA;
5596 }
5597 }
5598 #endif
5599 #if defined(HAS_SPLITUVROW_LSX)
5600 if (TestCpuFlag(kCpuHasLSX)) {
5601 SplitUVRow = SplitUVRow_Any_LSX;
5602 if (IS_ALIGNED(width, 32)) {
5603 SplitUVRow = SplitUVRow_LSX;
5604 }
5605 }
5606 #endif
5607 #if defined(HAS_SPLITUVROW_RVV)
5608 if (TestCpuFlag(kCpuHasRVV)) {
5609 SplitUVRow = SplitUVRow_RVV;
5610 }
5611 #endif
5612
5613 #if defined(HAS_INTERPOLATEROW_SSSE3)
5614 if (TestCpuFlag(kCpuHasSSSE3)) {
5615 InterpolateRow = InterpolateRow_Any_SSSE3;
5616 if (IS_ALIGNED(width, 16)) {
5617 InterpolateRow = InterpolateRow_SSSE3;
5618 }
5619 }
5620 #endif
5621 #if defined(HAS_INTERPOLATEROW_AVX2)
5622 if (TestCpuFlag(kCpuHasAVX2)) {
5623 InterpolateRow = InterpolateRow_Any_AVX2;
5624 if (IS_ALIGNED(width, 32)) {
5625 InterpolateRow = InterpolateRow_AVX2;
5626 }
5627 }
5628 #endif
5629 #if defined(HAS_INTERPOLATEROW_NEON)
5630 if (TestCpuFlag(kCpuHasNEON)) {
5631 InterpolateRow = InterpolateRow_Any_NEON;
5632 if (IS_ALIGNED(width, 16)) {
5633 InterpolateRow = InterpolateRow_NEON;
5634 }
5635 }
5636 #endif
5637 #if defined(HAS_INTERPOLATEROW_MSA)
5638 if (TestCpuFlag(kCpuHasMSA)) {
5639 InterpolateRow = InterpolateRow_Any_MSA;
5640 if (IS_ALIGNED(width, 32)) {
5641 InterpolateRow = InterpolateRow_MSA;
5642 }
5643 }
5644 #endif
5645 #if defined(HAS_INTERPOLATEROW_LSX)
5646 if (TestCpuFlag(kCpuHasLSX)) {
5647 InterpolateRow = InterpolateRow_Any_LSX;
5648 if (IS_ALIGNED(width, 32)) {
5649 InterpolateRow = InterpolateRow_LSX;
5650 }
5651 }
5652 #endif
5653 #if defined(HAS_INTERPOLATEROW_RVV)
5654 if (TestCpuFlag(kCpuHasRVV)) {
5655 InterpolateRow = InterpolateRow_RVV;
5656 }
5657 #endif
5658
5659 {
5660 int awidth = halfwidth * 2;
5661 // row of y and 2 rows of uv
5662 align_buffer_64(rows, awidth * 3);
5663 if (!rows)
5664 return 1;
5665
5666 for (y = 0; y < height - 1; y += 2) {
5667 // Split Y from UV.
5668 SplitUVRow(src_uyvy, rows + awidth, rows, awidth);
5669 memcpy(dst_y, rows, width);
5670 SplitUVRow(src_uyvy + src_stride_uyvy, rows + awidth * 2, rows, awidth);
5671 memcpy(dst_y + dst_stride_y, rows, width);
5672 InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128);
5673 src_uyvy += src_stride_uyvy * 2;
5674 dst_y += dst_stride_y * 2;
5675 dst_uv += dst_stride_uv;
5676 }
5677 if (height & 1) {
5678 // Split Y from UV.
5679 SplitUVRow(src_uyvy, dst_uv, rows, awidth);
5680 memcpy(dst_y, rows, width);
5681 }
5682 free_aligned_buffer_64(rows);
5683 }
5684 return 0;
5685 }
5686
5687 // width and height are src size allowing odd size handling.
5688 LIBYUV_API
HalfMergeUVPlane(const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint8_t * dst_uv,int dst_stride_uv,int width,int height)5689 void HalfMergeUVPlane(const uint8_t* src_u,
5690 int src_stride_u,
5691 const uint8_t* src_v,
5692 int src_stride_v,
5693 uint8_t* dst_uv,
5694 int dst_stride_uv,
5695 int width,
5696 int height) {
5697 int y;
5698 void (*HalfMergeUVRow)(const uint8_t* src_u, int src_stride_u,
5699 const uint8_t* src_v, int src_stride_v,
5700 uint8_t* dst_uv, int width) = HalfMergeUVRow_C;
5701
5702 // Negative height means invert the image.
5703 if (height < 0) {
5704 height = -height;
5705 src_u = src_u + (height - 1) * src_stride_u;
5706 src_v = src_v + (height - 1) * src_stride_v;
5707 src_stride_u = -src_stride_u;
5708 src_stride_v = -src_stride_v;
5709 }
5710 #if defined(HAS_HALFMERGEUVROW_NEON)
5711 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
5712 HalfMergeUVRow = HalfMergeUVRow_NEON;
5713 }
5714 #endif
5715 #if defined(HAS_HALFMERGEUVROW_SSSE3)
5716 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
5717 HalfMergeUVRow = HalfMergeUVRow_SSSE3;
5718 }
5719 #endif
5720 #if defined(HAS_HALFMERGEUVROW_AVX2)
5721 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
5722 HalfMergeUVRow = HalfMergeUVRow_AVX2;
5723 }
5724 #endif
5725
5726 for (y = 0; y < height - 1; y += 2) {
5727 // Merge a row of U and V into a row of UV.
5728 HalfMergeUVRow(src_u, src_stride_u, src_v, src_stride_v, dst_uv, width);
5729 src_u += src_stride_u * 2;
5730 src_v += src_stride_v * 2;
5731 dst_uv += dst_stride_uv;
5732 }
5733 if (height & 1) {
5734 HalfMergeUVRow(src_u, 0, src_v, 0, dst_uv, width);
5735 }
5736 }
5737
5738 #ifdef __cplusplus
5739 } // extern "C"
5740 } // namespace libyuv
5741 #endif
5742