1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/planar_functions.h"
12
13 #include <string.h> // for memset()
14
15 #include "libyuv/cpu_id.h"
16 #ifdef HAVE_JPEG
17 #include "libyuv/mjpeg_decoder.h"
18 #endif
19 #include "libyuv/row.h"
20
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25
26 // Copy a plane of data
27 LIBYUV_API
CopyPlane(const uint8 * src_y,int src_stride_y,uint8 * dst_y,int dst_stride_y,int width,int height)28 void CopyPlane(const uint8* src_y, int src_stride_y,
29 uint8* dst_y, int dst_stride_y,
30 int width, int height) {
31 int y;
32 void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
33 // Coalesce rows.
34 if (src_stride_y == width &&
35 dst_stride_y == width) {
36 width *= height;
37 height = 1;
38 src_stride_y = dst_stride_y = 0;
39 }
40 #if defined(HAS_COPYROW_X86)
41 if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
42 CopyRow = CopyRow_X86;
43 }
44 #endif
45 #if defined(HAS_COPYROW_SSE2)
46 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
47 IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
48 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
49 CopyRow = CopyRow_SSE2;
50 }
51 #endif
52 #if defined(HAS_COPYROW_ERMS)
53 if (TestCpuFlag(kCpuHasERMS)) {
54 CopyRow = CopyRow_ERMS;
55 }
56 #endif
57 #if defined(HAS_COPYROW_NEON)
58 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
59 CopyRow = CopyRow_NEON;
60 }
61 #endif
62 #if defined(HAS_COPYROW_MIPS)
63 if (TestCpuFlag(kCpuHasMIPS)) {
64 CopyRow = CopyRow_MIPS;
65 }
66 #endif
67
68 // Copy plane
69 for (y = 0; y < height; ++y) {
70 CopyRow(src_y, dst_y, width);
71 src_y += src_stride_y;
72 dst_y += dst_stride_y;
73 }
74 }
75
76 LIBYUV_API
CopyPlane_16(const uint16 * src_y,int src_stride_y,uint16 * dst_y,int dst_stride_y,int width,int height)77 void CopyPlane_16(const uint16* src_y, int src_stride_y,
78 uint16* dst_y, int dst_stride_y,
79 int width, int height) {
80 int y;
81 void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C;
82 // Coalesce rows.
83 if (src_stride_y == width &&
84 dst_stride_y == width) {
85 width *= height;
86 height = 1;
87 src_stride_y = dst_stride_y = 0;
88 }
89 #if defined(HAS_COPYROW_16_X86)
90 if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
91 CopyRow = CopyRow_16_X86;
92 }
93 #endif
94 #if defined(HAS_COPYROW_16_SSE2)
95 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
96 IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
97 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
98 CopyRow = CopyRow_16_SSE2;
99 }
100 #endif
101 #if defined(HAS_COPYROW_16_ERMS)
102 if (TestCpuFlag(kCpuHasERMS)) {
103 CopyRow = CopyRow_16_ERMS;
104 }
105 #endif
106 #if defined(HAS_COPYROW_16_NEON)
107 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
108 CopyRow = CopyRow_16_NEON;
109 }
110 #endif
111 #if defined(HAS_COPYROW_16_MIPS)
112 if (TestCpuFlag(kCpuHasMIPS)) {
113 CopyRow = CopyRow_16_MIPS;
114 }
115 #endif
116
117 // Copy plane
118 for (y = 0; y < height; ++y) {
119 CopyRow(src_y, dst_y, width);
120 src_y += src_stride_y;
121 dst_y += dst_stride_y;
122 }
123 }
124
125 // Copy I422.
126 LIBYUV_API
I422Copy(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)127 int I422Copy(const uint8* src_y, int src_stride_y,
128 const uint8* src_u, int src_stride_u,
129 const uint8* src_v, int src_stride_v,
130 uint8* dst_y, int dst_stride_y,
131 uint8* dst_u, int dst_stride_u,
132 uint8* dst_v, int dst_stride_v,
133 int width, int height) {
134 int halfwidth = (width + 1) >> 1;
135 if (!src_y || !src_u || !src_v ||
136 !dst_y || !dst_u || !dst_v ||
137 width <= 0 || height == 0) {
138 return -1;
139 }
140 // Negative height means invert the image.
141 if (height < 0) {
142 height = -height;
143 src_y = src_y + (height - 1) * src_stride_y;
144 src_u = src_u + (height - 1) * src_stride_u;
145 src_v = src_v + (height - 1) * src_stride_v;
146 src_stride_y = -src_stride_y;
147 src_stride_u = -src_stride_u;
148 src_stride_v = -src_stride_v;
149 }
150 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
151 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height);
152 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height);
153 return 0;
154 }
155
156 // Copy I444.
157 LIBYUV_API
I444Copy(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)158 int I444Copy(const uint8* src_y, int src_stride_y,
159 const uint8* src_u, int src_stride_u,
160 const uint8* src_v, int src_stride_v,
161 uint8* dst_y, int dst_stride_y,
162 uint8* dst_u, int dst_stride_u,
163 uint8* dst_v, int dst_stride_v,
164 int width, int height) {
165 if (!src_y || !src_u || !src_v ||
166 !dst_y || !dst_u || !dst_v ||
167 width <= 0 || height == 0) {
168 return -1;
169 }
170 // Negative height means invert the image.
171 if (height < 0) {
172 height = -height;
173 src_y = src_y + (height - 1) * src_stride_y;
174 src_u = src_u + (height - 1) * src_stride_u;
175 src_v = src_v + (height - 1) * src_stride_v;
176 src_stride_y = -src_stride_y;
177 src_stride_u = -src_stride_u;
178 src_stride_v = -src_stride_v;
179 }
180
181 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
182 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height);
183 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height);
184 return 0;
185 }
186
187 // Copy I400.
188 LIBYUV_API
I400ToI400(const uint8 * src_y,int src_stride_y,uint8 * dst_y,int dst_stride_y,int width,int height)189 int I400ToI400(const uint8* src_y, int src_stride_y,
190 uint8* dst_y, int dst_stride_y,
191 int width, int height) {
192 if (!src_y || !dst_y || width <= 0 || height == 0) {
193 return -1;
194 }
195 // Negative height means invert the image.
196 if (height < 0) {
197 height = -height;
198 src_y = src_y + (height - 1) * src_stride_y;
199 src_stride_y = -src_stride_y;
200 }
201 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
202 return 0;
203 }
204
205 // Convert I420 to I400.
206 LIBYUV_API
I420ToI400(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_y,int dst_stride_y,int width,int height)207 int I420ToI400(const uint8* src_y, int src_stride_y,
208 const uint8* src_u, int src_stride_u,
209 const uint8* src_v, int src_stride_v,
210 uint8* dst_y, int dst_stride_y,
211 int width, int height) {
212 if (!src_y || !dst_y || width <= 0 || height == 0) {
213 return -1;
214 }
215 // Negative height means invert the image.
216 if (height < 0) {
217 height = -height;
218 src_y = src_y + (height - 1) * src_stride_y;
219 src_stride_y = -src_stride_y;
220 }
221 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
222 return 0;
223 }
224
225 // Mirror a plane of data.
MirrorPlane(const uint8 * src_y,int src_stride_y,uint8 * dst_y,int dst_stride_y,int width,int height)226 void MirrorPlane(const uint8* src_y, int src_stride_y,
227 uint8* dst_y, int dst_stride_y,
228 int width, int height) {
229 int y;
230 void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
231 // Negative height means invert the image.
232 if (height < 0) {
233 height = -height;
234 src_y = src_y + (height - 1) * src_stride_y;
235 src_stride_y = -src_stride_y;
236 }
237 #if defined(HAS_MIRRORROW_NEON)
238 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
239 MirrorRow = MirrorRow_NEON;
240 }
241 #endif
242 #if defined(HAS_MIRRORROW_SSE2)
243 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) {
244 MirrorRow = MirrorRow_SSE2;
245 }
246 #endif
247 #if defined(HAS_MIRRORROW_SSSE3)
248 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
249 IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
250 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
251 MirrorRow = MirrorRow_SSSE3;
252 }
253 #endif
254 #if defined(HAS_MIRRORROW_AVX2)
255 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
256 MirrorRow = MirrorRow_AVX2;
257 }
258 #endif
259
260 // Mirror plane
261 for (y = 0; y < height; ++y) {
262 MirrorRow(src_y, dst_y, width);
263 src_y += src_stride_y;
264 dst_y += dst_stride_y;
265 }
266 }
267
268 // Convert YUY2 to I422.
269 LIBYUV_API
YUY2ToI422(const uint8 * src_yuy2,int src_stride_yuy2,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)270 int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2,
271 uint8* dst_y, int dst_stride_y,
272 uint8* dst_u, int dst_stride_u,
273 uint8* dst_v, int dst_stride_v,
274 int width, int height) {
275 int y;
276 void (*YUY2ToUV422Row)(const uint8* src_yuy2,
277 uint8* dst_u, uint8* dst_v, int pix) =
278 YUY2ToUV422Row_C;
279 void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
280 YUY2ToYRow_C;
281 // Negative height means invert the image.
282 if (height < 0) {
283 height = -height;
284 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
285 src_stride_yuy2 = -src_stride_yuy2;
286 }
287 // Coalesce rows.
288 if (src_stride_yuy2 == width * 2 &&
289 dst_stride_y == width &&
290 dst_stride_u * 2 == width &&
291 dst_stride_v * 2 == width) {
292 width *= height;
293 height = 1;
294 src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0;
295 }
296 #if defined(HAS_YUY2TOYROW_SSE2)
297 if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
298 YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
299 YUY2ToYRow = YUY2ToYRow_Any_SSE2;
300 if (IS_ALIGNED(width, 16)) {
301 YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
302 YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
303 if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
304 YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
305 if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
306 YUY2ToYRow = YUY2ToYRow_SSE2;
307 }
308 }
309 }
310 }
311 #endif
312 #if defined(HAS_YUY2TOYROW_AVX2)
313 if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
314 YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
315 YUY2ToYRow = YUY2ToYRow_Any_AVX2;
316 if (IS_ALIGNED(width, 32)) {
317 YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
318 YUY2ToYRow = YUY2ToYRow_AVX2;
319 }
320 }
321 #endif
322 #if defined(HAS_YUY2TOYROW_NEON)
323 if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
324 YUY2ToYRow = YUY2ToYRow_Any_NEON;
325 if (width >= 16) {
326 YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
327 }
328 if (IS_ALIGNED(width, 16)) {
329 YUY2ToYRow = YUY2ToYRow_NEON;
330 YUY2ToUV422Row = YUY2ToUV422Row_NEON;
331 }
332 }
333 #endif
334
335 for (y = 0; y < height; ++y) {
336 YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
337 YUY2ToYRow(src_yuy2, dst_y, width);
338 src_yuy2 += src_stride_yuy2;
339 dst_y += dst_stride_y;
340 dst_u += dst_stride_u;
341 dst_v += dst_stride_v;
342 }
343 return 0;
344 }
345
346 // Convert UYVY to I422.
347 LIBYUV_API
UYVYToI422(const uint8 * src_uyvy,int src_stride_uyvy,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)348 int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy,
349 uint8* dst_y, int dst_stride_y,
350 uint8* dst_u, int dst_stride_u,
351 uint8* dst_v, int dst_stride_v,
352 int width, int height) {
353 int y;
354 void (*UYVYToUV422Row)(const uint8* src_uyvy,
355 uint8* dst_u, uint8* dst_v, int pix) =
356 UYVYToUV422Row_C;
357 void (*UYVYToYRow)(const uint8* src_uyvy,
358 uint8* dst_y, int pix) = UYVYToYRow_C;
359 // Negative height means invert the image.
360 if (height < 0) {
361 height = -height;
362 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
363 src_stride_uyvy = -src_stride_uyvy;
364 }
365 // Coalesce rows.
366 if (src_stride_uyvy == width * 2 &&
367 dst_stride_y == width &&
368 dst_stride_u * 2 == width &&
369 dst_stride_v * 2 == width) {
370 width *= height;
371 height = 1;
372 src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0;
373 }
374 #if defined(HAS_UYVYTOYROW_SSE2)
375 if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
376 UYVYToUV422Row = UYVYToUV422Row_Any_SSE2;
377 UYVYToYRow = UYVYToYRow_Any_SSE2;
378 if (IS_ALIGNED(width, 16)) {
379 UYVYToUV422Row = UYVYToUV422Row_Unaligned_SSE2;
380 UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
381 if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
382 UYVYToUV422Row = UYVYToUV422Row_SSE2;
383 if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
384 UYVYToYRow = UYVYToYRow_SSE2;
385 }
386 }
387 }
388 }
389 #endif
390 #if defined(HAS_UYVYTOYROW_AVX2)
391 if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
392 UYVYToUV422Row = UYVYToUV422Row_Any_AVX2;
393 UYVYToYRow = UYVYToYRow_Any_AVX2;
394 if (IS_ALIGNED(width, 32)) {
395 UYVYToUV422Row = UYVYToUV422Row_AVX2;
396 UYVYToYRow = UYVYToYRow_AVX2;
397 }
398 }
399 #endif
400 #if defined(HAS_UYVYTOYROW_NEON)
401 if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
402 UYVYToYRow = UYVYToYRow_Any_NEON;
403 if (width >= 16) {
404 UYVYToUV422Row = UYVYToUV422Row_Any_NEON;
405 }
406 if (IS_ALIGNED(width, 16)) {
407 UYVYToYRow = UYVYToYRow_NEON;
408 UYVYToUV422Row = UYVYToUV422Row_NEON;
409 }
410 }
411 #endif
412
413 for (y = 0; y < height; ++y) {
414 UYVYToUV422Row(src_uyvy, dst_u, dst_v, width);
415 UYVYToYRow(src_uyvy, dst_y, width);
416 src_uyvy += src_stride_uyvy;
417 dst_y += dst_stride_y;
418 dst_u += dst_stride_u;
419 dst_v += dst_stride_v;
420 }
421 return 0;
422 }
423
424 // Mirror I400 with optional flipping
425 LIBYUV_API
I400Mirror(const uint8 * src_y,int src_stride_y,uint8 * dst_y,int dst_stride_y,int width,int height)426 int I400Mirror(const uint8* src_y, int src_stride_y,
427 uint8* dst_y, int dst_stride_y,
428 int width, int height) {
429 if (!src_y || !dst_y ||
430 width <= 0 || height == 0) {
431 return -1;
432 }
433 // Negative height means invert the image.
434 if (height < 0) {
435 height = -height;
436 src_y = src_y + (height - 1) * src_stride_y;
437 src_stride_y = -src_stride_y;
438 }
439
440 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
441 return 0;
442 }
443
444 // Mirror I420 with optional flipping
445 LIBYUV_API
I420Mirror(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int width,int height)446 int I420Mirror(const uint8* src_y, int src_stride_y,
447 const uint8* src_u, int src_stride_u,
448 const uint8* src_v, int src_stride_v,
449 uint8* dst_y, int dst_stride_y,
450 uint8* dst_u, int dst_stride_u,
451 uint8* dst_v, int dst_stride_v,
452 int width, int height) {
453 int halfwidth = (width + 1) >> 1;
454 int halfheight = (height + 1) >> 1;
455 if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v ||
456 width <= 0 || height == 0) {
457 return -1;
458 }
459 // Negative height means invert the image.
460 if (height < 0) {
461 height = -height;
462 halfheight = (height + 1) >> 1;
463 src_y = src_y + (height - 1) * src_stride_y;
464 src_u = src_u + (halfheight - 1) * src_stride_u;
465 src_v = src_v + (halfheight - 1) * src_stride_v;
466 src_stride_y = -src_stride_y;
467 src_stride_u = -src_stride_u;
468 src_stride_v = -src_stride_v;
469 }
470
471 if (dst_y) {
472 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
473 }
474 MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
475 MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
476 return 0;
477 }
478
479 // ARGB mirror.
480 LIBYUV_API
ARGBMirror(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height)481 int ARGBMirror(const uint8* src_argb, int src_stride_argb,
482 uint8* dst_argb, int dst_stride_argb,
483 int width, int height) {
484 int y;
485 void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
486 ARGBMirrorRow_C;
487 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
488 return -1;
489 }
490 // Negative height means invert the image.
491 if (height < 0) {
492 height = -height;
493 src_argb = src_argb + (height - 1) * src_stride_argb;
494 src_stride_argb = -src_stride_argb;
495 }
496
497 #if defined(HAS_ARGBMIRRORROW_SSSE3)
498 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
499 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
500 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
501 ARGBMirrorRow = ARGBMirrorRow_SSSE3;
502 }
503 #endif
504 #if defined(HAS_ARGBMIRRORROW_AVX2)
505 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
506 ARGBMirrorRow = ARGBMirrorRow_AVX2;
507 }
508 #endif
509 #if defined(HAS_ARGBMIRRORROW_NEON)
510 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
511 ARGBMirrorRow = ARGBMirrorRow_NEON;
512 }
513 #endif
514
515 // Mirror plane
516 for (y = 0; y < height; ++y) {
517 ARGBMirrorRow(src_argb, dst_argb, width);
518 src_argb += src_stride_argb;
519 dst_argb += dst_stride_argb;
520 }
521 return 0;
522 }
523
524 // Get a blender that optimized for the CPU, alignment and pixel count.
525 // As there are 6 blenders to choose from, the caller should try to use
526 // the same blend function for all pixels if possible.
527 LIBYUV_API
GetARGBBlend()528 ARGBBlendRow GetARGBBlend() {
529 void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
530 uint8* dst_argb, int width) = ARGBBlendRow_C;
531 #if defined(HAS_ARGBBLENDROW_SSSE3)
532 if (TestCpuFlag(kCpuHasSSSE3)) {
533 ARGBBlendRow = ARGBBlendRow_SSSE3;
534 return ARGBBlendRow;
535 }
536 #endif
537 #if defined(HAS_ARGBBLENDROW_SSE2)
538 if (TestCpuFlag(kCpuHasSSE2)) {
539 ARGBBlendRow = ARGBBlendRow_SSE2;
540 }
541 #endif
542 #if defined(HAS_ARGBBLENDROW_NEON)
543 if (TestCpuFlag(kCpuHasNEON)) {
544 ARGBBlendRow = ARGBBlendRow_NEON;
545 }
546 #endif
547 return ARGBBlendRow;
548 }
549
550 // Alpha Blend 2 ARGB images and store to destination.
551 LIBYUV_API
ARGBBlend(const uint8 * src_argb0,int src_stride_argb0,const uint8 * src_argb1,int src_stride_argb1,uint8 * dst_argb,int dst_stride_argb,int width,int height)552 int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
553 const uint8* src_argb1, int src_stride_argb1,
554 uint8* dst_argb, int dst_stride_argb,
555 int width, int height) {
556 int y;
557 void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1,
558 uint8* dst_argb, int width) = GetARGBBlend();
559 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
560 return -1;
561 }
562 // Negative height means invert the image.
563 if (height < 0) {
564 height = -height;
565 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
566 dst_stride_argb = -dst_stride_argb;
567 }
568 // Coalesce rows.
569 if (src_stride_argb0 == width * 4 &&
570 src_stride_argb1 == width * 4 &&
571 dst_stride_argb == width * 4) {
572 width *= height;
573 height = 1;
574 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
575 }
576
577 for (y = 0; y < height; ++y) {
578 ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
579 src_argb0 += src_stride_argb0;
580 src_argb1 += src_stride_argb1;
581 dst_argb += dst_stride_argb;
582 }
583 return 0;
584 }
585
586 // Multiply 2 ARGB images and store to destination.
587 LIBYUV_API
ARGBMultiply(const uint8 * src_argb0,int src_stride_argb0,const uint8 * src_argb1,int src_stride_argb1,uint8 * dst_argb,int dst_stride_argb,int width,int height)588 int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
589 const uint8* src_argb1, int src_stride_argb1,
590 uint8* dst_argb, int dst_stride_argb,
591 int width, int height) {
592 int y;
593 void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst,
594 int width) = ARGBMultiplyRow_C;
595 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
596 return -1;
597 }
598 // Negative height means invert the image.
599 if (height < 0) {
600 height = -height;
601 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
602 dst_stride_argb = -dst_stride_argb;
603 }
604 // Coalesce rows.
605 if (src_stride_argb0 == width * 4 &&
606 src_stride_argb1 == width * 4 &&
607 dst_stride_argb == width * 4) {
608 width *= height;
609 height = 1;
610 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
611 }
612 #if defined(HAS_ARGBMULTIPLYROW_SSE2)
613 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
614 ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2;
615 if (IS_ALIGNED(width, 4)) {
616 ARGBMultiplyRow = ARGBMultiplyRow_SSE2;
617 }
618 }
619 #endif
620 #if defined(HAS_ARGBMULTIPLYROW_AVX2)
621 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
622 ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2;
623 if (IS_ALIGNED(width, 8)) {
624 ARGBMultiplyRow = ARGBMultiplyRow_AVX2;
625 }
626 }
627 #endif
628 #if defined(HAS_ARGBMULTIPLYROW_NEON)
629 if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
630 ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON;
631 if (IS_ALIGNED(width, 8)) {
632 ARGBMultiplyRow = ARGBMultiplyRow_NEON;
633 }
634 }
635 #endif
636
637 // Multiply plane
638 for (y = 0; y < height; ++y) {
639 ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width);
640 src_argb0 += src_stride_argb0;
641 src_argb1 += src_stride_argb1;
642 dst_argb += dst_stride_argb;
643 }
644 return 0;
645 }
646
647 // Add 2 ARGB images and store to destination.
648 LIBYUV_API
ARGBAdd(const uint8 * src_argb0,int src_stride_argb0,const uint8 * src_argb1,int src_stride_argb1,uint8 * dst_argb,int dst_stride_argb,int width,int height)649 int ARGBAdd(const uint8* src_argb0, int src_stride_argb0,
650 const uint8* src_argb1, int src_stride_argb1,
651 uint8* dst_argb, int dst_stride_argb,
652 int width, int height) {
653 int y;
654 void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst,
655 int width) = ARGBAddRow_C;
656 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
657 return -1;
658 }
659 // Negative height means invert the image.
660 if (height < 0) {
661 height = -height;
662 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
663 dst_stride_argb = -dst_stride_argb;
664 }
665 // Coalesce rows.
666 if (src_stride_argb0 == width * 4 &&
667 src_stride_argb1 == width * 4 &&
668 dst_stride_argb == width * 4) {
669 width *= height;
670 height = 1;
671 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
672 }
673 #if defined(HAS_ARGBADDROW_SSE2) && defined(_MSC_VER)
674 if (TestCpuFlag(kCpuHasSSE2)) {
675 ARGBAddRow = ARGBAddRow_SSE2;
676 }
677 #endif
678 #if defined(HAS_ARGBADDROW_SSE2) && !defined(_MSC_VER)
679 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
680 ARGBAddRow = ARGBAddRow_Any_SSE2;
681 if (IS_ALIGNED(width, 4)) {
682 ARGBAddRow = ARGBAddRow_SSE2;
683 }
684 }
685 #endif
686 #if defined(HAS_ARGBADDROW_AVX2)
687 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
688 ARGBAddRow = ARGBAddRow_Any_AVX2;
689 if (IS_ALIGNED(width, 8)) {
690 ARGBAddRow = ARGBAddRow_AVX2;
691 }
692 }
693 #endif
694 #if defined(HAS_ARGBADDROW_NEON)
695 if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
696 ARGBAddRow = ARGBAddRow_Any_NEON;
697 if (IS_ALIGNED(width, 8)) {
698 ARGBAddRow = ARGBAddRow_NEON;
699 }
700 }
701 #endif
702
703 // Add plane
704 for (y = 0; y < height; ++y) {
705 ARGBAddRow(src_argb0, src_argb1, dst_argb, width);
706 src_argb0 += src_stride_argb0;
707 src_argb1 += src_stride_argb1;
708 dst_argb += dst_stride_argb;
709 }
710 return 0;
711 }
712
713 // Subtract 2 ARGB images and store to destination.
714 LIBYUV_API
ARGBSubtract(const uint8 * src_argb0,int src_stride_argb0,const uint8 * src_argb1,int src_stride_argb1,uint8 * dst_argb,int dst_stride_argb,int width,int height)715 int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0,
716 const uint8* src_argb1, int src_stride_argb1,
717 uint8* dst_argb, int dst_stride_argb,
718 int width, int height) {
719 int y;
720 void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst,
721 int width) = ARGBSubtractRow_C;
722 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
723 return -1;
724 }
725 // Negative height means invert the image.
726 if (height < 0) {
727 height = -height;
728 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
729 dst_stride_argb = -dst_stride_argb;
730 }
731 // Coalesce rows.
732 if (src_stride_argb0 == width * 4 &&
733 src_stride_argb1 == width * 4 &&
734 dst_stride_argb == width * 4) {
735 width *= height;
736 height = 1;
737 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
738 }
739 #if defined(HAS_ARGBSUBTRACTROW_SSE2)
740 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
741 ARGBSubtractRow = ARGBSubtractRow_Any_SSE2;
742 if (IS_ALIGNED(width, 4)) {
743 ARGBSubtractRow = ARGBSubtractRow_SSE2;
744 }
745 }
746 #endif
747 #if defined(HAS_ARGBSUBTRACTROW_AVX2)
748 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
749 ARGBSubtractRow = ARGBSubtractRow_Any_AVX2;
750 if (IS_ALIGNED(width, 8)) {
751 ARGBSubtractRow = ARGBSubtractRow_AVX2;
752 }
753 }
754 #endif
755 #if defined(HAS_ARGBSUBTRACTROW_NEON)
756 if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
757 ARGBSubtractRow = ARGBSubtractRow_Any_NEON;
758 if (IS_ALIGNED(width, 8)) {
759 ARGBSubtractRow = ARGBSubtractRow_NEON;
760 }
761 }
762 #endif
763
764 // Subtract plane
765 for (y = 0; y < height; ++y) {
766 ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width);
767 src_argb0 += src_stride_argb0;
768 src_argb1 += src_stride_argb1;
769 dst_argb += dst_stride_argb;
770 }
771 return 0;
772 }
773
774 // Convert I422 to BGRA.
775 LIBYUV_API
I422ToBGRA(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_bgra,int dst_stride_bgra,int width,int height)776 int I422ToBGRA(const uint8* src_y, int src_stride_y,
777 const uint8* src_u, int src_stride_u,
778 const uint8* src_v, int src_stride_v,
779 uint8* dst_bgra, int dst_stride_bgra,
780 int width, int height) {
781 int y;
782 void (*I422ToBGRARow)(const uint8* y_buf,
783 const uint8* u_buf,
784 const uint8* v_buf,
785 uint8* rgb_buf,
786 int width) = I422ToBGRARow_C;
787 if (!src_y || !src_u || !src_v ||
788 !dst_bgra ||
789 width <= 0 || height == 0) {
790 return -1;
791 }
792 // Negative height means invert the image.
793 if (height < 0) {
794 height = -height;
795 dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
796 dst_stride_bgra = -dst_stride_bgra;
797 }
798 // Coalesce rows.
799 if (src_stride_y == width &&
800 src_stride_u * 2 == width &&
801 src_stride_v * 2 == width &&
802 dst_stride_bgra == width * 4) {
803 width *= height;
804 height = 1;
805 src_stride_y = src_stride_u = src_stride_v = dst_stride_bgra = 0;
806 }
807 #if defined(HAS_I422TOBGRAROW_NEON)
808 if (TestCpuFlag(kCpuHasNEON)) {
809 I422ToBGRARow = I422ToBGRARow_Any_NEON;
810 if (IS_ALIGNED(width, 16)) {
811 I422ToBGRARow = I422ToBGRARow_NEON;
812 }
813 }
814 #elif defined(HAS_I422TOBGRAROW_SSSE3)
815 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
816 I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
817 if (IS_ALIGNED(width, 8)) {
818 I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3;
819 if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) {
820 I422ToBGRARow = I422ToBGRARow_SSSE3;
821 }
822 }
823 }
824 #elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
825 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
826 IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
827 IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
828 IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
829 IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
830 I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
831 }
832 #endif
833
834 for (y = 0; y < height; ++y) {
835 I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
836 dst_bgra += dst_stride_bgra;
837 src_y += src_stride_y;
838 src_u += src_stride_u;
839 src_v += src_stride_v;
840 }
841 return 0;
842 }
843
844 // Convert I422 to ABGR.
845 LIBYUV_API
I422ToABGR(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_abgr,int dst_stride_abgr,int width,int height)846 int I422ToABGR(const uint8* src_y, int src_stride_y,
847 const uint8* src_u, int src_stride_u,
848 const uint8* src_v, int src_stride_v,
849 uint8* dst_abgr, int dst_stride_abgr,
850 int width, int height) {
851 int y;
852 void (*I422ToABGRRow)(const uint8* y_buf,
853 const uint8* u_buf,
854 const uint8* v_buf,
855 uint8* rgb_buf,
856 int width) = I422ToABGRRow_C;
857 if (!src_y || !src_u || !src_v ||
858 !dst_abgr ||
859 width <= 0 || height == 0) {
860 return -1;
861 }
862 // Negative height means invert the image.
863 if (height < 0) {
864 height = -height;
865 dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
866 dst_stride_abgr = -dst_stride_abgr;
867 }
868 // Coalesce rows.
869 if (src_stride_y == width &&
870 src_stride_u * 2 == width &&
871 src_stride_v * 2 == width &&
872 dst_stride_abgr == width * 4) {
873 width *= height;
874 height = 1;
875 src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0;
876 }
877 #if defined(HAS_I422TOABGRROW_NEON)
878 if (TestCpuFlag(kCpuHasNEON)) {
879 I422ToABGRRow = I422ToABGRRow_Any_NEON;
880 if (IS_ALIGNED(width, 16)) {
881 I422ToABGRRow = I422ToABGRRow_NEON;
882 }
883 }
884 #elif defined(HAS_I422TOABGRROW_SSSE3)
885 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
886 I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
887 if (IS_ALIGNED(width, 8)) {
888 I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3;
889 if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) {
890 I422ToABGRRow = I422ToABGRRow_SSSE3;
891 }
892 }
893 }
894 #endif
895
896 for (y = 0; y < height; ++y) {
897 I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
898 dst_abgr += dst_stride_abgr;
899 src_y += src_stride_y;
900 src_u += src_stride_u;
901 src_v += src_stride_v;
902 }
903 return 0;
904 }
905
906 // Convert I422 to RGBA.
907 LIBYUV_API
I422ToRGBA(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint8 * dst_rgba,int dst_stride_rgba,int width,int height)908 int I422ToRGBA(const uint8* src_y, int src_stride_y,
909 const uint8* src_u, int src_stride_u,
910 const uint8* src_v, int src_stride_v,
911 uint8* dst_rgba, int dst_stride_rgba,
912 int width, int height) {
913 int y;
914 void (*I422ToRGBARow)(const uint8* y_buf,
915 const uint8* u_buf,
916 const uint8* v_buf,
917 uint8* rgb_buf,
918 int width) = I422ToRGBARow_C;
919 if (!src_y || !src_u || !src_v ||
920 !dst_rgba ||
921 width <= 0 || height == 0) {
922 return -1;
923 }
924 // Negative height means invert the image.
925 if (height < 0) {
926 height = -height;
927 dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba;
928 dst_stride_rgba = -dst_stride_rgba;
929 }
930 // Coalesce rows.
931 if (src_stride_y == width &&
932 src_stride_u * 2 == width &&
933 src_stride_v * 2 == width &&
934 dst_stride_rgba == width * 4) {
935 width *= height;
936 height = 1;
937 src_stride_y = src_stride_u = src_stride_v = dst_stride_rgba = 0;
938 }
939 #if defined(HAS_I422TORGBAROW_NEON)
940 if (TestCpuFlag(kCpuHasNEON)) {
941 I422ToRGBARow = I422ToRGBARow_Any_NEON;
942 if (IS_ALIGNED(width, 16)) {
943 I422ToRGBARow = I422ToRGBARow_NEON;
944 }
945 }
946 #elif defined(HAS_I422TORGBAROW_SSSE3)
947 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
948 I422ToRGBARow = I422ToRGBARow_Any_SSSE3;
949 if (IS_ALIGNED(width, 8)) {
950 I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3;
951 if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) {
952 I422ToRGBARow = I422ToRGBARow_SSSE3;
953 }
954 }
955 }
956 #endif
957
958 for (y = 0; y < height; ++y) {
959 I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
960 dst_rgba += dst_stride_rgba;
961 src_y += src_stride_y;
962 src_u += src_stride_u;
963 src_v += src_stride_v;
964 }
965 return 0;
966 }
967
968 // Convert NV12 to RGB565.
969 LIBYUV_API
NV12ToRGB565(const uint8 * src_y,int src_stride_y,const uint8 * src_uv,int src_stride_uv,uint8 * dst_rgb565,int dst_stride_rgb565,int width,int height)970 int NV12ToRGB565(const uint8* src_y, int src_stride_y,
971 const uint8* src_uv, int src_stride_uv,
972 uint8* dst_rgb565, int dst_stride_rgb565,
973 int width, int height) {
974 int y;
975 void (*NV12ToRGB565Row)(const uint8* y_buf,
976 const uint8* uv_buf,
977 uint8* rgb_buf,
978 int width) = NV12ToRGB565Row_C;
979 if (!src_y || !src_uv || !dst_rgb565 ||
980 width <= 0 || height == 0) {
981 return -1;
982 }
983 // Negative height means invert the image.
984 if (height < 0) {
985 height = -height;
986 dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
987 dst_stride_rgb565 = -dst_stride_rgb565;
988 }
989 #if defined(HAS_NV12TORGB565ROW_SSSE3)
990 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
991 NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3;
992 if (IS_ALIGNED(width, 8)) {
993 NV12ToRGB565Row = NV12ToRGB565Row_SSSE3;
994 }
995 }
996 #elif defined(HAS_NV12TORGB565ROW_NEON)
997 if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
998 NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON;
999 if (IS_ALIGNED(width, 8)) {
1000 NV12ToRGB565Row = NV12ToRGB565Row_NEON;
1001 }
1002 }
1003 #endif
1004
1005 for (y = 0; y < height; ++y) {
1006 NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width);
1007 dst_rgb565 += dst_stride_rgb565;
1008 src_y += src_stride_y;
1009 if (y & 1) {
1010 src_uv += src_stride_uv;
1011 }
1012 }
1013 return 0;
1014 }
1015
1016 // Convert NV21 to RGB565.
1017 LIBYUV_API
NV21ToRGB565(const uint8 * src_y,int src_stride_y,const uint8 * src_vu,int src_stride_vu,uint8 * dst_rgb565,int dst_stride_rgb565,int width,int height)1018 int NV21ToRGB565(const uint8* src_y, int src_stride_y,
1019 const uint8* src_vu, int src_stride_vu,
1020 uint8* dst_rgb565, int dst_stride_rgb565,
1021 int width, int height) {
1022 int y;
1023 void (*NV21ToRGB565Row)(const uint8* y_buf,
1024 const uint8* src_vu,
1025 uint8* rgb_buf,
1026 int width) = NV21ToRGB565Row_C;
1027 if (!src_y || !src_vu || !dst_rgb565 ||
1028 width <= 0 || height == 0) {
1029 return -1;
1030 }
1031 // Negative height means invert the image.
1032 if (height < 0) {
1033 height = -height;
1034 dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565;
1035 dst_stride_rgb565 = -dst_stride_rgb565;
1036 }
1037 #if defined(HAS_NV21TORGB565ROW_SSSE3)
1038 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
1039 NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3;
1040 if (IS_ALIGNED(width, 8)) {
1041 NV21ToRGB565Row = NV21ToRGB565Row_SSSE3;
1042 }
1043 }
1044 #elif defined(HAS_NV21TORGB565ROW_NEON)
1045 if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
1046 NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON;
1047 if (IS_ALIGNED(width, 8)) {
1048 NV21ToRGB565Row = NV21ToRGB565Row_NEON;
1049 }
1050 }
1051 #endif
1052
1053 for (y = 0; y < height; ++y) {
1054 NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width);
1055 dst_rgb565 += dst_stride_rgb565;
1056 src_y += src_stride_y;
1057 if (y & 1) {
1058 src_vu += src_stride_vu;
1059 }
1060 }
1061 return 0;
1062 }
1063
1064 LIBYUV_API
SetPlane(uint8 * dst_y,int dst_stride_y,int width,int height,uint32 value)1065 void SetPlane(uint8* dst_y, int dst_stride_y,
1066 int width, int height,
1067 uint32 value) {
1068 int y;
1069 uint32 v32 = value | (value << 8) | (value << 16) | (value << 24);
1070 void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C;
1071 // Coalesce rows.
1072 if (dst_stride_y == width) {
1073 width *= height;
1074 height = 1;
1075 dst_stride_y = 0;
1076 }
1077 #if defined(HAS_SETROW_NEON)
1078 if (TestCpuFlag(kCpuHasNEON) &&
1079 IS_ALIGNED(width, 16) &&
1080 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
1081 SetRow = SetRow_NEON;
1082 }
1083 #endif
1084 #if defined(HAS_SETROW_X86)
1085 if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
1086 SetRow = SetRow_X86;
1087 }
1088 #endif
1089
1090 // Set plane
1091 for (y = 0; y < height; ++y) {
1092 SetRow(dst_y, v32, width);
1093 dst_y += dst_stride_y;
1094 }
1095 }
1096
1097 // Draw a rectangle into I420
1098 LIBYUV_API
I420Rect(uint8 * dst_y,int dst_stride_y,uint8 * dst_u,int dst_stride_u,uint8 * dst_v,int dst_stride_v,int x,int y,int width,int height,int value_y,int value_u,int value_v)1099 int I420Rect(uint8* dst_y, int dst_stride_y,
1100 uint8* dst_u, int dst_stride_u,
1101 uint8* dst_v, int dst_stride_v,
1102 int x, int y,
1103 int width, int height,
1104 int value_y, int value_u, int value_v) {
1105 int halfwidth = (width + 1) >> 1;
1106 int halfheight = (height + 1) >> 1;
1107 uint8* start_y = dst_y + y * dst_stride_y + x;
1108 uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2);
1109 uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2);
1110 if (!dst_y || !dst_u || !dst_v ||
1111 width <= 0 || height <= 0 ||
1112 x < 0 || y < 0 ||
1113 value_y < 0 || value_y > 255 ||
1114 value_u < 0 || value_u > 255 ||
1115 value_v < 0 || value_v > 255) {
1116 return -1;
1117 }
1118
1119 SetPlane(start_y, dst_stride_y, width, height, value_y);
1120 SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u);
1121 SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v);
1122 return 0;
1123 }
1124
1125 // Draw a rectangle into ARGB
1126 LIBYUV_API
ARGBRect(uint8 * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height,uint32 value)1127 int ARGBRect(uint8* dst_argb, int dst_stride_argb,
1128 int dst_x, int dst_y,
1129 int width, int height,
1130 uint32 value) {
1131 if (!dst_argb ||
1132 width <= 0 || height <= 0 ||
1133 dst_x < 0 || dst_y < 0) {
1134 return -1;
1135 }
1136 dst_argb += dst_y * dst_stride_argb + dst_x * 4;
1137 // Coalesce rows.
1138 if (dst_stride_argb == width * 4) {
1139 width *= height;
1140 height = 1;
1141 dst_stride_argb = 0;
1142 }
1143 #if defined(HAS_SETROW_NEON)
1144 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) &&
1145 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1146 ARGBSetRows_NEON(dst_argb, value, width, dst_stride_argb, height);
1147 return 0;
1148 }
1149 #endif
1150 #if defined(HAS_SETROW_X86)
1151 if (TestCpuFlag(kCpuHasX86)) {
1152 ARGBSetRows_X86(dst_argb, value, width, dst_stride_argb, height);
1153 return 0;
1154 }
1155 #endif
1156 ARGBSetRows_C(dst_argb, value, width, dst_stride_argb, height);
1157 return 0;
1158 }
1159
1160 // Convert unattentuated ARGB to preattenuated ARGB.
1161 // An unattenutated ARGB alpha blend uses the formula
1162 // p = a * f + (1 - a) * b
1163 // where
1164 // p is output pixel
1165 // f is foreground pixel
1166 // b is background pixel
1167 // a is alpha value from foreground pixel
1168 // An preattenutated ARGB alpha blend uses the formula
1169 // p = f + (1 - a) * b
1170 // where
1171 // f is foreground pixel premultiplied by alpha
1172
1173 LIBYUV_API
ARGBAttenuate(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height)1174 int ARGBAttenuate(const uint8* src_argb, int src_stride_argb,
1175 uint8* dst_argb, int dst_stride_argb,
1176 int width, int height) {
1177 int y;
1178 void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
1179 int width) = ARGBAttenuateRow_C;
1180 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1181 return -1;
1182 }
1183 if (height < 0) {
1184 height = -height;
1185 src_argb = src_argb + (height - 1) * src_stride_argb;
1186 src_stride_argb = -src_stride_argb;
1187 }
1188 // Coalesce rows.
1189 if (src_stride_argb == width * 4 &&
1190 dst_stride_argb == width * 4) {
1191 width *= height;
1192 height = 1;
1193 src_stride_argb = dst_stride_argb = 0;
1194 }
1195 #if defined(HAS_ARGBATTENUATEROW_SSE2)
1196 if (TestCpuFlag(kCpuHasSSE2) && width >= 4 &&
1197 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
1198 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1199 ARGBAttenuateRow = ARGBAttenuateRow_Any_SSE2;
1200 if (IS_ALIGNED(width, 4)) {
1201 ARGBAttenuateRow = ARGBAttenuateRow_SSE2;
1202 }
1203 }
1204 #endif
1205 #if defined(HAS_ARGBATTENUATEROW_SSSE3)
1206 if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
1207 ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
1208 if (IS_ALIGNED(width, 4)) {
1209 ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
1210 }
1211 }
1212 #endif
1213 #if defined(HAS_ARGBATTENUATEROW_AVX2)
1214 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
1215 ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
1216 if (IS_ALIGNED(width, 8)) {
1217 ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
1218 }
1219 }
1220 #endif
1221 #if defined(HAS_ARGBATTENUATEROW_NEON)
1222 if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
1223 ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
1224 if (IS_ALIGNED(width, 8)) {
1225 ARGBAttenuateRow = ARGBAttenuateRow_NEON;
1226 }
1227 }
1228 #endif
1229
1230 for (y = 0; y < height; ++y) {
1231 ARGBAttenuateRow(src_argb, dst_argb, width);
1232 src_argb += src_stride_argb;
1233 dst_argb += dst_stride_argb;
1234 }
1235 return 0;
1236 }
1237
1238 // Convert preattentuated ARGB to unattenuated ARGB.
1239 LIBYUV_API
ARGBUnattenuate(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height)1240 int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
1241 uint8* dst_argb, int dst_stride_argb,
1242 int width, int height) {
1243 int y;
1244 void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb,
1245 int width) = ARGBUnattenuateRow_C;
1246 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1247 return -1;
1248 }
1249 if (height < 0) {
1250 height = -height;
1251 src_argb = src_argb + (height - 1) * src_stride_argb;
1252 src_stride_argb = -src_stride_argb;
1253 }
1254 // Coalesce rows.
1255 if (src_stride_argb == width * 4 &&
1256 dst_stride_argb == width * 4) {
1257 width *= height;
1258 height = 1;
1259 src_stride_argb = dst_stride_argb = 0;
1260 }
1261 #if defined(HAS_ARGBUNATTENUATEROW_SSE2)
1262 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
1263 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2;
1264 if (IS_ALIGNED(width, 4)) {
1265 ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2;
1266 }
1267 }
1268 #endif
1269 #if defined(HAS_ARGBUNATTENUATEROW_AVX2)
1270 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
1271 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2;
1272 if (IS_ALIGNED(width, 8)) {
1273 ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2;
1274 }
1275 }
1276 #endif
1277 // TODO(fbarchard): Neon version.
1278
1279 for (y = 0; y < height; ++y) {
1280 ARGBUnattenuateRow(src_argb, dst_argb, width);
1281 src_argb += src_stride_argb;
1282 dst_argb += dst_stride_argb;
1283 }
1284 return 0;
1285 }
1286
1287 // Convert ARGB to Grayed ARGB.
1288 LIBYUV_API
ARGBGrayTo(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height)1289 int ARGBGrayTo(const uint8* src_argb, int src_stride_argb,
1290 uint8* dst_argb, int dst_stride_argb,
1291 int width, int height) {
1292 int y;
1293 void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
1294 int width) = ARGBGrayRow_C;
1295 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1296 return -1;
1297 }
1298 if (height < 0) {
1299 height = -height;
1300 src_argb = src_argb + (height - 1) * src_stride_argb;
1301 src_stride_argb = -src_stride_argb;
1302 }
1303 // Coalesce rows.
1304 if (src_stride_argb == width * 4 &&
1305 dst_stride_argb == width * 4) {
1306 width *= height;
1307 height = 1;
1308 src_stride_argb = dst_stride_argb = 0;
1309 }
1310 #if defined(HAS_ARGBGRAYROW_SSSE3)
1311 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
1312 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
1313 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1314 ARGBGrayRow = ARGBGrayRow_SSSE3;
1315 }
1316 #elif defined(HAS_ARGBGRAYROW_NEON)
1317 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1318 ARGBGrayRow = ARGBGrayRow_NEON;
1319 }
1320 #endif
1321
1322 for (y = 0; y < height; ++y) {
1323 ARGBGrayRow(src_argb, dst_argb, width);
1324 src_argb += src_stride_argb;
1325 dst_argb += dst_stride_argb;
1326 }
1327 return 0;
1328 }
1329
1330 // Make a rectangle of ARGB gray scale.
1331 LIBYUV_API
ARGBGray(uint8 * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height)1332 int ARGBGray(uint8* dst_argb, int dst_stride_argb,
1333 int dst_x, int dst_y,
1334 int width, int height) {
1335 int y;
1336 void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb,
1337 int width) = ARGBGrayRow_C;
1338 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1339 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
1340 return -1;
1341 }
1342 // Coalesce rows.
1343 if (dst_stride_argb == width * 4) {
1344 width *= height;
1345 height = 1;
1346 dst_stride_argb = 0;
1347 }
1348 #if defined(HAS_ARGBGRAYROW_SSSE3)
1349 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
1350 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1351 ARGBGrayRow = ARGBGrayRow_SSSE3;
1352 }
1353 #elif defined(HAS_ARGBGRAYROW_NEON)
1354 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1355 ARGBGrayRow = ARGBGrayRow_NEON;
1356 }
1357 #endif
1358 for (y = 0; y < height; ++y) {
1359 ARGBGrayRow(dst, dst, width);
1360 dst += dst_stride_argb;
1361 }
1362 return 0;
1363 }
1364
1365 // Make a rectangle of ARGB Sepia tone.
1366 LIBYUV_API
ARGBSepia(uint8 * dst_argb,int dst_stride_argb,int dst_x,int dst_y,int width,int height)1367 int ARGBSepia(uint8* dst_argb, int dst_stride_argb,
1368 int dst_x, int dst_y, int width, int height) {
1369 int y;
1370 void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C;
1371 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1372 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) {
1373 return -1;
1374 }
1375 // Coalesce rows.
1376 if (dst_stride_argb == width * 4) {
1377 width *= height;
1378 height = 1;
1379 dst_stride_argb = 0;
1380 }
1381 #if defined(HAS_ARGBSEPIAROW_SSSE3)
1382 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
1383 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1384 ARGBSepiaRow = ARGBSepiaRow_SSSE3;
1385 }
1386 #elif defined(HAS_ARGBSEPIAROW_NEON)
1387 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1388 ARGBSepiaRow = ARGBSepiaRow_NEON;
1389 }
1390 #endif
1391 for (y = 0; y < height; ++y) {
1392 ARGBSepiaRow(dst, width);
1393 dst += dst_stride_argb;
1394 }
1395 return 0;
1396 }
1397
1398 // Apply a 4x4 matrix to each ARGB pixel.
1399 // Note: Normally for shading, but can be used to swizzle or invert.
1400 LIBYUV_API
ARGBColorMatrix(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,const int8 * matrix_argb,int width,int height)1401 int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb,
1402 uint8* dst_argb, int dst_stride_argb,
1403 const int8* matrix_argb,
1404 int width, int height) {
1405 int y;
1406 void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb,
1407 const int8* matrix_argb, int width) = ARGBColorMatrixRow_C;
1408 if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) {
1409 return -1;
1410 }
1411 if (height < 0) {
1412 height = -height;
1413 src_argb = src_argb + (height - 1) * src_stride_argb;
1414 src_stride_argb = -src_stride_argb;
1415 }
1416 // Coalesce rows.
1417 if (src_stride_argb == width * 4 &&
1418 dst_stride_argb == width * 4) {
1419 width *= height;
1420 height = 1;
1421 src_stride_argb = dst_stride_argb = 0;
1422 }
1423 #if defined(HAS_ARGBCOLORMATRIXROW_SSSE3)
1424 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) &&
1425 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1426 ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3;
1427 }
1428 #elif defined(HAS_ARGBCOLORMATRIXROW_NEON)
1429 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1430 ARGBColorMatrixRow = ARGBColorMatrixRow_NEON;
1431 }
1432 #endif
1433 for (y = 0; y < height; ++y) {
1434 ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width);
1435 src_argb += src_stride_argb;
1436 dst_argb += dst_stride_argb;
1437 }
1438 return 0;
1439 }
1440
1441 // Apply a 4x3 matrix to each ARGB pixel.
1442 // Deprecated.
1443 LIBYUV_API
RGBColorMatrix(uint8 * dst_argb,int dst_stride_argb,const int8 * matrix_rgb,int dst_x,int dst_y,int width,int height)1444 int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb,
1445 const int8* matrix_rgb,
1446 int dst_x, int dst_y, int width, int height) {
1447 SIMD_ALIGNED(int8 matrix_argb[16]);
1448 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1449 if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 ||
1450 dst_x < 0 || dst_y < 0) {
1451 return -1;
1452 }
1453
1454 // Convert 4x3 7 bit matrix to 4x4 6 bit matrix.
1455 matrix_argb[0] = matrix_rgb[0] / 2;
1456 matrix_argb[1] = matrix_rgb[1] / 2;
1457 matrix_argb[2] = matrix_rgb[2] / 2;
1458 matrix_argb[3] = matrix_rgb[3] / 2;
1459 matrix_argb[4] = matrix_rgb[4] / 2;
1460 matrix_argb[5] = matrix_rgb[5] / 2;
1461 matrix_argb[6] = matrix_rgb[6] / 2;
1462 matrix_argb[7] = matrix_rgb[7] / 2;
1463 matrix_argb[8] = matrix_rgb[8] / 2;
1464 matrix_argb[9] = matrix_rgb[9] / 2;
1465 matrix_argb[10] = matrix_rgb[10] / 2;
1466 matrix_argb[11] = matrix_rgb[11] / 2;
1467 matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0;
1468 matrix_argb[15] = 64; // 1.0
1469
1470 return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb,
1471 dst, dst_stride_argb,
1472 &matrix_argb[0], width, height);
1473 }
1474
1475 // Apply a color table each ARGB pixel.
1476 // Table contains 256 ARGB values.
1477 LIBYUV_API
ARGBColorTable(uint8 * dst_argb,int dst_stride_argb,const uint8 * table_argb,int dst_x,int dst_y,int width,int height)1478 int ARGBColorTable(uint8* dst_argb, int dst_stride_argb,
1479 const uint8* table_argb,
1480 int dst_x, int dst_y, int width, int height) {
1481 int y;
1482 void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
1483 int width) = ARGBColorTableRow_C;
1484 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1485 if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
1486 dst_x < 0 || dst_y < 0) {
1487 return -1;
1488 }
1489 // Coalesce rows.
1490 if (dst_stride_argb == width * 4) {
1491 width *= height;
1492 height = 1;
1493 dst_stride_argb = 0;
1494 }
1495 #if defined(HAS_ARGBCOLORTABLEROW_X86)
1496 if (TestCpuFlag(kCpuHasX86)) {
1497 ARGBColorTableRow = ARGBColorTableRow_X86;
1498 }
1499 #endif
1500 for (y = 0; y < height; ++y) {
1501 ARGBColorTableRow(dst, table_argb, width);
1502 dst += dst_stride_argb;
1503 }
1504 return 0;
1505 }
1506
1507 // Apply a color table each ARGB pixel but preserve destination alpha.
1508 // Table contains 256 ARGB values.
1509 LIBYUV_API
RGBColorTable(uint8 * dst_argb,int dst_stride_argb,const uint8 * table_argb,int dst_x,int dst_y,int width,int height)1510 int RGBColorTable(uint8* dst_argb, int dst_stride_argb,
1511 const uint8* table_argb,
1512 int dst_x, int dst_y, int width, int height) {
1513 int y;
1514 void (*RGBColorTableRow)(uint8* dst_argb, const uint8* table_argb,
1515 int width) = RGBColorTableRow_C;
1516 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1517 if (!dst_argb || !table_argb || width <= 0 || height <= 0 ||
1518 dst_x < 0 || dst_y < 0) {
1519 return -1;
1520 }
1521 // Coalesce rows.
1522 if (dst_stride_argb == width * 4) {
1523 width *= height;
1524 height = 1;
1525 dst_stride_argb = 0;
1526 }
1527 #if defined(HAS_RGBCOLORTABLEROW_X86)
1528 if (TestCpuFlag(kCpuHasX86)) {
1529 RGBColorTableRow = RGBColorTableRow_X86;
1530 }
1531 #endif
1532 for (y = 0; y < height; ++y) {
1533 RGBColorTableRow(dst, table_argb, width);
1534 dst += dst_stride_argb;
1535 }
1536 return 0;
1537 }
1538
1539 // ARGBQuantize is used to posterize art.
1540 // e.g. rgb / qvalue * qvalue + qvalue / 2
1541 // But the low levels implement efficiently with 3 parameters, and could be
1542 // used for other high level operations.
1543 // dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
1544 // where scale is 1 / interval_size as a fixed point value.
1545 // The divide is replaces with a multiply by reciprocal fixed point multiply.
1546 // Caveat - although SSE2 saturates, the C function does not and should be used
1547 // with care if doing anything but quantization.
1548 LIBYUV_API
ARGBQuantize(uint8 * dst_argb,int dst_stride_argb,int scale,int interval_size,int interval_offset,int dst_x,int dst_y,int width,int height)1549 int ARGBQuantize(uint8* dst_argb, int dst_stride_argb,
1550 int scale, int interval_size, int interval_offset,
1551 int dst_x, int dst_y, int width, int height) {
1552 int y;
1553 void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size,
1554 int interval_offset, int width) = ARGBQuantizeRow_C;
1555 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4;
1556 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 ||
1557 interval_size < 1 || interval_size > 255) {
1558 return -1;
1559 }
1560 // Coalesce rows.
1561 if (dst_stride_argb == width * 4) {
1562 width *= height;
1563 height = 1;
1564 dst_stride_argb = 0;
1565 }
1566 #if defined(HAS_ARGBQUANTIZEROW_SSE2)
1567 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
1568 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1569 ARGBQuantizeRow = ARGBQuantizeRow_SSE2;
1570 }
1571 #elif defined(HAS_ARGBQUANTIZEROW_NEON)
1572 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1573 ARGBQuantizeRow = ARGBQuantizeRow_NEON;
1574 }
1575 #endif
1576 for (y = 0; y < height; ++y) {
1577 ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width);
1578 dst += dst_stride_argb;
1579 }
1580 return 0;
1581 }
1582
1583 // Computes table of cumulative sum for image where the value is the sum
1584 // of all values above and to the left of the entry. Used by ARGBBlur.
1585 LIBYUV_API
ARGBComputeCumulativeSum(const uint8 * src_argb,int src_stride_argb,int32 * dst_cumsum,int dst_stride32_cumsum,int width,int height)1586 int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb,
1587 int32* dst_cumsum, int dst_stride32_cumsum,
1588 int width, int height) {
1589 int y;
1590 void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum,
1591 const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
1592 int32* previous_cumsum = dst_cumsum;
1593 if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) {
1594 return -1;
1595 }
1596 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
1597 if (TestCpuFlag(kCpuHasSSE2)) {
1598 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
1599 }
1600 #endif
1601 memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel.
1602 for (y = 0; y < height; ++y) {
1603 ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width);
1604 previous_cumsum = dst_cumsum;
1605 dst_cumsum += dst_stride32_cumsum;
1606 src_argb += src_stride_argb;
1607 }
1608 return 0;
1609 }
1610
1611 // Blur ARGB image.
1612 // Caller should allocate CumulativeSum table of width * height * 16 bytes
1613 // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory
1614 // as the buffer is treated as circular.
1615 LIBYUV_API
ARGBBlur(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int32 * dst_cumsum,int dst_stride32_cumsum,int width,int height,int radius)1616 int ARGBBlur(const uint8* src_argb, int src_stride_argb,
1617 uint8* dst_argb, int dst_stride_argb,
1618 int32* dst_cumsum, int dst_stride32_cumsum,
1619 int width, int height, int radius) {
1620 int y;
1621 void (*ComputeCumulativeSumRow)(const uint8 *row, int32 *cumsum,
1622 const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C;
1623 void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft,
1624 int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C;
1625 int32* cumsum_bot_row;
1626 int32* max_cumsum_bot_row;
1627 int32* cumsum_top_row;
1628
1629 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1630 return -1;
1631 }
1632 if (height < 0) {
1633 height = -height;
1634 src_argb = src_argb + (height - 1) * src_stride_argb;
1635 src_stride_argb = -src_stride_argb;
1636 }
1637 if (radius > height) {
1638 radius = height;
1639 }
1640 if (radius > (width / 2 - 1)) {
1641 radius = width / 2 - 1;
1642 }
1643 if (radius <= 0) {
1644 return -1;
1645 }
1646 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2)
1647 if (TestCpuFlag(kCpuHasSSE2)) {
1648 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2;
1649 CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2;
1650 }
1651 #endif
1652 // Compute enough CumulativeSum for first row to be blurred. After this
1653 // one row of CumulativeSum is updated at a time.
1654 ARGBComputeCumulativeSum(src_argb, src_stride_argb,
1655 dst_cumsum, dst_stride32_cumsum,
1656 width, radius);
1657
1658 src_argb = src_argb + radius * src_stride_argb;
1659 cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum];
1660
1661 max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum];
1662 cumsum_top_row = &dst_cumsum[0];
1663
1664 for (y = 0; y < height; ++y) {
1665 int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0;
1666 int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1);
1667 int area = radius * (bot_y - top_y);
1668 int boxwidth = radius * 4;
1669 int x;
1670 int n;
1671
1672 // Increment cumsum_top_row pointer with circular buffer wrap around.
1673 if (top_y) {
1674 cumsum_top_row += dst_stride32_cumsum;
1675 if (cumsum_top_row >= max_cumsum_bot_row) {
1676 cumsum_top_row = dst_cumsum;
1677 }
1678 }
1679 // Increment cumsum_bot_row pointer with circular buffer wrap around and
1680 // then fill in a row of CumulativeSum.
1681 if ((y + radius) < height) {
1682 const int32* prev_cumsum_bot_row = cumsum_bot_row;
1683 cumsum_bot_row += dst_stride32_cumsum;
1684 if (cumsum_bot_row >= max_cumsum_bot_row) {
1685 cumsum_bot_row = dst_cumsum;
1686 }
1687 ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row,
1688 width);
1689 src_argb += src_stride_argb;
1690 }
1691
1692 // Left clipped.
1693 for (x = 0; x < radius + 1; ++x) {
1694 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
1695 boxwidth, area, &dst_argb[x * 4], 1);
1696 area += (bot_y - top_y);
1697 boxwidth += 4;
1698 }
1699
1700 // Middle unclipped.
1701 n = (width - 1) - radius - x + 1;
1702 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row,
1703 boxwidth, area, &dst_argb[x * 4], n);
1704
1705 // Right clipped.
1706 for (x += n; x <= width - 1; ++x) {
1707 area -= (bot_y - top_y);
1708 boxwidth -= 4;
1709 CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4,
1710 cumsum_bot_row + (x - radius - 1) * 4,
1711 boxwidth, area, &dst_argb[x * 4], 1);
1712 }
1713 dst_argb += dst_stride_argb;
1714 }
1715 return 0;
1716 }
1717
1718 // Multiply ARGB image by a specified ARGB value.
1719 LIBYUV_API
ARGBShade(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height,uint32 value)1720 int ARGBShade(const uint8* src_argb, int src_stride_argb,
1721 uint8* dst_argb, int dst_stride_argb,
1722 int width, int height, uint32 value) {
1723 int y;
1724 void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb,
1725 int width, uint32 value) = ARGBShadeRow_C;
1726 if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) {
1727 return -1;
1728 }
1729 if (height < 0) {
1730 height = -height;
1731 src_argb = src_argb + (height - 1) * src_stride_argb;
1732 src_stride_argb = -src_stride_argb;
1733 }
1734 // Coalesce rows.
1735 if (src_stride_argb == width * 4 &&
1736 dst_stride_argb == width * 4) {
1737 width *= height;
1738 height = 1;
1739 src_stride_argb = dst_stride_argb = 0;
1740 }
1741 #if defined(HAS_ARGBSHADEROW_SSE2)
1742 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) &&
1743 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
1744 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1745 ARGBShadeRow = ARGBShadeRow_SSE2;
1746 }
1747 #elif defined(HAS_ARGBSHADEROW_NEON)
1748 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
1749 ARGBShadeRow = ARGBShadeRow_NEON;
1750 }
1751 #endif
1752
1753 for (y = 0; y < height; ++y) {
1754 ARGBShadeRow(src_argb, dst_argb, width, value);
1755 src_argb += src_stride_argb;
1756 dst_argb += dst_stride_argb;
1757 }
1758 return 0;
1759 }
1760
1761 // Interpolate 2 ARGB images by specified amount (0 to 255).
1762 LIBYUV_API
ARGBInterpolate(const uint8 * src_argb0,int src_stride_argb0,const uint8 * src_argb1,int src_stride_argb1,uint8 * dst_argb,int dst_stride_argb,int width,int height,int interpolation)1763 int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
1764 const uint8* src_argb1, int src_stride_argb1,
1765 uint8* dst_argb, int dst_stride_argb,
1766 int width, int height, int interpolation) {
1767 int y;
1768 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
1769 ptrdiff_t src_stride, int dst_width,
1770 int source_y_fraction) = InterpolateRow_C;
1771 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
1772 return -1;
1773 }
1774 // Negative height means invert the image.
1775 if (height < 0) {
1776 height = -height;
1777 dst_argb = dst_argb + (height - 1) * dst_stride_argb;
1778 dst_stride_argb = -dst_stride_argb;
1779 }
1780 // Coalesce rows.
1781 if (src_stride_argb0 == width * 4 &&
1782 src_stride_argb1 == width * 4 &&
1783 dst_stride_argb == width * 4) {
1784 width *= height;
1785 height = 1;
1786 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
1787 }
1788 #if defined(HAS_INTERPOLATEROW_SSE2)
1789 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
1790 InterpolateRow = InterpolateRow_Any_SSE2;
1791 if (IS_ALIGNED(width, 4)) {
1792 InterpolateRow = InterpolateRow_Unaligned_SSE2;
1793 if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
1794 IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
1795 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1796 InterpolateRow = InterpolateRow_SSE2;
1797 }
1798 }
1799 }
1800 #endif
1801 #if defined(HAS_INTERPOLATEROW_SSSE3)
1802 if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) {
1803 InterpolateRow = InterpolateRow_Any_SSSE3;
1804 if (IS_ALIGNED(width, 4)) {
1805 InterpolateRow = InterpolateRow_Unaligned_SSSE3;
1806 if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) &&
1807 IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) &&
1808 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1809 InterpolateRow = InterpolateRow_SSSE3;
1810 }
1811 }
1812 }
1813 #endif
1814 #if defined(HAS_INTERPOLATEROW_AVX2)
1815 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) {
1816 InterpolateRow = InterpolateRow_Any_AVX2;
1817 if (IS_ALIGNED(width, 8)) {
1818 InterpolateRow = InterpolateRow_AVX2;
1819 }
1820 }
1821 #endif
1822 #if defined(HAS_INTERPOLATEROW_NEON)
1823 if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
1824 InterpolateRow = InterpolateRow_Any_NEON;
1825 if (IS_ALIGNED(width, 4)) {
1826 InterpolateRow = InterpolateRow_NEON;
1827 }
1828 }
1829 #endif
1830 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
1831 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && width >= 1 &&
1832 IS_ALIGNED(src_argb0, 4) && IS_ALIGNED(src_stride_argb0, 4) &&
1833 IS_ALIGNED(src_argb1, 4) && IS_ALIGNED(src_stride_argb1, 4) &&
1834 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
1835 ScaleARGBFilterRows = InterpolateRow_MIPS_DSPR2;
1836 }
1837 #endif
1838
1839 for (y = 0; y < height; ++y) {
1840 InterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0,
1841 width * 4, interpolation);
1842 src_argb0 += src_stride_argb0;
1843 src_argb1 += src_stride_argb1;
1844 dst_argb += dst_stride_argb;
1845 }
1846 return 0;
1847 }
1848
1849 // Shuffle ARGB channel order. e.g. BGRA to ARGB.
1850 LIBYUV_API
ARGBShuffle(const uint8 * src_bgra,int src_stride_bgra,uint8 * dst_argb,int dst_stride_argb,const uint8 * shuffler,int width,int height)1851 int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra,
1852 uint8* dst_argb, int dst_stride_argb,
1853 const uint8* shuffler, int width, int height) {
1854 int y;
1855 void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb,
1856 const uint8* shuffler, int pix) = ARGBShuffleRow_C;
1857 if (!src_bgra || !dst_argb ||
1858 width <= 0 || height == 0) {
1859 return -1;
1860 }
1861 // Negative height means invert the image.
1862 if (height < 0) {
1863 height = -height;
1864 src_bgra = src_bgra + (height - 1) * src_stride_bgra;
1865 src_stride_bgra = -src_stride_bgra;
1866 }
1867 // Coalesce rows.
1868 if (src_stride_bgra == width * 4 &&
1869 dst_stride_argb == width * 4) {
1870 width *= height;
1871 height = 1;
1872 src_stride_bgra = dst_stride_argb = 0;
1873 }
1874 #if defined(HAS_ARGBSHUFFLEROW_SSE2)
1875 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) {
1876 ARGBShuffleRow = ARGBShuffleRow_Any_SSE2;
1877 if (IS_ALIGNED(width, 4)) {
1878 ARGBShuffleRow = ARGBShuffleRow_SSE2;
1879 }
1880 }
1881 #endif
1882 #if defined(HAS_ARGBSHUFFLEROW_SSSE3)
1883 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
1884 ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3;
1885 if (IS_ALIGNED(width, 8)) {
1886 ARGBShuffleRow = ARGBShuffleRow_Unaligned_SSSE3;
1887 if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16) &&
1888 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
1889 ARGBShuffleRow = ARGBShuffleRow_SSSE3;
1890 }
1891 }
1892 }
1893 #endif
1894 #if defined(HAS_ARGBSHUFFLEROW_AVX2)
1895 if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
1896 ARGBShuffleRow = ARGBShuffleRow_Any_AVX2;
1897 if (IS_ALIGNED(width, 16)) {
1898 ARGBShuffleRow = ARGBShuffleRow_AVX2;
1899 }
1900 }
1901 #endif
1902 #if defined(HAS_ARGBSHUFFLEROW_NEON)
1903 if (TestCpuFlag(kCpuHasNEON) && width >= 4) {
1904 ARGBShuffleRow = ARGBShuffleRow_Any_NEON;
1905 if (IS_ALIGNED(width, 4)) {
1906 ARGBShuffleRow = ARGBShuffleRow_NEON;
1907 }
1908 }
1909 #endif
1910
1911 for (y = 0; y < height; ++y) {
1912 ARGBShuffleRow(src_bgra, dst_argb, shuffler, width);
1913 src_bgra += src_stride_bgra;
1914 dst_argb += dst_stride_argb;
1915 }
1916 return 0;
1917 }
1918
1919 // Sobel ARGB effect.
ARGBSobelize(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height,void (* SobelRow)(const uint8 * src_sobelx,const uint8 * src_sobely,uint8 * dst,int width))1920 static int ARGBSobelize(const uint8* src_argb, int src_stride_argb,
1921 uint8* dst_argb, int dst_stride_argb,
1922 int width, int height,
1923 void (*SobelRow)(const uint8* src_sobelx,
1924 const uint8* src_sobely,
1925 uint8* dst, int width)) {
1926 int y;
1927 void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
1928 uint32 selector, int pix) = ARGBToBayerGGRow_C;
1929 void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1,
1930 uint8* dst_sobely, int width) = SobelYRow_C;
1931 void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1,
1932 const uint8* src_y2, uint8* dst_sobely, int width) =
1933 SobelXRow_C;
1934 const int kEdge = 16; // Extra pixels at start of row for extrude/align.
1935 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
1936 return -1;
1937 }
1938 // Negative height means invert the image.
1939 if (height < 0) {
1940 height = -height;
1941 src_argb = src_argb + (height - 1) * src_stride_argb;
1942 src_stride_argb = -src_stride_argb;
1943 }
1944 // ARGBToBayer used to select G channel from ARGB.
1945 #if defined(HAS_ARGBTOBAYERGGROW_SSE2)
1946 if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
1947 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
1948 ARGBToBayerRow = ARGBToBayerGGRow_Any_SSE2;
1949 if (IS_ALIGNED(width, 8)) {
1950 ARGBToBayerRow = ARGBToBayerGGRow_SSE2;
1951 }
1952 }
1953 #endif
1954 #if defined(HAS_ARGBTOBAYERROW_SSSE3)
1955 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
1956 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
1957 ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
1958 if (IS_ALIGNED(width, 8)) {
1959 ARGBToBayerRow = ARGBToBayerRow_SSSE3;
1960 }
1961 }
1962 #endif
1963 #if defined(HAS_ARGBTOBAYERGGROW_NEON)
1964 if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
1965 ARGBToBayerRow = ARGBToBayerGGRow_Any_NEON;
1966 if (IS_ALIGNED(width, 8)) {
1967 ARGBToBayerRow = ARGBToBayerGGRow_NEON;
1968 }
1969 }
1970 #endif
1971 #if defined(HAS_SOBELYROW_SSE2)
1972 if (TestCpuFlag(kCpuHasSSE2)) {
1973 SobelYRow = SobelYRow_SSE2;
1974 }
1975 #endif
1976 #if defined(HAS_SOBELYROW_NEON)
1977 if (TestCpuFlag(kCpuHasNEON)) {
1978 SobelYRow = SobelYRow_NEON;
1979 }
1980 #endif
1981 #if defined(HAS_SOBELXROW_SSE2)
1982 if (TestCpuFlag(kCpuHasSSE2)) {
1983 SobelXRow = SobelXRow_SSE2;
1984 }
1985 #endif
1986 #if defined(HAS_SOBELXROW_NEON)
1987 if (TestCpuFlag(kCpuHasNEON)) {
1988 SobelXRow = SobelXRow_NEON;
1989 }
1990 #endif
1991 {
1992 // 3 rows with edges before/after.
1993 const int kRowSize = (width + kEdge + 15) & ~15;
1994 align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge));
1995 uint8* row_sobelx = rows;
1996 uint8* row_sobely = rows + kRowSize;
1997 uint8* row_y = rows + kRowSize * 2;
1998
1999 // Convert first row.
2000 uint8* row_y0 = row_y + kEdge;
2001 uint8* row_y1 = row_y0 + kRowSize;
2002 uint8* row_y2 = row_y1 + kRowSize;
2003 ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width);
2004 row_y0[-1] = row_y0[0];
2005 memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind.
2006 ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width);
2007 row_y1[-1] = row_y1[0];
2008 memset(row_y1 + width, row_y1[width - 1], 16);
2009 memset(row_y2 + width, 0, 16);
2010
2011 for (y = 0; y < height; ++y) {
2012 // Convert next row of ARGB to Y.
2013 if (y < (height - 1)) {
2014 src_argb += src_stride_argb;
2015 }
2016 ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width);
2017 row_y2[-1] = row_y2[0];
2018 row_y2[width] = row_y2[width - 1];
2019
2020 SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width);
2021 SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width);
2022 SobelRow(row_sobelx, row_sobely, dst_argb, width);
2023
2024 // Cycle thru circular queue of 3 row_y buffers.
2025 {
2026 uint8* row_yt = row_y0;
2027 row_y0 = row_y1;
2028 row_y1 = row_y2;
2029 row_y2 = row_yt;
2030 }
2031
2032 dst_argb += dst_stride_argb;
2033 }
2034 free_aligned_buffer_64(rows);
2035 }
2036 return 0;
2037 }
2038
2039 // Sobel ARGB effect.
2040 LIBYUV_API
ARGBSobel(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height)2041 int ARGBSobel(const uint8* src_argb, int src_stride_argb,
2042 uint8* dst_argb, int dst_stride_argb,
2043 int width, int height) {
2044 void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely,
2045 uint8* dst_argb, int width) = SobelRow_C;
2046 #if defined(HAS_SOBELROW_SSE2)
2047 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
2048 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
2049 SobelRow = SobelRow_SSE2;
2050 }
2051 #endif
2052 #if defined(HAS_SOBELROW_NEON)
2053 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2054 SobelRow = SobelRow_NEON;
2055 }
2056 #endif
2057 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
2058 width, height, SobelRow);
2059 }
2060
2061 // Sobel ARGB effect with planar output.
2062 LIBYUV_API
ARGBSobelToPlane(const uint8 * src_argb,int src_stride_argb,uint8 * dst_y,int dst_stride_y,int width,int height)2063 int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb,
2064 uint8* dst_y, int dst_stride_y,
2065 int width, int height) {
2066 void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely,
2067 uint8* dst_, int width) = SobelToPlaneRow_C;
2068 #if defined(HAS_SOBELTOPLANEROW_SSE2)
2069 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
2070 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
2071 SobelToPlaneRow = SobelToPlaneRow_SSE2;
2072 }
2073 #endif
2074 #if defined(HAS_SOBELTOPLANEROW_NEON)
2075 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
2076 SobelToPlaneRow = SobelToPlaneRow_NEON;
2077 }
2078 #endif
2079 return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y,
2080 width, height, SobelToPlaneRow);
2081 }
2082
2083 // SobelXY ARGB effect.
2084 // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel.
2085 LIBYUV_API
ARGBSobelXY(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height)2086 int ARGBSobelXY(const uint8* src_argb, int src_stride_argb,
2087 uint8* dst_argb, int dst_stride_argb,
2088 int width, int height) {
2089 void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely,
2090 uint8* dst_argb, int width) = SobelXYRow_C;
2091 #if defined(HAS_SOBELXYROW_SSE2)
2092 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
2093 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
2094 SobelXYRow = SobelXYRow_SSE2;
2095 }
2096 #endif
2097 #if defined(HAS_SOBELXYROW_NEON)
2098 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
2099 SobelXYRow = SobelXYRow_NEON;
2100 }
2101 #endif
2102 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
2103 width, height, SobelXYRow);
2104 }
2105
2106 // Apply a 4x4 polynomial to each ARGB pixel.
2107 LIBYUV_API
ARGBPolynomial(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,const float * poly,int width,int height)2108 int ARGBPolynomial(const uint8* src_argb, int src_stride_argb,
2109 uint8* dst_argb, int dst_stride_argb,
2110 const float* poly,
2111 int width, int height) {
2112 int y;
2113 void (*ARGBPolynomialRow)(const uint8* src_argb,
2114 uint8* dst_argb, const float* poly,
2115 int width) = ARGBPolynomialRow_C;
2116 if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) {
2117 return -1;
2118 }
2119 // Negative height means invert the image.
2120 if (height < 0) {
2121 height = -height;
2122 src_argb = src_argb + (height - 1) * src_stride_argb;
2123 src_stride_argb = -src_stride_argb;
2124 }
2125 // Coalesce rows.
2126 if (src_stride_argb == width * 4 &&
2127 dst_stride_argb == width * 4) {
2128 width *= height;
2129 height = 1;
2130 src_stride_argb = dst_stride_argb = 0;
2131 }
2132 #if defined(HAS_ARGBPOLYNOMIALROW_SSE2)
2133 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) {
2134 ARGBPolynomialRow = ARGBPolynomialRow_SSE2;
2135 }
2136 #endif
2137 #if defined(HAS_ARGBPOLYNOMIALROW_AVX2)
2138 if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) &&
2139 IS_ALIGNED(width, 2)) {
2140 ARGBPolynomialRow = ARGBPolynomialRow_AVX2;
2141 }
2142 #endif
2143
2144 for (y = 0; y < height; ++y) {
2145 ARGBPolynomialRow(src_argb, dst_argb, poly, width);
2146 src_argb += src_stride_argb;
2147 dst_argb += dst_stride_argb;
2148 }
2149 return 0;
2150 }
2151
2152 // Apply a lumacolortable to each ARGB pixel.
2153 LIBYUV_API
ARGBLumaColorTable(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,const uint8 * luma,int width,int height)2154 int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb,
2155 uint8* dst_argb, int dst_stride_argb,
2156 const uint8* luma,
2157 int width, int height) {
2158 int y;
2159 void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb,
2160 int width, const uint8* luma, const uint32 lumacoeff) =
2161 ARGBLumaColorTableRow_C;
2162 if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) {
2163 return -1;
2164 }
2165 // Negative height means invert the image.
2166 if (height < 0) {
2167 height = -height;
2168 src_argb = src_argb + (height - 1) * src_stride_argb;
2169 src_stride_argb = -src_stride_argb;
2170 }
2171 // Coalesce rows.
2172 if (src_stride_argb == width * 4 &&
2173 dst_stride_argb == width * 4) {
2174 width *= height;
2175 height = 1;
2176 src_stride_argb = dst_stride_argb = 0;
2177 }
2178 #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3)
2179 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) {
2180 ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3;
2181 }
2182 #endif
2183
2184 for (y = 0; y < height; ++y) {
2185 ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f);
2186 src_argb += src_stride_argb;
2187 dst_argb += dst_stride_argb;
2188 }
2189 return 0;
2190 }
2191
2192 // Copy Alpha from one ARGB image to another.
2193 LIBYUV_API
ARGBCopyAlpha(const uint8 * src_argb,int src_stride_argb,uint8 * dst_argb,int dst_stride_argb,int width,int height)2194 int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
2195 uint8* dst_argb, int dst_stride_argb,
2196 int width, int height) {
2197 int y;
2198 void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) =
2199 ARGBCopyAlphaRow_C;
2200 if (!src_argb || !dst_argb || width <= 0 || height == 0) {
2201 return -1;
2202 }
2203 // Negative height means invert the image.
2204 if (height < 0) {
2205 height = -height;
2206 src_argb = src_argb + (height - 1) * src_stride_argb;
2207 src_stride_argb = -src_stride_argb;
2208 }
2209 // Coalesce rows.
2210 if (src_stride_argb == width * 4 &&
2211 dst_stride_argb == width * 4) {
2212 width *= height;
2213 height = 1;
2214 src_stride_argb = dst_stride_argb = 0;
2215 }
2216 #if defined(HAS_ARGBCOPYALPHAROW_SSE2)
2217 if (TestCpuFlag(kCpuHasSSE2) &&
2218 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) &&
2219 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
2220 IS_ALIGNED(width, 8)) {
2221 ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2;
2222 }
2223 #endif
2224 #if defined(HAS_ARGBCOPYALPHAROW_AVX2)
2225 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
2226 ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2;
2227 }
2228 #endif
2229
2230 for (y = 0; y < height; ++y) {
2231 ARGBCopyAlphaRow(src_argb, dst_argb, width);
2232 src_argb += src_stride_argb;
2233 dst_argb += dst_stride_argb;
2234 }
2235 return 0;
2236 }
2237
2238 // Copy a planar Y channel to the alpha channel of a destination ARGB image.
2239 LIBYUV_API
ARGBCopyYToAlpha(const uint8 * src_y,int src_stride_y,uint8 * dst_argb,int dst_stride_argb,int width,int height)2240 int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
2241 uint8* dst_argb, int dst_stride_argb,
2242 int width, int height) {
2243 int y;
2244 void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) =
2245 ARGBCopyYToAlphaRow_C;
2246 if (!src_y || !dst_argb || width <= 0 || height == 0) {
2247 return -1;
2248 }
2249 // Negative height means invert the image.
2250 if (height < 0) {
2251 height = -height;
2252 src_y = src_y + (height - 1) * src_stride_y;
2253 src_stride_y = -src_stride_y;
2254 }
2255 // Coalesce rows.
2256 if (src_stride_y == width &&
2257 dst_stride_argb == width * 4) {
2258 width *= height;
2259 height = 1;
2260 src_stride_y = dst_stride_argb = 0;
2261 }
2262 #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2)
2263 if (TestCpuFlag(kCpuHasSSE2) &&
2264 IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
2265 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) &&
2266 IS_ALIGNED(width, 8)) {
2267 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2;
2268 }
2269 #endif
2270 #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2)
2271 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) {
2272 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2;
2273 }
2274 #endif
2275
2276 for (y = 0; y < height; ++y) {
2277 ARGBCopyYToAlphaRow(src_y, dst_argb, width);
2278 src_y += src_stride_y;
2279 dst_argb += dst_stride_argb;
2280 }
2281 return 0;
2282 }
2283
2284 #ifdef __cplusplus
2285 } // extern "C"
2286 } // namespace libyuv
2287 #endif
2288