1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/scale.h"
12
13 #include <assert.h>
14 #include <string.h>
15
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyARGB
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25
Abs(int v)26 static __inline int Abs(int v) {
27 return v >= 0 ? v : -v;
28 }
29
30 // ScaleARGB ARGB, 1/2
31 // This is an optimized version for scaling down a ARGB to 1/2 of
32 // its original size.
ScaleARGBDown2(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_argb,uint8 * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)33 static void ScaleARGBDown2(int src_width,
34 int src_height,
35 int dst_width,
36 int dst_height,
37 int src_stride,
38 int dst_stride,
39 const uint8* src_argb,
40 uint8* dst_argb,
41 int x,
42 int dx,
43 int y,
44 int dy,
45 enum FilterMode filtering) {
46 int j;
47 int row_stride = src_stride * (dy >> 16);
48 void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
49 uint8* dst_argb, int dst_width) =
50 filtering == kFilterNone
51 ? ScaleARGBRowDown2_C
52 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C
53 : ScaleARGBRowDown2Box_C);
54 (void)src_width;
55 (void)src_height;
56 (void)dx;
57 assert(dx == 65536 * 2); // Test scale factor of 2.
58 assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
59 // Advance to odd row, even column.
60 if (filtering == kFilterBilinear) {
61 src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
62 } else {
63 src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
64 }
65
66 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
67 if (TestCpuFlag(kCpuHasSSE2)) {
68 ScaleARGBRowDown2 =
69 filtering == kFilterNone
70 ? ScaleARGBRowDown2_Any_SSE2
71 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2
72 : ScaleARGBRowDown2Box_Any_SSE2);
73 if (IS_ALIGNED(dst_width, 4)) {
74 ScaleARGBRowDown2 =
75 filtering == kFilterNone
76 ? ScaleARGBRowDown2_SSE2
77 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2
78 : ScaleARGBRowDown2Box_SSE2);
79 }
80 }
81 #endif
82 #if defined(HAS_SCALEARGBROWDOWN2_NEON)
83 if (TestCpuFlag(kCpuHasNEON)) {
84 ScaleARGBRowDown2 =
85 filtering == kFilterNone
86 ? ScaleARGBRowDown2_Any_NEON
87 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON
88 : ScaleARGBRowDown2Box_Any_NEON);
89 if (IS_ALIGNED(dst_width, 8)) {
90 ScaleARGBRowDown2 =
91 filtering == kFilterNone
92 ? ScaleARGBRowDown2_NEON
93 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON
94 : ScaleARGBRowDown2Box_NEON);
95 }
96 }
97 #endif
98 #if defined(HAS_SCALEARGBROWDOWN2_MSA)
99 if (TestCpuFlag(kCpuHasMSA)) {
100 ScaleARGBRowDown2 =
101 filtering == kFilterNone
102 ? ScaleARGBRowDown2_Any_MSA
103 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_MSA
104 : ScaleARGBRowDown2Box_Any_MSA);
105 if (IS_ALIGNED(dst_width, 4)) {
106 ScaleARGBRowDown2 =
107 filtering == kFilterNone
108 ? ScaleARGBRowDown2_MSA
109 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_MSA
110 : ScaleARGBRowDown2Box_MSA);
111 }
112 }
113 #endif
114
115 if (filtering == kFilterLinear) {
116 src_stride = 0;
117 }
118 for (j = 0; j < dst_height; ++j) {
119 ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
120 src_argb += row_stride;
121 dst_argb += dst_stride;
122 }
123 }
124
125 // ScaleARGB ARGB, 1/4
126 // This is an optimized version for scaling down a ARGB to 1/4 of
127 // its original size.
ScaleARGBDown4Box(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_argb,uint8 * dst_argb,int x,int dx,int y,int dy)128 static void ScaleARGBDown4Box(int src_width,
129 int src_height,
130 int dst_width,
131 int dst_height,
132 int src_stride,
133 int dst_stride,
134 const uint8* src_argb,
135 uint8* dst_argb,
136 int x,
137 int dx,
138 int y,
139 int dy) {
140 int j;
141 // Allocate 2 rows of ARGB.
142 const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
143 align_buffer_64(row, kRowSize * 2);
144 int row_stride = src_stride * (dy >> 16);
145 void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
146 uint8* dst_argb, int dst_width) =
147 ScaleARGBRowDown2Box_C;
148 // Advance to odd row, even column.
149 src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
150 (void)src_width;
151 (void)src_height;
152 (void)dx;
153 assert(dx == 65536 * 4); // Test scale factor of 4.
154 assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
155 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
156 if (TestCpuFlag(kCpuHasSSE2)) {
157 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_SSE2;
158 if (IS_ALIGNED(dst_width, 4)) {
159 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
160 }
161 }
162 #endif
163 #if defined(HAS_SCALEARGBROWDOWN2_NEON)
164 if (TestCpuFlag(kCpuHasNEON)) {
165 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_NEON;
166 if (IS_ALIGNED(dst_width, 8)) {
167 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
168 }
169 }
170 #endif
171
172 for (j = 0; j < dst_height; ++j) {
173 ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
174 ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + kRowSize,
175 dst_width * 2);
176 ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
177 src_argb += row_stride;
178 dst_argb += dst_stride;
179 }
180 free_aligned_buffer_64(row);
181 }
182
183 // ScaleARGB ARGB Even
184 // This is an optimized version for scaling down a ARGB to even
185 // multiple of its original size.
ScaleARGBDownEven(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_argb,uint8 * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)186 static void ScaleARGBDownEven(int src_width,
187 int src_height,
188 int dst_width,
189 int dst_height,
190 int src_stride,
191 int dst_stride,
192 const uint8* src_argb,
193 uint8* dst_argb,
194 int x,
195 int dx,
196 int y,
197 int dy,
198 enum FilterMode filtering) {
199 int j;
200 int col_step = dx >> 16;
201 int row_stride = (dy >> 16) * src_stride;
202 void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
203 int src_step, uint8* dst_argb, int dst_width) =
204 filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
205 (void)src_width;
206 (void)src_height;
207 assert(IS_ALIGNED(src_width, 2));
208 assert(IS_ALIGNED(src_height, 2));
209 src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
210 #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
211 if (TestCpuFlag(kCpuHasSSE2)) {
212 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
213 : ScaleARGBRowDownEven_Any_SSE2;
214 if (IS_ALIGNED(dst_width, 4)) {
215 ScaleARGBRowDownEven =
216 filtering ? ScaleARGBRowDownEvenBox_SSE2 : ScaleARGBRowDownEven_SSE2;
217 }
218 }
219 #endif
220 #if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
221 if (TestCpuFlag(kCpuHasNEON)) {
222 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON
223 : ScaleARGBRowDownEven_Any_NEON;
224 if (IS_ALIGNED(dst_width, 4)) {
225 ScaleARGBRowDownEven =
226 filtering ? ScaleARGBRowDownEvenBox_NEON : ScaleARGBRowDownEven_NEON;
227 }
228 }
229 #endif
230 #if defined(HAS_SCALEARGBROWDOWNEVEN_MSA)
231 if (TestCpuFlag(kCpuHasMSA)) {
232 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MSA
233 : ScaleARGBRowDownEven_Any_MSA;
234 if (IS_ALIGNED(dst_width, 4)) {
235 ScaleARGBRowDownEven =
236 filtering ? ScaleARGBRowDownEvenBox_MSA : ScaleARGBRowDownEven_MSA;
237 }
238 }
239 #endif
240
241 if (filtering == kFilterLinear) {
242 src_stride = 0;
243 }
244 for (j = 0; j < dst_height; ++j) {
245 ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
246 src_argb += row_stride;
247 dst_argb += dst_stride;
248 }
249 }
250
251 // Scale ARGB down with bilinear interpolation.
ScaleARGBBilinearDown(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_argb,uint8 * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)252 static void ScaleARGBBilinearDown(int src_width,
253 int src_height,
254 int dst_width,
255 int dst_height,
256 int src_stride,
257 int dst_stride,
258 const uint8* src_argb,
259 uint8* dst_argb,
260 int x,
261 int dx,
262 int y,
263 int dy,
264 enum FilterMode filtering) {
265 int j;
266 void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb,
267 ptrdiff_t src_stride, int dst_width,
268 int source_y_fraction) = InterpolateRow_C;
269 void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb,
270 int dst_width, int x, int dx) =
271 (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
272 int64 xlast = x + (int64)(dst_width - 1) * dx;
273 int64 xl = (dx >= 0) ? x : xlast;
274 int64 xr = (dx >= 0) ? xlast : x;
275 int clip_src_width;
276 xl = (xl >> 16) & ~3; // Left edge aligned.
277 xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
278 xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
279 if (xr > src_width) {
280 xr = src_width;
281 }
282 clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
283 src_argb += xl * 4;
284 x -= (int)(xl << 16);
285 #if defined(HAS_INTERPOLATEROW_SSSE3)
286 if (TestCpuFlag(kCpuHasSSSE3)) {
287 InterpolateRow = InterpolateRow_Any_SSSE3;
288 if (IS_ALIGNED(clip_src_width, 16)) {
289 InterpolateRow = InterpolateRow_SSSE3;
290 }
291 }
292 #endif
293 #if defined(HAS_INTERPOLATEROW_AVX2)
294 if (TestCpuFlag(kCpuHasAVX2)) {
295 InterpolateRow = InterpolateRow_Any_AVX2;
296 if (IS_ALIGNED(clip_src_width, 32)) {
297 InterpolateRow = InterpolateRow_AVX2;
298 }
299 }
300 #endif
301 #if defined(HAS_INTERPOLATEROW_NEON)
302 if (TestCpuFlag(kCpuHasNEON)) {
303 InterpolateRow = InterpolateRow_Any_NEON;
304 if (IS_ALIGNED(clip_src_width, 16)) {
305 InterpolateRow = InterpolateRow_NEON;
306 }
307 }
308 #endif
309 #if defined(HAS_INTERPOLATEROW_DSPR2)
310 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) &&
311 IS_ALIGNED(src_stride, 4)) {
312 InterpolateRow = InterpolateRow_Any_DSPR2;
313 if (IS_ALIGNED(clip_src_width, 4)) {
314 InterpolateRow = InterpolateRow_DSPR2;
315 }
316 }
317 #endif
318 #if defined(HAS_INTERPOLATEROW_MSA)
319 if (TestCpuFlag(kCpuHasMSA)) {
320 InterpolateRow = InterpolateRow_Any_MSA;
321 if (IS_ALIGNED(clip_src_width, 32)) {
322 InterpolateRow = InterpolateRow_MSA;
323 }
324 }
325 #endif
326 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
327 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
328 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
329 }
330 #endif
331 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
332 if (TestCpuFlag(kCpuHasNEON)) {
333 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
334 if (IS_ALIGNED(dst_width, 4)) {
335 ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
336 }
337 }
338 #endif
339 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
340 // Allocate a row of ARGB.
341 {
342 align_buffer_64(row, clip_src_width * 4);
343
344 const int max_y = (src_height - 1) << 16;
345 if (y > max_y) {
346 y = max_y;
347 }
348 for (j = 0; j < dst_height; ++j) {
349 int yi = y >> 16;
350 const uint8* src = src_argb + yi * src_stride;
351 if (filtering == kFilterLinear) {
352 ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
353 } else {
354 int yf = (y >> 8) & 255;
355 InterpolateRow(row, src, src_stride, clip_src_width, yf);
356 ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
357 }
358 dst_argb += dst_stride;
359 y += dy;
360 if (y > max_y) {
361 y = max_y;
362 }
363 }
364 free_aligned_buffer_64(row);
365 }
366 }
367
368 // Scale ARGB up with bilinear interpolation.
ScaleARGBBilinearUp(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_argb,uint8 * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)369 static void ScaleARGBBilinearUp(int src_width,
370 int src_height,
371 int dst_width,
372 int dst_height,
373 int src_stride,
374 int dst_stride,
375 const uint8* src_argb,
376 uint8* dst_argb,
377 int x,
378 int dx,
379 int y,
380 int dy,
381 enum FilterMode filtering) {
382 int j;
383 void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb,
384 ptrdiff_t src_stride, int dst_width,
385 int source_y_fraction) = InterpolateRow_C;
386 void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb,
387 int dst_width, int x, int dx) =
388 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
389 const int max_y = (src_height - 1) << 16;
390 #if defined(HAS_INTERPOLATEROW_SSSE3)
391 if (TestCpuFlag(kCpuHasSSSE3)) {
392 InterpolateRow = InterpolateRow_Any_SSSE3;
393 if (IS_ALIGNED(dst_width, 4)) {
394 InterpolateRow = InterpolateRow_SSSE3;
395 }
396 }
397 #endif
398 #if defined(HAS_INTERPOLATEROW_AVX2)
399 if (TestCpuFlag(kCpuHasAVX2)) {
400 InterpolateRow = InterpolateRow_Any_AVX2;
401 if (IS_ALIGNED(dst_width, 8)) {
402 InterpolateRow = InterpolateRow_AVX2;
403 }
404 }
405 #endif
406 #if defined(HAS_INTERPOLATEROW_NEON)
407 if (TestCpuFlag(kCpuHasNEON)) {
408 InterpolateRow = InterpolateRow_Any_NEON;
409 if (IS_ALIGNED(dst_width, 4)) {
410 InterpolateRow = InterpolateRow_NEON;
411 }
412 }
413 #endif
414 #if defined(HAS_INTERPOLATEROW_DSPR2)
415 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_argb, 4) &&
416 IS_ALIGNED(dst_stride, 4)) {
417 InterpolateRow = InterpolateRow_DSPR2;
418 }
419 #endif
420 #if defined(HAS_INTERPOLATEROW_MSA)
421 if (TestCpuFlag(kCpuHasMSA)) {
422 InterpolateRow = InterpolateRow_Any_MSA;
423 if (IS_ALIGNED(dst_width, 8)) {
424 InterpolateRow = InterpolateRow_MSA;
425 }
426 }
427 #endif
428 if (src_width >= 32768) {
429 ScaleARGBFilterCols =
430 filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
431 }
432 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
433 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
434 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
435 }
436 #endif
437 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
438 if (filtering && TestCpuFlag(kCpuHasNEON)) {
439 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
440 if (IS_ALIGNED(dst_width, 4)) {
441 ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
442 }
443 }
444 #endif
445 #if defined(HAS_SCALEARGBCOLS_SSE2)
446 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
447 ScaleARGBFilterCols = ScaleARGBCols_SSE2;
448 }
449 #endif
450 #if defined(HAS_SCALEARGBCOLS_NEON)
451 if (!filtering && TestCpuFlag(kCpuHasNEON)) {
452 ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
453 if (IS_ALIGNED(dst_width, 8)) {
454 ScaleARGBFilterCols = ScaleARGBCols_NEON;
455 }
456 }
457 #endif
458 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
459 ScaleARGBFilterCols = ScaleARGBColsUp2_C;
460 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
461 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
462 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
463 }
464 #endif
465 }
466
467 if (y > max_y) {
468 y = max_y;
469 }
470
471 {
472 int yi = y >> 16;
473 const uint8* src = src_argb + yi * src_stride;
474
475 // Allocate 2 rows of ARGB.
476 const int kRowSize = (dst_width * 4 + 31) & ~31;
477 align_buffer_64(row, kRowSize * 2);
478
479 uint8* rowptr = row;
480 int rowstride = kRowSize;
481 int lasty = yi;
482
483 ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
484 if (src_height > 1) {
485 src += src_stride;
486 }
487 ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
488 src += src_stride;
489
490 for (j = 0; j < dst_height; ++j) {
491 yi = y >> 16;
492 if (yi != lasty) {
493 if (y > max_y) {
494 y = max_y;
495 yi = y >> 16;
496 src = src_argb + yi * src_stride;
497 }
498 if (yi != lasty) {
499 ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
500 rowptr += rowstride;
501 rowstride = -rowstride;
502 lasty = yi;
503 src += src_stride;
504 }
505 }
506 if (filtering == kFilterLinear) {
507 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
508 } else {
509 int yf = (y >> 8) & 255;
510 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
511 }
512 dst_argb += dst_stride;
513 y += dy;
514 }
515 free_aligned_buffer_64(row);
516 }
517 }
518
519 #ifdef YUVSCALEUP
520 // Scale YUV to ARGB up with bilinear interpolation.
ScaleYUVToARGBBilinearUp(int src_width,int src_height,int dst_width,int dst_height,int src_stride_y,int src_stride_u,int src_stride_v,int dst_stride_argb,const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)521 static void ScaleYUVToARGBBilinearUp(int src_width,
522 int src_height,
523 int dst_width,
524 int dst_height,
525 int src_stride_y,
526 int src_stride_u,
527 int src_stride_v,
528 int dst_stride_argb,
529 const uint8* src_y,
530 const uint8* src_u,
531 const uint8* src_v,
532 uint8* dst_argb,
533 int x,
534 int dx,
535 int y,
536 int dy,
537 enum FilterMode filtering) {
538 int j;
539 void (*I422ToARGBRow)(const uint8* y_buf, const uint8* u_buf,
540 const uint8* v_buf, uint8* rgb_buf, int width) =
541 I422ToARGBRow_C;
542 #if defined(HAS_I422TOARGBROW_SSSE3)
543 if (TestCpuFlag(kCpuHasSSSE3)) {
544 I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
545 if (IS_ALIGNED(src_width, 8)) {
546 I422ToARGBRow = I422ToARGBRow_SSSE3;
547 }
548 }
549 #endif
550 #if defined(HAS_I422TOARGBROW_AVX2)
551 if (TestCpuFlag(kCpuHasAVX2)) {
552 I422ToARGBRow = I422ToARGBRow_Any_AVX2;
553 if (IS_ALIGNED(src_width, 16)) {
554 I422ToARGBRow = I422ToARGBRow_AVX2;
555 }
556 }
557 #endif
558 #if defined(HAS_I422TOARGBROW_NEON)
559 if (TestCpuFlag(kCpuHasNEON)) {
560 I422ToARGBRow = I422ToARGBRow_Any_NEON;
561 if (IS_ALIGNED(src_width, 8)) {
562 I422ToARGBRow = I422ToARGBRow_NEON;
563 }
564 }
565 #endif
566 #if defined(HAS_I422TOARGBROW_DSPR2)
567 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_width, 4) &&
568 IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
569 IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
570 IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
571 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
572 I422ToARGBRow = I422ToARGBRow_DSPR2;
573 }
574 #endif
575 #if defined(HAS_I422TOARGBROW_MSA)
576 if (TestCpuFlag(kCpuHasMSA)) {
577 I422ToARGBRow = I422ToARGBRow_Any_MSA;
578 if (IS_ALIGNED(src_width, 8)) {
579 I422ToARGBRow = I422ToARGBRow_MSA;
580 }
581 }
582 #endif
583
584 void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb,
585 ptrdiff_t src_stride, int dst_width,
586 int source_y_fraction) = InterpolateRow_C;
587 #if defined(HAS_INTERPOLATEROW_SSSE3)
588 if (TestCpuFlag(kCpuHasSSSE3)) {
589 InterpolateRow = InterpolateRow_Any_SSSE3;
590 if (IS_ALIGNED(dst_width, 4)) {
591 InterpolateRow = InterpolateRow_SSSE3;
592 }
593 }
594 #endif
595 #if defined(HAS_INTERPOLATEROW_AVX2)
596 if (TestCpuFlag(kCpuHasAVX2)) {
597 InterpolateRow = InterpolateRow_Any_AVX2;
598 if (IS_ALIGNED(dst_width, 8)) {
599 InterpolateRow = InterpolateRow_AVX2;
600 }
601 }
602 #endif
603 #if defined(HAS_INTERPOLATEROW_NEON)
604 if (TestCpuFlag(kCpuHasNEON)) {
605 InterpolateRow = InterpolateRow_Any_NEON;
606 if (IS_ALIGNED(dst_width, 4)) {
607 InterpolateRow = InterpolateRow_NEON;
608 }
609 }
610 #endif
611 #if defined(HAS_INTERPOLATEROW_DSPR2)
612 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(dst_argb, 4) &&
613 IS_ALIGNED(dst_stride_argb, 4)) {
614 InterpolateRow = InterpolateRow_DSPR2;
615 }
616 #endif
617 #if defined(HAS_INTERPOLATEROW_MSA)
618 if (TestCpuFlag(kCpuHasMSA)) {
619 InterpolateRow = InterpolateRow_Any_MSA;
620 if (IS_ALIGNED(dst_width, 8)) {
621 InterpolateRow = InterpolateRow_MSA;
622 }
623 }
624 #endif
625
626 void (*ScaleARGBFilterCols)(uint8 * dst_argb, const uint8* src_argb,
627 int dst_width, int x, int dx) =
628 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
629 if (src_width >= 32768) {
630 ScaleARGBFilterCols =
631 filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
632 }
633 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
634 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
635 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
636 }
637 #endif
638 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
639 if (filtering && TestCpuFlag(kCpuHasNEON)) {
640 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
641 if (IS_ALIGNED(dst_width, 4)) {
642 ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
643 }
644 }
645 #endif
646 #if defined(HAS_SCALEARGBCOLS_SSE2)
647 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
648 ScaleARGBFilterCols = ScaleARGBCols_SSE2;
649 }
650 #endif
651 #if defined(HAS_SCALEARGBCOLS_NEON)
652 if (!filtering && TestCpuFlag(kCpuHasNEON)) {
653 ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
654 if (IS_ALIGNED(dst_width, 8)) {
655 ScaleARGBFilterCols = ScaleARGBCols_NEON;
656 }
657 }
658 #endif
659 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
660 ScaleARGBFilterCols = ScaleARGBColsUp2_C;
661 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
662 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
663 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
664 }
665 #endif
666 }
667
668 const int max_y = (src_height - 1) << 16;
669 if (y > max_y) {
670 y = max_y;
671 }
672 const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
673 int yi = y >> 16;
674 int uv_yi = yi >> kYShift;
675 const uint8* src_row_y = src_y + yi * src_stride_y;
676 const uint8* src_row_u = src_u + uv_yi * src_stride_u;
677 const uint8* src_row_v = src_v + uv_yi * src_stride_v;
678
679 // Allocate 2 rows of ARGB.
680 const int kRowSize = (dst_width * 4 + 31) & ~31;
681 align_buffer_64(row, kRowSize * 2);
682
683 // Allocate 1 row of ARGB for source conversion.
684 align_buffer_64(argb_row, src_width * 4);
685
686 uint8* rowptr = row;
687 int rowstride = kRowSize;
688 int lasty = yi;
689
690 // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
691 ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
692 if (src_height > 1) {
693 src_row_y += src_stride_y;
694 if (yi & 1) {
695 src_row_u += src_stride_u;
696 src_row_v += src_stride_v;
697 }
698 }
699 ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
700 if (src_height > 2) {
701 src_row_y += src_stride_y;
702 if (!(yi & 1)) {
703 src_row_u += src_stride_u;
704 src_row_v += src_stride_v;
705 }
706 }
707
708 for (j = 0; j < dst_height; ++j) {
709 yi = y >> 16;
710 if (yi != lasty) {
711 if (y > max_y) {
712 y = max_y;
713 yi = y >> 16;
714 uv_yi = yi >> kYShift;
715 src_row_y = src_y + yi * src_stride_y;
716 src_row_u = src_u + uv_yi * src_stride_u;
717 src_row_v = src_v + uv_yi * src_stride_v;
718 }
719 if (yi != lasty) {
720 // TODO(fbarchard): Convert the clipped region of row.
721 I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
722 ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
723 rowptr += rowstride;
724 rowstride = -rowstride;
725 lasty = yi;
726 src_row_y += src_stride_y;
727 if (yi & 1) {
728 src_row_u += src_stride_u;
729 src_row_v += src_stride_v;
730 }
731 }
732 }
733 if (filtering == kFilterLinear) {
734 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
735 } else {
736 int yf = (y >> 8) & 255;
737 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
738 }
739 dst_argb += dst_stride_argb;
740 y += dy;
741 }
742 free_aligned_buffer_64(row);
743 free_aligned_buffer_64(row_argb);
744 }
745 #endif
746
747 // Scale ARGB to/from any dimensions, without interpolation.
748 // Fixed point math is used for performance: The upper 16 bits
749 // of x and dx is the integer part of the source position and
750 // the lower 16 bits are the fixed decimal part.
751
ScaleARGBSimple(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_argb,uint8 * dst_argb,int x,int dx,int y,int dy)752 static void ScaleARGBSimple(int src_width,
753 int src_height,
754 int dst_width,
755 int dst_height,
756 int src_stride,
757 int dst_stride,
758 const uint8* src_argb,
759 uint8* dst_argb,
760 int x,
761 int dx,
762 int y,
763 int dy) {
764 int j;
765 void (*ScaleARGBCols)(uint8 * dst_argb, const uint8* src_argb, int dst_width,
766 int x, int dx) =
767 (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
768 (void)src_height;
769 #if defined(HAS_SCALEARGBCOLS_SSE2)
770 if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
771 ScaleARGBCols = ScaleARGBCols_SSE2;
772 }
773 #endif
774 #if defined(HAS_SCALEARGBCOLS_NEON)
775 if (TestCpuFlag(kCpuHasNEON)) {
776 ScaleARGBCols = ScaleARGBCols_Any_NEON;
777 if (IS_ALIGNED(dst_width, 8)) {
778 ScaleARGBCols = ScaleARGBCols_NEON;
779 }
780 }
781 #endif
782 if (src_width * 2 == dst_width && x < 0x8000) {
783 ScaleARGBCols = ScaleARGBColsUp2_C;
784 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
785 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
786 ScaleARGBCols = ScaleARGBColsUp2_SSE2;
787 }
788 #endif
789 }
790
791 for (j = 0; j < dst_height; ++j) {
792 ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x,
793 dx);
794 dst_argb += dst_stride;
795 y += dy;
796 }
797 }
798
799 // ScaleARGB a ARGB.
800 // This function in turn calls a scaling function
801 // suitable for handling the desired resolutions.
ScaleARGB(const uint8 * src,int src_stride,int src_width,int src_height,uint8 * dst,int dst_stride,int dst_width,int dst_height,int clip_x,int clip_y,int clip_width,int clip_height,enum FilterMode filtering)802 static void ScaleARGB(const uint8* src,
803 int src_stride,
804 int src_width,
805 int src_height,
806 uint8* dst,
807 int dst_stride,
808 int dst_width,
809 int dst_height,
810 int clip_x,
811 int clip_y,
812 int clip_width,
813 int clip_height,
814 enum FilterMode filtering) {
815 // Initial source x/y coordinate and step values as 16.16 fixed point.
816 int x = 0;
817 int y = 0;
818 int dx = 0;
819 int dy = 0;
820 // ARGB does not support box filter yet, but allow the user to pass it.
821 // Simplify filtering when possible.
822 filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
823 filtering);
824
825 // Negative src_height means invert the image.
826 if (src_height < 0) {
827 src_height = -src_height;
828 src = src + (src_height - 1) * src_stride;
829 src_stride = -src_stride;
830 }
831 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
832 &dx, &dy);
833 src_width = Abs(src_width);
834 if (clip_x) {
835 int64 clipf = (int64)(clip_x)*dx;
836 x += (clipf & 0xffff);
837 src += (clipf >> 16) * 4;
838 dst += clip_x * 4;
839 }
840 if (clip_y) {
841 int64 clipf = (int64)(clip_y)*dy;
842 y += (clipf & 0xffff);
843 src += (clipf >> 16) * src_stride;
844 dst += clip_y * dst_stride;
845 }
846
847 // Special case for integer step values.
848 if (((dx | dy) & 0xffff) == 0) {
849 if (!dx || !dy) { // 1 pixel wide and/or tall.
850 filtering = kFilterNone;
851 } else {
852 // Optimized even scale down. ie 2, 4, 6, 8, 10x.
853 if (!(dx & 0x10000) && !(dy & 0x10000)) {
854 if (dx == 0x20000) {
855 // Optimized 1/2 downsample.
856 ScaleARGBDown2(src_width, src_height, clip_width, clip_height,
857 src_stride, dst_stride, src, dst, x, dx, y, dy,
858 filtering);
859 return;
860 }
861 if (dx == 0x40000 && filtering == kFilterBox) {
862 // Optimized 1/4 box downsample.
863 ScaleARGBDown4Box(src_width, src_height, clip_width, clip_height,
864 src_stride, dst_stride, src, dst, x, dx, y, dy);
865 return;
866 }
867 ScaleARGBDownEven(src_width, src_height, clip_width, clip_height,
868 src_stride, dst_stride, src, dst, x, dx, y, dy,
869 filtering);
870 return;
871 }
872 // Optimized odd scale down. ie 3, 5, 7, 9x.
873 if ((dx & 0x10000) && (dy & 0x10000)) {
874 filtering = kFilterNone;
875 if (dx == 0x10000 && dy == 0x10000) {
876 // Straight copy.
877 ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
878 dst, dst_stride, clip_width, clip_height);
879 return;
880 }
881 }
882 }
883 }
884 if (dx == 0x10000 && (x & 0xffff) == 0) {
885 // Arbitrary scale vertically, but unscaled vertically.
886 ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
887 dst_stride, src, dst, x, y, dy, 4, filtering);
888 return;
889 }
890 if (filtering && dy < 65536) {
891 ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height,
892 src_stride, dst_stride, src, dst, x, dx, y, dy,
893 filtering);
894 return;
895 }
896 if (filtering) {
897 ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height,
898 src_stride, dst_stride, src, dst, x, dx, y, dy,
899 filtering);
900 return;
901 }
902 ScaleARGBSimple(src_width, src_height, clip_width, clip_height, src_stride,
903 dst_stride, src, dst, x, dx, y, dy);
904 }
905
906 LIBYUV_API
ARGBScaleClip(const uint8 * src_argb,int src_stride_argb,int src_width,int src_height,uint8 * dst_argb,int dst_stride_argb,int dst_width,int dst_height,int clip_x,int clip_y,int clip_width,int clip_height,enum FilterMode filtering)907 int ARGBScaleClip(const uint8* src_argb,
908 int src_stride_argb,
909 int src_width,
910 int src_height,
911 uint8* dst_argb,
912 int dst_stride_argb,
913 int dst_width,
914 int dst_height,
915 int clip_x,
916 int clip_y,
917 int clip_width,
918 int clip_height,
919 enum FilterMode filtering) {
920 if (!src_argb || src_width == 0 || src_height == 0 || !dst_argb ||
921 dst_width <= 0 || dst_height <= 0 || clip_x < 0 || clip_y < 0 ||
922 clip_width > 32768 || clip_height > 32768 ||
923 (clip_x + clip_width) > dst_width ||
924 (clip_y + clip_height) > dst_height) {
925 return -1;
926 }
927 ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
928 dst_stride_argb, dst_width, dst_height, clip_x, clip_y, clip_width,
929 clip_height, filtering);
930 return 0;
931 }
932
933 // Scale an ARGB image.
934 LIBYUV_API
ARGBScale(const uint8 * src_argb,int src_stride_argb,int src_width,int src_height,uint8 * dst_argb,int dst_stride_argb,int dst_width,int dst_height,enum FilterMode filtering)935 int ARGBScale(const uint8* src_argb,
936 int src_stride_argb,
937 int src_width,
938 int src_height,
939 uint8* dst_argb,
940 int dst_stride_argb,
941 int dst_width,
942 int dst_height,
943 enum FilterMode filtering) {
944 if (!src_argb || src_width == 0 || src_height == 0 || src_width > 32768 ||
945 src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) {
946 return -1;
947 }
948 ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
949 dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, dst_height,
950 filtering);
951 return 0;
952 }
953
954 // Scale with YUV conversion to ARGB and clipping.
955 LIBYUV_API
YUVToARGBScaleClip(const uint8 * src_y,int src_stride_y,const uint8 * src_u,int src_stride_u,const uint8 * src_v,int src_stride_v,uint32 src_fourcc,int src_width,int src_height,uint8 * dst_argb,int dst_stride_argb,uint32 dst_fourcc,int dst_width,int dst_height,int clip_x,int clip_y,int clip_width,int clip_height,enum FilterMode filtering)956 int YUVToARGBScaleClip(const uint8* src_y,
957 int src_stride_y,
958 const uint8* src_u,
959 int src_stride_u,
960 const uint8* src_v,
961 int src_stride_v,
962 uint32 src_fourcc,
963 int src_width,
964 int src_height,
965 uint8* dst_argb,
966 int dst_stride_argb,
967 uint32 dst_fourcc,
968 int dst_width,
969 int dst_height,
970 int clip_x,
971 int clip_y,
972 int clip_width,
973 int clip_height,
974 enum FilterMode filtering) {
975 uint8* argb_buffer = (uint8*)malloc(src_width * src_height * 4);
976 int r;
977 (void)src_fourcc; // TODO(fbarchard): implement and/or assert.
978 (void)dst_fourcc;
979 I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
980 argb_buffer, src_width * 4, src_width, src_height);
981
982 r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, src_height, dst_argb,
983 dst_stride_argb, dst_width, dst_height, clip_x, clip_y,
984 clip_width, clip_height, filtering);
985 free(argb_buffer);
986 return r;
987 }
988
989 #ifdef __cplusplus
990 } // extern "C"
991 } // namespace libyuv
992 #endif
993