1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/scale.h"
12
13 #include <assert.h>
14 #include <string.h>
15
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyARGB
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25
Abs(int v)26 static __inline int Abs(int v) {
27 return v >= 0 ? v : -v;
28 }
29
30 // ScaleARGB ARGB, 1/2
31 // This is an optimized version for scaling down a ARGB to 1/2 of
32 // its original size.
ScaleARGBDown2(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)33 static void ScaleARGBDown2(int src_width,
34 int src_height,
35 int dst_width,
36 int dst_height,
37 int src_stride,
38 int dst_stride,
39 const uint8_t* src_argb,
40 uint8_t* dst_argb,
41 int x,
42 int dx,
43 int y,
44 int dy,
45 enum FilterMode filtering) {
46 int j;
47 int row_stride = src_stride * (dy >> 16);
48 void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
49 uint8_t* dst_argb, int dst_width) =
50 filtering == kFilterNone
51 ? ScaleARGBRowDown2_C
52 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C
53 : ScaleARGBRowDown2Box_C);
54 (void)src_width;
55 (void)src_height;
56 (void)dx;
57 assert(dx == 65536 * 2); // Test scale factor of 2.
58 assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
59 // Advance to odd row, even column.
60 if (filtering == kFilterBilinear) {
61 src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
62 } else {
63 src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
64 }
65
66 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
67 if (TestCpuFlag(kCpuHasSSE2)) {
68 ScaleARGBRowDown2 =
69 filtering == kFilterNone
70 ? ScaleARGBRowDown2_Any_SSE2
71 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2
72 : ScaleARGBRowDown2Box_Any_SSE2);
73 if (IS_ALIGNED(dst_width, 4)) {
74 ScaleARGBRowDown2 =
75 filtering == kFilterNone
76 ? ScaleARGBRowDown2_SSE2
77 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2
78 : ScaleARGBRowDown2Box_SSE2);
79 }
80 }
81 #endif
82 #if defined(HAS_SCALEARGBROWDOWN2_NEON)
83 if (TestCpuFlag(kCpuHasNEON)) {
84 ScaleARGBRowDown2 =
85 filtering == kFilterNone
86 ? ScaleARGBRowDown2_Any_NEON
87 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON
88 : ScaleARGBRowDown2Box_Any_NEON);
89 if (IS_ALIGNED(dst_width, 8)) {
90 ScaleARGBRowDown2 =
91 filtering == kFilterNone
92 ? ScaleARGBRowDown2_NEON
93 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON
94 : ScaleARGBRowDown2Box_NEON);
95 }
96 }
97 #endif
98 #if defined(HAS_SCALEARGBROWDOWN2_MSA)
99 if (TestCpuFlag(kCpuHasMSA)) {
100 ScaleARGBRowDown2 =
101 filtering == kFilterNone
102 ? ScaleARGBRowDown2_Any_MSA
103 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_MSA
104 : ScaleARGBRowDown2Box_Any_MSA);
105 if (IS_ALIGNED(dst_width, 4)) {
106 ScaleARGBRowDown2 =
107 filtering == kFilterNone
108 ? ScaleARGBRowDown2_MSA
109 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_MSA
110 : ScaleARGBRowDown2Box_MSA);
111 }
112 }
113 #endif
114
115 if (filtering == kFilterLinear) {
116 src_stride = 0;
117 }
118 for (j = 0; j < dst_height; ++j) {
119 ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
120 src_argb += row_stride;
121 dst_argb += dst_stride;
122 }
123 }
124
125 // ScaleARGB ARGB, 1/4
126 // This is an optimized version for scaling down a ARGB to 1/4 of
127 // its original size.
ScaleARGBDown4Box(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy)128 static void ScaleARGBDown4Box(int src_width,
129 int src_height,
130 int dst_width,
131 int dst_height,
132 int src_stride,
133 int dst_stride,
134 const uint8_t* src_argb,
135 uint8_t* dst_argb,
136 int x,
137 int dx,
138 int y,
139 int dy) {
140 int j;
141 // Allocate 2 rows of ARGB.
142 const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
143 align_buffer_64(row, kRowSize * 2);
144 int row_stride = src_stride * (dy >> 16);
145 void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
146 uint8_t* dst_argb, int dst_width) =
147 ScaleARGBRowDown2Box_C;
148 // Advance to odd row, even column.
149 src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
150 (void)src_width;
151 (void)src_height;
152 (void)dx;
153 assert(dx == 65536 * 4); // Test scale factor of 4.
154 assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
155 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
156 if (TestCpuFlag(kCpuHasSSE2)) {
157 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_SSE2;
158 if (IS_ALIGNED(dst_width, 4)) {
159 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
160 }
161 }
162 #endif
163 #if defined(HAS_SCALEARGBROWDOWN2_NEON)
164 if (TestCpuFlag(kCpuHasNEON)) {
165 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_NEON;
166 if (IS_ALIGNED(dst_width, 8)) {
167 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
168 }
169 }
170 #endif
171
172 for (j = 0; j < dst_height; ++j) {
173 ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
174 ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + kRowSize,
175 dst_width * 2);
176 ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
177 src_argb += row_stride;
178 dst_argb += dst_stride;
179 }
180 free_aligned_buffer_64(row);
181 }
182
183 // ScaleARGB ARGB Even
184 // This is an optimized version for scaling down a ARGB to even
185 // multiple of its original size.
ScaleARGBDownEven(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)186 static void ScaleARGBDownEven(int src_width,
187 int src_height,
188 int dst_width,
189 int dst_height,
190 int src_stride,
191 int dst_stride,
192 const uint8_t* src_argb,
193 uint8_t* dst_argb,
194 int x,
195 int dx,
196 int y,
197 int dy,
198 enum FilterMode filtering) {
199 int j;
200 int col_step = dx >> 16;
201 int row_stride = (dy >> 16) * src_stride;
202 void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride,
203 int src_step, uint8_t* dst_argb, int dst_width) =
204 filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
205 (void)src_width;
206 (void)src_height;
207 assert(IS_ALIGNED(src_width, 2));
208 assert(IS_ALIGNED(src_height, 2));
209 src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
210 #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
211 if (TestCpuFlag(kCpuHasSSE2)) {
212 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
213 : ScaleARGBRowDownEven_Any_SSE2;
214 if (IS_ALIGNED(dst_width, 4)) {
215 ScaleARGBRowDownEven =
216 filtering ? ScaleARGBRowDownEvenBox_SSE2 : ScaleARGBRowDownEven_SSE2;
217 }
218 }
219 #endif
220 #if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
221 if (TestCpuFlag(kCpuHasNEON)) {
222 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON
223 : ScaleARGBRowDownEven_Any_NEON;
224 if (IS_ALIGNED(dst_width, 4)) {
225 ScaleARGBRowDownEven =
226 filtering ? ScaleARGBRowDownEvenBox_NEON : ScaleARGBRowDownEven_NEON;
227 }
228 }
229 #endif
230 #if defined(HAS_SCALEARGBROWDOWNEVEN_MSA)
231 if (TestCpuFlag(kCpuHasMSA)) {
232 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MSA
233 : ScaleARGBRowDownEven_Any_MSA;
234 if (IS_ALIGNED(dst_width, 4)) {
235 ScaleARGBRowDownEven =
236 filtering ? ScaleARGBRowDownEvenBox_MSA : ScaleARGBRowDownEven_MSA;
237 }
238 }
239 #endif
240
241 if (filtering == kFilterLinear) {
242 src_stride = 0;
243 }
244 for (j = 0; j < dst_height; ++j) {
245 ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
246 src_argb += row_stride;
247 dst_argb += dst_stride;
248 }
249 }
250
251 // Scale ARGB down with bilinear interpolation.
ScaleARGBBilinearDown(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)252 static void ScaleARGBBilinearDown(int src_width,
253 int src_height,
254 int dst_width,
255 int dst_height,
256 int src_stride,
257 int dst_stride,
258 const uint8_t* src_argb,
259 uint8_t* dst_argb,
260 int x,
261 int dx,
262 int y,
263 int dy,
264 enum FilterMode filtering) {
265 int j;
266 void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
267 ptrdiff_t src_stride, int dst_width,
268 int source_y_fraction) = InterpolateRow_C;
269 void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
270 int dst_width, int x, int dx) =
271 (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
272 int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
273 int64_t xl = (dx >= 0) ? x : xlast;
274 int64_t xr = (dx >= 0) ? xlast : x;
275 int clip_src_width;
276 xl = (xl >> 16) & ~3; // Left edge aligned.
277 xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
278 xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
279 if (xr > src_width) {
280 xr = src_width;
281 }
282 clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
283 src_argb += xl * 4;
284 x -= (int)(xl << 16);
285 #if defined(HAS_INTERPOLATEROW_SSSE3)
286 if (TestCpuFlag(kCpuHasSSSE3)) {
287 InterpolateRow = InterpolateRow_Any_SSSE3;
288 if (IS_ALIGNED(clip_src_width, 16)) {
289 InterpolateRow = InterpolateRow_SSSE3;
290 }
291 }
292 #endif
293 #if defined(HAS_INTERPOLATEROW_AVX2)
294 if (TestCpuFlag(kCpuHasAVX2)) {
295 InterpolateRow = InterpolateRow_Any_AVX2;
296 if (IS_ALIGNED(clip_src_width, 32)) {
297 InterpolateRow = InterpolateRow_AVX2;
298 }
299 }
300 #endif
301 #if defined(HAS_INTERPOLATEROW_NEON)
302 if (TestCpuFlag(kCpuHasNEON)) {
303 InterpolateRow = InterpolateRow_Any_NEON;
304 if (IS_ALIGNED(clip_src_width, 16)) {
305 InterpolateRow = InterpolateRow_NEON;
306 }
307 }
308 #endif
309 #if defined(HAS_INTERPOLATEROW_MSA)
310 if (TestCpuFlag(kCpuHasMSA)) {
311 InterpolateRow = InterpolateRow_Any_MSA;
312 if (IS_ALIGNED(clip_src_width, 32)) {
313 InterpolateRow = InterpolateRow_MSA;
314 }
315 }
316 #endif
317 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
318 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
319 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
320 }
321 #endif
322 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
323 if (TestCpuFlag(kCpuHasNEON)) {
324 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
325 if (IS_ALIGNED(dst_width, 4)) {
326 ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
327 }
328 }
329 #endif
330 #if defined(HAS_SCALEARGBFILTERCOLS_MSA)
331 if (TestCpuFlag(kCpuHasMSA)) {
332 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
333 if (IS_ALIGNED(dst_width, 8)) {
334 ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
335 }
336 }
337 #endif
338 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
339 // Allocate a row of ARGB.
340 {
341 align_buffer_64(row, clip_src_width * 4);
342
343 const int max_y = (src_height - 1) << 16;
344 if (y > max_y) {
345 y = max_y;
346 }
347 for (j = 0; j < dst_height; ++j) {
348 int yi = y >> 16;
349 const uint8_t* src = src_argb + yi * src_stride;
350 if (filtering == kFilterLinear) {
351 ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
352 } else {
353 int yf = (y >> 8) & 255;
354 InterpolateRow(row, src, src_stride, clip_src_width, yf);
355 ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
356 }
357 dst_argb += dst_stride;
358 y += dy;
359 if (y > max_y) {
360 y = max_y;
361 }
362 }
363 free_aligned_buffer_64(row);
364 }
365 }
366
367 // Scale ARGB up with bilinear interpolation.
ScaleARGBBilinearUp(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)368 static void ScaleARGBBilinearUp(int src_width,
369 int src_height,
370 int dst_width,
371 int dst_height,
372 int src_stride,
373 int dst_stride,
374 const uint8_t* src_argb,
375 uint8_t* dst_argb,
376 int x,
377 int dx,
378 int y,
379 int dy,
380 enum FilterMode filtering) {
381 int j;
382 void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
383 ptrdiff_t src_stride, int dst_width,
384 int source_y_fraction) = InterpolateRow_C;
385 void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
386 int dst_width, int x, int dx) =
387 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
388 const int max_y = (src_height - 1) << 16;
389 #if defined(HAS_INTERPOLATEROW_SSSE3)
390 if (TestCpuFlag(kCpuHasSSSE3)) {
391 InterpolateRow = InterpolateRow_Any_SSSE3;
392 if (IS_ALIGNED(dst_width, 4)) {
393 InterpolateRow = InterpolateRow_SSSE3;
394 }
395 }
396 #endif
397 #if defined(HAS_INTERPOLATEROW_AVX2)
398 if (TestCpuFlag(kCpuHasAVX2)) {
399 InterpolateRow = InterpolateRow_Any_AVX2;
400 if (IS_ALIGNED(dst_width, 8)) {
401 InterpolateRow = InterpolateRow_AVX2;
402 }
403 }
404 #endif
405 #if defined(HAS_INTERPOLATEROW_NEON)
406 if (TestCpuFlag(kCpuHasNEON)) {
407 InterpolateRow = InterpolateRow_Any_NEON;
408 if (IS_ALIGNED(dst_width, 4)) {
409 InterpolateRow = InterpolateRow_NEON;
410 }
411 }
412 #endif
413 #if defined(HAS_INTERPOLATEROW_MSA)
414 if (TestCpuFlag(kCpuHasMSA)) {
415 InterpolateRow = InterpolateRow_Any_MSA;
416 if (IS_ALIGNED(dst_width, 8)) {
417 InterpolateRow = InterpolateRow_MSA;
418 }
419 }
420 #endif
421 if (src_width >= 32768) {
422 ScaleARGBFilterCols =
423 filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
424 }
425 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
426 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
427 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
428 }
429 #endif
430 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
431 if (filtering && TestCpuFlag(kCpuHasNEON)) {
432 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
433 if (IS_ALIGNED(dst_width, 4)) {
434 ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
435 }
436 }
437 #endif
438 #if defined(HAS_SCALEARGBFILTERCOLS_MSA)
439 if (filtering && TestCpuFlag(kCpuHasMSA)) {
440 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
441 if (IS_ALIGNED(dst_width, 8)) {
442 ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
443 }
444 }
445 #endif
446 #if defined(HAS_SCALEARGBCOLS_SSE2)
447 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
448 ScaleARGBFilterCols = ScaleARGBCols_SSE2;
449 }
450 #endif
451 #if defined(HAS_SCALEARGBCOLS_NEON)
452 if (!filtering && TestCpuFlag(kCpuHasNEON)) {
453 ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
454 if (IS_ALIGNED(dst_width, 8)) {
455 ScaleARGBFilterCols = ScaleARGBCols_NEON;
456 }
457 }
458 #endif
459 #if defined(HAS_SCALEARGBCOLS_MSA)
460 if (!filtering && TestCpuFlag(kCpuHasMSA)) {
461 ScaleARGBFilterCols = ScaleARGBCols_Any_MSA;
462 if (IS_ALIGNED(dst_width, 4)) {
463 ScaleARGBFilterCols = ScaleARGBCols_MSA;
464 }
465 }
466 #endif
467 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
468 ScaleARGBFilterCols = ScaleARGBColsUp2_C;
469 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
470 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
471 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
472 }
473 #endif
474 }
475
476 if (y > max_y) {
477 y = max_y;
478 }
479
480 {
481 int yi = y >> 16;
482 const uint8_t* src = src_argb + yi * src_stride;
483
484 // Allocate 2 rows of ARGB.
485 const int kRowSize = (dst_width * 4 + 31) & ~31;
486 align_buffer_64(row, kRowSize * 2);
487
488 uint8_t* rowptr = row;
489 int rowstride = kRowSize;
490 int lasty = yi;
491
492 ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
493 if (src_height > 1) {
494 src += src_stride;
495 }
496 ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
497 src += src_stride;
498
499 for (j = 0; j < dst_height; ++j) {
500 yi = y >> 16;
501 if (yi != lasty) {
502 if (y > max_y) {
503 y = max_y;
504 yi = y >> 16;
505 src = src_argb + yi * src_stride;
506 }
507 if (yi != lasty) {
508 ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
509 rowptr += rowstride;
510 rowstride = -rowstride;
511 lasty = yi;
512 src += src_stride;
513 }
514 }
515 if (filtering == kFilterLinear) {
516 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
517 } else {
518 int yf = (y >> 8) & 255;
519 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
520 }
521 dst_argb += dst_stride;
522 y += dy;
523 }
524 free_aligned_buffer_64(row);
525 }
526 }
527
528 #ifdef YUVSCALEUP
529 // Scale YUV to ARGB up with bilinear interpolation.
ScaleYUVToARGBBilinearUp(int src_width,int src_height,int dst_width,int dst_height,int src_stride_y,int src_stride_u,int src_stride_v,int dst_stride_argb,const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)530 static void ScaleYUVToARGBBilinearUp(int src_width,
531 int src_height,
532 int dst_width,
533 int dst_height,
534 int src_stride_y,
535 int src_stride_u,
536 int src_stride_v,
537 int dst_stride_argb,
538 const uint8_t* src_y,
539 const uint8_t* src_u,
540 const uint8_t* src_v,
541 uint8_t* dst_argb,
542 int x,
543 int dx,
544 int y,
545 int dy,
546 enum FilterMode filtering) {
547 int j;
548 void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
549 const uint8_t* v_buf, uint8_t* rgb_buf, int width) =
550 I422ToARGBRow_C;
551 #if defined(HAS_I422TOARGBROW_SSSE3)
552 if (TestCpuFlag(kCpuHasSSSE3)) {
553 I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
554 if (IS_ALIGNED(src_width, 8)) {
555 I422ToARGBRow = I422ToARGBRow_SSSE3;
556 }
557 }
558 #endif
559 #if defined(HAS_I422TOARGBROW_AVX2)
560 if (TestCpuFlag(kCpuHasAVX2)) {
561 I422ToARGBRow = I422ToARGBRow_Any_AVX2;
562 if (IS_ALIGNED(src_width, 16)) {
563 I422ToARGBRow = I422ToARGBRow_AVX2;
564 }
565 }
566 #endif
567 #if defined(HAS_I422TOARGBROW_NEON)
568 if (TestCpuFlag(kCpuHasNEON)) {
569 I422ToARGBRow = I422ToARGBRow_Any_NEON;
570 if (IS_ALIGNED(src_width, 8)) {
571 I422ToARGBRow = I422ToARGBRow_NEON;
572 }
573 }
574 #endif
575 #if defined(HAS_I422TOARGBROW_MSA)
576 if (TestCpuFlag(kCpuHasMSA)) {
577 I422ToARGBRow = I422ToARGBRow_Any_MSA;
578 if (IS_ALIGNED(src_width, 8)) {
579 I422ToARGBRow = I422ToARGBRow_MSA;
580 }
581 }
582 #endif
583
584 void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
585 ptrdiff_t src_stride, int dst_width,
586 int source_y_fraction) = InterpolateRow_C;
587 #if defined(HAS_INTERPOLATEROW_SSSE3)
588 if (TestCpuFlag(kCpuHasSSSE3)) {
589 InterpolateRow = InterpolateRow_Any_SSSE3;
590 if (IS_ALIGNED(dst_width, 4)) {
591 InterpolateRow = InterpolateRow_SSSE3;
592 }
593 }
594 #endif
595 #if defined(HAS_INTERPOLATEROW_AVX2)
596 if (TestCpuFlag(kCpuHasAVX2)) {
597 InterpolateRow = InterpolateRow_Any_AVX2;
598 if (IS_ALIGNED(dst_width, 8)) {
599 InterpolateRow = InterpolateRow_AVX2;
600 }
601 }
602 #endif
603 #if defined(HAS_INTERPOLATEROW_NEON)
604 if (TestCpuFlag(kCpuHasNEON)) {
605 InterpolateRow = InterpolateRow_Any_NEON;
606 if (IS_ALIGNED(dst_width, 4)) {
607 InterpolateRow = InterpolateRow_NEON;
608 }
609 }
610 #endif
611 #if defined(HAS_INTERPOLATEROW_MSA)
612 if (TestCpuFlag(kCpuHasMSA)) {
613 InterpolateRow = InterpolateRow_Any_MSA;
614 if (IS_ALIGNED(dst_width, 8)) {
615 InterpolateRow = InterpolateRow_MSA;
616 }
617 }
618 #endif
619
620 void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
621 int dst_width, int x, int dx) =
622 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
623 if (src_width >= 32768) {
624 ScaleARGBFilterCols =
625 filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
626 }
627 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
628 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
629 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
630 }
631 #endif
632 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
633 if (filtering && TestCpuFlag(kCpuHasNEON)) {
634 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
635 if (IS_ALIGNED(dst_width, 4)) {
636 ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
637 }
638 }
639 #endif
640 #if defined(HAS_SCALEARGBFILTERCOLS_MSA)
641 if (filtering && TestCpuFlag(kCpuHasMSA)) {
642 ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
643 if (IS_ALIGNED(dst_width, 8)) {
644 ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
645 }
646 }
647 #endif
648 #if defined(HAS_SCALEARGBCOLS_SSE2)
649 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
650 ScaleARGBFilterCols = ScaleARGBCols_SSE2;
651 }
652 #endif
653 #if defined(HAS_SCALEARGBCOLS_NEON)
654 if (!filtering && TestCpuFlag(kCpuHasNEON)) {
655 ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
656 if (IS_ALIGNED(dst_width, 8)) {
657 ScaleARGBFilterCols = ScaleARGBCols_NEON;
658 }
659 }
660 #endif
661 #if defined(HAS_SCALEARGBCOLS_MSA)
662 if (!filtering && TestCpuFlag(kCpuHasMSA)) {
663 ScaleARGBFilterCols = ScaleARGBCols_Any_MSA;
664 if (IS_ALIGNED(dst_width, 4)) {
665 ScaleARGBFilterCols = ScaleARGBCols_MSA;
666 }
667 }
668 #endif
669 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
670 ScaleARGBFilterCols = ScaleARGBColsUp2_C;
671 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
672 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
673 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
674 }
675 #endif
676 }
677
678 const int max_y = (src_height - 1) << 16;
679 if (y > max_y) {
680 y = max_y;
681 }
682 const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
683 int yi = y >> 16;
684 int uv_yi = yi >> kYShift;
685 const uint8_t* src_row_y = src_y + yi * src_stride_y;
686 const uint8_t* src_row_u = src_u + uv_yi * src_stride_u;
687 const uint8_t* src_row_v = src_v + uv_yi * src_stride_v;
688
689 // Allocate 2 rows of ARGB.
690 const int kRowSize = (dst_width * 4 + 31) & ~31;
691 align_buffer_64(row, kRowSize * 2);
692
693 // Allocate 1 row of ARGB for source conversion.
694 align_buffer_64(argb_row, src_width * 4);
695
696 uint8_t* rowptr = row;
697 int rowstride = kRowSize;
698 int lasty = yi;
699
700 // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
701 ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
702 if (src_height > 1) {
703 src_row_y += src_stride_y;
704 if (yi & 1) {
705 src_row_u += src_stride_u;
706 src_row_v += src_stride_v;
707 }
708 }
709 ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
710 if (src_height > 2) {
711 src_row_y += src_stride_y;
712 if (!(yi & 1)) {
713 src_row_u += src_stride_u;
714 src_row_v += src_stride_v;
715 }
716 }
717
718 for (j = 0; j < dst_height; ++j) {
719 yi = y >> 16;
720 if (yi != lasty) {
721 if (y > max_y) {
722 y = max_y;
723 yi = y >> 16;
724 uv_yi = yi >> kYShift;
725 src_row_y = src_y + yi * src_stride_y;
726 src_row_u = src_u + uv_yi * src_stride_u;
727 src_row_v = src_v + uv_yi * src_stride_v;
728 }
729 if (yi != lasty) {
730 // TODO(fbarchard): Convert the clipped region of row.
731 I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
732 ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
733 rowptr += rowstride;
734 rowstride = -rowstride;
735 lasty = yi;
736 src_row_y += src_stride_y;
737 if (yi & 1) {
738 src_row_u += src_stride_u;
739 src_row_v += src_stride_v;
740 }
741 }
742 }
743 if (filtering == kFilterLinear) {
744 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
745 } else {
746 int yf = (y >> 8) & 255;
747 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
748 }
749 dst_argb += dst_stride_argb;
750 y += dy;
751 }
752 free_aligned_buffer_64(row);
753 free_aligned_buffer_64(row_argb);
754 }
755 #endif
756
757 // Scale ARGB to/from any dimensions, without interpolation.
758 // Fixed point math is used for performance: The upper 16 bits
759 // of x and dx is the integer part of the source position and
760 // the lower 16 bits are the fixed decimal part.
761
ScaleARGBSimple(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy)762 static void ScaleARGBSimple(int src_width,
763 int src_height,
764 int dst_width,
765 int dst_height,
766 int src_stride,
767 int dst_stride,
768 const uint8_t* src_argb,
769 uint8_t* dst_argb,
770 int x,
771 int dx,
772 int y,
773 int dy) {
774 int j;
775 void (*ScaleARGBCols)(uint8_t * dst_argb, const uint8_t* src_argb,
776 int dst_width, int x, int dx) =
777 (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
778 (void)src_height;
779 #if defined(HAS_SCALEARGBCOLS_SSE2)
780 if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
781 ScaleARGBCols = ScaleARGBCols_SSE2;
782 }
783 #endif
784 #if defined(HAS_SCALEARGBCOLS_NEON)
785 if (TestCpuFlag(kCpuHasNEON)) {
786 ScaleARGBCols = ScaleARGBCols_Any_NEON;
787 if (IS_ALIGNED(dst_width, 8)) {
788 ScaleARGBCols = ScaleARGBCols_NEON;
789 }
790 }
791 #endif
792 #if defined(HAS_SCALEARGBCOLS_MSA)
793 if (TestCpuFlag(kCpuHasMSA)) {
794 ScaleARGBCols = ScaleARGBCols_Any_MSA;
795 if (IS_ALIGNED(dst_width, 4)) {
796 ScaleARGBCols = ScaleARGBCols_MSA;
797 }
798 }
799 #endif
800 if (src_width * 2 == dst_width && x < 0x8000) {
801 ScaleARGBCols = ScaleARGBColsUp2_C;
802 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
803 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
804 ScaleARGBCols = ScaleARGBColsUp2_SSE2;
805 }
806 #endif
807 }
808
809 for (j = 0; j < dst_height; ++j) {
810 ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x,
811 dx);
812 dst_argb += dst_stride;
813 y += dy;
814 }
815 }
816
817 // ScaleARGB a ARGB.
818 // This function in turn calls a scaling function
819 // suitable for handling the desired resolutions.
ScaleARGB(const uint8_t * src,int src_stride,int src_width,int src_height,uint8_t * dst,int dst_stride,int dst_width,int dst_height,int clip_x,int clip_y,int clip_width,int clip_height,enum FilterMode filtering)820 static void ScaleARGB(const uint8_t* src,
821 int src_stride,
822 int src_width,
823 int src_height,
824 uint8_t* dst,
825 int dst_stride,
826 int dst_width,
827 int dst_height,
828 int clip_x,
829 int clip_y,
830 int clip_width,
831 int clip_height,
832 enum FilterMode filtering) {
833 // Initial source x/y coordinate and step values as 16.16 fixed point.
834 int x = 0;
835 int y = 0;
836 int dx = 0;
837 int dy = 0;
838 // ARGB does not support box filter yet, but allow the user to pass it.
839 // Simplify filtering when possible.
840 filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
841 filtering);
842
843 // Negative src_height means invert the image.
844 if (src_height < 0) {
845 src_height = -src_height;
846 src = src + (src_height - 1) * src_stride;
847 src_stride = -src_stride;
848 }
849 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
850 &dx, &dy);
851 src_width = Abs(src_width);
852 if (clip_x) {
853 int64_t clipf = (int64_t)(clip_x)*dx;
854 x += (clipf & 0xffff);
855 src += (clipf >> 16) * 4;
856 dst += clip_x * 4;
857 }
858 if (clip_y) {
859 int64_t clipf = (int64_t)(clip_y)*dy;
860 y += (clipf & 0xffff);
861 src += (clipf >> 16) * src_stride;
862 dst += clip_y * dst_stride;
863 }
864
865 // Special case for integer step values.
866 if (((dx | dy) & 0xffff) == 0) {
867 if (!dx || !dy) { // 1 pixel wide and/or tall.
868 filtering = kFilterNone;
869 } else {
870 // Optimized even scale down. ie 2, 4, 6, 8, 10x.
871 if (!(dx & 0x10000) && !(dy & 0x10000)) {
872 if (dx == 0x20000) {
873 // Optimized 1/2 downsample.
874 ScaleARGBDown2(src_width, src_height, clip_width, clip_height,
875 src_stride, dst_stride, src, dst, x, dx, y, dy,
876 filtering);
877 return;
878 }
879 if (dx == 0x40000 && filtering == kFilterBox) {
880 // Optimized 1/4 box downsample.
881 ScaleARGBDown4Box(src_width, src_height, clip_width, clip_height,
882 src_stride, dst_stride, src, dst, x, dx, y, dy);
883 return;
884 }
885 ScaleARGBDownEven(src_width, src_height, clip_width, clip_height,
886 src_stride, dst_stride, src, dst, x, dx, y, dy,
887 filtering);
888 return;
889 }
890 // Optimized odd scale down. ie 3, 5, 7, 9x.
891 if ((dx & 0x10000) && (dy & 0x10000)) {
892 filtering = kFilterNone;
893 if (dx == 0x10000 && dy == 0x10000) {
894 // Straight copy.
895 ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
896 dst, dst_stride, clip_width, clip_height);
897 return;
898 }
899 }
900 }
901 }
902 if (dx == 0x10000 && (x & 0xffff) == 0) {
903 // Arbitrary scale vertically, but unscaled vertically.
904 ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
905 dst_stride, src, dst, x, y, dy, 4, filtering);
906 return;
907 }
908 if (filtering && dy < 65536) {
909 ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height,
910 src_stride, dst_stride, src, dst, x, dx, y, dy,
911 filtering);
912 return;
913 }
914 if (filtering) {
915 ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height,
916 src_stride, dst_stride, src, dst, x, dx, y, dy,
917 filtering);
918 return;
919 }
920 ScaleARGBSimple(src_width, src_height, clip_width, clip_height, src_stride,
921 dst_stride, src, dst, x, dx, y, dy);
922 }
923
924 LIBYUV_API
ARGBScaleClip(const uint8_t * src_argb,int src_stride_argb,int src_width,int src_height,uint8_t * dst_argb,int dst_stride_argb,int dst_width,int dst_height,int clip_x,int clip_y,int clip_width,int clip_height,enum FilterMode filtering)925 int ARGBScaleClip(const uint8_t* src_argb,
926 int src_stride_argb,
927 int src_width,
928 int src_height,
929 uint8_t* dst_argb,
930 int dst_stride_argb,
931 int dst_width,
932 int dst_height,
933 int clip_x,
934 int clip_y,
935 int clip_width,
936 int clip_height,
937 enum FilterMode filtering) {
938 if (!src_argb || src_width == 0 || src_height == 0 || !dst_argb ||
939 dst_width <= 0 || dst_height <= 0 || clip_x < 0 || clip_y < 0 ||
940 clip_width > 32768 || clip_height > 32768 ||
941 (clip_x + clip_width) > dst_width ||
942 (clip_y + clip_height) > dst_height) {
943 return -1;
944 }
945 ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
946 dst_stride_argb, dst_width, dst_height, clip_x, clip_y, clip_width,
947 clip_height, filtering);
948 return 0;
949 }
950
951 // Scale an ARGB image.
952 LIBYUV_API
ARGBScale(const uint8_t * src_argb,int src_stride_argb,int src_width,int src_height,uint8_t * dst_argb,int dst_stride_argb,int dst_width,int dst_height,enum FilterMode filtering)953 int ARGBScale(const uint8_t* src_argb,
954 int src_stride_argb,
955 int src_width,
956 int src_height,
957 uint8_t* dst_argb,
958 int dst_stride_argb,
959 int dst_width,
960 int dst_height,
961 enum FilterMode filtering) {
962 if (!src_argb || src_width == 0 || src_height == 0 || src_width > 32768 ||
963 src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) {
964 return -1;
965 }
966 ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
967 dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, dst_height,
968 filtering);
969 return 0;
970 }
971
972 // Scale with YUV conversion to ARGB and clipping.
973 LIBYUV_API
YUVToARGBScaleClip(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint32_t src_fourcc,int src_width,int src_height,uint8_t * dst_argb,int dst_stride_argb,uint32_t dst_fourcc,int dst_width,int dst_height,int clip_x,int clip_y,int clip_width,int clip_height,enum FilterMode filtering)974 int YUVToARGBScaleClip(const uint8_t* src_y,
975 int src_stride_y,
976 const uint8_t* src_u,
977 int src_stride_u,
978 const uint8_t* src_v,
979 int src_stride_v,
980 uint32_t src_fourcc,
981 int src_width,
982 int src_height,
983 uint8_t* dst_argb,
984 int dst_stride_argb,
985 uint32_t dst_fourcc,
986 int dst_width,
987 int dst_height,
988 int clip_x,
989 int clip_y,
990 int clip_width,
991 int clip_height,
992 enum FilterMode filtering) {
993 uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4);
994 int r;
995 (void)src_fourcc; // TODO(fbarchard): implement and/or assert.
996 (void)dst_fourcc;
997 I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
998 argb_buffer, src_width * 4, src_width, src_height);
999
1000 r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, src_height, dst_argb,
1001 dst_stride_argb, dst_width, dst_height, clip_x, clip_y,
1002 clip_width, clip_height, filtering);
1003 free(argb_buffer);
1004 return r;
1005 }
1006
1007 #ifdef __cplusplus
1008 } // extern "C"
1009 } // namespace libyuv
1010 #endif
1011