1 /*
2 * Copyright 2020 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/scale.h"
12
13 #include <assert.h>
14 #include <string.h>
15
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyUV
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25
26 // Macros to enable specialized scalers
27
28 #ifndef HAS_SCALEUVDOWN2
29 #define HAS_SCALEUVDOWN2 1
30 #endif
31 #ifndef HAS_SCALEUVDOWN4BOX
32 #define HAS_SCALEUVDOWN4BOX 1
33 #endif
34 #ifndef HAS_SCALEUVDOWNEVEN
35 #define HAS_SCALEUVDOWNEVEN 1
36 #endif
37 #ifndef HAS_SCALEUVBILINEARDOWN
38 #define HAS_SCALEUVBILINEARDOWN 1
39 #endif
40 #ifndef HAS_SCALEUVBILINEARUP
41 #define HAS_SCALEUVBILINEARUP 1
42 #endif
43 #ifndef HAS_UVCOPY
44 #define HAS_UVCOPY 1
45 #endif
46 #ifndef HAS_SCALEPLANEVERTICAL
47 #define HAS_SCALEPLANEVERTICAL 1
48 #endif
49
Abs(int v)50 static __inline int Abs(int v) {
51 return v >= 0 ? v : -v;
52 }
53
54 // ScaleUV, 1/2
55 // This is an optimized version for scaling down a UV to 1/2 of
56 // its original size.
57 #if HAS_SCALEUVDOWN2
ScaleUVDown2(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_uv,uint8_t * dst_uv,int x,int dx,int y,int dy,enum FilterMode filtering)58 static void ScaleUVDown2(int src_width,
59 int src_height,
60 int dst_width,
61 int dst_height,
62 int src_stride,
63 int dst_stride,
64 const uint8_t* src_uv,
65 uint8_t* dst_uv,
66 int x,
67 int dx,
68 int y,
69 int dy,
70 enum FilterMode filtering) {
71 int j;
72 int row_stride = src_stride * (dy >> 16);
73 void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride,
74 uint8_t* dst_uv, int dst_width) =
75 filtering == kFilterNone
76 ? ScaleUVRowDown2_C
77 : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_C
78 : ScaleUVRowDown2Box_C);
79 (void)src_width;
80 (void)src_height;
81 (void)dx;
82 assert(dx == 65536 * 2); // Test scale factor of 2.
83 assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
84 // Advance to odd row, even column.
85 if (filtering == kFilterBilinear) {
86 src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
87 } else {
88 src_uv += (y >> 16) * src_stride + ((x >> 16) - 1) * 2;
89 }
90
91 #if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3)
92 if (TestCpuFlag(kCpuHasSSSE3) && filtering) {
93 ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_SSSE3;
94 if (IS_ALIGNED(dst_width, 4)) {
95 ScaleUVRowDown2 = ScaleUVRowDown2Box_SSSE3;
96 }
97 }
98 #endif
99 #if defined(HAS_SCALEUVROWDOWN2BOX_AVX2)
100 if (TestCpuFlag(kCpuHasAVX2) && filtering) {
101 ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_AVX2;
102 if (IS_ALIGNED(dst_width, 8)) {
103 ScaleUVRowDown2 = ScaleUVRowDown2Box_AVX2;
104 }
105 }
106 #endif
107 #if defined(HAS_SCALEUVROWDOWN2BOX_NEON)
108 if (TestCpuFlag(kCpuHasNEON) && filtering) {
109 ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_NEON;
110 if (IS_ALIGNED(dst_width, 8)) {
111 ScaleUVRowDown2 = ScaleUVRowDown2Box_NEON;
112 }
113 }
114 #endif
115
116 // This code is not enabled. Only box filter is available at this time.
117 #if defined(HAS_SCALEUVROWDOWN2_SSSE3)
118 if (TestCpuFlag(kCpuHasSSSE3)) {
119 ScaleUVRowDown2 =
120 filtering == kFilterNone
121 ? ScaleUVRowDown2_Any_SSSE3
122 : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_Any_SSSE3
123 : ScaleUVRowDown2Box_Any_SSSE3);
124 if (IS_ALIGNED(dst_width, 2)) {
125 ScaleUVRowDown2 =
126 filtering == kFilterNone
127 ? ScaleUVRowDown2_SSSE3
128 : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_SSSE3
129 : ScaleUVRowDown2Box_SSSE3);
130 }
131 }
132 #endif
133 // This code is not enabled. Only box filter is available at this time.
134 #if defined(HAS_SCALEUVROWDOWN2_NEON)
135 if (TestCpuFlag(kCpuHasNEON)) {
136 ScaleUVRowDown2 =
137 filtering == kFilterNone
138 ? ScaleUVRowDown2_Any_NEON
139 : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_Any_NEON
140 : ScaleUVRowDown2Box_Any_NEON);
141 if (IS_ALIGNED(dst_width, 8)) {
142 ScaleUVRowDown2 =
143 filtering == kFilterNone
144 ? ScaleUVRowDown2_NEON
145 : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_NEON
146 : ScaleUVRowDown2Box_NEON);
147 }
148 }
149 #endif
150 #if defined(HAS_SCALEUVROWDOWN2_MMI)
151 if (TestCpuFlag(kCpuHasMMI)) {
152 ScaleUVRowDown2 =
153 filtering == kFilterNone
154 ? ScaleUVRowDown2_Any_MMI
155 : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_Any_MMI
156 : ScaleUVRowDown2Box_Any_MMI);
157 if (IS_ALIGNED(dst_width, 2)) {
158 ScaleUVRowDown2 =
159 filtering == kFilterNone
160 ? ScaleUVRowDown2_MMI
161 : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_MMI
162 : ScaleUVRowDown2Box_MMI);
163 }
164 }
165 #endif
166 #if defined(HAS_SCALEUVROWDOWN2_MSA)
167 if (TestCpuFlag(kCpuHasMSA)) {
168 ScaleUVRowDown2 =
169 filtering == kFilterNone
170 ? ScaleUVRowDown2_Any_MSA
171 : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_Any_MSA
172 : ScaleUVRowDown2Box_Any_MSA);
173 if (IS_ALIGNED(dst_width, 2)) {
174 ScaleUVRowDown2 =
175 filtering == kFilterNone
176 ? ScaleUVRowDown2_MSA
177 : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_MSA
178 : ScaleUVRowDown2Box_MSA);
179 }
180 }
181 #endif
182
183 if (filtering == kFilterLinear) {
184 src_stride = 0;
185 }
186 for (j = 0; j < dst_height; ++j) {
187 ScaleUVRowDown2(src_uv, src_stride, dst_uv, dst_width);
188 src_uv += row_stride;
189 dst_uv += dst_stride;
190 }
191 }
192 #endif // HAS_SCALEUVDOWN2
193
194 // ScaleUV, 1/4
195 // This is an optimized version for scaling down a UV to 1/4 of
196 // its original size.
197 #if HAS_SCALEUVDOWN4BOX
ScaleUVDown4Box(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_uv,uint8_t * dst_uv,int x,int dx,int y,int dy)198 static void ScaleUVDown4Box(int src_width,
199 int src_height,
200 int dst_width,
201 int dst_height,
202 int src_stride,
203 int dst_stride,
204 const uint8_t* src_uv,
205 uint8_t* dst_uv,
206 int x,
207 int dx,
208 int y,
209 int dy) {
210 int j;
211 // Allocate 2 rows of UV.
212 const int kRowSize = (dst_width * 2 * 2 + 15) & ~15;
213 align_buffer_64(row, kRowSize * 2);
214 int row_stride = src_stride * (dy >> 16);
215 void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride,
216 uint8_t* dst_uv, int dst_width) =
217 ScaleUVRowDown2Box_C;
218 // Advance to odd row, even column.
219 src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
220 (void)src_width;
221 (void)src_height;
222 (void)dx;
223 assert(dx == 65536 * 4); // Test scale factor of 4.
224 assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
225
226 #if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3)
227 if (TestCpuFlag(kCpuHasSSSE3)) {
228 ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_SSSE3;
229 if (IS_ALIGNED(dst_width, 4)) {
230 ScaleUVRowDown2 = ScaleUVRowDown2Box_SSSE3;
231 }
232 }
233 #endif
234 #if defined(HAS_SCALEUVROWDOWN2BOX_AVX2)
235 if (TestCpuFlag(kCpuHasAVX2)) {
236 ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_AVX2;
237 if (IS_ALIGNED(dst_width, 8)) {
238 ScaleUVRowDown2 = ScaleUVRowDown2Box_AVX2;
239 }
240 }
241 #endif
242 #if defined(HAS_SCALEUVROWDOWN2BOX_NEON)
243 if (TestCpuFlag(kCpuHasNEON)) {
244 ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_NEON;
245 if (IS_ALIGNED(dst_width, 8)) {
246 ScaleUVRowDown2 = ScaleUVRowDown2Box_NEON;
247 }
248 }
249 #endif
250
251 for (j = 0; j < dst_height; ++j) {
252 ScaleUVRowDown2(src_uv, src_stride, row, dst_width * 2);
253 ScaleUVRowDown2(src_uv + src_stride * 2, src_stride, row + kRowSize,
254 dst_width * 2);
255 ScaleUVRowDown2(row, kRowSize, dst_uv, dst_width);
256 src_uv += row_stride;
257 dst_uv += dst_stride;
258 }
259 free_aligned_buffer_64(row);
260 }
261 #endif // HAS_SCALEUVDOWN4BOX
262
263 // ScaleUV Even
264 // This is an optimized version for scaling down a UV to even
265 // multiple of its original size.
266 #if HAS_SCALEUVDOWNEVEN
ScaleUVDownEven(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_uv,uint8_t * dst_uv,int x,int dx,int y,int dy,enum FilterMode filtering)267 static void ScaleUVDownEven(int src_width,
268 int src_height,
269 int dst_width,
270 int dst_height,
271 int src_stride,
272 int dst_stride,
273 const uint8_t* src_uv,
274 uint8_t* dst_uv,
275 int x,
276 int dx,
277 int y,
278 int dy,
279 enum FilterMode filtering) {
280 int j;
281 int col_step = dx >> 16;
282 int row_stride = (dy >> 16) * src_stride;
283 void (*ScaleUVRowDownEven)(const uint8_t* src_uv, ptrdiff_t src_stride,
284 int src_step, uint8_t* dst_uv, int dst_width) =
285 filtering ? ScaleUVRowDownEvenBox_C : ScaleUVRowDownEven_C;
286 (void)src_width;
287 (void)src_height;
288 assert(IS_ALIGNED(src_width, 2));
289 assert(IS_ALIGNED(src_height, 2));
290 src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
291 #if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3)
292 if (TestCpuFlag(kCpuHasSSSE3)) {
293 ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3
294 : ScaleUVRowDownEven_Any_SSSE3;
295 if (IS_ALIGNED(dst_width, 4)) {
296 ScaleUVRowDownEven =
297 filtering ? ScaleUVRowDownEvenBox_SSE2 : ScaleUVRowDownEven_SSSE3;
298 }
299 }
300 #endif
301 #if defined(HAS_SCALEUVROWDOWNEVEN_NEON)
302 if (TestCpuFlag(kCpuHasNEON) && !filtering) {
303 ScaleUVRowDownEven = ScaleUVRowDownEven_Any_NEON;
304 if (IS_ALIGNED(dst_width, 4)) {
305 ScaleUVRowDownEven = ScaleUVRowDownEven_NEON;
306 }
307 }
308 #endif// TODO(fbarchard): Enable Box filter
309 #if defined(HAS_SCALEUVROWDOWNEVENBOX_NEON)
310 if (TestCpuFlag(kCpuHasNEON)) {
311 ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_NEON
312 : ScaleUVRowDownEven_Any_NEON;
313 if (IS_ALIGNED(dst_width, 4)) {
314 ScaleUVRowDownEven =
315 filtering ? ScaleUVRowDownEvenBox_NEON : ScaleUVRowDownEven_NEON;
316 }
317 }
318 #endif
319 #if defined(HAS_SCALEUVROWDOWNEVEN_MMI)
320 if (TestCpuFlag(kCpuHasMMI)) {
321 ScaleUVRowDownEven =
322 filtering ? ScaleUVRowDownEvenBox_Any_MMI : ScaleUVRowDownEven_Any_MMI;
323 if (IS_ALIGNED(dst_width, 2)) {
324 ScaleUVRowDownEven =
325 filtering ? ScaleUVRowDownEvenBox_MMI : ScaleUVRowDownEven_MMI;
326 }
327 }
328 #endif
329 #if defined(HAS_SCALEUVROWDOWNEVEN_MSA)
330 if (TestCpuFlag(kCpuHasMSA)) {
331 ScaleUVRowDownEven =
332 filtering ? ScaleUVRowDownEvenBox_Any_MSA : ScaleUVRowDownEven_Any_MSA;
333 if (IS_ALIGNED(dst_width, 4)) {
334 ScaleUVRowDownEven =
335 filtering ? ScaleUVRowDownEvenBox_MSA : ScaleUVRowDownEven_MSA;
336 }
337 }
338 #endif
339
340 if (filtering == kFilterLinear) {
341 src_stride = 0;
342 }
343 for (j = 0; j < dst_height; ++j) {
344 ScaleUVRowDownEven(src_uv, src_stride, col_step, dst_uv, dst_width);
345 src_uv += row_stride;
346 dst_uv += dst_stride;
347 }
348 }
349 #endif
350
351 // Scale UV down with bilinear interpolation.
352 #if HAS_SCALEUVBILINEARDOWN
ScaleUVBilinearDown(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_uv,uint8_t * dst_uv,int x,int dx,int y,int dy,enum FilterMode filtering)353 static void ScaleUVBilinearDown(int src_width,
354 int src_height,
355 int dst_width,
356 int dst_height,
357 int src_stride,
358 int dst_stride,
359 const uint8_t* src_uv,
360 uint8_t* dst_uv,
361 int x,
362 int dx,
363 int y,
364 int dy,
365 enum FilterMode filtering) {
366 int j;
367 void (*InterpolateRow)(uint8_t * dst_uv, const uint8_t* src_uv,
368 ptrdiff_t src_stride, int dst_width,
369 int source_y_fraction) = InterpolateRow_C;
370 void (*ScaleUVFilterCols)(uint8_t * dst_uv, const uint8_t* src_uv,
371 int dst_width, int x, int dx) =
372 (src_width >= 32768) ? ScaleUVFilterCols64_C : ScaleUVFilterCols_C;
373 int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
374 int64_t xl = (dx >= 0) ? x : xlast;
375 int64_t xr = (dx >= 0) ? xlast : x;
376 int clip_src_width;
377 xl = (xl >> 16) & ~3; // Left edge aligned.
378 xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
379 xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
380 if (xr > src_width) {
381 xr = src_width;
382 }
383 clip_src_width = (int)(xr - xl) * 2; // Width aligned to 2.
384 src_uv += xl * 2;
385 x -= (int)(xl << 16);
386 #if defined(HAS_INTERPOLATEROW_SSSE3)
387 if (TestCpuFlag(kCpuHasSSSE3)) {
388 InterpolateRow = InterpolateRow_Any_SSSE3;
389 if (IS_ALIGNED(clip_src_width, 16)) {
390 InterpolateRow = InterpolateRow_SSSE3;
391 }
392 }
393 #endif
394 #if defined(HAS_INTERPOLATEROW_AVX2)
395 if (TestCpuFlag(kCpuHasAVX2)) {
396 InterpolateRow = InterpolateRow_Any_AVX2;
397 if (IS_ALIGNED(clip_src_width, 32)) {
398 InterpolateRow = InterpolateRow_AVX2;
399 }
400 }
401 #endif
402 #if defined(HAS_INTERPOLATEROW_NEON)
403 if (TestCpuFlag(kCpuHasNEON)) {
404 InterpolateRow = InterpolateRow_Any_NEON;
405 if (IS_ALIGNED(clip_src_width, 16)) {
406 InterpolateRow = InterpolateRow_NEON;
407 }
408 }
409 #endif
410 #if defined(HAS_INTERPOLATEROW_MSA)
411 if (TestCpuFlag(kCpuHasMSA)) {
412 InterpolateRow = InterpolateRow_Any_MSA;
413 if (IS_ALIGNED(clip_src_width, 32)) {
414 InterpolateRow = InterpolateRow_MSA;
415 }
416 }
417 #endif
418 #if defined(HAS_SCALEUVFILTERCOLS_SSSE3)
419 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
420 ScaleUVFilterCols = ScaleUVFilterCols_SSSE3;
421 }
422 #endif
423 #if defined(HAS_SCALEUVFILTERCOLS_NEON)
424 if (TestCpuFlag(kCpuHasNEON)) {
425 ScaleUVFilterCols = ScaleUVFilterCols_Any_NEON;
426 if (IS_ALIGNED(dst_width, 4)) {
427 ScaleUVFilterCols = ScaleUVFilterCols_NEON;
428 }
429 }
430 #endif
431 #if defined(HAS_SCALEUVFILTERCOLS_MSA)
432 if (TestCpuFlag(kCpuHasMSA)) {
433 ScaleUVFilterCols = ScaleUVFilterCols_Any_MSA;
434 if (IS_ALIGNED(dst_width, 8)) {
435 ScaleUVFilterCols = ScaleUVFilterCols_MSA;
436 }
437 }
438 #endif
439 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
440 // Allocate a row of UV.
441 {
442 align_buffer_64(row, clip_src_width * 2);
443
444 const int max_y = (src_height - 1) << 16;
445 if (y > max_y) {
446 y = max_y;
447 }
448 for (j = 0; j < dst_height; ++j) {
449 int yi = y >> 16;
450 const uint8_t* src = src_uv + yi * src_stride;
451 if (filtering == kFilterLinear) {
452 ScaleUVFilterCols(dst_uv, src, dst_width, x, dx);
453 } else {
454 int yf = (y >> 8) & 255;
455 InterpolateRow(row, src, src_stride, clip_src_width, yf);
456 ScaleUVFilterCols(dst_uv, row, dst_width, x, dx);
457 }
458 dst_uv += dst_stride;
459 y += dy;
460 if (y > max_y) {
461 y = max_y;
462 }
463 }
464 free_aligned_buffer_64(row);
465 }
466 }
467 #endif
468
469 // Scale UV up with bilinear interpolation.
470 #if HAS_SCALEUVBILINEARUP
ScaleUVBilinearUp(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_uv,uint8_t * dst_uv,int x,int dx,int y,int dy,enum FilterMode filtering)471 static void ScaleUVBilinearUp(int src_width,
472 int src_height,
473 int dst_width,
474 int dst_height,
475 int src_stride,
476 int dst_stride,
477 const uint8_t* src_uv,
478 uint8_t* dst_uv,
479 int x,
480 int dx,
481 int y,
482 int dy,
483 enum FilterMode filtering) {
484 int j;
485 void (*InterpolateRow)(uint8_t * dst_uv, const uint8_t* src_uv,
486 ptrdiff_t src_stride, int dst_width,
487 int source_y_fraction) = InterpolateRow_C;
488 void (*ScaleUVFilterCols)(uint8_t * dst_uv, const uint8_t* src_uv,
489 int dst_width, int x, int dx) =
490 filtering ? ScaleUVFilterCols_C : ScaleUVCols_C;
491 const int max_y = (src_height - 1) << 16;
492 #if defined(HAS_INTERPOLATEROW_SSSE3)
493 if (TestCpuFlag(kCpuHasSSSE3)) {
494 InterpolateRow = InterpolateRow_Any_SSSE3;
495 if (IS_ALIGNED(dst_width, 4)) {
496 InterpolateRow = InterpolateRow_SSSE3;
497 }
498 }
499 #endif
500 #if defined(HAS_INTERPOLATEROW_AVX2)
501 if (TestCpuFlag(kCpuHasAVX2)) {
502 InterpolateRow = InterpolateRow_Any_AVX2;
503 if (IS_ALIGNED(dst_width, 8)) {
504 InterpolateRow = InterpolateRow_AVX2;
505 }
506 }
507 #endif
508 #if defined(HAS_INTERPOLATEROW_NEON)
509 if (TestCpuFlag(kCpuHasNEON)) {
510 InterpolateRow = InterpolateRow_Any_NEON;
511 if (IS_ALIGNED(dst_width, 4)) {
512 InterpolateRow = InterpolateRow_NEON;
513 }
514 }
515 #endif
516 #if defined(HAS_INTERPOLATEROW_MMI)
517 if (TestCpuFlag(kCpuHasMMI)) {
518 InterpolateRow = InterpolateRow_Any_MMI;
519 if (IS_ALIGNED(dst_width, 2)) {
520 InterpolateRow = InterpolateRow_MMI;
521 }
522 }
523 #endif
524 #if defined(HAS_INTERPOLATEROW_MSA)
525 if (TestCpuFlag(kCpuHasMSA)) {
526 InterpolateRow = InterpolateRow_Any_MSA;
527 if (IS_ALIGNED(dst_width, 8)) {
528 InterpolateRow = InterpolateRow_MSA;
529 }
530 }
531 #endif
532 if (src_width >= 32768) {
533 ScaleUVFilterCols = filtering ? ScaleUVFilterCols64_C : ScaleUVCols64_C;
534 }
535 #if defined(HAS_SCALEUVFILTERCOLS_SSSE3)
536 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
537 ScaleUVFilterCols = ScaleUVFilterCols_SSSE3;
538 }
539 #endif
540 #if defined(HAS_SCALEUVFILTERCOLS_NEON)
541 if (filtering && TestCpuFlag(kCpuHasNEON)) {
542 ScaleUVFilterCols = ScaleUVFilterCols_Any_NEON;
543 if (IS_ALIGNED(dst_width, 4)) {
544 ScaleUVFilterCols = ScaleUVFilterCols_NEON;
545 }
546 }
547 #endif
548 #if defined(HAS_SCALEUVFILTERCOLS_MSA)
549 if (filtering && TestCpuFlag(kCpuHasMSA)) {
550 ScaleUVFilterCols = ScaleUVFilterCols_Any_MSA;
551 if (IS_ALIGNED(dst_width, 8)) {
552 ScaleUVFilterCols = ScaleUVFilterCols_MSA;
553 }
554 }
555 #endif
556 #if defined(HAS_SCALEUVCOLS_SSSE3)
557 if (!filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
558 ScaleUVFilterCols = ScaleUVCols_SSSE3;
559 }
560 #endif
561 #if defined(HAS_SCALEUVCOLS_NEON)
562 if (!filtering && TestCpuFlag(kCpuHasNEON)) {
563 ScaleUVFilterCols = ScaleUVCols_Any_NEON;
564 if (IS_ALIGNED(dst_width, 8)) {
565 ScaleUVFilterCols = ScaleUVCols_NEON;
566 }
567 }
568 #endif
569 #if defined(HAS_SCALEUVCOLS_MMI)
570 if (!filtering && TestCpuFlag(kCpuHasMMI)) {
571 ScaleUVFilterCols = ScaleUVCols_Any_MMI;
572 if (IS_ALIGNED(dst_width, 1)) {
573 ScaleUVFilterCols = ScaleUVCols_MMI;
574 }
575 }
576 #endif
577 #if defined(HAS_SCALEUVCOLS_MSA)
578 if (!filtering && TestCpuFlag(kCpuHasMSA)) {
579 ScaleUVFilterCols = ScaleUVCols_Any_MSA;
580 if (IS_ALIGNED(dst_width, 4)) {
581 ScaleUVFilterCols = ScaleUVCols_MSA;
582 }
583 }
584 #endif
585 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
586 ScaleUVFilterCols = ScaleUVColsUp2_C;
587 #if defined(HAS_SCALEUVCOLSUP2_SSSE3)
588 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(dst_width, 8)) {
589 ScaleUVFilterCols = ScaleUVColsUp2_SSSE3;
590 }
591 #endif
592 #if defined(HAS_SCALEUVCOLSUP2_MMI)
593 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
594 ScaleUVFilterCols = ScaleUVColsUp2_MMI;
595 }
596 #endif
597 }
598
599 if (y > max_y) {
600 y = max_y;
601 }
602
603 {
604 int yi = y >> 16;
605 const uint8_t* src = src_uv + yi * src_stride;
606
607 // Allocate 2 rows of UV.
608 const int kRowSize = (dst_width * 2 + 15) & ~15;
609 align_buffer_64(row, kRowSize * 2);
610
611 uint8_t* rowptr = row;
612 int rowstride = kRowSize;
613 int lasty = yi;
614
615 ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
616 if (src_height > 1) {
617 src += src_stride;
618 }
619 ScaleUVFilterCols(rowptr + rowstride, src, dst_width, x, dx);
620 src += src_stride;
621
622 for (j = 0; j < dst_height; ++j) {
623 yi = y >> 16;
624 if (yi != lasty) {
625 if (y > max_y) {
626 y = max_y;
627 yi = y >> 16;
628 src = src_uv + yi * src_stride;
629 }
630 if (yi != lasty) {
631 ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
632 rowptr += rowstride;
633 rowstride = -rowstride;
634 lasty = yi;
635 src += src_stride;
636 }
637 }
638 if (filtering == kFilterLinear) {
639 InterpolateRow(dst_uv, rowptr, 0, dst_width * 2, 0);
640 } else {
641 int yf = (y >> 8) & 255;
642 InterpolateRow(dst_uv, rowptr, rowstride, dst_width * 2, yf);
643 }
644 dst_uv += dst_stride;
645 y += dy;
646 }
647 free_aligned_buffer_64(row);
648 }
649 }
650 #endif // HAS_SCALEUVBILINEARUP
651
652 // Scale UV to/from any dimensions, without interpolation.
653 // Fixed point math is used for performance: The upper 16 bits
654 // of x and dx is the integer part of the source position and
655 // the lower 16 bits are the fixed decimal part.
656
ScaleUVSimple(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_uv,uint8_t * dst_uv,int x,int dx,int y,int dy)657 static void ScaleUVSimple(int src_width,
658 int src_height,
659 int dst_width,
660 int dst_height,
661 int src_stride,
662 int dst_stride,
663 const uint8_t* src_uv,
664 uint8_t* dst_uv,
665 int x,
666 int dx,
667 int y,
668 int dy) {
669 int j;
670 void (*ScaleUVCols)(uint8_t * dst_uv, const uint8_t* src_uv, int dst_width,
671 int x, int dx) =
672 (src_width >= 32768) ? ScaleUVCols64_C : ScaleUVCols_C;
673 (void)src_height;
674 #if defined(HAS_SCALEUVCOLS_SSSE3)
675 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
676 ScaleUVCols = ScaleUVCols_SSSE3;
677 }
678 #endif
679 #if defined(HAS_SCALEUVCOLS_NEON)
680 if (TestCpuFlag(kCpuHasNEON)) {
681 ScaleUVCols = ScaleUVCols_Any_NEON;
682 if (IS_ALIGNED(dst_width, 8)) {
683 ScaleUVCols = ScaleUVCols_NEON;
684 }
685 }
686 #endif
687 #if defined(HAS_SCALEUVCOLS_MMI)
688 if (TestCpuFlag(kCpuHasMMI)) {
689 ScaleUVCols = ScaleUVCols_Any_MMI;
690 if (IS_ALIGNED(dst_width, 1)) {
691 ScaleUVCols = ScaleUVCols_MMI;
692 }
693 }
694 #endif
695 #if defined(HAS_SCALEUVCOLS_MSA)
696 if (TestCpuFlag(kCpuHasMSA)) {
697 ScaleUVCols = ScaleUVCols_Any_MSA;
698 if (IS_ALIGNED(dst_width, 4)) {
699 ScaleUVCols = ScaleUVCols_MSA;
700 }
701 }
702 #endif
703 if (src_width * 2 == dst_width && x < 0x8000) {
704 ScaleUVCols = ScaleUVColsUp2_C;
705 #if defined(HAS_SCALEUVCOLSUP2_SSSE3)
706 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(dst_width, 8)) {
707 ScaleUVCols = ScaleUVColsUp2_SSSE3;
708 }
709 #endif
710 #if defined(HAS_SCALEUVCOLSUP2_MMI)
711 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
712 ScaleUVCols = ScaleUVColsUp2_MMI;
713 }
714 #endif
715 }
716
717 for (j = 0; j < dst_height; ++j) {
718 ScaleUVCols(dst_uv, src_uv + (y >> 16) * src_stride, dst_width, x, dx);
719 dst_uv += dst_stride;
720 y += dy;
721 }
722 }
723
724 // Copy UV with optional flipping
725 #if HAS_UVCOPY
UVCopy(const uint8_t * src_UV,int src_stride_UV,uint8_t * dst_UV,int dst_stride_UV,int width,int height)726 static int UVCopy(const uint8_t* src_UV,
727 int src_stride_UV,
728 uint8_t* dst_UV,
729 int dst_stride_UV,
730 int width,
731 int height) {
732 if (!src_UV || !dst_UV || width <= 0 || height == 0) {
733 return -1;
734 }
735 // Negative height means invert the image.
736 if (height < 0) {
737 height = -height;
738 src_UV = src_UV + (height - 1) * src_stride_UV;
739 src_stride_UV = -src_stride_UV;
740 }
741
742 CopyPlane(src_UV, src_stride_UV, dst_UV, dst_stride_UV, width * 2, height);
743 return 0;
744 }
745 #endif // HAS_UVCOPY
746
747 // Scale a UV plane (from NV12)
748 // This function in turn calls a scaling function
749 // suitable for handling the desired resolutions.
ScaleUV(const uint8_t * src,int src_stride,int src_width,int src_height,uint8_t * dst,int dst_stride,int dst_width,int dst_height,int clip_x,int clip_y,int clip_width,int clip_height,enum FilterMode filtering)750 static void ScaleUV(const uint8_t* src,
751 int src_stride,
752 int src_width,
753 int src_height,
754 uint8_t* dst,
755 int dst_stride,
756 int dst_width,
757 int dst_height,
758 int clip_x,
759 int clip_y,
760 int clip_width,
761 int clip_height,
762 enum FilterMode filtering) {
763 // Initial source x/y coordinate and step values as 16.16 fixed point.
764 int x = 0;
765 int y = 0;
766 int dx = 0;
767 int dy = 0;
768 // UV does not support box filter yet, but allow the user to pass it.
769 // Simplify filtering when possible.
770 filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
771 filtering);
772
773 // Negative src_height means invert the image.
774 if (src_height < 0) {
775 src_height = -src_height;
776 src = src + (src_height - 1) * src_stride;
777 src_stride = -src_stride;
778 }
779 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
780 &dx, &dy);
781 src_width = Abs(src_width);
782 if (clip_x) {
783 int64_t clipf = (int64_t)(clip_x)*dx;
784 x += (clipf & 0xffff);
785 src += (clipf >> 16) * 2;
786 dst += clip_x * 2;
787 }
788 if (clip_y) {
789 int64_t clipf = (int64_t)(clip_y)*dy;
790 y += (clipf & 0xffff);
791 src += (clipf >> 16) * src_stride;
792 dst += clip_y * dst_stride;
793 }
794
795 // Special case for integer step values.
796 if (((dx | dy) & 0xffff) == 0) {
797 if (!dx || !dy) { // 1 pixel wide and/or tall.
798 filtering = kFilterNone;
799 } else {
800 // Optimized even scale down. ie 2, 4, 6, 8, 10x.
801 if (!(dx & 0x10000) && !(dy & 0x10000)) {
802 #if HAS_SCALEUVDOWN2
803 if (dx == 0x20000) {
804 // Optimized 1/2 downsample.
805 ScaleUVDown2(src_width, src_height, clip_width, clip_height,
806 src_stride, dst_stride, src, dst, x, dx, y, dy,
807 filtering);
808 return;
809 }
810 #endif
811 #if HAS_SCALEUVDOWN4BOX
812 if (dx == 0x40000 && filtering == kFilterBox) {
813 // Optimized 1/4 box downsample.
814 ScaleUVDown4Box(src_width, src_height, clip_width, clip_height,
815 src_stride, dst_stride, src, dst, x, dx, y, dy);
816 return;
817 }
818 #endif
819 #if HAS_SCALEUVDOWNEVEN
820 ScaleUVDownEven(src_width, src_height, clip_width, clip_height,
821 src_stride, dst_stride, src, dst, x, dx, y, dy,
822 filtering);
823 return;
824 #endif
825 }
826 // Optimized odd scale down. ie 3, 5, 7, 9x.
827 if ((dx & 0x10000) && (dy & 0x10000)) {
828 filtering = kFilterNone;
829 #ifdef HAS_UVCOPY
830 if (dx == 0x10000 && dy == 0x10000) {
831 // Straight copy.
832 UVCopy(src + (y >> 16) * src_stride + (x >> 16) * 2, src_stride, dst,
833 dst_stride, clip_width, clip_height);
834 return;
835 }
836 #endif
837 }
838 }
839 }
840 // HAS_SCALEPLANEVERTICAL
841 if (dx == 0x10000 && (x & 0xffff) == 0) {
842 // Arbitrary scale vertically, but unscaled horizontally.
843 ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
844 dst_stride, src, dst, x, y, dy, 4, filtering);
845 return;
846 }
847
848 #if HAS_SCALEUVBILINEARUP
849 if (filtering && dy < 65536) {
850 ScaleUVBilinearUp(src_width, src_height, clip_width, clip_height,
851 src_stride, dst_stride, src, dst, x, dx, y, dy,
852 filtering);
853 return;
854 }
855 #endif
856 #if HAS_SCALEUVBILINEARDOWN
857 if (filtering) {
858 ScaleUVBilinearDown(src_width, src_height, clip_width, clip_height,
859 src_stride, dst_stride, src, dst, x, dx, y, dy,
860 filtering);
861 return;
862 }
863 #endif
864 ScaleUVSimple(src_width, src_height, clip_width, clip_height, src_stride,
865 dst_stride, src, dst, x, dx, y, dy);
866 }
867
868 // Scale an UV image.
869 LIBYUV_API
UVScale(const uint8_t * src_uv,int src_stride_uv,int src_width,int src_height,uint8_t * dst_uv,int dst_stride_uv,int dst_width,int dst_height,enum FilterMode filtering)870 int UVScale(const uint8_t* src_uv,
871 int src_stride_uv,
872 int src_width,
873 int src_height,
874 uint8_t* dst_uv,
875 int dst_stride_uv,
876 int dst_width,
877 int dst_height,
878 enum FilterMode filtering) {
879 if (!src_uv || src_width == 0 || src_height == 0 || src_width > 32768 ||
880 src_height > 32768 || !dst_uv || dst_width <= 0 || dst_height <= 0) {
881 return -1;
882 }
883 ScaleUV(src_uv, src_stride_uv, src_width, src_height, dst_uv, dst_stride_uv,
884 dst_width, dst_height, 0, 0, dst_width, dst_height, filtering);
885 return 0;
886 }
887
888 #ifdef __cplusplus
889 } // extern "C"
890 } // namespace libyuv
891 #endif
892