• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright 2020 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "libyuv/scale.h"
12 
13 #include <assert.h>
14 #include <string.h>
15 
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h"  // For CopyUV
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20 
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25 
26 // Macros to enable specialized scalers
27 
28 #ifndef HAS_SCALEUVDOWN2
29 #define HAS_SCALEUVDOWN2 1
30 #endif
31 #ifndef HAS_SCALEUVDOWN4BOX
32 #define HAS_SCALEUVDOWN4BOX 1
33 #endif
34 #ifndef HAS_SCALEUVDOWNEVEN
35 #define HAS_SCALEUVDOWNEVEN 1
36 #endif
37 #ifndef HAS_SCALEUVBILINEARDOWN
38 #define HAS_SCALEUVBILINEARDOWN 1
39 #endif
40 #ifndef HAS_SCALEUVBILINEARUP
41 #define HAS_SCALEUVBILINEARUP 1
42 #endif
43 #ifndef HAS_UVCOPY
44 #define HAS_UVCOPY 1
45 #endif
46 #ifndef HAS_SCALEPLANEVERTICAL
47 #define HAS_SCALEPLANEVERTICAL 1
48 #endif
49 
Abs(int v)50 static __inline int Abs(int v) {
51   return v >= 0 ? v : -v;
52 }
53 
54 // ScaleUV, 1/2
55 // This is an optimized version for scaling down a UV to 1/2 of
56 // its original size.
57 #if HAS_SCALEUVDOWN2
ScaleUVDown2(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_uv,uint8_t * dst_uv,int x,int dx,int y,int dy,enum FilterMode filtering)58 static void ScaleUVDown2(int src_width,
59                          int src_height,
60                          int dst_width,
61                          int dst_height,
62                          int src_stride,
63                          int dst_stride,
64                          const uint8_t* src_uv,
65                          uint8_t* dst_uv,
66                          int x,
67                          int dx,
68                          int y,
69                          int dy,
70                          enum FilterMode filtering) {
71   int j;
72   int row_stride = src_stride * (dy >> 16);
73   void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride,
74                           uint8_t* dst_uv, int dst_width) =
75       filtering == kFilterNone
76           ? ScaleUVRowDown2_C
77           : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_C
78                                         : ScaleUVRowDown2Box_C);
79   (void)src_width;
80   (void)src_height;
81   (void)dx;
82   assert(dx == 65536 * 2);      // Test scale factor of 2.
83   assert((dy & 0x1ffff) == 0);  // Test vertical scale is multiple of 2.
84   // Advance to odd row, even column.
85   if (filtering == kFilterBilinear) {
86     src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
87   } else {
88     src_uv += (y >> 16) * src_stride + ((x >> 16) - 1) * 2;
89   }
90 
91 #if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3)
92   if (TestCpuFlag(kCpuHasSSSE3) && filtering) {
93     ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_SSSE3;
94     if (IS_ALIGNED(dst_width, 4)) {
95       ScaleUVRowDown2 = ScaleUVRowDown2Box_SSSE3;
96     }
97   }
98 #endif
99 #if defined(HAS_SCALEUVROWDOWN2BOX_AVX2)
100   if (TestCpuFlag(kCpuHasAVX2) && filtering) {
101     ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_AVX2;
102     if (IS_ALIGNED(dst_width, 8)) {
103       ScaleUVRowDown2 = ScaleUVRowDown2Box_AVX2;
104     }
105   }
106 #endif
107 #if defined(HAS_SCALEUVROWDOWN2BOX_NEON)
108   if (TestCpuFlag(kCpuHasNEON) && filtering) {
109     ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_NEON;
110     if (IS_ALIGNED(dst_width, 8)) {
111       ScaleUVRowDown2 = ScaleUVRowDown2Box_NEON;
112     }
113   }
114 #endif
115 
116 // This code is not enabled.  Only box filter is available at this time.
117 #if defined(HAS_SCALEUVROWDOWN2_SSSE3)
118   if (TestCpuFlag(kCpuHasSSSE3)) {
119     ScaleUVRowDown2 =
120         filtering == kFilterNone
121             ? ScaleUVRowDown2_Any_SSSE3
122             : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_Any_SSSE3
123                                           : ScaleUVRowDown2Box_Any_SSSE3);
124     if (IS_ALIGNED(dst_width, 2)) {
125       ScaleUVRowDown2 =
126           filtering == kFilterNone
127               ? ScaleUVRowDown2_SSSE3
128               : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_SSSE3
129                                             : ScaleUVRowDown2Box_SSSE3);
130     }
131   }
132 #endif
133 // This code is not enabled.  Only box filter is available at this time.
134 #if defined(HAS_SCALEUVROWDOWN2_NEON)
135   if (TestCpuFlag(kCpuHasNEON)) {
136     ScaleUVRowDown2 =
137         filtering == kFilterNone
138             ? ScaleUVRowDown2_Any_NEON
139             : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_Any_NEON
140                                           : ScaleUVRowDown2Box_Any_NEON);
141     if (IS_ALIGNED(dst_width, 8)) {
142       ScaleUVRowDown2 =
143           filtering == kFilterNone
144               ? ScaleUVRowDown2_NEON
145               : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_NEON
146                                             : ScaleUVRowDown2Box_NEON);
147     }
148   }
149 #endif
150 #if defined(HAS_SCALEUVROWDOWN2_MMI)
151   if (TestCpuFlag(kCpuHasMMI)) {
152     ScaleUVRowDown2 =
153         filtering == kFilterNone
154             ? ScaleUVRowDown2_Any_MMI
155             : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_Any_MMI
156                                           : ScaleUVRowDown2Box_Any_MMI);
157     if (IS_ALIGNED(dst_width, 2)) {
158       ScaleUVRowDown2 =
159           filtering == kFilterNone
160               ? ScaleUVRowDown2_MMI
161               : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_MMI
162                                             : ScaleUVRowDown2Box_MMI);
163     }
164   }
165 #endif
166 #if defined(HAS_SCALEUVROWDOWN2_MSA)
167   if (TestCpuFlag(kCpuHasMSA)) {
168     ScaleUVRowDown2 =
169         filtering == kFilterNone
170             ? ScaleUVRowDown2_Any_MSA
171             : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_Any_MSA
172                                           : ScaleUVRowDown2Box_Any_MSA);
173     if (IS_ALIGNED(dst_width, 2)) {
174       ScaleUVRowDown2 =
175           filtering == kFilterNone
176               ? ScaleUVRowDown2_MSA
177               : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_MSA
178                                             : ScaleUVRowDown2Box_MSA);
179     }
180   }
181 #endif
182 
183   if (filtering == kFilterLinear) {
184     src_stride = 0;
185   }
186   for (j = 0; j < dst_height; ++j) {
187     ScaleUVRowDown2(src_uv, src_stride, dst_uv, dst_width);
188     src_uv += row_stride;
189     dst_uv += dst_stride;
190   }
191 }
192 #endif  // HAS_SCALEUVDOWN2
193 
194 // ScaleUV, 1/4
195 // This is an optimized version for scaling down a UV to 1/4 of
196 // its original size.
197 #if HAS_SCALEUVDOWN4BOX
ScaleUVDown4Box(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_uv,uint8_t * dst_uv,int x,int dx,int y,int dy)198 static void ScaleUVDown4Box(int src_width,
199                             int src_height,
200                             int dst_width,
201                             int dst_height,
202                             int src_stride,
203                             int dst_stride,
204                             const uint8_t* src_uv,
205                             uint8_t* dst_uv,
206                             int x,
207                             int dx,
208                             int y,
209                             int dy) {
210   int j;
211   // Allocate 2 rows of UV.
212   const int kRowSize = (dst_width * 2 * 2 + 15) & ~15;
213   align_buffer_64(row, kRowSize * 2);
214   int row_stride = src_stride * (dy >> 16);
215   void (*ScaleUVRowDown2)(const uint8_t* src_uv, ptrdiff_t src_stride,
216                           uint8_t* dst_uv, int dst_width) =
217       ScaleUVRowDown2Box_C;
218   // Advance to odd row, even column.
219   src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
220   (void)src_width;
221   (void)src_height;
222   (void)dx;
223   assert(dx == 65536 * 4);      // Test scale factor of 4.
224   assert((dy & 0x3ffff) == 0);  // Test vertical scale is multiple of 4.
225 
226 #if defined(HAS_SCALEUVROWDOWN2BOX_SSSE3)
227   if (TestCpuFlag(kCpuHasSSSE3)) {
228     ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_SSSE3;
229     if (IS_ALIGNED(dst_width, 4)) {
230       ScaleUVRowDown2 = ScaleUVRowDown2Box_SSSE3;
231     }
232   }
233 #endif
234 #if defined(HAS_SCALEUVROWDOWN2BOX_AVX2)
235   if (TestCpuFlag(kCpuHasAVX2)) {
236     ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_AVX2;
237     if (IS_ALIGNED(dst_width, 8)) {
238       ScaleUVRowDown2 = ScaleUVRowDown2Box_AVX2;
239     }
240   }
241 #endif
242 #if defined(HAS_SCALEUVROWDOWN2BOX_NEON)
243   if (TestCpuFlag(kCpuHasNEON)) {
244     ScaleUVRowDown2 = ScaleUVRowDown2Box_Any_NEON;
245     if (IS_ALIGNED(dst_width, 8)) {
246       ScaleUVRowDown2 = ScaleUVRowDown2Box_NEON;
247     }
248   }
249 #endif
250 
251   for (j = 0; j < dst_height; ++j) {
252     ScaleUVRowDown2(src_uv, src_stride, row, dst_width * 2);
253     ScaleUVRowDown2(src_uv + src_stride * 2, src_stride, row + kRowSize,
254                     dst_width * 2);
255     ScaleUVRowDown2(row, kRowSize, dst_uv, dst_width);
256     src_uv += row_stride;
257     dst_uv += dst_stride;
258   }
259   free_aligned_buffer_64(row);
260 }
261 #endif  // HAS_SCALEUVDOWN4BOX
262 
263 // ScaleUV Even
264 // This is an optimized version for scaling down a UV to even
265 // multiple of its original size.
266 #if HAS_SCALEUVDOWNEVEN
ScaleUVDownEven(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_uv,uint8_t * dst_uv,int x,int dx,int y,int dy,enum FilterMode filtering)267 static void ScaleUVDownEven(int src_width,
268                             int src_height,
269                             int dst_width,
270                             int dst_height,
271                             int src_stride,
272                             int dst_stride,
273                             const uint8_t* src_uv,
274                             uint8_t* dst_uv,
275                             int x,
276                             int dx,
277                             int y,
278                             int dy,
279                             enum FilterMode filtering) {
280   int j;
281   int col_step = dx >> 16;
282   int row_stride = (dy >> 16) * src_stride;
283   void (*ScaleUVRowDownEven)(const uint8_t* src_uv, ptrdiff_t src_stride,
284                              int src_step, uint8_t* dst_uv, int dst_width) =
285       filtering ? ScaleUVRowDownEvenBox_C : ScaleUVRowDownEven_C;
286   (void)src_width;
287   (void)src_height;
288   assert(IS_ALIGNED(src_width, 2));
289   assert(IS_ALIGNED(src_height, 2));
290   src_uv += (y >> 16) * src_stride + (x >> 16) * 2;
291 #if defined(HAS_SCALEUVROWDOWNEVEN_SSSE3)
292   if (TestCpuFlag(kCpuHasSSSE3)) {
293     ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_SSSE3
294                                    : ScaleUVRowDownEven_Any_SSSE3;
295     if (IS_ALIGNED(dst_width, 4)) {
296       ScaleUVRowDownEven =
297           filtering ? ScaleUVRowDownEvenBox_SSE2 : ScaleUVRowDownEven_SSSE3;
298     }
299   }
300 #endif
301 #if defined(HAS_SCALEUVROWDOWNEVEN_NEON)
302   if (TestCpuFlag(kCpuHasNEON) && !filtering) {
303     ScaleUVRowDownEven = ScaleUVRowDownEven_Any_NEON;
304     if (IS_ALIGNED(dst_width, 4)) {
305       ScaleUVRowDownEven = ScaleUVRowDownEven_NEON;
306     }
307   }
308 #endif// TODO(fbarchard): Enable Box filter
309 #if defined(HAS_SCALEUVROWDOWNEVENBOX_NEON)
310   if (TestCpuFlag(kCpuHasNEON)) {
311     ScaleUVRowDownEven = filtering ? ScaleUVRowDownEvenBox_Any_NEON
312                                    : ScaleUVRowDownEven_Any_NEON;
313     if (IS_ALIGNED(dst_width, 4)) {
314       ScaleUVRowDownEven =
315           filtering ? ScaleUVRowDownEvenBox_NEON : ScaleUVRowDownEven_NEON;
316     }
317   }
318 #endif
319 #if defined(HAS_SCALEUVROWDOWNEVEN_MMI)
320   if (TestCpuFlag(kCpuHasMMI)) {
321     ScaleUVRowDownEven =
322         filtering ? ScaleUVRowDownEvenBox_Any_MMI : ScaleUVRowDownEven_Any_MMI;
323     if (IS_ALIGNED(dst_width, 2)) {
324       ScaleUVRowDownEven =
325           filtering ? ScaleUVRowDownEvenBox_MMI : ScaleUVRowDownEven_MMI;
326     }
327   }
328 #endif
329 #if defined(HAS_SCALEUVROWDOWNEVEN_MSA)
330   if (TestCpuFlag(kCpuHasMSA)) {
331     ScaleUVRowDownEven =
332         filtering ? ScaleUVRowDownEvenBox_Any_MSA : ScaleUVRowDownEven_Any_MSA;
333     if (IS_ALIGNED(dst_width, 4)) {
334       ScaleUVRowDownEven =
335           filtering ? ScaleUVRowDownEvenBox_MSA : ScaleUVRowDownEven_MSA;
336     }
337   }
338 #endif
339 
340   if (filtering == kFilterLinear) {
341     src_stride = 0;
342   }
343   for (j = 0; j < dst_height; ++j) {
344     ScaleUVRowDownEven(src_uv, src_stride, col_step, dst_uv, dst_width);
345     src_uv += row_stride;
346     dst_uv += dst_stride;
347   }
348 }
349 #endif
350 
351 // Scale UV down with bilinear interpolation.
352 #if HAS_SCALEUVBILINEARDOWN
ScaleUVBilinearDown(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_uv,uint8_t * dst_uv,int x,int dx,int y,int dy,enum FilterMode filtering)353 static void ScaleUVBilinearDown(int src_width,
354                                 int src_height,
355                                 int dst_width,
356                                 int dst_height,
357                                 int src_stride,
358                                 int dst_stride,
359                                 const uint8_t* src_uv,
360                                 uint8_t* dst_uv,
361                                 int x,
362                                 int dx,
363                                 int y,
364                                 int dy,
365                                 enum FilterMode filtering) {
366   int j;
367   void (*InterpolateRow)(uint8_t * dst_uv, const uint8_t* src_uv,
368                          ptrdiff_t src_stride, int dst_width,
369                          int source_y_fraction) = InterpolateRow_C;
370   void (*ScaleUVFilterCols)(uint8_t * dst_uv, const uint8_t* src_uv,
371                             int dst_width, int x, int dx) =
372       (src_width >= 32768) ? ScaleUVFilterCols64_C : ScaleUVFilterCols_C;
373   int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
374   int64_t xl = (dx >= 0) ? x : xlast;
375   int64_t xr = (dx >= 0) ? xlast : x;
376   int clip_src_width;
377   xl = (xl >> 16) & ~3;    // Left edge aligned.
378   xr = (xr >> 16) + 1;     // Right most pixel used.  Bilinear uses 2 pixels.
379   xr = (xr + 1 + 3) & ~3;  // 1 beyond 4 pixel aligned right most pixel.
380   if (xr > src_width) {
381     xr = src_width;
382   }
383   clip_src_width = (int)(xr - xl) * 2;  // Width aligned to 2.
384   src_uv += xl * 2;
385   x -= (int)(xl << 16);
386 #if defined(HAS_INTERPOLATEROW_SSSE3)
387   if (TestCpuFlag(kCpuHasSSSE3)) {
388     InterpolateRow = InterpolateRow_Any_SSSE3;
389     if (IS_ALIGNED(clip_src_width, 16)) {
390       InterpolateRow = InterpolateRow_SSSE3;
391     }
392   }
393 #endif
394 #if defined(HAS_INTERPOLATEROW_AVX2)
395   if (TestCpuFlag(kCpuHasAVX2)) {
396     InterpolateRow = InterpolateRow_Any_AVX2;
397     if (IS_ALIGNED(clip_src_width, 32)) {
398       InterpolateRow = InterpolateRow_AVX2;
399     }
400   }
401 #endif
402 #if defined(HAS_INTERPOLATEROW_NEON)
403   if (TestCpuFlag(kCpuHasNEON)) {
404     InterpolateRow = InterpolateRow_Any_NEON;
405     if (IS_ALIGNED(clip_src_width, 16)) {
406       InterpolateRow = InterpolateRow_NEON;
407     }
408   }
409 #endif
410 #if defined(HAS_INTERPOLATEROW_MSA)
411   if (TestCpuFlag(kCpuHasMSA)) {
412     InterpolateRow = InterpolateRow_Any_MSA;
413     if (IS_ALIGNED(clip_src_width, 32)) {
414       InterpolateRow = InterpolateRow_MSA;
415     }
416   }
417 #endif
418 #if defined(HAS_SCALEUVFILTERCOLS_SSSE3)
419   if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
420     ScaleUVFilterCols = ScaleUVFilterCols_SSSE3;
421   }
422 #endif
423 #if defined(HAS_SCALEUVFILTERCOLS_NEON)
424   if (TestCpuFlag(kCpuHasNEON)) {
425     ScaleUVFilterCols = ScaleUVFilterCols_Any_NEON;
426     if (IS_ALIGNED(dst_width, 4)) {
427       ScaleUVFilterCols = ScaleUVFilterCols_NEON;
428     }
429   }
430 #endif
431 #if defined(HAS_SCALEUVFILTERCOLS_MSA)
432   if (TestCpuFlag(kCpuHasMSA)) {
433     ScaleUVFilterCols = ScaleUVFilterCols_Any_MSA;
434     if (IS_ALIGNED(dst_width, 8)) {
435       ScaleUVFilterCols = ScaleUVFilterCols_MSA;
436     }
437   }
438 #endif
439   // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
440   // Allocate a row of UV.
441   {
442     align_buffer_64(row, clip_src_width * 2);
443 
444     const int max_y = (src_height - 1) << 16;
445     if (y > max_y) {
446       y = max_y;
447     }
448     for (j = 0; j < dst_height; ++j) {
449       int yi = y >> 16;
450       const uint8_t* src = src_uv + yi * src_stride;
451       if (filtering == kFilterLinear) {
452         ScaleUVFilterCols(dst_uv, src, dst_width, x, dx);
453       } else {
454         int yf = (y >> 8) & 255;
455         InterpolateRow(row, src, src_stride, clip_src_width, yf);
456         ScaleUVFilterCols(dst_uv, row, dst_width, x, dx);
457       }
458       dst_uv += dst_stride;
459       y += dy;
460       if (y > max_y) {
461         y = max_y;
462       }
463     }
464     free_aligned_buffer_64(row);
465   }
466 }
467 #endif
468 
469 // Scale UV up with bilinear interpolation.
470 #if HAS_SCALEUVBILINEARUP
ScaleUVBilinearUp(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_uv,uint8_t * dst_uv,int x,int dx,int y,int dy,enum FilterMode filtering)471 static void ScaleUVBilinearUp(int src_width,
472                               int src_height,
473                               int dst_width,
474                               int dst_height,
475                               int src_stride,
476                               int dst_stride,
477                               const uint8_t* src_uv,
478                               uint8_t* dst_uv,
479                               int x,
480                               int dx,
481                               int y,
482                               int dy,
483                               enum FilterMode filtering) {
484   int j;
485   void (*InterpolateRow)(uint8_t * dst_uv, const uint8_t* src_uv,
486                          ptrdiff_t src_stride, int dst_width,
487                          int source_y_fraction) = InterpolateRow_C;
488   void (*ScaleUVFilterCols)(uint8_t * dst_uv, const uint8_t* src_uv,
489                             int dst_width, int x, int dx) =
490       filtering ? ScaleUVFilterCols_C : ScaleUVCols_C;
491   const int max_y = (src_height - 1) << 16;
492 #if defined(HAS_INTERPOLATEROW_SSSE3)
493   if (TestCpuFlag(kCpuHasSSSE3)) {
494     InterpolateRow = InterpolateRow_Any_SSSE3;
495     if (IS_ALIGNED(dst_width, 4)) {
496       InterpolateRow = InterpolateRow_SSSE3;
497     }
498   }
499 #endif
500 #if defined(HAS_INTERPOLATEROW_AVX2)
501   if (TestCpuFlag(kCpuHasAVX2)) {
502     InterpolateRow = InterpolateRow_Any_AVX2;
503     if (IS_ALIGNED(dst_width, 8)) {
504       InterpolateRow = InterpolateRow_AVX2;
505     }
506   }
507 #endif
508 #if defined(HAS_INTERPOLATEROW_NEON)
509   if (TestCpuFlag(kCpuHasNEON)) {
510     InterpolateRow = InterpolateRow_Any_NEON;
511     if (IS_ALIGNED(dst_width, 4)) {
512       InterpolateRow = InterpolateRow_NEON;
513     }
514   }
515 #endif
516 #if defined(HAS_INTERPOLATEROW_MMI)
517   if (TestCpuFlag(kCpuHasMMI)) {
518     InterpolateRow = InterpolateRow_Any_MMI;
519     if (IS_ALIGNED(dst_width, 2)) {
520       InterpolateRow = InterpolateRow_MMI;
521     }
522   }
523 #endif
524 #if defined(HAS_INTERPOLATEROW_MSA)
525   if (TestCpuFlag(kCpuHasMSA)) {
526     InterpolateRow = InterpolateRow_Any_MSA;
527     if (IS_ALIGNED(dst_width, 8)) {
528       InterpolateRow = InterpolateRow_MSA;
529     }
530   }
531 #endif
532   if (src_width >= 32768) {
533     ScaleUVFilterCols = filtering ? ScaleUVFilterCols64_C : ScaleUVCols64_C;
534   }
535 #if defined(HAS_SCALEUVFILTERCOLS_SSSE3)
536   if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
537     ScaleUVFilterCols = ScaleUVFilterCols_SSSE3;
538   }
539 #endif
540 #if defined(HAS_SCALEUVFILTERCOLS_NEON)
541   if (filtering && TestCpuFlag(kCpuHasNEON)) {
542     ScaleUVFilterCols = ScaleUVFilterCols_Any_NEON;
543     if (IS_ALIGNED(dst_width, 4)) {
544       ScaleUVFilterCols = ScaleUVFilterCols_NEON;
545     }
546   }
547 #endif
548 #if defined(HAS_SCALEUVFILTERCOLS_MSA)
549   if (filtering && TestCpuFlag(kCpuHasMSA)) {
550     ScaleUVFilterCols = ScaleUVFilterCols_Any_MSA;
551     if (IS_ALIGNED(dst_width, 8)) {
552       ScaleUVFilterCols = ScaleUVFilterCols_MSA;
553     }
554   }
555 #endif
556 #if defined(HAS_SCALEUVCOLS_SSSE3)
557   if (!filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
558     ScaleUVFilterCols = ScaleUVCols_SSSE3;
559   }
560 #endif
561 #if defined(HAS_SCALEUVCOLS_NEON)
562   if (!filtering && TestCpuFlag(kCpuHasNEON)) {
563     ScaleUVFilterCols = ScaleUVCols_Any_NEON;
564     if (IS_ALIGNED(dst_width, 8)) {
565       ScaleUVFilterCols = ScaleUVCols_NEON;
566     }
567   }
568 #endif
569 #if defined(HAS_SCALEUVCOLS_MMI)
570   if (!filtering && TestCpuFlag(kCpuHasMMI)) {
571     ScaleUVFilterCols = ScaleUVCols_Any_MMI;
572     if (IS_ALIGNED(dst_width, 1)) {
573       ScaleUVFilterCols = ScaleUVCols_MMI;
574     }
575   }
576 #endif
577 #if defined(HAS_SCALEUVCOLS_MSA)
578   if (!filtering && TestCpuFlag(kCpuHasMSA)) {
579     ScaleUVFilterCols = ScaleUVCols_Any_MSA;
580     if (IS_ALIGNED(dst_width, 4)) {
581       ScaleUVFilterCols = ScaleUVCols_MSA;
582     }
583   }
584 #endif
585   if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
586     ScaleUVFilterCols = ScaleUVColsUp2_C;
587 #if defined(HAS_SCALEUVCOLSUP2_SSSE3)
588     if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(dst_width, 8)) {
589       ScaleUVFilterCols = ScaleUVColsUp2_SSSE3;
590     }
591 #endif
592 #if defined(HAS_SCALEUVCOLSUP2_MMI)
593     if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
594       ScaleUVFilterCols = ScaleUVColsUp2_MMI;
595     }
596 #endif
597   }
598 
599   if (y > max_y) {
600     y = max_y;
601   }
602 
603   {
604     int yi = y >> 16;
605     const uint8_t* src = src_uv + yi * src_stride;
606 
607     // Allocate 2 rows of UV.
608     const int kRowSize = (dst_width * 2 + 15) & ~15;
609     align_buffer_64(row, kRowSize * 2);
610 
611     uint8_t* rowptr = row;
612     int rowstride = kRowSize;
613     int lasty = yi;
614 
615     ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
616     if (src_height > 1) {
617       src += src_stride;
618     }
619     ScaleUVFilterCols(rowptr + rowstride, src, dst_width, x, dx);
620     src += src_stride;
621 
622     for (j = 0; j < dst_height; ++j) {
623       yi = y >> 16;
624       if (yi != lasty) {
625         if (y > max_y) {
626           y = max_y;
627           yi = y >> 16;
628           src = src_uv + yi * src_stride;
629         }
630         if (yi != lasty) {
631           ScaleUVFilterCols(rowptr, src, dst_width, x, dx);
632           rowptr += rowstride;
633           rowstride = -rowstride;
634           lasty = yi;
635           src += src_stride;
636         }
637       }
638       if (filtering == kFilterLinear) {
639         InterpolateRow(dst_uv, rowptr, 0, dst_width * 2, 0);
640       } else {
641         int yf = (y >> 8) & 255;
642         InterpolateRow(dst_uv, rowptr, rowstride, dst_width * 2, yf);
643       }
644       dst_uv += dst_stride;
645       y += dy;
646     }
647     free_aligned_buffer_64(row);
648   }
649 }
650 #endif  // HAS_SCALEUVBILINEARUP
651 
652 // Scale UV to/from any dimensions, without interpolation.
653 // Fixed point math is used for performance: The upper 16 bits
654 // of x and dx is the integer part of the source position and
655 // the lower 16 bits are the fixed decimal part.
656 
ScaleUVSimple(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_uv,uint8_t * dst_uv,int x,int dx,int y,int dy)657 static void ScaleUVSimple(int src_width,
658                           int src_height,
659                           int dst_width,
660                           int dst_height,
661                           int src_stride,
662                           int dst_stride,
663                           const uint8_t* src_uv,
664                           uint8_t* dst_uv,
665                           int x,
666                           int dx,
667                           int y,
668                           int dy) {
669   int j;
670   void (*ScaleUVCols)(uint8_t * dst_uv, const uint8_t* src_uv, int dst_width,
671                       int x, int dx) =
672       (src_width >= 32768) ? ScaleUVCols64_C : ScaleUVCols_C;
673   (void)src_height;
674 #if defined(HAS_SCALEUVCOLS_SSSE3)
675   if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
676     ScaleUVCols = ScaleUVCols_SSSE3;
677   }
678 #endif
679 #if defined(HAS_SCALEUVCOLS_NEON)
680   if (TestCpuFlag(kCpuHasNEON)) {
681     ScaleUVCols = ScaleUVCols_Any_NEON;
682     if (IS_ALIGNED(dst_width, 8)) {
683       ScaleUVCols = ScaleUVCols_NEON;
684     }
685   }
686 #endif
687 #if defined(HAS_SCALEUVCOLS_MMI)
688   if (TestCpuFlag(kCpuHasMMI)) {
689     ScaleUVCols = ScaleUVCols_Any_MMI;
690     if (IS_ALIGNED(dst_width, 1)) {
691       ScaleUVCols = ScaleUVCols_MMI;
692     }
693   }
694 #endif
695 #if defined(HAS_SCALEUVCOLS_MSA)
696   if (TestCpuFlag(kCpuHasMSA)) {
697     ScaleUVCols = ScaleUVCols_Any_MSA;
698     if (IS_ALIGNED(dst_width, 4)) {
699       ScaleUVCols = ScaleUVCols_MSA;
700     }
701   }
702 #endif
703   if (src_width * 2 == dst_width && x < 0x8000) {
704     ScaleUVCols = ScaleUVColsUp2_C;
705 #if defined(HAS_SCALEUVCOLSUP2_SSSE3)
706     if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(dst_width, 8)) {
707       ScaleUVCols = ScaleUVColsUp2_SSSE3;
708     }
709 #endif
710 #if defined(HAS_SCALEUVCOLSUP2_MMI)
711     if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
712       ScaleUVCols = ScaleUVColsUp2_MMI;
713     }
714 #endif
715   }
716 
717   for (j = 0; j < dst_height; ++j) {
718     ScaleUVCols(dst_uv, src_uv + (y >> 16) * src_stride, dst_width, x, dx);
719     dst_uv += dst_stride;
720     y += dy;
721   }
722 }
723 
724 // Copy UV with optional flipping
725 #if HAS_UVCOPY
UVCopy(const uint8_t * src_UV,int src_stride_UV,uint8_t * dst_UV,int dst_stride_UV,int width,int height)726 static int UVCopy(const uint8_t* src_UV,
727                   int src_stride_UV,
728                   uint8_t* dst_UV,
729                   int dst_stride_UV,
730                   int width,
731                   int height) {
732   if (!src_UV || !dst_UV || width <= 0 || height == 0) {
733     return -1;
734   }
735   // Negative height means invert the image.
736   if (height < 0) {
737     height = -height;
738     src_UV = src_UV + (height - 1) * src_stride_UV;
739     src_stride_UV = -src_stride_UV;
740   }
741 
742   CopyPlane(src_UV, src_stride_UV, dst_UV, dst_stride_UV, width * 2, height);
743   return 0;
744 }
745 #endif  // HAS_UVCOPY
746 
747 // Scale a UV plane (from NV12)
748 // This function in turn calls a scaling function
749 // suitable for handling the desired resolutions.
ScaleUV(const uint8_t * src,int src_stride,int src_width,int src_height,uint8_t * dst,int dst_stride,int dst_width,int dst_height,int clip_x,int clip_y,int clip_width,int clip_height,enum FilterMode filtering)750 static void ScaleUV(const uint8_t* src,
751                     int src_stride,
752                     int src_width,
753                     int src_height,
754                     uint8_t* dst,
755                     int dst_stride,
756                     int dst_width,
757                     int dst_height,
758                     int clip_x,
759                     int clip_y,
760                     int clip_width,
761                     int clip_height,
762                     enum FilterMode filtering) {
763   // Initial source x/y coordinate and step values as 16.16 fixed point.
764   int x = 0;
765   int y = 0;
766   int dx = 0;
767   int dy = 0;
768   // UV does not support box filter yet, but allow the user to pass it.
769   // Simplify filtering when possible.
770   filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
771                                 filtering);
772 
773   // Negative src_height means invert the image.
774   if (src_height < 0) {
775     src_height = -src_height;
776     src = src + (src_height - 1) * src_stride;
777     src_stride = -src_stride;
778   }
779   ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
780              &dx, &dy);
781   src_width = Abs(src_width);
782   if (clip_x) {
783     int64_t clipf = (int64_t)(clip_x)*dx;
784     x += (clipf & 0xffff);
785     src += (clipf >> 16) * 2;
786     dst += clip_x * 2;
787   }
788   if (clip_y) {
789     int64_t clipf = (int64_t)(clip_y)*dy;
790     y += (clipf & 0xffff);
791     src += (clipf >> 16) * src_stride;
792     dst += clip_y * dst_stride;
793   }
794 
795   // Special case for integer step values.
796   if (((dx | dy) & 0xffff) == 0) {
797     if (!dx || !dy) {  // 1 pixel wide and/or tall.
798       filtering = kFilterNone;
799     } else {
800       // Optimized even scale down. ie 2, 4, 6, 8, 10x.
801       if (!(dx & 0x10000) && !(dy & 0x10000)) {
802 #if HAS_SCALEUVDOWN2
803         if (dx == 0x20000) {
804           // Optimized 1/2 downsample.
805           ScaleUVDown2(src_width, src_height, clip_width, clip_height,
806                        src_stride, dst_stride, src, dst, x, dx, y, dy,
807                        filtering);
808           return;
809         }
810 #endif
811 #if HAS_SCALEUVDOWN4BOX
812         if (dx == 0x40000 && filtering == kFilterBox) {
813           // Optimized 1/4 box downsample.
814           ScaleUVDown4Box(src_width, src_height, clip_width, clip_height,
815                           src_stride, dst_stride, src, dst, x, dx, y, dy);
816           return;
817         }
818 #endif
819 #if HAS_SCALEUVDOWNEVEN
820         ScaleUVDownEven(src_width, src_height, clip_width, clip_height,
821                         src_stride, dst_stride, src, dst, x, dx, y, dy,
822                         filtering);
823         return;
824 #endif
825       }
826       // Optimized odd scale down. ie 3, 5, 7, 9x.
827       if ((dx & 0x10000) && (dy & 0x10000)) {
828         filtering = kFilterNone;
829 #ifdef HAS_UVCOPY
830         if (dx == 0x10000 && dy == 0x10000) {
831           // Straight copy.
832           UVCopy(src + (y >> 16) * src_stride + (x >> 16) * 2, src_stride, dst,
833                  dst_stride, clip_width, clip_height);
834           return;
835         }
836 #endif
837       }
838     }
839   }
840   // HAS_SCALEPLANEVERTICAL
841   if (dx == 0x10000 && (x & 0xffff) == 0) {
842     // Arbitrary scale vertically, but unscaled horizontally.
843     ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
844                        dst_stride, src, dst, x, y, dy, 4, filtering);
845     return;
846   }
847 
848 #if HAS_SCALEUVBILINEARUP
849   if (filtering && dy < 65536) {
850     ScaleUVBilinearUp(src_width, src_height, clip_width, clip_height,
851                       src_stride, dst_stride, src, dst, x, dx, y, dy,
852                       filtering);
853     return;
854   }
855 #endif
856 #if HAS_SCALEUVBILINEARDOWN
857   if (filtering) {
858     ScaleUVBilinearDown(src_width, src_height, clip_width, clip_height,
859                         src_stride, dst_stride, src, dst, x, dx, y, dy,
860                         filtering);
861     return;
862   }
863 #endif
864   ScaleUVSimple(src_width, src_height, clip_width, clip_height, src_stride,
865                 dst_stride, src, dst, x, dx, y, dy);
866 }
867 
868 // Scale an UV image.
869 LIBYUV_API
UVScale(const uint8_t * src_uv,int src_stride_uv,int src_width,int src_height,uint8_t * dst_uv,int dst_stride_uv,int dst_width,int dst_height,enum FilterMode filtering)870 int UVScale(const uint8_t* src_uv,
871             int src_stride_uv,
872             int src_width,
873             int src_height,
874             uint8_t* dst_uv,
875             int dst_stride_uv,
876             int dst_width,
877             int dst_height,
878             enum FilterMode filtering) {
879   if (!src_uv || src_width == 0 || src_height == 0 || src_width > 32768 ||
880       src_height > 32768 || !dst_uv || dst_width <= 0 || dst_height <= 0) {
881     return -1;
882   }
883   ScaleUV(src_uv, src_stride_uv, src_width, src_height, dst_uv, dst_stride_uv,
884           dst_width, dst_height, 0, 0, dst_width, dst_height, filtering);
885   return 0;
886 }
887 
888 #ifdef __cplusplus
889 }  // extern "C"
890 }  // namespace libyuv
891 #endif
892