• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS. All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "libyuv/scale.h"
12 
13 #include <assert.h>
14 #include <string.h>
15 
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h"  // For CopyARGB
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20 
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25 
Abs(int v)26 static __inline int Abs(int v) {
27   return v >= 0 ? v : -v;
28 }
29 
30 // ScaleARGB ARGB, 1/2
31 // This is an optimized version for scaling down a ARGB to 1/2 of
32 // its original size.
ScaleARGBDown2(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)33 static void ScaleARGBDown2(int src_width,
34                            int src_height,
35                            int dst_width,
36                            int dst_height,
37                            int src_stride,
38                            int dst_stride,
39                            const uint8_t* src_argb,
40                            uint8_t* dst_argb,
41                            int x,
42                            int dx,
43                            int y,
44                            int dy,
45                            enum FilterMode filtering) {
46   int j;
47   int row_stride = src_stride * (dy >> 16);
48   void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
49                             uint8_t* dst_argb, int dst_width) =
50       filtering == kFilterNone
51           ? ScaleARGBRowDown2_C
52           : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C
53                                         : ScaleARGBRowDown2Box_C);
54   (void)src_width;
55   (void)src_height;
56   (void)dx;
57   assert(dx == 65536 * 2);      // Test scale factor of 2.
58   assert((dy & 0x1ffff) == 0);  // Test vertical scale is multiple of 2.
59   // Advance to odd row, even column.
60   if (filtering == kFilterBilinear) {
61     src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
62   } else {
63     src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
64   }
65 
66 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
67   if (TestCpuFlag(kCpuHasSSE2)) {
68     ScaleARGBRowDown2 =
69         filtering == kFilterNone
70             ? ScaleARGBRowDown2_Any_SSE2
71             : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2
72                                           : ScaleARGBRowDown2Box_Any_SSE2);
73     if (IS_ALIGNED(dst_width, 4)) {
74       ScaleARGBRowDown2 =
75           filtering == kFilterNone
76               ? ScaleARGBRowDown2_SSE2
77               : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2
78                                             : ScaleARGBRowDown2Box_SSE2);
79     }
80   }
81 #endif
82 #if defined(HAS_SCALEARGBROWDOWN2_NEON)
83   if (TestCpuFlag(kCpuHasNEON)) {
84     ScaleARGBRowDown2 =
85         filtering == kFilterNone
86             ? ScaleARGBRowDown2_Any_NEON
87             : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON
88                                           : ScaleARGBRowDown2Box_Any_NEON);
89     if (IS_ALIGNED(dst_width, 8)) {
90       ScaleARGBRowDown2 =
91           filtering == kFilterNone
92               ? ScaleARGBRowDown2_NEON
93               : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON
94                                             : ScaleARGBRowDown2Box_NEON);
95     }
96   }
97 #endif
98 #if defined(HAS_SCALEARGBROWDOWN2_MSA)
99   if (TestCpuFlag(kCpuHasMSA)) {
100     ScaleARGBRowDown2 =
101         filtering == kFilterNone
102             ? ScaleARGBRowDown2_Any_MSA
103             : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_MSA
104                                           : ScaleARGBRowDown2Box_Any_MSA);
105     if (IS_ALIGNED(dst_width, 4)) {
106       ScaleARGBRowDown2 =
107           filtering == kFilterNone
108               ? ScaleARGBRowDown2_MSA
109               : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_MSA
110                                             : ScaleARGBRowDown2Box_MSA);
111     }
112   }
113 #endif
114 
115   if (filtering == kFilterLinear) {
116     src_stride = 0;
117   }
118   for (j = 0; j < dst_height; ++j) {
119     ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
120     src_argb += row_stride;
121     dst_argb += dst_stride;
122   }
123 }
124 
125 // ScaleARGB ARGB, 1/4
126 // This is an optimized version for scaling down a ARGB to 1/4 of
127 // its original size.
ScaleARGBDown4Box(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy)128 static void ScaleARGBDown4Box(int src_width,
129                               int src_height,
130                               int dst_width,
131                               int dst_height,
132                               int src_stride,
133                               int dst_stride,
134                               const uint8_t* src_argb,
135                               uint8_t* dst_argb,
136                               int x,
137                               int dx,
138                               int y,
139                               int dy) {
140   int j;
141   // Allocate 2 rows of ARGB.
142   const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
143   align_buffer_64(row, kRowSize * 2);
144   int row_stride = src_stride * (dy >> 16);
145   void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
146                             uint8_t* dst_argb, int dst_width) =
147       ScaleARGBRowDown2Box_C;
148   // Advance to odd row, even column.
149   src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
150   (void)src_width;
151   (void)src_height;
152   (void)dx;
153   assert(dx == 65536 * 4);      // Test scale factor of 4.
154   assert((dy & 0x3ffff) == 0);  // Test vertical scale is multiple of 4.
155 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
156   if (TestCpuFlag(kCpuHasSSE2)) {
157     ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_SSE2;
158     if (IS_ALIGNED(dst_width, 4)) {
159       ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
160     }
161   }
162 #endif
163 #if defined(HAS_SCALEARGBROWDOWN2_NEON)
164   if (TestCpuFlag(kCpuHasNEON)) {
165     ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_NEON;
166     if (IS_ALIGNED(dst_width, 8)) {
167       ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
168     }
169   }
170 #endif
171 
172   for (j = 0; j < dst_height; ++j) {
173     ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
174     ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + kRowSize,
175                       dst_width * 2);
176     ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
177     src_argb += row_stride;
178     dst_argb += dst_stride;
179   }
180   free_aligned_buffer_64(row);
181 }
182 
183 // ScaleARGB ARGB Even
184 // This is an optimized version for scaling down a ARGB to even
185 // multiple of its original size.
ScaleARGBDownEven(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)186 static void ScaleARGBDownEven(int src_width,
187                               int src_height,
188                               int dst_width,
189                               int dst_height,
190                               int src_stride,
191                               int dst_stride,
192                               const uint8_t* src_argb,
193                               uint8_t* dst_argb,
194                               int x,
195                               int dx,
196                               int y,
197                               int dy,
198                               enum FilterMode filtering) {
199   int j;
200   int col_step = dx >> 16;
201   int row_stride = (dy >> 16) * src_stride;
202   void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride,
203                                int src_step, uint8_t* dst_argb, int dst_width) =
204       filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
205   (void)src_width;
206   (void)src_height;
207   assert(IS_ALIGNED(src_width, 2));
208   assert(IS_ALIGNED(src_height, 2));
209   src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
210 #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
211   if (TestCpuFlag(kCpuHasSSE2)) {
212     ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
213                                      : ScaleARGBRowDownEven_Any_SSE2;
214     if (IS_ALIGNED(dst_width, 4)) {
215       ScaleARGBRowDownEven =
216           filtering ? ScaleARGBRowDownEvenBox_SSE2 : ScaleARGBRowDownEven_SSE2;
217     }
218   }
219 #endif
220 #if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
221   if (TestCpuFlag(kCpuHasNEON)) {
222     ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON
223                                      : ScaleARGBRowDownEven_Any_NEON;
224     if (IS_ALIGNED(dst_width, 4)) {
225       ScaleARGBRowDownEven =
226           filtering ? ScaleARGBRowDownEvenBox_NEON : ScaleARGBRowDownEven_NEON;
227     }
228   }
229 #endif
230 #if defined(HAS_SCALEARGBROWDOWNEVEN_MSA)
231   if (TestCpuFlag(kCpuHasMSA)) {
232     ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MSA
233                                      : ScaleARGBRowDownEven_Any_MSA;
234     if (IS_ALIGNED(dst_width, 4)) {
235       ScaleARGBRowDownEven =
236           filtering ? ScaleARGBRowDownEvenBox_MSA : ScaleARGBRowDownEven_MSA;
237     }
238   }
239 #endif
240 
241   if (filtering == kFilterLinear) {
242     src_stride = 0;
243   }
244   for (j = 0; j < dst_height; ++j) {
245     ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
246     src_argb += row_stride;
247     dst_argb += dst_stride;
248   }
249 }
250 
251 // Scale ARGB down with bilinear interpolation.
ScaleARGBBilinearDown(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)252 static void ScaleARGBBilinearDown(int src_width,
253                                   int src_height,
254                                   int dst_width,
255                                   int dst_height,
256                                   int src_stride,
257                                   int dst_stride,
258                                   const uint8_t* src_argb,
259                                   uint8_t* dst_argb,
260                                   int x,
261                                   int dx,
262                                   int y,
263                                   int dy,
264                                   enum FilterMode filtering) {
265   int j;
266   void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
267                          ptrdiff_t src_stride, int dst_width,
268                          int source_y_fraction) = InterpolateRow_C;
269   void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
270                               int dst_width, int x, int dx) =
271       (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
272   int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
273   int64_t xl = (dx >= 0) ? x : xlast;
274   int64_t xr = (dx >= 0) ? xlast : x;
275   int clip_src_width;
276   xl = (xl >> 16) & ~3;    // Left edge aligned.
277   xr = (xr >> 16) + 1;     // Right most pixel used.  Bilinear uses 2 pixels.
278   xr = (xr + 1 + 3) & ~3;  // 1 beyond 4 pixel aligned right most pixel.
279   if (xr > src_width) {
280     xr = src_width;
281   }
282   clip_src_width = (int)(xr - xl) * 4;  // Width aligned to 4.
283   src_argb += xl * 4;
284   x -= (int)(xl << 16);
285 #if defined(HAS_INTERPOLATEROW_SSSE3)
286   if (TestCpuFlag(kCpuHasSSSE3)) {
287     InterpolateRow = InterpolateRow_Any_SSSE3;
288     if (IS_ALIGNED(clip_src_width, 16)) {
289       InterpolateRow = InterpolateRow_SSSE3;
290     }
291   }
292 #endif
293 #if defined(HAS_INTERPOLATEROW_AVX2)
294   if (TestCpuFlag(kCpuHasAVX2)) {
295     InterpolateRow = InterpolateRow_Any_AVX2;
296     if (IS_ALIGNED(clip_src_width, 32)) {
297       InterpolateRow = InterpolateRow_AVX2;
298     }
299   }
300 #endif
301 #if defined(HAS_INTERPOLATEROW_NEON)
302   if (TestCpuFlag(kCpuHasNEON)) {
303     InterpolateRow = InterpolateRow_Any_NEON;
304     if (IS_ALIGNED(clip_src_width, 16)) {
305       InterpolateRow = InterpolateRow_NEON;
306     }
307   }
308 #endif
309 #if defined(HAS_INTERPOLATEROW_MSA)
310   if (TestCpuFlag(kCpuHasMSA)) {
311     InterpolateRow = InterpolateRow_Any_MSA;
312     if (IS_ALIGNED(clip_src_width, 32)) {
313       InterpolateRow = InterpolateRow_MSA;
314     }
315   }
316 #endif
317 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
318   if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
319     ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
320   }
321 #endif
322 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
323   if (TestCpuFlag(kCpuHasNEON)) {
324     ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
325     if (IS_ALIGNED(dst_width, 4)) {
326       ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
327     }
328   }
329 #endif
330 #if defined(HAS_SCALEARGBFILTERCOLS_MSA)
331   if (TestCpuFlag(kCpuHasMSA)) {
332     ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
333     if (IS_ALIGNED(dst_width, 8)) {
334       ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
335     }
336   }
337 #endif
338   // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
339   // Allocate a row of ARGB.
340   {
341     align_buffer_64(row, clip_src_width * 4);
342 
343     const int max_y = (src_height - 1) << 16;
344     if (y > max_y) {
345       y = max_y;
346     }
347     for (j = 0; j < dst_height; ++j) {
348       int yi = y >> 16;
349       const uint8_t* src = src_argb + yi * src_stride;
350       if (filtering == kFilterLinear) {
351         ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
352       } else {
353         int yf = (y >> 8) & 255;
354         InterpolateRow(row, src, src_stride, clip_src_width, yf);
355         ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
356       }
357       dst_argb += dst_stride;
358       y += dy;
359       if (y > max_y) {
360         y = max_y;
361       }
362     }
363     free_aligned_buffer_64(row);
364   }
365 }
366 
367 // Scale ARGB up with bilinear interpolation.
ScaleARGBBilinearUp(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)368 static void ScaleARGBBilinearUp(int src_width,
369                                 int src_height,
370                                 int dst_width,
371                                 int dst_height,
372                                 int src_stride,
373                                 int dst_stride,
374                                 const uint8_t* src_argb,
375                                 uint8_t* dst_argb,
376                                 int x,
377                                 int dx,
378                                 int y,
379                                 int dy,
380                                 enum FilterMode filtering) {
381   int j;
382   void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
383                          ptrdiff_t src_stride, int dst_width,
384                          int source_y_fraction) = InterpolateRow_C;
385   void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
386                               int dst_width, int x, int dx) =
387       filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
388   const int max_y = (src_height - 1) << 16;
389 #if defined(HAS_INTERPOLATEROW_SSSE3)
390   if (TestCpuFlag(kCpuHasSSSE3)) {
391     InterpolateRow = InterpolateRow_Any_SSSE3;
392     if (IS_ALIGNED(dst_width, 4)) {
393       InterpolateRow = InterpolateRow_SSSE3;
394     }
395   }
396 #endif
397 #if defined(HAS_INTERPOLATEROW_AVX2)
398   if (TestCpuFlag(kCpuHasAVX2)) {
399     InterpolateRow = InterpolateRow_Any_AVX2;
400     if (IS_ALIGNED(dst_width, 8)) {
401       InterpolateRow = InterpolateRow_AVX2;
402     }
403   }
404 #endif
405 #if defined(HAS_INTERPOLATEROW_NEON)
406   if (TestCpuFlag(kCpuHasNEON)) {
407     InterpolateRow = InterpolateRow_Any_NEON;
408     if (IS_ALIGNED(dst_width, 4)) {
409       InterpolateRow = InterpolateRow_NEON;
410     }
411   }
412 #endif
413 #if defined(HAS_INTERPOLATEROW_MSA)
414   if (TestCpuFlag(kCpuHasMSA)) {
415     InterpolateRow = InterpolateRow_Any_MSA;
416     if (IS_ALIGNED(dst_width, 8)) {
417       InterpolateRow = InterpolateRow_MSA;
418     }
419   }
420 #endif
421   if (src_width >= 32768) {
422     ScaleARGBFilterCols =
423         filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
424   }
425 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
426   if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
427     ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
428   }
429 #endif
430 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
431   if (filtering && TestCpuFlag(kCpuHasNEON)) {
432     ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
433     if (IS_ALIGNED(dst_width, 4)) {
434       ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
435     }
436   }
437 #endif
438 #if defined(HAS_SCALEARGBFILTERCOLS_MSA)
439   if (filtering && TestCpuFlag(kCpuHasMSA)) {
440     ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
441     if (IS_ALIGNED(dst_width, 8)) {
442       ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
443     }
444   }
445 #endif
446 #if defined(HAS_SCALEARGBCOLS_SSE2)
447   if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
448     ScaleARGBFilterCols = ScaleARGBCols_SSE2;
449   }
450 #endif
451 #if defined(HAS_SCALEARGBCOLS_NEON)
452   if (!filtering && TestCpuFlag(kCpuHasNEON)) {
453     ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
454     if (IS_ALIGNED(dst_width, 8)) {
455       ScaleARGBFilterCols = ScaleARGBCols_NEON;
456     }
457   }
458 #endif
459 #if defined(HAS_SCALEARGBCOLS_MSA)
460   if (!filtering && TestCpuFlag(kCpuHasMSA)) {
461     ScaleARGBFilterCols = ScaleARGBCols_Any_MSA;
462     if (IS_ALIGNED(dst_width, 4)) {
463       ScaleARGBFilterCols = ScaleARGBCols_MSA;
464     }
465   }
466 #endif
467   if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
468     ScaleARGBFilterCols = ScaleARGBColsUp2_C;
469 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
470     if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
471       ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
472     }
473 #endif
474   }
475 
476   if (y > max_y) {
477     y = max_y;
478   }
479 
480   {
481     int yi = y >> 16;
482     const uint8_t* src = src_argb + yi * src_stride;
483 
484     // Allocate 2 rows of ARGB.
485     const int kRowSize = (dst_width * 4 + 31) & ~31;
486     align_buffer_64(row, kRowSize * 2);
487 
488     uint8_t* rowptr = row;
489     int rowstride = kRowSize;
490     int lasty = yi;
491 
492     ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
493     if (src_height > 1) {
494       src += src_stride;
495     }
496     ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
497     src += src_stride;
498 
499     for (j = 0; j < dst_height; ++j) {
500       yi = y >> 16;
501       if (yi != lasty) {
502         if (y > max_y) {
503           y = max_y;
504           yi = y >> 16;
505           src = src_argb + yi * src_stride;
506         }
507         if (yi != lasty) {
508           ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
509           rowptr += rowstride;
510           rowstride = -rowstride;
511           lasty = yi;
512           src += src_stride;
513         }
514       }
515       if (filtering == kFilterLinear) {
516         InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
517       } else {
518         int yf = (y >> 8) & 255;
519         InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
520       }
521       dst_argb += dst_stride;
522       y += dy;
523     }
524     free_aligned_buffer_64(row);
525   }
526 }
527 
528 #ifdef YUVSCALEUP
529 // Scale YUV to ARGB up with bilinear interpolation.
ScaleYUVToARGBBilinearUp(int src_width,int src_height,int dst_width,int dst_height,int src_stride_y,int src_stride_u,int src_stride_v,int dst_stride_argb,const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)530 static void ScaleYUVToARGBBilinearUp(int src_width,
531                                      int src_height,
532                                      int dst_width,
533                                      int dst_height,
534                                      int src_stride_y,
535                                      int src_stride_u,
536                                      int src_stride_v,
537                                      int dst_stride_argb,
538                                      const uint8_t* src_y,
539                                      const uint8_t* src_u,
540                                      const uint8_t* src_v,
541                                      uint8_t* dst_argb,
542                                      int x,
543                                      int dx,
544                                      int y,
545                                      int dy,
546                                      enum FilterMode filtering) {
547   int j;
548   void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
549                         const uint8_t* v_buf, uint8_t* rgb_buf, int width) =
550       I422ToARGBRow_C;
551 #if defined(HAS_I422TOARGBROW_SSSE3)
552   if (TestCpuFlag(kCpuHasSSSE3)) {
553     I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
554     if (IS_ALIGNED(src_width, 8)) {
555       I422ToARGBRow = I422ToARGBRow_SSSE3;
556     }
557   }
558 #endif
559 #if defined(HAS_I422TOARGBROW_AVX2)
560   if (TestCpuFlag(kCpuHasAVX2)) {
561     I422ToARGBRow = I422ToARGBRow_Any_AVX2;
562     if (IS_ALIGNED(src_width, 16)) {
563       I422ToARGBRow = I422ToARGBRow_AVX2;
564     }
565   }
566 #endif
567 #if defined(HAS_I422TOARGBROW_NEON)
568   if (TestCpuFlag(kCpuHasNEON)) {
569     I422ToARGBRow = I422ToARGBRow_Any_NEON;
570     if (IS_ALIGNED(src_width, 8)) {
571       I422ToARGBRow = I422ToARGBRow_NEON;
572     }
573   }
574 #endif
575 #if defined(HAS_I422TOARGBROW_MSA)
576   if (TestCpuFlag(kCpuHasMSA)) {
577     I422ToARGBRow = I422ToARGBRow_Any_MSA;
578     if (IS_ALIGNED(src_width, 8)) {
579       I422ToARGBRow = I422ToARGBRow_MSA;
580     }
581   }
582 #endif
583 
584   void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
585                          ptrdiff_t src_stride, int dst_width,
586                          int source_y_fraction) = InterpolateRow_C;
587 #if defined(HAS_INTERPOLATEROW_SSSE3)
588   if (TestCpuFlag(kCpuHasSSSE3)) {
589     InterpolateRow = InterpolateRow_Any_SSSE3;
590     if (IS_ALIGNED(dst_width, 4)) {
591       InterpolateRow = InterpolateRow_SSSE3;
592     }
593   }
594 #endif
595 #if defined(HAS_INTERPOLATEROW_AVX2)
596   if (TestCpuFlag(kCpuHasAVX2)) {
597     InterpolateRow = InterpolateRow_Any_AVX2;
598     if (IS_ALIGNED(dst_width, 8)) {
599       InterpolateRow = InterpolateRow_AVX2;
600     }
601   }
602 #endif
603 #if defined(HAS_INTERPOLATEROW_NEON)
604   if (TestCpuFlag(kCpuHasNEON)) {
605     InterpolateRow = InterpolateRow_Any_NEON;
606     if (IS_ALIGNED(dst_width, 4)) {
607       InterpolateRow = InterpolateRow_NEON;
608     }
609   }
610 #endif
611 #if defined(HAS_INTERPOLATEROW_MSA)
612   if (TestCpuFlag(kCpuHasMSA)) {
613     InterpolateRow = InterpolateRow_Any_MSA;
614     if (IS_ALIGNED(dst_width, 8)) {
615       InterpolateRow = InterpolateRow_MSA;
616     }
617   }
618 #endif
619 
620   void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
621                               int dst_width, int x, int dx) =
622       filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
623   if (src_width >= 32768) {
624     ScaleARGBFilterCols =
625         filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
626   }
627 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
628   if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
629     ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
630   }
631 #endif
632 #if defined(HAS_SCALEARGBFILTERCOLS_NEON)
633   if (filtering && TestCpuFlag(kCpuHasNEON)) {
634     ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON;
635     if (IS_ALIGNED(dst_width, 4)) {
636       ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
637     }
638   }
639 #endif
640 #if defined(HAS_SCALEARGBFILTERCOLS_MSA)
641   if (filtering && TestCpuFlag(kCpuHasMSA)) {
642     ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
643     if (IS_ALIGNED(dst_width, 8)) {
644       ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
645     }
646   }
647 #endif
648 #if defined(HAS_SCALEARGBCOLS_SSE2)
649   if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
650     ScaleARGBFilterCols = ScaleARGBCols_SSE2;
651   }
652 #endif
653 #if defined(HAS_SCALEARGBCOLS_NEON)
654   if (!filtering && TestCpuFlag(kCpuHasNEON)) {
655     ScaleARGBFilterCols = ScaleARGBCols_Any_NEON;
656     if (IS_ALIGNED(dst_width, 8)) {
657       ScaleARGBFilterCols = ScaleARGBCols_NEON;
658     }
659   }
660 #endif
661 #if defined(HAS_SCALEARGBCOLS_MSA)
662   if (!filtering && TestCpuFlag(kCpuHasMSA)) {
663     ScaleARGBFilterCols = ScaleARGBCols_Any_MSA;
664     if (IS_ALIGNED(dst_width, 4)) {
665       ScaleARGBFilterCols = ScaleARGBCols_MSA;
666     }
667   }
668 #endif
669   if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
670     ScaleARGBFilterCols = ScaleARGBColsUp2_C;
671 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
672     if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
673       ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
674     }
675 #endif
676   }
677 
678   const int max_y = (src_height - 1) << 16;
679   if (y > max_y) {
680     y = max_y;
681   }
682   const int kYShift = 1;  // Shift Y by 1 to convert Y plane to UV coordinate.
683   int yi = y >> 16;
684   int uv_yi = yi >> kYShift;
685   const uint8_t* src_row_y = src_y + yi * src_stride_y;
686   const uint8_t* src_row_u = src_u + uv_yi * src_stride_u;
687   const uint8_t* src_row_v = src_v + uv_yi * src_stride_v;
688 
689   // Allocate 2 rows of ARGB.
690   const int kRowSize = (dst_width * 4 + 31) & ~31;
691   align_buffer_64(row, kRowSize * 2);
692 
693   // Allocate 1 row of ARGB for source conversion.
694   align_buffer_64(argb_row, src_width * 4);
695 
696   uint8_t* rowptr = row;
697   int rowstride = kRowSize;
698   int lasty = yi;
699 
700   // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
701   ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
702   if (src_height > 1) {
703     src_row_y += src_stride_y;
704     if (yi & 1) {
705       src_row_u += src_stride_u;
706       src_row_v += src_stride_v;
707     }
708   }
709   ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
710   if (src_height > 2) {
711     src_row_y += src_stride_y;
712     if (!(yi & 1)) {
713       src_row_u += src_stride_u;
714       src_row_v += src_stride_v;
715     }
716   }
717 
718   for (j = 0; j < dst_height; ++j) {
719     yi = y >> 16;
720     if (yi != lasty) {
721       if (y > max_y) {
722         y = max_y;
723         yi = y >> 16;
724         uv_yi = yi >> kYShift;
725         src_row_y = src_y + yi * src_stride_y;
726         src_row_u = src_u + uv_yi * src_stride_u;
727         src_row_v = src_v + uv_yi * src_stride_v;
728       }
729       if (yi != lasty) {
730         // TODO(fbarchard): Convert the clipped region of row.
731         I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
732         ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
733         rowptr += rowstride;
734         rowstride = -rowstride;
735         lasty = yi;
736         src_row_y += src_stride_y;
737         if (yi & 1) {
738           src_row_u += src_stride_u;
739           src_row_v += src_stride_v;
740         }
741       }
742     }
743     if (filtering == kFilterLinear) {
744       InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
745     } else {
746       int yf = (y >> 8) & 255;
747       InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
748     }
749     dst_argb += dst_stride_argb;
750     y += dy;
751   }
752   free_aligned_buffer_64(row);
753   free_aligned_buffer_64(row_argb);
754 }
755 #endif
756 
757 // Scale ARGB to/from any dimensions, without interpolation.
758 // Fixed point math is used for performance: The upper 16 bits
759 // of x and dx is the integer part of the source position and
760 // the lower 16 bits are the fixed decimal part.
761 
ScaleARGBSimple(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int dx,int y,int dy)762 static void ScaleARGBSimple(int src_width,
763                             int src_height,
764                             int dst_width,
765                             int dst_height,
766                             int src_stride,
767                             int dst_stride,
768                             const uint8_t* src_argb,
769                             uint8_t* dst_argb,
770                             int x,
771                             int dx,
772                             int y,
773                             int dy) {
774   int j;
775   void (*ScaleARGBCols)(uint8_t * dst_argb, const uint8_t* src_argb,
776                         int dst_width, int x, int dx) =
777       (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
778   (void)src_height;
779 #if defined(HAS_SCALEARGBCOLS_SSE2)
780   if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
781     ScaleARGBCols = ScaleARGBCols_SSE2;
782   }
783 #endif
784 #if defined(HAS_SCALEARGBCOLS_NEON)
785   if (TestCpuFlag(kCpuHasNEON)) {
786     ScaleARGBCols = ScaleARGBCols_Any_NEON;
787     if (IS_ALIGNED(dst_width, 8)) {
788       ScaleARGBCols = ScaleARGBCols_NEON;
789     }
790   }
791 #endif
792 #if defined(HAS_SCALEARGBCOLS_MSA)
793   if (TestCpuFlag(kCpuHasMSA)) {
794     ScaleARGBCols = ScaleARGBCols_Any_MSA;
795     if (IS_ALIGNED(dst_width, 4)) {
796       ScaleARGBCols = ScaleARGBCols_MSA;
797     }
798   }
799 #endif
800   if (src_width * 2 == dst_width && x < 0x8000) {
801     ScaleARGBCols = ScaleARGBColsUp2_C;
802 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
803     if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
804       ScaleARGBCols = ScaleARGBColsUp2_SSE2;
805     }
806 #endif
807   }
808 
809   for (j = 0; j < dst_height; ++j) {
810     ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x,
811                   dx);
812     dst_argb += dst_stride;
813     y += dy;
814   }
815 }
816 
817 // ScaleARGB a ARGB.
818 // This function in turn calls a scaling function
819 // suitable for handling the desired resolutions.
ScaleARGB(const uint8_t * src,int src_stride,int src_width,int src_height,uint8_t * dst,int dst_stride,int dst_width,int dst_height,int clip_x,int clip_y,int clip_width,int clip_height,enum FilterMode filtering)820 static void ScaleARGB(const uint8_t* src,
821                       int src_stride,
822                       int src_width,
823                       int src_height,
824                       uint8_t* dst,
825                       int dst_stride,
826                       int dst_width,
827                       int dst_height,
828                       int clip_x,
829                       int clip_y,
830                       int clip_width,
831                       int clip_height,
832                       enum FilterMode filtering) {
833   // Initial source x/y coordinate and step values as 16.16 fixed point.
834   int x = 0;
835   int y = 0;
836   int dx = 0;
837   int dy = 0;
838   // ARGB does not support box filter yet, but allow the user to pass it.
839   // Simplify filtering when possible.
840   filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
841                                 filtering);
842 
843   // Negative src_height means invert the image.
844   if (src_height < 0) {
845     src_height = -src_height;
846     src = src + (src_height - 1) * src_stride;
847     src_stride = -src_stride;
848   }
849   ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
850              &dx, &dy);
851   src_width = Abs(src_width);
852   if (clip_x) {
853     int64_t clipf = (int64_t)(clip_x)*dx;
854     x += (clipf & 0xffff);
855     src += (clipf >> 16) * 4;
856     dst += clip_x * 4;
857   }
858   if (clip_y) {
859     int64_t clipf = (int64_t)(clip_y)*dy;
860     y += (clipf & 0xffff);
861     src += (clipf >> 16) * src_stride;
862     dst += clip_y * dst_stride;
863   }
864 
865   // Special case for integer step values.
866   if (((dx | dy) & 0xffff) == 0) {
867     if (!dx || !dy) {  // 1 pixel wide and/or tall.
868       filtering = kFilterNone;
869     } else {
870       // Optimized even scale down. ie 2, 4, 6, 8, 10x.
871       if (!(dx & 0x10000) && !(dy & 0x10000)) {
872         if (dx == 0x20000) {
873           // Optimized 1/2 downsample.
874           ScaleARGBDown2(src_width, src_height, clip_width, clip_height,
875                          src_stride, dst_stride, src, dst, x, dx, y, dy,
876                          filtering);
877           return;
878         }
879         if (dx == 0x40000 && filtering == kFilterBox) {
880           // Optimized 1/4 box downsample.
881           ScaleARGBDown4Box(src_width, src_height, clip_width, clip_height,
882                             src_stride, dst_stride, src, dst, x, dx, y, dy);
883           return;
884         }
885         ScaleARGBDownEven(src_width, src_height, clip_width, clip_height,
886                           src_stride, dst_stride, src, dst, x, dx, y, dy,
887                           filtering);
888         return;
889       }
890       // Optimized odd scale down. ie 3, 5, 7, 9x.
891       if ((dx & 0x10000) && (dy & 0x10000)) {
892         filtering = kFilterNone;
893         if (dx == 0x10000 && dy == 0x10000) {
894           // Straight copy.
895           ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
896                    dst, dst_stride, clip_width, clip_height);
897           return;
898         }
899       }
900     }
901   }
902   if (dx == 0x10000 && (x & 0xffff) == 0) {
903     // Arbitrary scale vertically, but unscaled vertically.
904     ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
905                        dst_stride, src, dst, x, y, dy, 4, filtering);
906     return;
907   }
908   if (filtering && dy < 65536) {
909     ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height,
910                         src_stride, dst_stride, src, dst, x, dx, y, dy,
911                         filtering);
912     return;
913   }
914   if (filtering) {
915     ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height,
916                           src_stride, dst_stride, src, dst, x, dx, y, dy,
917                           filtering);
918     return;
919   }
920   ScaleARGBSimple(src_width, src_height, clip_width, clip_height, src_stride,
921                   dst_stride, src, dst, x, dx, y, dy);
922 }
923 
924 LIBYUV_API
ARGBScaleClip(const uint8_t * src_argb,int src_stride_argb,int src_width,int src_height,uint8_t * dst_argb,int dst_stride_argb,int dst_width,int dst_height,int clip_x,int clip_y,int clip_width,int clip_height,enum FilterMode filtering)925 int ARGBScaleClip(const uint8_t* src_argb,
926                   int src_stride_argb,
927                   int src_width,
928                   int src_height,
929                   uint8_t* dst_argb,
930                   int dst_stride_argb,
931                   int dst_width,
932                   int dst_height,
933                   int clip_x,
934                   int clip_y,
935                   int clip_width,
936                   int clip_height,
937                   enum FilterMode filtering) {
938   if (!src_argb || src_width == 0 || src_height == 0 || !dst_argb ||
939       dst_width <= 0 || dst_height <= 0 || clip_x < 0 || clip_y < 0 ||
940       clip_width > 32768 || clip_height > 32768 ||
941       (clip_x + clip_width) > dst_width ||
942       (clip_y + clip_height) > dst_height) {
943     return -1;
944   }
945   ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
946             dst_stride_argb, dst_width, dst_height, clip_x, clip_y, clip_width,
947             clip_height, filtering);
948   return 0;
949 }
950 
951 // Scale an ARGB image.
952 LIBYUV_API
ARGBScale(const uint8_t * src_argb,int src_stride_argb,int src_width,int src_height,uint8_t * dst_argb,int dst_stride_argb,int dst_width,int dst_height,enum FilterMode filtering)953 int ARGBScale(const uint8_t* src_argb,
954               int src_stride_argb,
955               int src_width,
956               int src_height,
957               uint8_t* dst_argb,
958               int dst_stride_argb,
959               int dst_width,
960               int dst_height,
961               enum FilterMode filtering) {
962   if (!src_argb || src_width == 0 || src_height == 0 || src_width > 32768 ||
963       src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) {
964     return -1;
965   }
966   ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
967             dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, dst_height,
968             filtering);
969   return 0;
970 }
971 
972 // Scale with YUV conversion to ARGB and clipping.
973 LIBYUV_API
YUVToARGBScaleClip(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,uint32_t src_fourcc,int src_width,int src_height,uint8_t * dst_argb,int dst_stride_argb,uint32_t dst_fourcc,int dst_width,int dst_height,int clip_x,int clip_y,int clip_width,int clip_height,enum FilterMode filtering)974 int YUVToARGBScaleClip(const uint8_t* src_y,
975                        int src_stride_y,
976                        const uint8_t* src_u,
977                        int src_stride_u,
978                        const uint8_t* src_v,
979                        int src_stride_v,
980                        uint32_t src_fourcc,
981                        int src_width,
982                        int src_height,
983                        uint8_t* dst_argb,
984                        int dst_stride_argb,
985                        uint32_t dst_fourcc,
986                        int dst_width,
987                        int dst_height,
988                        int clip_x,
989                        int clip_y,
990                        int clip_width,
991                        int clip_height,
992                        enum FilterMode filtering) {
993   uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4);
994   int r;
995   (void)src_fourcc;  // TODO(fbarchard): implement and/or assert.
996   (void)dst_fourcc;
997   I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
998              argb_buffer, src_width * 4, src_width, src_height);
999 
1000   r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, src_height, dst_argb,
1001                     dst_stride_argb, dst_width, dst_height, clip_x, clip_y,
1002                     clip_width, clip_height, filtering);
1003   free(argb_buffer);
1004   return r;
1005 }
1006 
1007 #ifdef __cplusplus
1008 }  // extern "C"
1009 }  // namespace libyuv
1010 #endif
1011