1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/scale.h"
12
13 #include <assert.h>
14 #include <string.h>
15
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyARGB
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25
Abs(int v)26 static __inline int Abs(int v) {
27 return v >= 0 ? v : -v;
28 }
29
30 // ScaleARGB ARGB, 1/2
31 // This is an optimized version for scaling down a ARGB to 1/2 of
32 // its original size.
ScaleARGBDown2(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_argb,uint8 * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)33 static void ScaleARGBDown2(int src_width, int src_height,
34 int dst_width, int dst_height,
35 int src_stride, int dst_stride,
36 const uint8* src_argb, uint8* dst_argb,
37 int x, int dx, int y, int dy,
38 enum FilterMode filtering) {
39 int j;
40 int row_stride = src_stride * (dy >> 16);
41 void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
42 uint8* dst_argb, int dst_width) =
43 filtering == kFilterNone ? ScaleARGBRowDown2_C :
44 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
45 ScaleARGBRowDown2Box_C);
46 assert(dx == 65536 * 2); // Test scale factor of 2.
47 assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
48 // Advance to odd row, even column.
49 if (filtering == kFilterBilinear) {
50 src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
51 } else {
52 src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
53 }
54
55 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
56 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
57 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
58 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
59 ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
60 (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
61 ScaleARGBRowDown2Box_SSE2);
62 }
63 #elif defined(HAS_SCALEARGBROWDOWN2_NEON)
64 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
65 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
66 ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON :
67 ScaleARGBRowDown2_NEON;
68 }
69 #endif
70
71 if (filtering == kFilterLinear) {
72 src_stride = 0;
73 }
74 for (j = 0; j < dst_height; ++j) {
75 ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
76 src_argb += row_stride;
77 dst_argb += dst_stride;
78 }
79 }
80
81 // ScaleARGB ARGB, 1/4
82 // This is an optimized version for scaling down a ARGB to 1/4 of
83 // its original size.
ScaleARGBDown4Box(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_argb,uint8 * dst_argb,int x,int dx,int y,int dy)84 static void ScaleARGBDown4Box(int src_width, int src_height,
85 int dst_width, int dst_height,
86 int src_stride, int dst_stride,
87 const uint8* src_argb, uint8* dst_argb,
88 int x, int dx, int y, int dy) {
89 int j;
90 // Allocate 2 rows of ARGB.
91 const int kRowSize = (dst_width * 2 * 4 + 15) & ~15;
92 align_buffer_64(row, kRowSize * 2);
93 int row_stride = src_stride * (dy >> 16);
94 void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
95 uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C;
96 // Advance to odd row, even column.
97 src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
98 assert(dx == 65536 * 4); // Test scale factor of 4.
99 assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
100 #if defined(HAS_SCALEARGBROWDOWN2_SSE2)
101 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
102 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
103 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
104 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
105 }
106 #elif defined(HAS_SCALEARGBROWDOWN2_NEON)
107 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
108 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
109 ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
110 }
111 #endif
112 for (j = 0; j < dst_height; ++j) {
113 ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
114 ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
115 row + kRowSize, dst_width * 2);
116 ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
117 src_argb += row_stride;
118 dst_argb += dst_stride;
119 }
120 free_aligned_buffer_64(row);
121 }
122
123 // ScaleARGB ARGB Even
124 // This is an optimized version for scaling down a ARGB to even
125 // multiple of its original size.
ScaleARGBDownEven(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_argb,uint8 * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)126 static void ScaleARGBDownEven(int src_width, int src_height,
127 int dst_width, int dst_height,
128 int src_stride, int dst_stride,
129 const uint8* src_argb, uint8* dst_argb,
130 int x, int dx, int y, int dy,
131 enum FilterMode filtering) {
132 int j;
133 int col_step = dx >> 16;
134 int row_stride = (dy >> 16) * src_stride;
135 void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
136 int src_step, uint8* dst_argb, int dst_width) =
137 filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
138 assert(IS_ALIGNED(src_width, 2));
139 assert(IS_ALIGNED(src_height, 2));
140 src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
141 #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
142 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
143 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
144 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
145 ScaleARGBRowDownEven_SSE2;
146 }
147 #elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
148 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) &&
149 IS_ALIGNED(src_argb, 4)) {
150 ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
151 ScaleARGBRowDownEven_NEON;
152 }
153 #endif
154
155 if (filtering == kFilterLinear) {
156 src_stride = 0;
157 }
158 for (j = 0; j < dst_height; ++j) {
159 ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
160 src_argb += row_stride;
161 dst_argb += dst_stride;
162 }
163 }
164
165 // Scale ARGB down with bilinear interpolation.
ScaleARGBBilinearDown(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_argb,uint8 * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)166 static void ScaleARGBBilinearDown(int src_width, int src_height,
167 int dst_width, int dst_height,
168 int src_stride, int dst_stride,
169 const uint8* src_argb, uint8* dst_argb,
170 int x, int dx, int y, int dy,
171 enum FilterMode filtering) {
172 int j;
173 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
174 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
175 InterpolateRow_C;
176 void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
177 int dst_width, int x, int dx) =
178 (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
179 int64 xlast = x + (int64)(dst_width - 1) * dx;
180 int64 xl = (dx >= 0) ? x : xlast;
181 int64 xr = (dx >= 0) ? xlast : x;
182 int clip_src_width;
183 xl = (xl >> 16) & ~3; // Left edge aligned.
184 xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
185 xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
186 if (xr > src_width) {
187 xr = src_width;
188 }
189 clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
190 src_argb += xl * 4;
191 x -= (int)(xl << 16);
192 #if defined(HAS_INTERPOLATEROW_SSE2)
193 if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) {
194 InterpolateRow = InterpolateRow_Any_SSE2;
195 if (IS_ALIGNED(clip_src_width, 16)) {
196 InterpolateRow = InterpolateRow_Unaligned_SSE2;
197 if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
198 InterpolateRow = InterpolateRow_SSE2;
199 }
200 }
201 }
202 #endif
203 #if defined(HAS_INTERPOLATEROW_SSSE3)
204 if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) {
205 InterpolateRow = InterpolateRow_Any_SSSE3;
206 if (IS_ALIGNED(clip_src_width, 16)) {
207 InterpolateRow = InterpolateRow_Unaligned_SSSE3;
208 if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
209 InterpolateRow = InterpolateRow_SSSE3;
210 }
211 }
212 }
213 #endif
214 #if defined(HAS_INTERPOLATEROW_AVX2)
215 if (TestCpuFlag(kCpuHasAVX2) && clip_src_width >= 32) {
216 InterpolateRow = InterpolateRow_Any_AVX2;
217 if (IS_ALIGNED(clip_src_width, 32)) {
218 InterpolateRow = InterpolateRow_AVX2;
219 }
220 }
221 #endif
222 #if defined(HAS_INTERPOLATEROW_NEON)
223 if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) {
224 InterpolateRow = InterpolateRow_Any_NEON;
225 if (IS_ALIGNED(clip_src_width, 16)) {
226 InterpolateRow = InterpolateRow_NEON;
227 }
228 }
229 #endif
230 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
231 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 &&
232 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
233 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
234 if (IS_ALIGNED(clip_src_width, 4)) {
235 InterpolateRow = InterpolateRow_MIPS_DSPR2;
236 }
237 }
238 #endif
239 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
240 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
241 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
242 }
243 #endif
244 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
245 // Allocate a row of ARGB.
246 {
247 align_buffer_64(row, clip_src_width * 4);
248
249 const int max_y = (src_height - 1) << 16;
250 if (y > max_y) {
251 y = max_y;
252 }
253 for (j = 0; j < dst_height; ++j) {
254 int yi = y >> 16;
255 const uint8* src = src_argb + yi * src_stride;
256 if (filtering == kFilterLinear) {
257 ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
258 } else {
259 int yf = (y >> 8) & 255;
260 InterpolateRow(row, src, src_stride, clip_src_width, yf);
261 ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
262 }
263 dst_argb += dst_stride;
264 y += dy;
265 if (y > max_y) {
266 y = max_y;
267 }
268 }
269 free_aligned_buffer_64(row);
270 }
271 }
272
273 // Scale ARGB up with bilinear interpolation.
ScaleARGBBilinearUp(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_argb,uint8 * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)274 static void ScaleARGBBilinearUp(int src_width, int src_height,
275 int dst_width, int dst_height,
276 int src_stride, int dst_stride,
277 const uint8* src_argb, uint8* dst_argb,
278 int x, int dx, int y, int dy,
279 enum FilterMode filtering) {
280 int j;
281 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
282 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
283 InterpolateRow_C;
284 void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
285 int dst_width, int x, int dx) =
286 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
287 const int max_y = (src_height - 1) << 16;
288 #if defined(HAS_INTERPOLATEROW_SSE2)
289 if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
290 InterpolateRow = InterpolateRow_Any_SSE2;
291 if (IS_ALIGNED(dst_width, 4)) {
292 InterpolateRow = InterpolateRow_Unaligned_SSE2;
293 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
294 InterpolateRow = InterpolateRow_SSE2;
295 }
296 }
297 }
298 #endif
299 #if defined(HAS_INTERPOLATEROW_SSSE3)
300 if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
301 InterpolateRow = InterpolateRow_Any_SSSE3;
302 if (IS_ALIGNED(dst_width, 4)) {
303 InterpolateRow = InterpolateRow_Unaligned_SSSE3;
304 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
305 InterpolateRow = InterpolateRow_SSSE3;
306 }
307 }
308 }
309 #endif
310 #if defined(HAS_INTERPOLATEROW_AVX2)
311 if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
312 InterpolateRow = InterpolateRow_Any_AVX2;
313 if (IS_ALIGNED(dst_width, 8)) {
314 InterpolateRow = InterpolateRow_AVX2;
315 }
316 }
317 #endif
318 #if defined(HAS_INTERPOLATEROW_NEON)
319 if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
320 InterpolateRow = InterpolateRow_Any_NEON;
321 if (IS_ALIGNED(dst_width, 4)) {
322 InterpolateRow = InterpolateRow_NEON;
323 }
324 }
325 #endif
326 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
327 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
328 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
329 InterpolateRow = InterpolateRow_MIPS_DSPR2;
330 }
331 #endif
332 if (src_width >= 32768) {
333 ScaleARGBFilterCols = filtering ?
334 ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
335 }
336 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
337 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
338 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
339 }
340 #endif
341 #if defined(HAS_SCALEARGBCOLS_SSE2)
342 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
343 ScaleARGBFilterCols = ScaleARGBCols_SSE2;
344 }
345 #endif
346 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
347 ScaleARGBFilterCols = ScaleARGBColsUp2_C;
348 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
349 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
350 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
351 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
352 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
353 }
354 #endif
355 }
356
357 if (y > max_y) {
358 y = max_y;
359 }
360
361 {
362 int yi = y >> 16;
363 const uint8* src = src_argb + yi * src_stride;
364
365 // Allocate 2 rows of ARGB.
366 const int kRowSize = (dst_width * 4 + 15) & ~15;
367 align_buffer_64(row, kRowSize * 2);
368
369 uint8* rowptr = row;
370 int rowstride = kRowSize;
371 int lasty = yi;
372
373 ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
374 if (src_height > 1) {
375 src += src_stride;
376 }
377 ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
378 src += src_stride;
379
380 for (j = 0; j < dst_height; ++j) {
381 yi = y >> 16;
382 if (yi != lasty) {
383 if (y > max_y) {
384 y = max_y;
385 yi = y >> 16;
386 src = src_argb + yi * src_stride;
387 }
388 if (yi != lasty) {
389 ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
390 rowptr += rowstride;
391 rowstride = -rowstride;
392 lasty = yi;
393 src += src_stride;
394 }
395 }
396 if (filtering == kFilterLinear) {
397 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
398 } else {
399 int yf = (y >> 8) & 255;
400 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
401 }
402 dst_argb += dst_stride;
403 y += dy;
404 }
405 free_aligned_buffer_64(row);
406 }
407 }
408
409 #ifdef YUVSCALEUP
410 // Scale YUV to ARGB up with bilinear interpolation.
ScaleYUVToARGBBilinearUp(int src_width,int src_height,int dst_width,int dst_height,int src_stride_y,int src_stride_u,int src_stride_v,int dst_stride_argb,const uint8 * src_y,const uint8 * src_u,const uint8 * src_v,uint8 * dst_argb,int x,int dx,int y,int dy,enum FilterMode filtering)411 static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
412 int dst_width, int dst_height,
413 int src_stride_y,
414 int src_stride_u,
415 int src_stride_v,
416 int dst_stride_argb,
417 const uint8* src_y,
418 const uint8* src_u,
419 const uint8* src_v,
420 uint8* dst_argb,
421 int x, int dx, int y, int dy,
422 enum FilterMode filtering) {
423 int j;
424 void (*I422ToARGBRow)(const uint8* y_buf,
425 const uint8* u_buf,
426 const uint8* v_buf,
427 uint8* rgb_buf,
428 int width) = I422ToARGBRow_C;
429 #if defined(HAS_I422TOARGBROW_SSSE3)
430 if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) {
431 I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
432 if (IS_ALIGNED(src_width, 8)) {
433 I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
434 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
435 I422ToARGBRow = I422ToARGBRow_SSSE3;
436 }
437 }
438 }
439 #endif
440 #if defined(HAS_I422TOARGBROW_AVX2)
441 if (TestCpuFlag(kCpuHasAVX2) && src_width >= 16) {
442 I422ToARGBRow = I422ToARGBRow_Any_AVX2;
443 if (IS_ALIGNED(src_width, 16)) {
444 I422ToARGBRow = I422ToARGBRow_AVX2;
445 }
446 }
447 #endif
448 #if defined(HAS_I422TOARGBROW_NEON)
449 if (TestCpuFlag(kCpuHasNEON) && src_width >= 8) {
450 I422ToARGBRow = I422ToARGBRow_Any_NEON;
451 if (IS_ALIGNED(src_width, 8)) {
452 I422ToARGBRow = I422ToARGBRow_NEON;
453 }
454 }
455 #endif
456 #if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
457 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_width, 4) &&
458 IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
459 IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
460 IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
461 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
462 I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
463 }
464 #endif
465
466 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
467 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
468 InterpolateRow_C;
469 #if defined(HAS_INTERPOLATEROW_SSE2)
470 if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
471 InterpolateRow = InterpolateRow_Any_SSE2;
472 if (IS_ALIGNED(dst_width, 4)) {
473 InterpolateRow = InterpolateRow_Unaligned_SSE2;
474 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
475 InterpolateRow = InterpolateRow_SSE2;
476 }
477 }
478 }
479 #endif
480 #if defined(HAS_INTERPOLATEROW_SSSE3)
481 if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
482 InterpolateRow = InterpolateRow_Any_SSSE3;
483 if (IS_ALIGNED(dst_width, 4)) {
484 InterpolateRow = InterpolateRow_Unaligned_SSSE3;
485 if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
486 InterpolateRow = InterpolateRow_SSSE3;
487 }
488 }
489 }
490 #endif
491 #if defined(HAS_INTERPOLATEROW_AVX2)
492 if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
493 InterpolateRow = InterpolateRow_Any_AVX2;
494 if (IS_ALIGNED(dst_width, 8)) {
495 InterpolateRow = InterpolateRow_AVX2;
496 }
497 }
498 #endif
499 #if defined(HAS_INTERPOLATEROW_NEON)
500 if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
501 InterpolateRow = InterpolateRow_Any_NEON;
502 if (IS_ALIGNED(dst_width, 4)) {
503 InterpolateRow = InterpolateRow_NEON;
504 }
505 }
506 #endif
507 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
508 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
509 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
510 InterpolateRow = InterpolateRow_MIPS_DSPR2;
511 }
512 #endif
513
514 void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
515 int dst_width, int x, int dx) =
516 filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
517 if (src_width >= 32768) {
518 ScaleARGBFilterCols = filtering ?
519 ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
520 }
521 #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
522 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
523 ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
524 }
525 #endif
526 #if defined(HAS_SCALEARGBCOLS_SSE2)
527 if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
528 ScaleARGBFilterCols = ScaleARGBCols_SSE2;
529 }
530 #endif
531 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
532 ScaleARGBFilterCols = ScaleARGBColsUp2_C;
533 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
534 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
535 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
536 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
537 ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
538 }
539 #endif
540 }
541
542 const int max_y = (src_height - 1) << 16;
543 if (y > max_y) {
544 y = max_y;
545 }
546 const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
547 int yi = y >> 16;
548 int uv_yi = yi >> kYShift;
549 const uint8* src_row_y = src_y + yi * src_stride_y;
550 const uint8* src_row_u = src_u + uv_yi * src_stride_u;
551 const uint8* src_row_v = src_v + uv_yi * src_stride_v;
552
553 // Allocate 2 rows of ARGB.
554 const int kRowSize = (dst_width * 4 + 15) & ~15;
555 align_buffer_64(row, kRowSize * 2);
556
557 // Allocate 1 row of ARGB for source conversion.
558 align_buffer_64(argb_row, src_width * 4);
559
560 uint8* rowptr = row;
561 int rowstride = kRowSize;
562 int lasty = yi;
563
564 // TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
565 ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
566 if (src_height > 1) {
567 src_row_y += src_stride_y;
568 if (yi & 1) {
569 src_row_u += src_stride_u;
570 src_row_v += src_stride_v;
571 }
572 }
573 ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
574 if (src_height > 2) {
575 src_row_y += src_stride_y;
576 if (!(yi & 1)) {
577 src_row_u += src_stride_u;
578 src_row_v += src_stride_v;
579 }
580 }
581
582 for (j = 0; j < dst_height; ++j) {
583 yi = y >> 16;
584 if (yi != lasty) {
585 if (y > max_y) {
586 y = max_y;
587 yi = y >> 16;
588 uv_yi = yi >> kYShift;
589 src_row_y = src_y + yi * src_stride_y;
590 src_row_u = src_u + uv_yi * src_stride_u;
591 src_row_v = src_v + uv_yi * src_stride_v;
592 }
593 if (yi != lasty) {
594 // TODO(fbarchard): Convert the clipped region of row.
595 I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
596 ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
597 rowptr += rowstride;
598 rowstride = -rowstride;
599 lasty = yi;
600 src_row_y += src_stride_y;
601 if (yi & 1) {
602 src_row_u += src_stride_u;
603 src_row_v += src_stride_v;
604 }
605 }
606 }
607 if (filtering == kFilterLinear) {
608 InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
609 } else {
610 int yf = (y >> 8) & 255;
611 InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
612 }
613 dst_argb += dst_stride_argb;
614 y += dy;
615 }
616 free_aligned_buffer_64(row);
617 free_aligned_buffer_64(row_argb);
618 }
619 #endif
620
621 // Scale ARGB to/from any dimensions, without interpolation.
622 // Fixed point math is used for performance: The upper 16 bits
623 // of x and dx is the integer part of the source position and
624 // the lower 16 bits are the fixed decimal part.
625
ScaleARGBSimple(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_argb,uint8 * dst_argb,int x,int dx,int y,int dy)626 static void ScaleARGBSimple(int src_width, int src_height,
627 int dst_width, int dst_height,
628 int src_stride, int dst_stride,
629 const uint8* src_argb, uint8* dst_argb,
630 int x, int dx, int y, int dy) {
631 int j;
632 void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
633 int dst_width, int x, int dx) =
634 (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
635 #if defined(HAS_SCALEARGBCOLS_SSE2)
636 if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
637 ScaleARGBCols = ScaleARGBCols_SSE2;
638 }
639 #endif
640 if (src_width * 2 == dst_width && x < 0x8000) {
641 ScaleARGBCols = ScaleARGBColsUp2_C;
642 #if defined(HAS_SCALEARGBCOLSUP2_SSE2)
643 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
644 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
645 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
646 ScaleARGBCols = ScaleARGBColsUp2_SSE2;
647 }
648 #endif
649 }
650
651 for (j = 0; j < dst_height; ++j) {
652 ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
653 dst_width, x, dx);
654 dst_argb += dst_stride;
655 y += dy;
656 }
657 }
658
659 // ScaleARGB a ARGB.
660 // This function in turn calls a scaling function
661 // suitable for handling the desired resolutions.
ScaleARGB(const uint8 * src,int src_stride,int src_width,int src_height,uint8 * dst,int dst_stride,int dst_width,int dst_height,int clip_x,int clip_y,int clip_width,int clip_height,enum FilterMode filtering)662 static void ScaleARGB(const uint8* src, int src_stride,
663 int src_width, int src_height,
664 uint8* dst, int dst_stride,
665 int dst_width, int dst_height,
666 int clip_x, int clip_y, int clip_width, int clip_height,
667 enum FilterMode filtering) {
668 // Initial source x/y coordinate and step values as 16.16 fixed point.
669 int x = 0;
670 int y = 0;
671 int dx = 0;
672 int dy = 0;
673 // ARGB does not support box filter yet, but allow the user to pass it.
674 // Simplify filtering when possible.
675 filtering = ScaleFilterReduce(src_width, src_height,
676 dst_width, dst_height,
677 filtering);
678
679 // Negative src_height means invert the image.
680 if (src_height < 0) {
681 src_height = -src_height;
682 src = src + (src_height - 1) * src_stride;
683 src_stride = -src_stride;
684 }
685 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
686 &x, &y, &dx, &dy);
687 src_width = Abs(src_width);
688 if (clip_x) {
689 int64 clipf = (int64)(clip_x) * dx;
690 x += (clipf & 0xffff);
691 src += (clipf >> 16) * 4;
692 dst += clip_x * 4;
693 }
694 if (clip_y) {
695 int64 clipf = (int64)(clip_y) * dy;
696 y += (clipf & 0xffff);
697 src += (clipf >> 16) * src_stride;
698 dst += clip_y * dst_stride;
699 }
700
701 // Special case for integer step values.
702 if (((dx | dy) & 0xffff) == 0) {
703 if (!dx || !dy) { // 1 pixel wide and/or tall.
704 filtering = kFilterNone;
705 } else {
706 // Optimized even scale down. ie 2, 4, 6, 8, 10x.
707 if (!(dx & 0x10000) && !(dy & 0x10000)) {
708 if (dx == 0x20000) {
709 // Optimized 1/2 downsample.
710 ScaleARGBDown2(src_width, src_height,
711 clip_width, clip_height,
712 src_stride, dst_stride, src, dst,
713 x, dx, y, dy, filtering);
714 return;
715 }
716 if (dx == 0x40000 && filtering == kFilterBox) {
717 // Optimized 1/4 box downsample.
718 ScaleARGBDown4Box(src_width, src_height,
719 clip_width, clip_height,
720 src_stride, dst_stride, src, dst,
721 x, dx, y, dy);
722 return;
723 }
724 ScaleARGBDownEven(src_width, src_height,
725 clip_width, clip_height,
726 src_stride, dst_stride, src, dst,
727 x, dx, y, dy, filtering);
728 return;
729 }
730 // Optimized odd scale down. ie 3, 5, 7, 9x.
731 if ((dx & 0x10000) && (dy & 0x10000)) {
732 filtering = kFilterNone;
733 if (dx == 0x10000 && dy == 0x10000) {
734 // Straight copy.
735 ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
736 dst, dst_stride, clip_width, clip_height);
737 return;
738 }
739 }
740 }
741 }
742 if (dx == 0x10000 && (x & 0xffff) == 0) {
743 // Arbitrary scale vertically, but unscaled vertically.
744 ScalePlaneVertical(src_height,
745 clip_width, clip_height,
746 src_stride, dst_stride, src, dst,
747 x, y, dy, 4, filtering);
748 return;
749 }
750 if (filtering && dy < 65536) {
751 ScaleARGBBilinearUp(src_width, src_height,
752 clip_width, clip_height,
753 src_stride, dst_stride, src, dst,
754 x, dx, y, dy, filtering);
755 return;
756 }
757 if (filtering) {
758 ScaleARGBBilinearDown(src_width, src_height,
759 clip_width, clip_height,
760 src_stride, dst_stride, src, dst,
761 x, dx, y, dy, filtering);
762 return;
763 }
764 ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
765 src_stride, dst_stride, src, dst,
766 x, dx, y, dy);
767 }
768
769 LIBYUV_API
ARGBScaleClip(const uint8 * src_argb,int src_stride_argb,int src_width,int src_height,uint8 * dst_argb,int dst_stride_argb,int dst_width,int dst_height,int clip_x,int clip_y,int clip_width,int clip_height,enum FilterMode filtering)770 int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
771 int src_width, int src_height,
772 uint8* dst_argb, int dst_stride_argb,
773 int dst_width, int dst_height,
774 int clip_x, int clip_y, int clip_width, int clip_height,
775 enum FilterMode filtering) {
776 if (!src_argb || src_width == 0 || src_height == 0 ||
777 !dst_argb || dst_width <= 0 || dst_height <= 0 ||
778 clip_x < 0 || clip_y < 0 ||
779 (clip_x + clip_width) > dst_width ||
780 (clip_y + clip_height) > dst_height) {
781 return -1;
782 }
783 ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
784 dst_argb, dst_stride_argb, dst_width, dst_height,
785 clip_x, clip_y, clip_width, clip_height, filtering);
786 return 0;
787 }
788
789 // Scale an ARGB image.
790 LIBYUV_API
ARGBScale(const uint8 * src_argb,int src_stride_argb,int src_width,int src_height,uint8 * dst_argb,int dst_stride_argb,int dst_width,int dst_height,enum FilterMode filtering)791 int ARGBScale(const uint8* src_argb, int src_stride_argb,
792 int src_width, int src_height,
793 uint8* dst_argb, int dst_stride_argb,
794 int dst_width, int dst_height,
795 enum FilterMode filtering) {
796 if (!src_argb || src_width == 0 || src_height == 0 ||
797 !dst_argb || dst_width <= 0 || dst_height <= 0) {
798 return -1;
799 }
800 ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
801 dst_argb, dst_stride_argb, dst_width, dst_height,
802 0, 0, dst_width, dst_height, filtering);
803 return 0;
804 }
805
806 #ifdef __cplusplus
807 } // extern "C"
808 } // namespace libyuv
809 #endif
810