1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/scale.h"
12
13 #include <assert.h>
14 #include <string.h>
15
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyPlane
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25
Abs(int v)26 static __inline int Abs(int v) {
27 return v >= 0 ? v : -v;
28 }
29
30 #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
31
32 // Scale plane, 1/2
33 // This is an optimized version for scaling down a plane to 1/2 of
34 // its original size.
35
ScalePlaneDown2(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)36 static void ScalePlaneDown2(int src_width,
37 int src_height,
38 int dst_width,
39 int dst_height,
40 int src_stride,
41 int dst_stride,
42 const uint8_t* src_ptr,
43 uint8_t* dst_ptr,
44 enum FilterMode filtering) {
45 int y;
46 void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
47 uint8_t* dst_ptr, int dst_width) =
48 filtering == kFilterNone
49 ? ScaleRowDown2_C
50 : (filtering == kFilterLinear ? ScaleRowDown2Linear_C
51 : ScaleRowDown2Box_C);
52 int row_stride = src_stride << 1;
53 (void)src_width;
54 (void)src_height;
55 if (!filtering) {
56 src_ptr += src_stride; // Point to odd rows.
57 src_stride = 0;
58 }
59
60 #if defined(HAS_SCALEROWDOWN2_NEON)
61 if (TestCpuFlag(kCpuHasNEON)) {
62 ScaleRowDown2 =
63 filtering == kFilterNone
64 ? ScaleRowDown2_Any_NEON
65 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON
66 : ScaleRowDown2Box_Any_NEON);
67 if (IS_ALIGNED(dst_width, 16)) {
68 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON
69 : (filtering == kFilterLinear
70 ? ScaleRowDown2Linear_NEON
71 : ScaleRowDown2Box_NEON);
72 }
73 }
74 #endif
75 #if defined(HAS_SCALEROWDOWN2_SSSE3)
76 if (TestCpuFlag(kCpuHasSSSE3)) {
77 ScaleRowDown2 =
78 filtering == kFilterNone
79 ? ScaleRowDown2_Any_SSSE3
80 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3
81 : ScaleRowDown2Box_Any_SSSE3);
82 if (IS_ALIGNED(dst_width, 16)) {
83 ScaleRowDown2 =
84 filtering == kFilterNone
85 ? ScaleRowDown2_SSSE3
86 : (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3
87 : ScaleRowDown2Box_SSSE3);
88 }
89 }
90 #endif
91 #if defined(HAS_SCALEROWDOWN2_AVX2)
92 if (TestCpuFlag(kCpuHasAVX2)) {
93 ScaleRowDown2 =
94 filtering == kFilterNone
95 ? ScaleRowDown2_Any_AVX2
96 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2
97 : ScaleRowDown2Box_Any_AVX2);
98 if (IS_ALIGNED(dst_width, 32)) {
99 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2
100 : (filtering == kFilterLinear
101 ? ScaleRowDown2Linear_AVX2
102 : ScaleRowDown2Box_AVX2);
103 }
104 }
105 #endif
106 #if defined(HAS_SCALEROWDOWN2_MSA)
107 if (TestCpuFlag(kCpuHasMSA)) {
108 ScaleRowDown2 =
109 filtering == kFilterNone
110 ? ScaleRowDown2_Any_MSA
111 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MSA
112 : ScaleRowDown2Box_Any_MSA);
113 if (IS_ALIGNED(dst_width, 32)) {
114 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MSA
115 : (filtering == kFilterLinear
116 ? ScaleRowDown2Linear_MSA
117 : ScaleRowDown2Box_MSA);
118 }
119 }
120 #endif
121 #if defined(HAS_SCALEROWDOWN2_MMI)
122 if (TestCpuFlag(kCpuHasMMI)) {
123 ScaleRowDown2 =
124 filtering == kFilterNone
125 ? ScaleRowDown2_Any_MMI
126 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MMI
127 : ScaleRowDown2Box_Any_MMI);
128 if (IS_ALIGNED(dst_width, 8)) {
129 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MMI
130 : (filtering == kFilterLinear
131 ? ScaleRowDown2Linear_MMI
132 : ScaleRowDown2Box_MMI);
133 }
134 }
135 #endif
136
137 if (filtering == kFilterLinear) {
138 src_stride = 0;
139 }
140 // TODO(fbarchard): Loop through source height to allow odd height.
141 for (y = 0; y < dst_height; ++y) {
142 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
143 src_ptr += row_stride;
144 dst_ptr += dst_stride;
145 }
146 }
147
ScalePlaneDown2_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)148 static void ScalePlaneDown2_16(int src_width,
149 int src_height,
150 int dst_width,
151 int dst_height,
152 int src_stride,
153 int dst_stride,
154 const uint16_t* src_ptr,
155 uint16_t* dst_ptr,
156 enum FilterMode filtering) {
157 int y;
158 void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
159 uint16_t* dst_ptr, int dst_width) =
160 filtering == kFilterNone
161 ? ScaleRowDown2_16_C
162 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C
163 : ScaleRowDown2Box_16_C);
164 int row_stride = src_stride << 1;
165 (void)src_width;
166 (void)src_height;
167 if (!filtering) {
168 src_ptr += src_stride; // Point to odd rows.
169 src_stride = 0;
170 }
171
172 #if defined(HAS_SCALEROWDOWN2_16_NEON)
173 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
174 ScaleRowDown2 =
175 filtering ? ScaleRowDown2Box_16_NEON : ScaleRowDown2_16_NEON;
176 }
177 #endif
178 #if defined(HAS_SCALEROWDOWN2_16_SSE2)
179 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
180 ScaleRowDown2 =
181 filtering == kFilterNone
182 ? ScaleRowDown2_16_SSE2
183 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2
184 : ScaleRowDown2Box_16_SSE2);
185 }
186 #endif
187 #if defined(HAS_SCALEROWDOWN2_16_MMI)
188 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
189 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_MMI
190 : (filtering == kFilterLinear
191 ? ScaleRowDown2Linear_16_MMI
192 : ScaleRowDown2Box_16_MMI);
193 }
194 #endif
195
196 if (filtering == kFilterLinear) {
197 src_stride = 0;
198 }
199 // TODO(fbarchard): Loop through source height to allow odd height.
200 for (y = 0; y < dst_height; ++y) {
201 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
202 src_ptr += row_stride;
203 dst_ptr += dst_stride;
204 }
205 }
206
207 // Scale plane, 1/4
208 // This is an optimized version for scaling down a plane to 1/4 of
209 // its original size.
210
ScalePlaneDown4(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)211 static void ScalePlaneDown4(int src_width,
212 int src_height,
213 int dst_width,
214 int dst_height,
215 int src_stride,
216 int dst_stride,
217 const uint8_t* src_ptr,
218 uint8_t* dst_ptr,
219 enum FilterMode filtering) {
220 int y;
221 void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride,
222 uint8_t* dst_ptr, int dst_width) =
223 filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
224 int row_stride = src_stride << 2;
225 (void)src_width;
226 (void)src_height;
227 if (!filtering) {
228 src_ptr += src_stride * 2; // Point to row 2.
229 src_stride = 0;
230 }
231 #if defined(HAS_SCALEROWDOWN4_NEON)
232 if (TestCpuFlag(kCpuHasNEON)) {
233 ScaleRowDown4 =
234 filtering ? ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;
235 if (IS_ALIGNED(dst_width, 8)) {
236 ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
237 }
238 }
239 #endif
240 #if defined(HAS_SCALEROWDOWN4_SSSE3)
241 if (TestCpuFlag(kCpuHasSSSE3)) {
242 ScaleRowDown4 =
243 filtering ? ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3;
244 if (IS_ALIGNED(dst_width, 8)) {
245 ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3;
246 }
247 }
248 #endif
249 #if defined(HAS_SCALEROWDOWN4_AVX2)
250 if (TestCpuFlag(kCpuHasAVX2)) {
251 ScaleRowDown4 =
252 filtering ? ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
253 if (IS_ALIGNED(dst_width, 16)) {
254 ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
255 }
256 }
257 #endif
258 #if defined(HAS_SCALEROWDOWN4_MSA)
259 if (TestCpuFlag(kCpuHasMSA)) {
260 ScaleRowDown4 =
261 filtering ? ScaleRowDown4Box_Any_MSA : ScaleRowDown4_Any_MSA;
262 if (IS_ALIGNED(dst_width, 16)) {
263 ScaleRowDown4 = filtering ? ScaleRowDown4Box_MSA : ScaleRowDown4_MSA;
264 }
265 }
266 #endif
267 #if defined(HAS_SCALEROWDOWN4_MMI)
268 if (TestCpuFlag(kCpuHasMMI)) {
269 ScaleRowDown4 =
270 filtering ? ScaleRowDown4Box_Any_MMI : ScaleRowDown4_Any_MMI;
271 if (IS_ALIGNED(dst_width, 8)) {
272 ScaleRowDown4 = filtering ? ScaleRowDown4Box_MMI : ScaleRowDown4_MMI;
273 }
274 }
275 #endif
276
277 if (filtering == kFilterLinear) {
278 src_stride = 0;
279 }
280 for (y = 0; y < dst_height; ++y) {
281 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
282 src_ptr += row_stride;
283 dst_ptr += dst_stride;
284 }
285 }
286
ScalePlaneDown4_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)287 static void ScalePlaneDown4_16(int src_width,
288 int src_height,
289 int dst_width,
290 int dst_height,
291 int src_stride,
292 int dst_stride,
293 const uint16_t* src_ptr,
294 uint16_t* dst_ptr,
295 enum FilterMode filtering) {
296 int y;
297 void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride,
298 uint16_t* dst_ptr, int dst_width) =
299 filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
300 int row_stride = src_stride << 2;
301 (void)src_width;
302 (void)src_height;
303 if (!filtering) {
304 src_ptr += src_stride * 2; // Point to row 2.
305 src_stride = 0;
306 }
307 #if defined(HAS_SCALEROWDOWN4_16_NEON)
308 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
309 ScaleRowDown4 =
310 filtering ? ScaleRowDown4Box_16_NEON : ScaleRowDown4_16_NEON;
311 }
312 #endif
313 #if defined(HAS_SCALEROWDOWN4_16_SSE2)
314 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
315 ScaleRowDown4 =
316 filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2;
317 }
318 #endif
319 #if defined(HAS_SCALEROWDOWN4_16_MMI)
320 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
321 ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_MMI : ScaleRowDown4_16_MMI;
322 }
323 #endif
324
325 if (filtering == kFilterLinear) {
326 src_stride = 0;
327 }
328 for (y = 0; y < dst_height; ++y) {
329 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
330 src_ptr += row_stride;
331 dst_ptr += dst_stride;
332 }
333 }
334
335 // Scale plane down, 3/4
ScalePlaneDown34(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)336 static void ScalePlaneDown34(int src_width,
337 int src_height,
338 int dst_width,
339 int dst_height,
340 int src_stride,
341 int dst_stride,
342 const uint8_t* src_ptr,
343 uint8_t* dst_ptr,
344 enum FilterMode filtering) {
345 int y;
346 void (*ScaleRowDown34_0)(const uint8_t* src_ptr, ptrdiff_t src_stride,
347 uint8_t* dst_ptr, int dst_width);
348 void (*ScaleRowDown34_1)(const uint8_t* src_ptr, ptrdiff_t src_stride,
349 uint8_t* dst_ptr, int dst_width);
350 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
351 (void)src_width;
352 (void)src_height;
353 assert(dst_width % 3 == 0);
354 if (!filtering) {
355 ScaleRowDown34_0 = ScaleRowDown34_C;
356 ScaleRowDown34_1 = ScaleRowDown34_C;
357 } else {
358 ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
359 ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
360 }
361 #if defined(HAS_SCALEROWDOWN34_NEON)
362 if (TestCpuFlag(kCpuHasNEON)) {
363 if (!filtering) {
364 ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
365 ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
366 } else {
367 ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
368 ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
369 }
370 if (dst_width % 24 == 0) {
371 if (!filtering) {
372 ScaleRowDown34_0 = ScaleRowDown34_NEON;
373 ScaleRowDown34_1 = ScaleRowDown34_NEON;
374 } else {
375 ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
376 ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
377 }
378 }
379 }
380 #endif
381 #if defined(HAS_SCALEROWDOWN34_MSA)
382 if (TestCpuFlag(kCpuHasMSA)) {
383 if (!filtering) {
384 ScaleRowDown34_0 = ScaleRowDown34_Any_MSA;
385 ScaleRowDown34_1 = ScaleRowDown34_Any_MSA;
386 } else {
387 ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_MSA;
388 ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_MSA;
389 }
390 if (dst_width % 48 == 0) {
391 if (!filtering) {
392 ScaleRowDown34_0 = ScaleRowDown34_MSA;
393 ScaleRowDown34_1 = ScaleRowDown34_MSA;
394 } else {
395 ScaleRowDown34_0 = ScaleRowDown34_0_Box_MSA;
396 ScaleRowDown34_1 = ScaleRowDown34_1_Box_MSA;
397 }
398 }
399 }
400 #endif
401 #if defined(HAS_SCALEROWDOWN34_SSSE3)
402 if (TestCpuFlag(kCpuHasSSSE3)) {
403 if (!filtering) {
404 ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
405 ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
406 } else {
407 ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
408 ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
409 }
410 if (dst_width % 24 == 0) {
411 if (!filtering) {
412 ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
413 ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
414 } else {
415 ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
416 ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
417 }
418 }
419 }
420 #endif
421
422 for (y = 0; y < dst_height - 2; y += 3) {
423 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
424 src_ptr += src_stride;
425 dst_ptr += dst_stride;
426 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
427 src_ptr += src_stride;
428 dst_ptr += dst_stride;
429 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
430 src_ptr += src_stride * 2;
431 dst_ptr += dst_stride;
432 }
433
434 // Remainder 1 or 2 rows with last row vertically unfiltered
435 if ((dst_height % 3) == 2) {
436 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
437 src_ptr += src_stride;
438 dst_ptr += dst_stride;
439 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
440 } else if ((dst_height % 3) == 1) {
441 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
442 }
443 }
444
ScalePlaneDown34_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)445 static void ScalePlaneDown34_16(int src_width,
446 int src_height,
447 int dst_width,
448 int dst_height,
449 int src_stride,
450 int dst_stride,
451 const uint16_t* src_ptr,
452 uint16_t* dst_ptr,
453 enum FilterMode filtering) {
454 int y;
455 void (*ScaleRowDown34_0)(const uint16_t* src_ptr, ptrdiff_t src_stride,
456 uint16_t* dst_ptr, int dst_width);
457 void (*ScaleRowDown34_1)(const uint16_t* src_ptr, ptrdiff_t src_stride,
458 uint16_t* dst_ptr, int dst_width);
459 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
460 (void)src_width;
461 (void)src_height;
462 assert(dst_width % 3 == 0);
463 if (!filtering) {
464 ScaleRowDown34_0 = ScaleRowDown34_16_C;
465 ScaleRowDown34_1 = ScaleRowDown34_16_C;
466 } else {
467 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;
468 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;
469 }
470 #if defined(HAS_SCALEROWDOWN34_16_NEON)
471 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
472 if (!filtering) {
473 ScaleRowDown34_0 = ScaleRowDown34_16_NEON;
474 ScaleRowDown34_1 = ScaleRowDown34_16_NEON;
475 } else {
476 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;
477 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;
478 }
479 }
480 #endif
481 #if defined(HAS_SCALEROWDOWN34_16_SSSE3)
482 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
483 if (!filtering) {
484 ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
485 ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
486 } else {
487 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;
488 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;
489 }
490 }
491 #endif
492
493 for (y = 0; y < dst_height - 2; y += 3) {
494 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
495 src_ptr += src_stride;
496 dst_ptr += dst_stride;
497 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
498 src_ptr += src_stride;
499 dst_ptr += dst_stride;
500 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
501 src_ptr += src_stride * 2;
502 dst_ptr += dst_stride;
503 }
504
505 // Remainder 1 or 2 rows with last row vertically unfiltered
506 if ((dst_height % 3) == 2) {
507 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
508 src_ptr += src_stride;
509 dst_ptr += dst_stride;
510 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
511 } else if ((dst_height % 3) == 1) {
512 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
513 }
514 }
515
516 // Scale plane, 3/8
517 // This is an optimized version for scaling down a plane to 3/8
518 // of its original size.
519 //
520 // Uses box filter arranges like this
521 // aaabbbcc -> abc
522 // aaabbbcc def
523 // aaabbbcc ghi
524 // dddeeeff
525 // dddeeeff
526 // dddeeeff
527 // ggghhhii
528 // ggghhhii
529 // Boxes are 3x3, 2x3, 3x2 and 2x2
530
ScalePlaneDown38(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)531 static void ScalePlaneDown38(int src_width,
532 int src_height,
533 int dst_width,
534 int dst_height,
535 int src_stride,
536 int dst_stride,
537 const uint8_t* src_ptr,
538 uint8_t* dst_ptr,
539 enum FilterMode filtering) {
540 int y;
541 void (*ScaleRowDown38_3)(const uint8_t* src_ptr, ptrdiff_t src_stride,
542 uint8_t* dst_ptr, int dst_width);
543 void (*ScaleRowDown38_2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
544 uint8_t* dst_ptr, int dst_width);
545 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
546 assert(dst_width % 3 == 0);
547 (void)src_width;
548 (void)src_height;
549 if (!filtering) {
550 ScaleRowDown38_3 = ScaleRowDown38_C;
551 ScaleRowDown38_2 = ScaleRowDown38_C;
552 } else {
553 ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
554 ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
555 }
556
557 #if defined(HAS_SCALEROWDOWN38_NEON)
558 if (TestCpuFlag(kCpuHasNEON)) {
559 if (!filtering) {
560 ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
561 ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
562 } else {
563 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
564 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
565 }
566 if (dst_width % 12 == 0) {
567 if (!filtering) {
568 ScaleRowDown38_3 = ScaleRowDown38_NEON;
569 ScaleRowDown38_2 = ScaleRowDown38_NEON;
570 } else {
571 ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
572 ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
573 }
574 }
575 }
576 #endif
577 #if defined(HAS_SCALEROWDOWN38_SSSE3)
578 if (TestCpuFlag(kCpuHasSSSE3)) {
579 if (!filtering) {
580 ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
581 ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
582 } else {
583 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
584 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
585 }
586 if (dst_width % 12 == 0 && !filtering) {
587 ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
588 ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
589 }
590 if (dst_width % 6 == 0 && filtering) {
591 ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
592 ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
593 }
594 }
595 #endif
596 #if defined(HAS_SCALEROWDOWN38_MSA)
597 if (TestCpuFlag(kCpuHasMSA)) {
598 if (!filtering) {
599 ScaleRowDown38_3 = ScaleRowDown38_Any_MSA;
600 ScaleRowDown38_2 = ScaleRowDown38_Any_MSA;
601 } else {
602 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_MSA;
603 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_MSA;
604 }
605 if (dst_width % 12 == 0) {
606 if (!filtering) {
607 ScaleRowDown38_3 = ScaleRowDown38_MSA;
608 ScaleRowDown38_2 = ScaleRowDown38_MSA;
609 } else {
610 ScaleRowDown38_3 = ScaleRowDown38_3_Box_MSA;
611 ScaleRowDown38_2 = ScaleRowDown38_2_Box_MSA;
612 }
613 }
614 }
615 #endif
616
617 for (y = 0; y < dst_height - 2; y += 3) {
618 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
619 src_ptr += src_stride * 3;
620 dst_ptr += dst_stride;
621 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
622 src_ptr += src_stride * 3;
623 dst_ptr += dst_stride;
624 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
625 src_ptr += src_stride * 2;
626 dst_ptr += dst_stride;
627 }
628
629 // Remainder 1 or 2 rows with last row vertically unfiltered
630 if ((dst_height % 3) == 2) {
631 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
632 src_ptr += src_stride * 3;
633 dst_ptr += dst_stride;
634 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
635 } else if ((dst_height % 3) == 1) {
636 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
637 }
638 }
639
ScalePlaneDown38_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)640 static void ScalePlaneDown38_16(int src_width,
641 int src_height,
642 int dst_width,
643 int dst_height,
644 int src_stride,
645 int dst_stride,
646 const uint16_t* src_ptr,
647 uint16_t* dst_ptr,
648 enum FilterMode filtering) {
649 int y;
650 void (*ScaleRowDown38_3)(const uint16_t* src_ptr, ptrdiff_t src_stride,
651 uint16_t* dst_ptr, int dst_width);
652 void (*ScaleRowDown38_2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
653 uint16_t* dst_ptr, int dst_width);
654 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
655 (void)src_width;
656 (void)src_height;
657 assert(dst_width % 3 == 0);
658 if (!filtering) {
659 ScaleRowDown38_3 = ScaleRowDown38_16_C;
660 ScaleRowDown38_2 = ScaleRowDown38_16_C;
661 } else {
662 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;
663 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;
664 }
665 #if defined(HAS_SCALEROWDOWN38_16_NEON)
666 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
667 if (!filtering) {
668 ScaleRowDown38_3 = ScaleRowDown38_16_NEON;
669 ScaleRowDown38_2 = ScaleRowDown38_16_NEON;
670 } else {
671 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;
672 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
673 }
674 }
675 #endif
676 #if defined(HAS_SCALEROWDOWN38_16_SSSE3)
677 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
678 if (!filtering) {
679 ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
680 ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
681 } else {
682 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;
683 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
684 }
685 }
686 #endif
687
688 for (y = 0; y < dst_height - 2; y += 3) {
689 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
690 src_ptr += src_stride * 3;
691 dst_ptr += dst_stride;
692 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
693 src_ptr += src_stride * 3;
694 dst_ptr += dst_stride;
695 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
696 src_ptr += src_stride * 2;
697 dst_ptr += dst_stride;
698 }
699
700 // Remainder 1 or 2 rows with last row vertically unfiltered
701 if ((dst_height % 3) == 2) {
702 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
703 src_ptr += src_stride * 3;
704 dst_ptr += dst_stride;
705 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
706 } else if ((dst_height % 3) == 1) {
707 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
708 }
709 }
710
711 #define MIN1(x) ((x) < 1 ? 1 : (x))
712
SumPixels(int iboxwidth,const uint16_t * src_ptr)713 static __inline uint32_t SumPixels(int iboxwidth, const uint16_t* src_ptr) {
714 uint32_t sum = 0u;
715 int x;
716 assert(iboxwidth > 0);
717 for (x = 0; x < iboxwidth; ++x) {
718 sum += src_ptr[x];
719 }
720 return sum;
721 }
722
SumPixels_16(int iboxwidth,const uint32_t * src_ptr)723 static __inline uint32_t SumPixels_16(int iboxwidth, const uint32_t* src_ptr) {
724 uint32_t sum = 0u;
725 int x;
726 assert(iboxwidth > 0);
727 for (x = 0; x < iboxwidth; ++x) {
728 sum += src_ptr[x];
729 }
730 return sum;
731 }
732
ScaleAddCols2_C(int dst_width,int boxheight,int x,int dx,const uint16_t * src_ptr,uint8_t * dst_ptr)733 static void ScaleAddCols2_C(int dst_width,
734 int boxheight,
735 int x,
736 int dx,
737 const uint16_t* src_ptr,
738 uint8_t* dst_ptr) {
739 int i;
740 int scaletbl[2];
741 int minboxwidth = dx >> 16;
742 int boxwidth;
743 scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
744 scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
745 for (i = 0; i < dst_width; ++i) {
746 int ix = x >> 16;
747 x += dx;
748 boxwidth = MIN1((x >> 16) - ix);
749 *dst_ptr++ =
750 SumPixels(boxwidth, src_ptr + ix) * scaletbl[boxwidth - minboxwidth] >>
751 16;
752 }
753 }
754
ScaleAddCols2_16_C(int dst_width,int boxheight,int x,int dx,const uint32_t * src_ptr,uint16_t * dst_ptr)755 static void ScaleAddCols2_16_C(int dst_width,
756 int boxheight,
757 int x,
758 int dx,
759 const uint32_t* src_ptr,
760 uint16_t* dst_ptr) {
761 int i;
762 int scaletbl[2];
763 int minboxwidth = dx >> 16;
764 int boxwidth;
765 scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
766 scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
767 for (i = 0; i < dst_width; ++i) {
768 int ix = x >> 16;
769 x += dx;
770 boxwidth = MIN1((x >> 16) - ix);
771 *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) *
772 scaletbl[boxwidth - minboxwidth] >>
773 16;
774 }
775 }
776
ScaleAddCols0_C(int dst_width,int boxheight,int x,int dx,const uint16_t * src_ptr,uint8_t * dst_ptr)777 static void ScaleAddCols0_C(int dst_width,
778 int boxheight,
779 int x,
780 int dx,
781 const uint16_t* src_ptr,
782 uint8_t* dst_ptr) {
783 int scaleval = 65536 / boxheight;
784 int i;
785 (void)dx;
786 src_ptr += (x >> 16);
787 for (i = 0; i < dst_width; ++i) {
788 *dst_ptr++ = src_ptr[i] * scaleval >> 16;
789 }
790 }
791
ScaleAddCols1_C(int dst_width,int boxheight,int x,int dx,const uint16_t * src_ptr,uint8_t * dst_ptr)792 static void ScaleAddCols1_C(int dst_width,
793 int boxheight,
794 int x,
795 int dx,
796 const uint16_t* src_ptr,
797 uint8_t* dst_ptr) {
798 int boxwidth = MIN1(dx >> 16);
799 int scaleval = 65536 / (boxwidth * boxheight);
800 int i;
801 x >>= 16;
802 for (i = 0; i < dst_width; ++i) {
803 *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
804 x += boxwidth;
805 }
806 }
807
ScaleAddCols1_16_C(int dst_width,int boxheight,int x,int dx,const uint32_t * src_ptr,uint16_t * dst_ptr)808 static void ScaleAddCols1_16_C(int dst_width,
809 int boxheight,
810 int x,
811 int dx,
812 const uint32_t* src_ptr,
813 uint16_t* dst_ptr) {
814 int boxwidth = MIN1(dx >> 16);
815 int scaleval = 65536 / (boxwidth * boxheight);
816 int i;
817 for (i = 0; i < dst_width; ++i) {
818 *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
819 x += boxwidth;
820 }
821 }
822
823 // Scale plane down to any dimensions, with interpolation.
824 // (boxfilter).
825 //
826 // Same method as SimpleScale, which is fixed point, outputting
827 // one pixel of destination using fixed point (16.16) to step
828 // through source, sampling a box of pixel with simple
829 // averaging.
ScalePlaneBox(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr)830 static void ScalePlaneBox(int src_width,
831 int src_height,
832 int dst_width,
833 int dst_height,
834 int src_stride,
835 int dst_stride,
836 const uint8_t* src_ptr,
837 uint8_t* dst_ptr) {
838 int j, k;
839 // Initial source x/y coordinate and step values as 16.16 fixed point.
840 int x = 0;
841 int y = 0;
842 int dx = 0;
843 int dy = 0;
844 const int max_y = (src_height << 16);
845 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
846 &dx, &dy);
847 src_width = Abs(src_width);
848 {
849 // Allocate a row buffer of uint16_t.
850 align_buffer_64(row16, src_width * 2);
851 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
852 const uint16_t* src_ptr, uint8_t* dst_ptr) =
853 (dx & 0xffff) ? ScaleAddCols2_C
854 : ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
855 void (*ScaleAddRow)(const uint8_t* src_ptr, uint16_t* dst_ptr,
856 int src_width) = ScaleAddRow_C;
857 #if defined(HAS_SCALEADDROW_SSE2)
858 if (TestCpuFlag(kCpuHasSSE2)) {
859 ScaleAddRow = ScaleAddRow_Any_SSE2;
860 if (IS_ALIGNED(src_width, 16)) {
861 ScaleAddRow = ScaleAddRow_SSE2;
862 }
863 }
864 #endif
865 #if defined(HAS_SCALEADDROW_AVX2)
866 if (TestCpuFlag(kCpuHasAVX2)) {
867 ScaleAddRow = ScaleAddRow_Any_AVX2;
868 if (IS_ALIGNED(src_width, 32)) {
869 ScaleAddRow = ScaleAddRow_AVX2;
870 }
871 }
872 #endif
873 #if defined(HAS_SCALEADDROW_NEON)
874 if (TestCpuFlag(kCpuHasNEON)) {
875 ScaleAddRow = ScaleAddRow_Any_NEON;
876 if (IS_ALIGNED(src_width, 16)) {
877 ScaleAddRow = ScaleAddRow_NEON;
878 }
879 }
880 #endif
881 #if defined(HAS_SCALEADDROW_MSA)
882 if (TestCpuFlag(kCpuHasMSA)) {
883 ScaleAddRow = ScaleAddRow_Any_MSA;
884 if (IS_ALIGNED(src_width, 16)) {
885 ScaleAddRow = ScaleAddRow_MSA;
886 }
887 }
888 #endif
889 #if defined(HAS_SCALEADDROW_MMI)
890 if (TestCpuFlag(kCpuHasMMI)) {
891 ScaleAddRow = ScaleAddRow_Any_MMI;
892 if (IS_ALIGNED(src_width, 8)) {
893 ScaleAddRow = ScaleAddRow_MMI;
894 }
895 }
896 #endif
897
898 for (j = 0; j < dst_height; ++j) {
899 int boxheight;
900 int iy = y >> 16;
901 const uint8_t* src = src_ptr + iy * src_stride;
902 y += dy;
903 if (y > max_y) {
904 y = max_y;
905 }
906 boxheight = MIN1((y >> 16) - iy);
907 memset(row16, 0, src_width * 2);
908 for (k = 0; k < boxheight; ++k) {
909 ScaleAddRow(src, (uint16_t*)(row16), src_width);
910 src += src_stride;
911 }
912 ScaleAddCols(dst_width, boxheight, x, dx, (uint16_t*)(row16), dst_ptr);
913 dst_ptr += dst_stride;
914 }
915 free_aligned_buffer_64(row16);
916 }
917 }
918
ScalePlaneBox_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr)919 static void ScalePlaneBox_16(int src_width,
920 int src_height,
921 int dst_width,
922 int dst_height,
923 int src_stride,
924 int dst_stride,
925 const uint16_t* src_ptr,
926 uint16_t* dst_ptr) {
927 int j, k;
928 // Initial source x/y coordinate and step values as 16.16 fixed point.
929 int x = 0;
930 int y = 0;
931 int dx = 0;
932 int dy = 0;
933 const int max_y = (src_height << 16);
934 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
935 &dx, &dy);
936 src_width = Abs(src_width);
937 {
938 // Allocate a row buffer of uint32_t.
939 align_buffer_64(row32, src_width * 4);
940 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
941 const uint32_t* src_ptr, uint16_t* dst_ptr) =
942 (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C;
943 void (*ScaleAddRow)(const uint16_t* src_ptr, uint32_t* dst_ptr,
944 int src_width) = ScaleAddRow_16_C;
945
946 #if defined(HAS_SCALEADDROW_16_SSE2)
947 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
948 ScaleAddRow = ScaleAddRow_16_SSE2;
949 }
950 #endif
951
952 #if defined(HAS_SCALEADDROW_16_MMI)
953 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(src_width, 4)) {
954 ScaleAddRow = ScaleAddRow_16_MMI;
955 }
956 #endif
957 for (j = 0; j < dst_height; ++j) {
958 int boxheight;
959 int iy = y >> 16;
960 const uint16_t* src = src_ptr + iy * src_stride;
961 y += dy;
962 if (y > max_y) {
963 y = max_y;
964 }
965 boxheight = MIN1((y >> 16) - iy);
966 memset(row32, 0, src_width * 4);
967 for (k = 0; k < boxheight; ++k) {
968 ScaleAddRow(src, (uint32_t*)(row32), src_width);
969 src += src_stride;
970 }
971 ScaleAddCols(dst_width, boxheight, x, dx, (uint32_t*)(row32), dst_ptr);
972 dst_ptr += dst_stride;
973 }
974 free_aligned_buffer_64(row32);
975 }
976 }
977
978 // Scale plane down with bilinear interpolation.
ScalePlaneBilinearDown(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)979 void ScalePlaneBilinearDown(int src_width,
980 int src_height,
981 int dst_width,
982 int dst_height,
983 int src_stride,
984 int dst_stride,
985 const uint8_t* src_ptr,
986 uint8_t* dst_ptr,
987 enum FilterMode filtering) {
988 // Initial source x/y coordinate and step values as 16.16 fixed point.
989 int x = 0;
990 int y = 0;
991 int dx = 0;
992 int dy = 0;
993 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
994 // Allocate a row buffer.
995 align_buffer_64(row, src_width);
996
997 const int max_y = (src_height - 1) << 16;
998 int j;
999 void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
1000 int dst_width, int x, int dx) =
1001 (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
1002 void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
1003 ptrdiff_t src_stride, int dst_width,
1004 int source_y_fraction) = InterpolateRow_C;
1005 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1006 &dx, &dy);
1007 src_width = Abs(src_width);
1008
1009 #if defined(HAS_INTERPOLATEROW_SSSE3)
1010 if (TestCpuFlag(kCpuHasSSSE3)) {
1011 InterpolateRow = InterpolateRow_Any_SSSE3;
1012 if (IS_ALIGNED(src_width, 16)) {
1013 InterpolateRow = InterpolateRow_SSSE3;
1014 }
1015 }
1016 #endif
1017 #if defined(HAS_INTERPOLATEROW_AVX2)
1018 if (TestCpuFlag(kCpuHasAVX2)) {
1019 InterpolateRow = InterpolateRow_Any_AVX2;
1020 if (IS_ALIGNED(src_width, 32)) {
1021 InterpolateRow = InterpolateRow_AVX2;
1022 }
1023 }
1024 #endif
1025 #if defined(HAS_INTERPOLATEROW_NEON)
1026 if (TestCpuFlag(kCpuHasNEON)) {
1027 InterpolateRow = InterpolateRow_Any_NEON;
1028 if (IS_ALIGNED(src_width, 16)) {
1029 InterpolateRow = InterpolateRow_NEON;
1030 }
1031 }
1032 #endif
1033 #if defined(HAS_INTERPOLATEROW_MSA)
1034 if (TestCpuFlag(kCpuHasMSA)) {
1035 InterpolateRow = InterpolateRow_Any_MSA;
1036 if (IS_ALIGNED(src_width, 32)) {
1037 InterpolateRow = InterpolateRow_MSA;
1038 }
1039 }
1040 #endif
1041 #if defined(HAS_INTERPOLATEROW_MMI)
1042 if (TestCpuFlag(kCpuHasMMI)) {
1043 InterpolateRow = InterpolateRow_Any_MMI;
1044 if (IS_ALIGNED(src_width, 16)) {
1045 InterpolateRow = InterpolateRow_MMI;
1046 }
1047 }
1048 #endif
1049
1050 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
1051 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1052 ScaleFilterCols = ScaleFilterCols_SSSE3;
1053 }
1054 #endif
1055 #if defined(HAS_SCALEFILTERCOLS_NEON)
1056 if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
1057 ScaleFilterCols = ScaleFilterCols_Any_NEON;
1058 if (IS_ALIGNED(dst_width, 8)) {
1059 ScaleFilterCols = ScaleFilterCols_NEON;
1060 }
1061 }
1062 #endif
1063 #if defined(HAS_SCALEFILTERCOLS_MSA)
1064 if (TestCpuFlag(kCpuHasMSA) && src_width < 32768) {
1065 ScaleFilterCols = ScaleFilterCols_Any_MSA;
1066 if (IS_ALIGNED(dst_width, 16)) {
1067 ScaleFilterCols = ScaleFilterCols_MSA;
1068 }
1069 }
1070 #endif
1071 if (y > max_y) {
1072 y = max_y;
1073 }
1074
1075 for (j = 0; j < dst_height; ++j) {
1076 int yi = y >> 16;
1077 const uint8_t* src = src_ptr + yi * src_stride;
1078 if (filtering == kFilterLinear) {
1079 ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1080 } else {
1081 int yf = (y >> 8) & 255;
1082 InterpolateRow(row, src, src_stride, src_width, yf);
1083 ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
1084 }
1085 dst_ptr += dst_stride;
1086 y += dy;
1087 if (y > max_y) {
1088 y = max_y;
1089 }
1090 }
1091 free_aligned_buffer_64(row);
1092 }
1093
ScalePlaneBilinearDown_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)1094 void ScalePlaneBilinearDown_16(int src_width,
1095 int src_height,
1096 int dst_width,
1097 int dst_height,
1098 int src_stride,
1099 int dst_stride,
1100 const uint16_t* src_ptr,
1101 uint16_t* dst_ptr,
1102 enum FilterMode filtering) {
1103 // Initial source x/y coordinate and step values as 16.16 fixed point.
1104 int x = 0;
1105 int y = 0;
1106 int dx = 0;
1107 int dy = 0;
1108 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
1109 // Allocate a row buffer.
1110 align_buffer_64(row, src_width * 2);
1111
1112 const int max_y = (src_height - 1) << 16;
1113 int j;
1114 void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr,
1115 int dst_width, int x, int dx) =
1116 (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
1117 void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr,
1118 ptrdiff_t src_stride, int dst_width,
1119 int source_y_fraction) = InterpolateRow_16_C;
1120 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1121 &dx, &dy);
1122 src_width = Abs(src_width);
1123
1124 #if defined(HAS_INTERPOLATEROW_16_SSE2)
1125 if (TestCpuFlag(kCpuHasSSE2)) {
1126 InterpolateRow = InterpolateRow_Any_16_SSE2;
1127 if (IS_ALIGNED(src_width, 16)) {
1128 InterpolateRow = InterpolateRow_16_SSE2;
1129 }
1130 }
1131 #endif
1132 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
1133 if (TestCpuFlag(kCpuHasSSSE3)) {
1134 InterpolateRow = InterpolateRow_Any_16_SSSE3;
1135 if (IS_ALIGNED(src_width, 16)) {
1136 InterpolateRow = InterpolateRow_16_SSSE3;
1137 }
1138 }
1139 #endif
1140 #if defined(HAS_INTERPOLATEROW_16_AVX2)
1141 if (TestCpuFlag(kCpuHasAVX2)) {
1142 InterpolateRow = InterpolateRow_Any_16_AVX2;
1143 if (IS_ALIGNED(src_width, 32)) {
1144 InterpolateRow = InterpolateRow_16_AVX2;
1145 }
1146 }
1147 #endif
1148 #if defined(HAS_INTERPOLATEROW_16_NEON)
1149 if (TestCpuFlag(kCpuHasNEON)) {
1150 InterpolateRow = InterpolateRow_Any_16_NEON;
1151 if (IS_ALIGNED(src_width, 16)) {
1152 InterpolateRow = InterpolateRow_16_NEON;
1153 }
1154 }
1155 #endif
1156
1157 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1158 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1159 ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1160 }
1161 #endif
1162 if (y > max_y) {
1163 y = max_y;
1164 }
1165
1166 for (j = 0; j < dst_height; ++j) {
1167 int yi = y >> 16;
1168 const uint16_t* src = src_ptr + yi * src_stride;
1169 if (filtering == kFilterLinear) {
1170 ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1171 } else {
1172 int yf = (y >> 8) & 255;
1173 InterpolateRow((uint16_t*)row, src, src_stride, src_width, yf);
1174 ScaleFilterCols(dst_ptr, (uint16_t*)row, dst_width, x, dx);
1175 }
1176 dst_ptr += dst_stride;
1177 y += dy;
1178 if (y > max_y) {
1179 y = max_y;
1180 }
1181 }
1182 free_aligned_buffer_64(row);
1183 }
1184
1185 // Scale up down with bilinear interpolation.
ScalePlaneBilinearUp(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)1186 void ScalePlaneBilinearUp(int src_width,
1187 int src_height,
1188 int dst_width,
1189 int dst_height,
1190 int src_stride,
1191 int dst_stride,
1192 const uint8_t* src_ptr,
1193 uint8_t* dst_ptr,
1194 enum FilterMode filtering) {
1195 int j;
1196 // Initial source x/y coordinate and step values as 16.16 fixed point.
1197 int x = 0;
1198 int y = 0;
1199 int dx = 0;
1200 int dy = 0;
1201 const int max_y = (src_height - 1) << 16;
1202 void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
1203 ptrdiff_t src_stride, int dst_width,
1204 int source_y_fraction) = InterpolateRow_C;
1205 void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
1206 int dst_width, int x, int dx) =
1207 filtering ? ScaleFilterCols_C : ScaleCols_C;
1208 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1209 &dx, &dy);
1210 src_width = Abs(src_width);
1211
1212 #if defined(HAS_INTERPOLATEROW_SSSE3)
1213 if (TestCpuFlag(kCpuHasSSSE3)) {
1214 InterpolateRow = InterpolateRow_Any_SSSE3;
1215 if (IS_ALIGNED(dst_width, 16)) {
1216 InterpolateRow = InterpolateRow_SSSE3;
1217 }
1218 }
1219 #endif
1220 #if defined(HAS_INTERPOLATEROW_AVX2)
1221 if (TestCpuFlag(kCpuHasAVX2)) {
1222 InterpolateRow = InterpolateRow_Any_AVX2;
1223 if (IS_ALIGNED(dst_width, 32)) {
1224 InterpolateRow = InterpolateRow_AVX2;
1225 }
1226 }
1227 #endif
1228 #if defined(HAS_INTERPOLATEROW_NEON)
1229 if (TestCpuFlag(kCpuHasNEON)) {
1230 InterpolateRow = InterpolateRow_Any_NEON;
1231 if (IS_ALIGNED(dst_width, 16)) {
1232 InterpolateRow = InterpolateRow_NEON;
1233 }
1234 }
1235 #endif
1236
1237 if (filtering && src_width >= 32768) {
1238 ScaleFilterCols = ScaleFilterCols64_C;
1239 }
1240 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
1241 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1242 ScaleFilterCols = ScaleFilterCols_SSSE3;
1243 }
1244 #endif
1245 #if defined(HAS_SCALEFILTERCOLS_NEON)
1246 if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
1247 ScaleFilterCols = ScaleFilterCols_Any_NEON;
1248 if (IS_ALIGNED(dst_width, 8)) {
1249 ScaleFilterCols = ScaleFilterCols_NEON;
1250 }
1251 }
1252 #endif
1253 #if defined(HAS_SCALEFILTERCOLS_MSA)
1254 if (filtering && TestCpuFlag(kCpuHasMSA) && src_width < 32768) {
1255 ScaleFilterCols = ScaleFilterCols_Any_MSA;
1256 if (IS_ALIGNED(dst_width, 16)) {
1257 ScaleFilterCols = ScaleFilterCols_MSA;
1258 }
1259 }
1260 #endif
1261 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1262 ScaleFilterCols = ScaleColsUp2_C;
1263 #if defined(HAS_SCALECOLS_SSE2)
1264 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1265 ScaleFilterCols = ScaleColsUp2_SSE2;
1266 }
1267 #endif
1268 #if defined(HAS_SCALECOLS_MMI)
1269 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
1270 ScaleFilterCols = ScaleColsUp2_MMI;
1271 }
1272 #endif
1273 }
1274
1275 if (y > max_y) {
1276 y = max_y;
1277 }
1278 {
1279 int yi = y >> 16;
1280 const uint8_t* src = src_ptr + yi * src_stride;
1281
1282 // Allocate 2 row buffers.
1283 const int kRowSize = (dst_width + 31) & ~31;
1284 align_buffer_64(row, kRowSize * 2);
1285
1286 uint8_t* rowptr = row;
1287 int rowstride = kRowSize;
1288 int lasty = yi;
1289
1290 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1291 if (src_height > 1) {
1292 src += src_stride;
1293 }
1294 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1295 src += src_stride;
1296
1297 for (j = 0; j < dst_height; ++j) {
1298 yi = y >> 16;
1299 if (yi != lasty) {
1300 if (y > max_y) {
1301 y = max_y;
1302 yi = y >> 16;
1303 src = src_ptr + yi * src_stride;
1304 }
1305 if (yi != lasty) {
1306 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1307 rowptr += rowstride;
1308 rowstride = -rowstride;
1309 lasty = yi;
1310 src += src_stride;
1311 }
1312 }
1313 if (filtering == kFilterLinear) {
1314 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1315 } else {
1316 int yf = (y >> 8) & 255;
1317 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1318 }
1319 dst_ptr += dst_stride;
1320 y += dy;
1321 }
1322 free_aligned_buffer_64(row);
1323 }
1324 }
1325
ScalePlaneBilinearUp_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)1326 void ScalePlaneBilinearUp_16(int src_width,
1327 int src_height,
1328 int dst_width,
1329 int dst_height,
1330 int src_stride,
1331 int dst_stride,
1332 const uint16_t* src_ptr,
1333 uint16_t* dst_ptr,
1334 enum FilterMode filtering) {
1335 int j;
1336 // Initial source x/y coordinate and step values as 16.16 fixed point.
1337 int x = 0;
1338 int y = 0;
1339 int dx = 0;
1340 int dy = 0;
1341 const int max_y = (src_height - 1) << 16;
1342 void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr,
1343 ptrdiff_t src_stride, int dst_width,
1344 int source_y_fraction) = InterpolateRow_16_C;
1345 void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr,
1346 int dst_width, int x, int dx) =
1347 filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
1348 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1349 &dx, &dy);
1350 src_width = Abs(src_width);
1351
1352 #if defined(HAS_INTERPOLATEROW_16_SSE2)
1353 if (TestCpuFlag(kCpuHasSSE2)) {
1354 InterpolateRow = InterpolateRow_Any_16_SSE2;
1355 if (IS_ALIGNED(dst_width, 16)) {
1356 InterpolateRow = InterpolateRow_16_SSE2;
1357 }
1358 }
1359 #endif
1360 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
1361 if (TestCpuFlag(kCpuHasSSSE3)) {
1362 InterpolateRow = InterpolateRow_Any_16_SSSE3;
1363 if (IS_ALIGNED(dst_width, 16)) {
1364 InterpolateRow = InterpolateRow_16_SSSE3;
1365 }
1366 }
1367 #endif
1368 #if defined(HAS_INTERPOLATEROW_16_AVX2)
1369 if (TestCpuFlag(kCpuHasAVX2)) {
1370 InterpolateRow = InterpolateRow_Any_16_AVX2;
1371 if (IS_ALIGNED(dst_width, 32)) {
1372 InterpolateRow = InterpolateRow_16_AVX2;
1373 }
1374 }
1375 #endif
1376 #if defined(HAS_INTERPOLATEROW_16_NEON)
1377 if (TestCpuFlag(kCpuHasNEON)) {
1378 InterpolateRow = InterpolateRow_Any_16_NEON;
1379 if (IS_ALIGNED(dst_width, 16)) {
1380 InterpolateRow = InterpolateRow_16_NEON;
1381 }
1382 }
1383 #endif
1384
1385 if (filtering && src_width >= 32768) {
1386 ScaleFilterCols = ScaleFilterCols64_16_C;
1387 }
1388 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1389 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1390 ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1391 }
1392 #endif
1393 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1394 ScaleFilterCols = ScaleColsUp2_16_C;
1395 #if defined(HAS_SCALECOLS_16_SSE2)
1396 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1397 ScaleFilterCols = ScaleColsUp2_16_SSE2;
1398 }
1399 #endif
1400 #if defined(HAS_SCALECOLS_16_MMI)
1401 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
1402 ScaleFilterCols = ScaleColsUp2_16_MMI;
1403 }
1404 #endif
1405 }
1406
1407 if (y > max_y) {
1408 y = max_y;
1409 }
1410 {
1411 int yi = y >> 16;
1412 const uint16_t* src = src_ptr + yi * src_stride;
1413
1414 // Allocate 2 row buffers.
1415 const int kRowSize = (dst_width + 31) & ~31;
1416 align_buffer_64(row, kRowSize * 4);
1417
1418 uint16_t* rowptr = (uint16_t*)row;
1419 int rowstride = kRowSize;
1420 int lasty = yi;
1421
1422 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1423 if (src_height > 1) {
1424 src += src_stride;
1425 }
1426 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1427 src += src_stride;
1428
1429 for (j = 0; j < dst_height; ++j) {
1430 yi = y >> 16;
1431 if (yi != lasty) {
1432 if (y > max_y) {
1433 y = max_y;
1434 yi = y >> 16;
1435 src = src_ptr + yi * src_stride;
1436 }
1437 if (yi != lasty) {
1438 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1439 rowptr += rowstride;
1440 rowstride = -rowstride;
1441 lasty = yi;
1442 src += src_stride;
1443 }
1444 }
1445 if (filtering == kFilterLinear) {
1446 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1447 } else {
1448 int yf = (y >> 8) & 255;
1449 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1450 }
1451 dst_ptr += dst_stride;
1452 y += dy;
1453 }
1454 free_aligned_buffer_64(row);
1455 }
1456 }
1457
1458 // Scale Plane to/from any dimensions, without interpolation.
1459 // Fixed point math is used for performance: The upper 16 bits
1460 // of x and dx is the integer part of the source position and
1461 // the lower 16 bits are the fixed decimal part.
1462
ScalePlaneSimple(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr)1463 static void ScalePlaneSimple(int src_width,
1464 int src_height,
1465 int dst_width,
1466 int dst_height,
1467 int src_stride,
1468 int dst_stride,
1469 const uint8_t* src_ptr,
1470 uint8_t* dst_ptr) {
1471 int i;
1472 void (*ScaleCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width,
1473 int x, int dx) = ScaleCols_C;
1474 // Initial source x/y coordinate and step values as 16.16 fixed point.
1475 int x = 0;
1476 int y = 0;
1477 int dx = 0;
1478 int dy = 0;
1479 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
1480 &dx, &dy);
1481 src_width = Abs(src_width);
1482
1483 if (src_width * 2 == dst_width && x < 0x8000) {
1484 ScaleCols = ScaleColsUp2_C;
1485 #if defined(HAS_SCALECOLS_SSE2)
1486 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1487 ScaleCols = ScaleColsUp2_SSE2;
1488 }
1489 #endif
1490 #if defined(HAS_SCALECOLS_MMI)
1491 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
1492 ScaleCols = ScaleColsUp2_MMI;
1493 }
1494 #endif
1495 }
1496
1497 for (i = 0; i < dst_height; ++i) {
1498 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
1499 dst_ptr += dst_stride;
1500 y += dy;
1501 }
1502 }
1503
ScalePlaneSimple_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr)1504 static void ScalePlaneSimple_16(int src_width,
1505 int src_height,
1506 int dst_width,
1507 int dst_height,
1508 int src_stride,
1509 int dst_stride,
1510 const uint16_t* src_ptr,
1511 uint16_t* dst_ptr) {
1512 int i;
1513 void (*ScaleCols)(uint16_t * dst_ptr, const uint16_t* src_ptr, int dst_width,
1514 int x, int dx) = ScaleCols_16_C;
1515 // Initial source x/y coordinate and step values as 16.16 fixed point.
1516 int x = 0;
1517 int y = 0;
1518 int dx = 0;
1519 int dy = 0;
1520 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
1521 &dx, &dy);
1522 src_width = Abs(src_width);
1523
1524 if (src_width * 2 == dst_width && x < 0x8000) {
1525 ScaleCols = ScaleColsUp2_16_C;
1526 #if defined(HAS_SCALECOLS_16_SSE2)
1527 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1528 ScaleCols = ScaleColsUp2_16_SSE2;
1529 }
1530 #endif
1531 #if defined(HAS_SCALECOLS_16_MMI)
1532 if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 8)) {
1533 ScaleCols = ScaleColsUp2_16_MMI;
1534 }
1535 #endif
1536 }
1537
1538 for (i = 0; i < dst_height; ++i) {
1539 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
1540 dst_ptr += dst_stride;
1541 y += dy;
1542 }
1543 }
1544
1545 // Scale a plane.
1546 // This function dispatches to a specialized scaler based on scale factor.
1547
1548 LIBYUV_API
ScalePlane(const uint8_t * src,int src_stride,int src_width,int src_height,uint8_t * dst,int dst_stride,int dst_width,int dst_height,enum FilterMode filtering)1549 void ScalePlane(const uint8_t* src,
1550 int src_stride,
1551 int src_width,
1552 int src_height,
1553 uint8_t* dst,
1554 int dst_stride,
1555 int dst_width,
1556 int dst_height,
1557 enum FilterMode filtering) {
1558 // Simplify filtering when possible.
1559 filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
1560 filtering);
1561
1562 // Negative height means invert the image.
1563 if (src_height < 0) {
1564 src_height = -src_height;
1565 src = src + (src_height - 1) * src_stride;
1566 src_stride = -src_stride;
1567 }
1568
1569 // Use specialized scales to improve performance for common resolutions.
1570 // For example, all the 1/2 scalings will use ScalePlaneDown2()
1571 if (dst_width == src_width && dst_height == src_height) {
1572 // Straight copy.
1573 CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
1574 return;
1575 }
1576 if (dst_width == src_width && filtering != kFilterBox) {
1577 int dy = FixedDiv(src_height, dst_height);
1578 // Arbitrary scale vertically, but unscaled horizontally.
1579 ScalePlaneVertical(src_height, dst_width, dst_height, src_stride,
1580 dst_stride, src, dst, 0, 0, dy, 1, filtering);
1581 return;
1582 }
1583 if (dst_width <= Abs(src_width) && dst_height <= src_height) {
1584 // Scale down.
1585 if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
1586 // optimized, 3/4
1587 ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride,
1588 dst_stride, src, dst, filtering);
1589 return;
1590 }
1591 if (2 * dst_width == src_width && 2 * dst_height == src_height) {
1592 // optimized, 1/2
1593 ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride,
1594 dst_stride, src, dst, filtering);
1595 return;
1596 }
1597 // 3/8 rounded up for odd sized chroma height.
1598 if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
1599 // optimized, 3/8
1600 ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride,
1601 dst_stride, src, dst, filtering);
1602 return;
1603 }
1604 if (4 * dst_width == src_width && 4 * dst_height == src_height &&
1605 (filtering == kFilterBox || filtering == kFilterNone)) {
1606 // optimized, 1/4
1607 ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride,
1608 dst_stride, src, dst, filtering);
1609 return;
1610 }
1611 }
1612 if (filtering == kFilterBox && dst_height * 2 < src_height) {
1613 ScalePlaneBox(src_width, src_height, dst_width, dst_height, src_stride,
1614 dst_stride, src, dst);
1615 return;
1616 }
1617 if (filtering && dst_height > src_height) {
1618 ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
1619 src_stride, dst_stride, src, dst, filtering);
1620 return;
1621 }
1622 if (filtering) {
1623 ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
1624 src_stride, dst_stride, src, dst, filtering);
1625 return;
1626 }
1627 ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride,
1628 dst_stride, src, dst);
1629 }
1630
1631 LIBYUV_API
ScalePlane_16(const uint16_t * src,int src_stride,int src_width,int src_height,uint16_t * dst,int dst_stride,int dst_width,int dst_height,enum FilterMode filtering)1632 void ScalePlane_16(const uint16_t* src,
1633 int src_stride,
1634 int src_width,
1635 int src_height,
1636 uint16_t* dst,
1637 int dst_stride,
1638 int dst_width,
1639 int dst_height,
1640 enum FilterMode filtering) {
1641 // Simplify filtering when possible.
1642 filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
1643 filtering);
1644
1645 // Negative height means invert the image.
1646 if (src_height < 0) {
1647 src_height = -src_height;
1648 src = src + (src_height - 1) * src_stride;
1649 src_stride = -src_stride;
1650 }
1651
1652 // Use specialized scales to improve performance for common resolutions.
1653 // For example, all the 1/2 scalings will use ScalePlaneDown2()
1654 if (dst_width == src_width && dst_height == src_height) {
1655 // Straight copy.
1656 CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
1657 return;
1658 }
1659 if (dst_width == src_width && filtering != kFilterBox) {
1660 int dy = FixedDiv(src_height, dst_height);
1661 // Arbitrary scale vertically, but unscaled vertically.
1662 ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride,
1663 dst_stride, src, dst, 0, 0, dy, 1, filtering);
1664 return;
1665 }
1666 if (dst_width <= Abs(src_width) && dst_height <= src_height) {
1667 // Scale down.
1668 if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
1669 // optimized, 3/4
1670 ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
1671 src_stride, dst_stride, src, dst, filtering);
1672 return;
1673 }
1674 if (2 * dst_width == src_width && 2 * dst_height == src_height) {
1675 // optimized, 1/2
1676 ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
1677 src_stride, dst_stride, src, dst, filtering);
1678 return;
1679 }
1680 // 3/8 rounded up for odd sized chroma height.
1681 if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
1682 // optimized, 3/8
1683 ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
1684 src_stride, dst_stride, src, dst, filtering);
1685 return;
1686 }
1687 if (4 * dst_width == src_width && 4 * dst_height == src_height &&
1688 (filtering == kFilterBox || filtering == kFilterNone)) {
1689 // optimized, 1/4
1690 ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
1691 src_stride, dst_stride, src, dst, filtering);
1692 return;
1693 }
1694 }
1695 if (filtering == kFilterBox && dst_height * 2 < src_height) {
1696 ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, src_stride,
1697 dst_stride, src, dst);
1698 return;
1699 }
1700 if (filtering && dst_height > src_height) {
1701 ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
1702 src_stride, dst_stride, src, dst, filtering);
1703 return;
1704 }
1705 if (filtering) {
1706 ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height,
1707 src_stride, dst_stride, src, dst, filtering);
1708 return;
1709 }
1710 ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride,
1711 dst_stride, src, dst);
1712 }
1713
1714 // Scale an I420 image.
1715 // This function in turn calls a scaling function for each plane.
1716
1717 LIBYUV_API
I420Scale(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,int src_width,int src_height,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)1718 int I420Scale(const uint8_t* src_y,
1719 int src_stride_y,
1720 const uint8_t* src_u,
1721 int src_stride_u,
1722 const uint8_t* src_v,
1723 int src_stride_v,
1724 int src_width,
1725 int src_height,
1726 uint8_t* dst_y,
1727 int dst_stride_y,
1728 uint8_t* dst_u,
1729 int dst_stride_u,
1730 uint8_t* dst_v,
1731 int dst_stride_v,
1732 int dst_width,
1733 int dst_height,
1734 enum FilterMode filtering) {
1735 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1736 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1737 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1738 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1739 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1740 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
1741 dst_width <= 0 || dst_height <= 0) {
1742 return -1;
1743 }
1744
1745 ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
1746 dst_width, dst_height, filtering);
1747 ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
1748 dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
1749 ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
1750 dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
1751 return 0;
1752 }
1753
1754 LIBYUV_API
I420Scale_16(const uint16_t * src_y,int src_stride_y,const uint16_t * src_u,int src_stride_u,const uint16_t * src_v,int src_stride_v,int src_width,int src_height,uint16_t * dst_y,int dst_stride_y,uint16_t * dst_u,int dst_stride_u,uint16_t * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)1755 int I420Scale_16(const uint16_t* src_y,
1756 int src_stride_y,
1757 const uint16_t* src_u,
1758 int src_stride_u,
1759 const uint16_t* src_v,
1760 int src_stride_v,
1761 int src_width,
1762 int src_height,
1763 uint16_t* dst_y,
1764 int dst_stride_y,
1765 uint16_t* dst_u,
1766 int dst_stride_u,
1767 uint16_t* dst_v,
1768 int dst_stride_v,
1769 int dst_width,
1770 int dst_height,
1771 enum FilterMode filtering) {
1772 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1773 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1774 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1775 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1776 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1777 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
1778 dst_width <= 0 || dst_height <= 0) {
1779 return -1;
1780 }
1781
1782 ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
1783 dst_width, dst_height, filtering);
1784 ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
1785 dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
1786 ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
1787 dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
1788 return 0;
1789 }
1790
1791 // Scale an I444 image.
1792 // This function in turn calls a scaling function for each plane.
1793
1794 LIBYUV_API
I444Scale(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,int src_width,int src_height,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)1795 int I444Scale(const uint8_t* src_y,
1796 int src_stride_y,
1797 const uint8_t* src_u,
1798 int src_stride_u,
1799 const uint8_t* src_v,
1800 int src_stride_v,
1801 int src_width,
1802 int src_height,
1803 uint8_t* dst_y,
1804 int dst_stride_y,
1805 uint8_t* dst_u,
1806 int dst_stride_u,
1807 uint8_t* dst_v,
1808 int dst_stride_v,
1809 int dst_width,
1810 int dst_height,
1811 enum FilterMode filtering) {
1812 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1813 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
1814 dst_width <= 0 || dst_height <= 0) {
1815 return -1;
1816 }
1817
1818 ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
1819 dst_width, dst_height, filtering);
1820 ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
1821 dst_width, dst_height, filtering);
1822 ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
1823 dst_width, dst_height, filtering);
1824 return 0;
1825 }
1826
1827 LIBYUV_API
I444Scale_16(const uint16_t * src_y,int src_stride_y,const uint16_t * src_u,int src_stride_u,const uint16_t * src_v,int src_stride_v,int src_width,int src_height,uint16_t * dst_y,int dst_stride_y,uint16_t * dst_u,int dst_stride_u,uint16_t * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)1828 int I444Scale_16(const uint16_t* src_y,
1829 int src_stride_y,
1830 const uint16_t* src_u,
1831 int src_stride_u,
1832 const uint16_t* src_v,
1833 int src_stride_v,
1834 int src_width,
1835 int src_height,
1836 uint16_t* dst_y,
1837 int dst_stride_y,
1838 uint16_t* dst_u,
1839 int dst_stride_u,
1840 uint16_t* dst_v,
1841 int dst_stride_v,
1842 int dst_width,
1843 int dst_height,
1844 enum FilterMode filtering) {
1845 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1846 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
1847 dst_width <= 0 || dst_height <= 0) {
1848 return -1;
1849 }
1850
1851 ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
1852 dst_width, dst_height, filtering);
1853 ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u, dst_stride_u,
1854 dst_width, dst_height, filtering);
1855 ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v, dst_stride_v,
1856 dst_width, dst_height, filtering);
1857 return 0;
1858 }
1859
1860 // Deprecated api
1861 LIBYUV_API
Scale(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,int src_stride_y,int src_stride_u,int src_stride_v,int src_width,int src_height,uint8_t * dst_y,uint8_t * dst_u,uint8_t * dst_v,int dst_stride_y,int dst_stride_u,int dst_stride_v,int dst_width,int dst_height,LIBYUV_BOOL interpolate)1862 int Scale(const uint8_t* src_y,
1863 const uint8_t* src_u,
1864 const uint8_t* src_v,
1865 int src_stride_y,
1866 int src_stride_u,
1867 int src_stride_v,
1868 int src_width,
1869 int src_height,
1870 uint8_t* dst_y,
1871 uint8_t* dst_u,
1872 uint8_t* dst_v,
1873 int dst_stride_y,
1874 int dst_stride_u,
1875 int dst_stride_v,
1876 int dst_width,
1877 int dst_height,
1878 LIBYUV_BOOL interpolate) {
1879 return I420Scale(src_y, src_stride_y, src_u, src_stride_u, src_v,
1880 src_stride_v, src_width, src_height, dst_y, dst_stride_y,
1881 dst_u, dst_stride_u, dst_v, dst_stride_v, dst_width,
1882 dst_height, interpolate ? kFilterBox : kFilterNone);
1883 }
1884
1885 #ifdef __cplusplus
1886 } // extern "C"
1887 } // namespace libyuv
1888 #endif
1889