1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/scale.h"
12
13 #include <assert.h>
14 #include <string.h>
15
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyPlane
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25
Abs(int v)26 static __inline int Abs(int v) {
27 return v >= 0 ? v : -v;
28 }
29
30 #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
31
32 // Scale plane, 1/2
33 // This is an optimized version for scaling down a plane to 1/2 of
34 // its original size.
35
ScalePlaneDown2(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)36 static void ScalePlaneDown2(int src_width,
37 int src_height,
38 int dst_width,
39 int dst_height,
40 int src_stride,
41 int dst_stride,
42 const uint8_t* src_ptr,
43 uint8_t* dst_ptr,
44 enum FilterMode filtering) {
45 int y;
46 void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
47 uint8_t* dst_ptr, int dst_width) =
48 filtering == kFilterNone
49 ? ScaleRowDown2_C
50 : (filtering == kFilterLinear ? ScaleRowDown2Linear_C
51 : ScaleRowDown2Box_C);
52 int row_stride = src_stride << 1;
53 (void)src_width;
54 (void)src_height;
55 if (!filtering) {
56 src_ptr += src_stride; // Point to odd rows.
57 src_stride = 0;
58 }
59
60 #if defined(HAS_SCALEROWDOWN2_NEON)
61 if (TestCpuFlag(kCpuHasNEON)) {
62 ScaleRowDown2 =
63 filtering == kFilterNone
64 ? ScaleRowDown2_Any_NEON
65 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON
66 : ScaleRowDown2Box_Any_NEON);
67 if (IS_ALIGNED(dst_width, 16)) {
68 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON
69 : (filtering == kFilterLinear
70 ? ScaleRowDown2Linear_NEON
71 : ScaleRowDown2Box_NEON);
72 }
73 }
74 #endif
75 #if defined(HAS_SCALEROWDOWN2_SSSE3)
76 if (TestCpuFlag(kCpuHasSSSE3)) {
77 ScaleRowDown2 =
78 filtering == kFilterNone
79 ? ScaleRowDown2_Any_SSSE3
80 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3
81 : ScaleRowDown2Box_Any_SSSE3);
82 if (IS_ALIGNED(dst_width, 16)) {
83 ScaleRowDown2 =
84 filtering == kFilterNone
85 ? ScaleRowDown2_SSSE3
86 : (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3
87 : ScaleRowDown2Box_SSSE3);
88 }
89 }
90 #endif
91 #if defined(HAS_SCALEROWDOWN2_AVX2)
92 if (TestCpuFlag(kCpuHasAVX2)) {
93 ScaleRowDown2 =
94 filtering == kFilterNone
95 ? ScaleRowDown2_Any_AVX2
96 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2
97 : ScaleRowDown2Box_Any_AVX2);
98 if (IS_ALIGNED(dst_width, 32)) {
99 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2
100 : (filtering == kFilterLinear
101 ? ScaleRowDown2Linear_AVX2
102 : ScaleRowDown2Box_AVX2);
103 }
104 }
105 #endif
106 #if defined(HAS_SCALEROWDOWN2_MSA)
107 if (TestCpuFlag(kCpuHasMSA)) {
108 ScaleRowDown2 =
109 filtering == kFilterNone
110 ? ScaleRowDown2_Any_MSA
111 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MSA
112 : ScaleRowDown2Box_Any_MSA);
113 if (IS_ALIGNED(dst_width, 32)) {
114 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MSA
115 : (filtering == kFilterLinear
116 ? ScaleRowDown2Linear_MSA
117 : ScaleRowDown2Box_MSA);
118 }
119 }
120 #endif
121
122 if (filtering == kFilterLinear) {
123 src_stride = 0;
124 }
125 // TODO(fbarchard): Loop through source height to allow odd height.
126 for (y = 0; y < dst_height; ++y) {
127 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
128 src_ptr += row_stride;
129 dst_ptr += dst_stride;
130 }
131 }
132
ScalePlaneDown2_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)133 static void ScalePlaneDown2_16(int src_width,
134 int src_height,
135 int dst_width,
136 int dst_height,
137 int src_stride,
138 int dst_stride,
139 const uint16_t* src_ptr,
140 uint16_t* dst_ptr,
141 enum FilterMode filtering) {
142 int y;
143 void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
144 uint16_t* dst_ptr, int dst_width) =
145 filtering == kFilterNone
146 ? ScaleRowDown2_16_C
147 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C
148 : ScaleRowDown2Box_16_C);
149 int row_stride = src_stride << 1;
150 (void)src_width;
151 (void)src_height;
152 if (!filtering) {
153 src_ptr += src_stride; // Point to odd rows.
154 src_stride = 0;
155 }
156
157 #if defined(HAS_SCALEROWDOWN2_16_NEON)
158 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
159 ScaleRowDown2 =
160 filtering ? ScaleRowDown2Box_16_NEON : ScaleRowDown2_16_NEON;
161 }
162 #endif
163 #if defined(HAS_SCALEROWDOWN2_16_SSE2)
164 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
165 ScaleRowDown2 =
166 filtering == kFilterNone
167 ? ScaleRowDown2_16_SSE2
168 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2
169 : ScaleRowDown2Box_16_SSE2);
170 }
171 #endif
172
173 if (filtering == kFilterLinear) {
174 src_stride = 0;
175 }
176 // TODO(fbarchard): Loop through source height to allow odd height.
177 for (y = 0; y < dst_height; ++y) {
178 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
179 src_ptr += row_stride;
180 dst_ptr += dst_stride;
181 }
182 }
183
184 // Scale plane, 1/4
185 // This is an optimized version for scaling down a plane to 1/4 of
186 // its original size.
187
ScalePlaneDown4(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)188 static void ScalePlaneDown4(int src_width,
189 int src_height,
190 int dst_width,
191 int dst_height,
192 int src_stride,
193 int dst_stride,
194 const uint8_t* src_ptr,
195 uint8_t* dst_ptr,
196 enum FilterMode filtering) {
197 int y;
198 void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride,
199 uint8_t* dst_ptr, int dst_width) =
200 filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
201 int row_stride = src_stride << 2;
202 (void)src_width;
203 (void)src_height;
204 if (!filtering) {
205 src_ptr += src_stride * 2; // Point to row 2.
206 src_stride = 0;
207 }
208 #if defined(HAS_SCALEROWDOWN4_NEON)
209 if (TestCpuFlag(kCpuHasNEON)) {
210 ScaleRowDown4 =
211 filtering ? ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;
212 if (IS_ALIGNED(dst_width, 8)) {
213 ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
214 }
215 }
216 #endif
217 #if defined(HAS_SCALEROWDOWN4_SSSE3)
218 if (TestCpuFlag(kCpuHasSSSE3)) {
219 ScaleRowDown4 =
220 filtering ? ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3;
221 if (IS_ALIGNED(dst_width, 8)) {
222 ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3;
223 }
224 }
225 #endif
226 #if defined(HAS_SCALEROWDOWN4_AVX2)
227 if (TestCpuFlag(kCpuHasAVX2)) {
228 ScaleRowDown4 =
229 filtering ? ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
230 if (IS_ALIGNED(dst_width, 16)) {
231 ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
232 }
233 }
234 #endif
235 #if defined(HAS_SCALEROWDOWN4_MSA)
236 if (TestCpuFlag(kCpuHasMSA)) {
237 ScaleRowDown4 =
238 filtering ? ScaleRowDown4Box_Any_MSA : ScaleRowDown4_Any_MSA;
239 if (IS_ALIGNED(dst_width, 16)) {
240 ScaleRowDown4 = filtering ? ScaleRowDown4Box_MSA : ScaleRowDown4_MSA;
241 }
242 }
243 #endif
244
245 if (filtering == kFilterLinear) {
246 src_stride = 0;
247 }
248 for (y = 0; y < dst_height; ++y) {
249 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
250 src_ptr += row_stride;
251 dst_ptr += dst_stride;
252 }
253 }
254
ScalePlaneDown4_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)255 static void ScalePlaneDown4_16(int src_width,
256 int src_height,
257 int dst_width,
258 int dst_height,
259 int src_stride,
260 int dst_stride,
261 const uint16_t* src_ptr,
262 uint16_t* dst_ptr,
263 enum FilterMode filtering) {
264 int y;
265 void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride,
266 uint16_t* dst_ptr, int dst_width) =
267 filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
268 int row_stride = src_stride << 2;
269 (void)src_width;
270 (void)src_height;
271 if (!filtering) {
272 src_ptr += src_stride * 2; // Point to row 2.
273 src_stride = 0;
274 }
275 #if defined(HAS_SCALEROWDOWN4_16_NEON)
276 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
277 ScaleRowDown4 =
278 filtering ? ScaleRowDown4Box_16_NEON : ScaleRowDown4_16_NEON;
279 }
280 #endif
281 #if defined(HAS_SCALEROWDOWN4_16_SSE2)
282 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
283 ScaleRowDown4 =
284 filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2;
285 }
286 #endif
287
288 if (filtering == kFilterLinear) {
289 src_stride = 0;
290 }
291 for (y = 0; y < dst_height; ++y) {
292 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
293 src_ptr += row_stride;
294 dst_ptr += dst_stride;
295 }
296 }
297
298 // Scale plane down, 3/4
ScalePlaneDown34(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)299 static void ScalePlaneDown34(int src_width,
300 int src_height,
301 int dst_width,
302 int dst_height,
303 int src_stride,
304 int dst_stride,
305 const uint8_t* src_ptr,
306 uint8_t* dst_ptr,
307 enum FilterMode filtering) {
308 int y;
309 void (*ScaleRowDown34_0)(const uint8_t* src_ptr, ptrdiff_t src_stride,
310 uint8_t* dst_ptr, int dst_width);
311 void (*ScaleRowDown34_1)(const uint8_t* src_ptr, ptrdiff_t src_stride,
312 uint8_t* dst_ptr, int dst_width);
313 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
314 (void)src_width;
315 (void)src_height;
316 assert(dst_width % 3 == 0);
317 if (!filtering) {
318 ScaleRowDown34_0 = ScaleRowDown34_C;
319 ScaleRowDown34_1 = ScaleRowDown34_C;
320 } else {
321 ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
322 ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
323 }
324 #if defined(HAS_SCALEROWDOWN34_NEON)
325 if (TestCpuFlag(kCpuHasNEON)) {
326 if (!filtering) {
327 ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
328 ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
329 } else {
330 ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
331 ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
332 }
333 if (dst_width % 24 == 0) {
334 if (!filtering) {
335 ScaleRowDown34_0 = ScaleRowDown34_NEON;
336 ScaleRowDown34_1 = ScaleRowDown34_NEON;
337 } else {
338 ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
339 ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
340 }
341 }
342 }
343 #endif
344 #if defined(HAS_SCALEROWDOWN34_MSA)
345 if (TestCpuFlag(kCpuHasMSA)) {
346 if (!filtering) {
347 ScaleRowDown34_0 = ScaleRowDown34_Any_MSA;
348 ScaleRowDown34_1 = ScaleRowDown34_Any_MSA;
349 } else {
350 ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_MSA;
351 ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_MSA;
352 }
353 if (dst_width % 48 == 0) {
354 if (!filtering) {
355 ScaleRowDown34_0 = ScaleRowDown34_MSA;
356 ScaleRowDown34_1 = ScaleRowDown34_MSA;
357 } else {
358 ScaleRowDown34_0 = ScaleRowDown34_0_Box_MSA;
359 ScaleRowDown34_1 = ScaleRowDown34_1_Box_MSA;
360 }
361 }
362 }
363 #endif
364 #if defined(HAS_SCALEROWDOWN34_SSSE3)
365 if (TestCpuFlag(kCpuHasSSSE3)) {
366 if (!filtering) {
367 ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
368 ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
369 } else {
370 ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
371 ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
372 }
373 if (dst_width % 24 == 0) {
374 if (!filtering) {
375 ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
376 ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
377 } else {
378 ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
379 ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
380 }
381 }
382 }
383 #endif
384
385 for (y = 0; y < dst_height - 2; y += 3) {
386 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
387 src_ptr += src_stride;
388 dst_ptr += dst_stride;
389 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
390 src_ptr += src_stride;
391 dst_ptr += dst_stride;
392 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
393 src_ptr += src_stride * 2;
394 dst_ptr += dst_stride;
395 }
396
397 // Remainder 1 or 2 rows with last row vertically unfiltered
398 if ((dst_height % 3) == 2) {
399 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
400 src_ptr += src_stride;
401 dst_ptr += dst_stride;
402 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
403 } else if ((dst_height % 3) == 1) {
404 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
405 }
406 }
407
ScalePlaneDown34_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)408 static void ScalePlaneDown34_16(int src_width,
409 int src_height,
410 int dst_width,
411 int dst_height,
412 int src_stride,
413 int dst_stride,
414 const uint16_t* src_ptr,
415 uint16_t* dst_ptr,
416 enum FilterMode filtering) {
417 int y;
418 void (*ScaleRowDown34_0)(const uint16_t* src_ptr, ptrdiff_t src_stride,
419 uint16_t* dst_ptr, int dst_width);
420 void (*ScaleRowDown34_1)(const uint16_t* src_ptr, ptrdiff_t src_stride,
421 uint16_t* dst_ptr, int dst_width);
422 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
423 (void)src_width;
424 (void)src_height;
425 assert(dst_width % 3 == 0);
426 if (!filtering) {
427 ScaleRowDown34_0 = ScaleRowDown34_16_C;
428 ScaleRowDown34_1 = ScaleRowDown34_16_C;
429 } else {
430 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;
431 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;
432 }
433 #if defined(HAS_SCALEROWDOWN34_16_NEON)
434 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
435 if (!filtering) {
436 ScaleRowDown34_0 = ScaleRowDown34_16_NEON;
437 ScaleRowDown34_1 = ScaleRowDown34_16_NEON;
438 } else {
439 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;
440 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;
441 }
442 }
443 #endif
444 #if defined(HAS_SCALEROWDOWN34_16_SSSE3)
445 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
446 if (!filtering) {
447 ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
448 ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
449 } else {
450 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;
451 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;
452 }
453 }
454 #endif
455
456 for (y = 0; y < dst_height - 2; y += 3) {
457 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
458 src_ptr += src_stride;
459 dst_ptr += dst_stride;
460 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
461 src_ptr += src_stride;
462 dst_ptr += dst_stride;
463 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
464 src_ptr += src_stride * 2;
465 dst_ptr += dst_stride;
466 }
467
468 // Remainder 1 or 2 rows with last row vertically unfiltered
469 if ((dst_height % 3) == 2) {
470 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
471 src_ptr += src_stride;
472 dst_ptr += dst_stride;
473 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
474 } else if ((dst_height % 3) == 1) {
475 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
476 }
477 }
478
479 // Scale plane, 3/8
480 // This is an optimized version for scaling down a plane to 3/8
481 // of its original size.
482 //
483 // Uses box filter arranges like this
484 // aaabbbcc -> abc
485 // aaabbbcc def
486 // aaabbbcc ghi
487 // dddeeeff
488 // dddeeeff
489 // dddeeeff
490 // ggghhhii
491 // ggghhhii
492 // Boxes are 3x3, 2x3, 3x2 and 2x2
493
ScalePlaneDown38(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)494 static void ScalePlaneDown38(int src_width,
495 int src_height,
496 int dst_width,
497 int dst_height,
498 int src_stride,
499 int dst_stride,
500 const uint8_t* src_ptr,
501 uint8_t* dst_ptr,
502 enum FilterMode filtering) {
503 int y;
504 void (*ScaleRowDown38_3)(const uint8_t* src_ptr, ptrdiff_t src_stride,
505 uint8_t* dst_ptr, int dst_width);
506 void (*ScaleRowDown38_2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
507 uint8_t* dst_ptr, int dst_width);
508 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
509 assert(dst_width % 3 == 0);
510 (void)src_width;
511 (void)src_height;
512 if (!filtering) {
513 ScaleRowDown38_3 = ScaleRowDown38_C;
514 ScaleRowDown38_2 = ScaleRowDown38_C;
515 } else {
516 ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
517 ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
518 }
519
520 #if defined(HAS_SCALEROWDOWN38_NEON)
521 if (TestCpuFlag(kCpuHasNEON)) {
522 if (!filtering) {
523 ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
524 ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
525 } else {
526 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
527 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
528 }
529 if (dst_width % 12 == 0) {
530 if (!filtering) {
531 ScaleRowDown38_3 = ScaleRowDown38_NEON;
532 ScaleRowDown38_2 = ScaleRowDown38_NEON;
533 } else {
534 ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
535 ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
536 }
537 }
538 }
539 #endif
540 #if defined(HAS_SCALEROWDOWN38_SSSE3)
541 if (TestCpuFlag(kCpuHasSSSE3)) {
542 if (!filtering) {
543 ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
544 ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
545 } else {
546 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
547 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
548 }
549 if (dst_width % 12 == 0 && !filtering) {
550 ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
551 ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
552 }
553 if (dst_width % 6 == 0 && filtering) {
554 ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
555 ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
556 }
557 }
558 #endif
559 #if defined(HAS_SCALEROWDOWN38_MSA)
560 if (TestCpuFlag(kCpuHasMSA)) {
561 if (!filtering) {
562 ScaleRowDown38_3 = ScaleRowDown38_Any_MSA;
563 ScaleRowDown38_2 = ScaleRowDown38_Any_MSA;
564 } else {
565 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_MSA;
566 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_MSA;
567 }
568 if (dst_width % 12 == 0) {
569 if (!filtering) {
570 ScaleRowDown38_3 = ScaleRowDown38_MSA;
571 ScaleRowDown38_2 = ScaleRowDown38_MSA;
572 } else {
573 ScaleRowDown38_3 = ScaleRowDown38_3_Box_MSA;
574 ScaleRowDown38_2 = ScaleRowDown38_2_Box_MSA;
575 }
576 }
577 }
578 #endif
579
580 for (y = 0; y < dst_height - 2; y += 3) {
581 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
582 src_ptr += src_stride * 3;
583 dst_ptr += dst_stride;
584 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
585 src_ptr += src_stride * 3;
586 dst_ptr += dst_stride;
587 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
588 src_ptr += src_stride * 2;
589 dst_ptr += dst_stride;
590 }
591
592 // Remainder 1 or 2 rows with last row vertically unfiltered
593 if ((dst_height % 3) == 2) {
594 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
595 src_ptr += src_stride * 3;
596 dst_ptr += dst_stride;
597 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
598 } else if ((dst_height % 3) == 1) {
599 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
600 }
601 }
602
ScalePlaneDown38_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)603 static void ScalePlaneDown38_16(int src_width,
604 int src_height,
605 int dst_width,
606 int dst_height,
607 int src_stride,
608 int dst_stride,
609 const uint16_t* src_ptr,
610 uint16_t* dst_ptr,
611 enum FilterMode filtering) {
612 int y;
613 void (*ScaleRowDown38_3)(const uint16_t* src_ptr, ptrdiff_t src_stride,
614 uint16_t* dst_ptr, int dst_width);
615 void (*ScaleRowDown38_2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
616 uint16_t* dst_ptr, int dst_width);
617 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
618 (void)src_width;
619 (void)src_height;
620 assert(dst_width % 3 == 0);
621 if (!filtering) {
622 ScaleRowDown38_3 = ScaleRowDown38_16_C;
623 ScaleRowDown38_2 = ScaleRowDown38_16_C;
624 } else {
625 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;
626 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;
627 }
628 #if defined(HAS_SCALEROWDOWN38_16_NEON)
629 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
630 if (!filtering) {
631 ScaleRowDown38_3 = ScaleRowDown38_16_NEON;
632 ScaleRowDown38_2 = ScaleRowDown38_16_NEON;
633 } else {
634 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;
635 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
636 }
637 }
638 #endif
639 #if defined(HAS_SCALEROWDOWN38_16_SSSE3)
640 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
641 if (!filtering) {
642 ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
643 ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
644 } else {
645 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;
646 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
647 }
648 }
649 #endif
650
651 for (y = 0; y < dst_height - 2; y += 3) {
652 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
653 src_ptr += src_stride * 3;
654 dst_ptr += dst_stride;
655 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
656 src_ptr += src_stride * 3;
657 dst_ptr += dst_stride;
658 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
659 src_ptr += src_stride * 2;
660 dst_ptr += dst_stride;
661 }
662
663 // Remainder 1 or 2 rows with last row vertically unfiltered
664 if ((dst_height % 3) == 2) {
665 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
666 src_ptr += src_stride * 3;
667 dst_ptr += dst_stride;
668 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
669 } else if ((dst_height % 3) == 1) {
670 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
671 }
672 }
673
674 #define MIN1(x) ((x) < 1 ? 1 : (x))
675
SumPixels(int iboxwidth,const uint16_t * src_ptr)676 static __inline uint32_t SumPixels(int iboxwidth, const uint16_t* src_ptr) {
677 uint32_t sum = 0u;
678 int x;
679 assert(iboxwidth > 0);
680 for (x = 0; x < iboxwidth; ++x) {
681 sum += src_ptr[x];
682 }
683 return sum;
684 }
685
SumPixels_16(int iboxwidth,const uint32_t * src_ptr)686 static __inline uint32_t SumPixels_16(int iboxwidth, const uint32_t* src_ptr) {
687 uint32_t sum = 0u;
688 int x;
689 assert(iboxwidth > 0);
690 for (x = 0; x < iboxwidth; ++x) {
691 sum += src_ptr[x];
692 }
693 return sum;
694 }
695
ScaleAddCols2_C(int dst_width,int boxheight,int x,int dx,const uint16_t * src_ptr,uint8_t * dst_ptr)696 static void ScaleAddCols2_C(int dst_width,
697 int boxheight,
698 int x,
699 int dx,
700 const uint16_t* src_ptr,
701 uint8_t* dst_ptr) {
702 int i;
703 int scaletbl[2];
704 int minboxwidth = dx >> 16;
705 int boxwidth;
706 scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
707 scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
708 for (i = 0; i < dst_width; ++i) {
709 int ix = x >> 16;
710 x += dx;
711 boxwidth = MIN1((x >> 16) - ix);
712 *dst_ptr++ =
713 SumPixels(boxwidth, src_ptr + ix) * scaletbl[boxwidth - minboxwidth] >>
714 16;
715 }
716 }
717
ScaleAddCols2_16_C(int dst_width,int boxheight,int x,int dx,const uint32_t * src_ptr,uint16_t * dst_ptr)718 static void ScaleAddCols2_16_C(int dst_width,
719 int boxheight,
720 int x,
721 int dx,
722 const uint32_t* src_ptr,
723 uint16_t* dst_ptr) {
724 int i;
725 int scaletbl[2];
726 int minboxwidth = dx >> 16;
727 int boxwidth;
728 scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
729 scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
730 for (i = 0; i < dst_width; ++i) {
731 int ix = x >> 16;
732 x += dx;
733 boxwidth = MIN1((x >> 16) - ix);
734 *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) *
735 scaletbl[boxwidth - minboxwidth] >>
736 16;
737 }
738 }
739
ScaleAddCols0_C(int dst_width,int boxheight,int x,int dx,const uint16_t * src_ptr,uint8_t * dst_ptr)740 static void ScaleAddCols0_C(int dst_width,
741 int boxheight,
742 int x,
743 int dx,
744 const uint16_t* src_ptr,
745 uint8_t* dst_ptr) {
746 int scaleval = 65536 / boxheight;
747 int i;
748 (void)dx;
749 src_ptr += (x >> 16);
750 for (i = 0; i < dst_width; ++i) {
751 *dst_ptr++ = src_ptr[i] * scaleval >> 16;
752 }
753 }
754
ScaleAddCols1_C(int dst_width,int boxheight,int x,int dx,const uint16_t * src_ptr,uint8_t * dst_ptr)755 static void ScaleAddCols1_C(int dst_width,
756 int boxheight,
757 int x,
758 int dx,
759 const uint16_t* src_ptr,
760 uint8_t* dst_ptr) {
761 int boxwidth = MIN1(dx >> 16);
762 int scaleval = 65536 / (boxwidth * boxheight);
763 int i;
764 x >>= 16;
765 for (i = 0; i < dst_width; ++i) {
766 *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
767 x += boxwidth;
768 }
769 }
770
ScaleAddCols1_16_C(int dst_width,int boxheight,int x,int dx,const uint32_t * src_ptr,uint16_t * dst_ptr)771 static void ScaleAddCols1_16_C(int dst_width,
772 int boxheight,
773 int x,
774 int dx,
775 const uint32_t* src_ptr,
776 uint16_t* dst_ptr) {
777 int boxwidth = MIN1(dx >> 16);
778 int scaleval = 65536 / (boxwidth * boxheight);
779 int i;
780 for (i = 0; i < dst_width; ++i) {
781 *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
782 x += boxwidth;
783 }
784 }
785
786 // Scale plane down to any dimensions, with interpolation.
787 // (boxfilter).
788 //
789 // Same method as SimpleScale, which is fixed point, outputting
790 // one pixel of destination using fixed point (16.16) to step
791 // through source, sampling a box of pixel with simple
792 // averaging.
ScalePlaneBox(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr)793 static void ScalePlaneBox(int src_width,
794 int src_height,
795 int dst_width,
796 int dst_height,
797 int src_stride,
798 int dst_stride,
799 const uint8_t* src_ptr,
800 uint8_t* dst_ptr) {
801 int j, k;
802 // Initial source x/y coordinate and step values as 16.16 fixed point.
803 int x = 0;
804 int y = 0;
805 int dx = 0;
806 int dy = 0;
807 const int max_y = (src_height << 16);
808 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
809 &dx, &dy);
810 src_width = Abs(src_width);
811 {
812 // Allocate a row buffer of uint16_t.
813 align_buffer_64(row16, src_width * 2);
814 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
815 const uint16_t* src_ptr, uint8_t* dst_ptr) =
816 (dx & 0xffff) ? ScaleAddCols2_C
817 : ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
818 void (*ScaleAddRow)(const uint8_t* src_ptr, uint16_t* dst_ptr,
819 int src_width) = ScaleAddRow_C;
820 #if defined(HAS_SCALEADDROW_SSE2)
821 if (TestCpuFlag(kCpuHasSSE2)) {
822 ScaleAddRow = ScaleAddRow_Any_SSE2;
823 if (IS_ALIGNED(src_width, 16)) {
824 ScaleAddRow = ScaleAddRow_SSE2;
825 }
826 }
827 #endif
828 #if defined(HAS_SCALEADDROW_AVX2)
829 if (TestCpuFlag(kCpuHasAVX2)) {
830 ScaleAddRow = ScaleAddRow_Any_AVX2;
831 if (IS_ALIGNED(src_width, 32)) {
832 ScaleAddRow = ScaleAddRow_AVX2;
833 }
834 }
835 #endif
836 #if defined(HAS_SCALEADDROW_NEON)
837 if (TestCpuFlag(kCpuHasNEON)) {
838 ScaleAddRow = ScaleAddRow_Any_NEON;
839 if (IS_ALIGNED(src_width, 16)) {
840 ScaleAddRow = ScaleAddRow_NEON;
841 }
842 }
843 #endif
844 #if defined(HAS_SCALEADDROW_MSA)
845 if (TestCpuFlag(kCpuHasMSA)) {
846 ScaleAddRow = ScaleAddRow_Any_MSA;
847 if (IS_ALIGNED(src_width, 16)) {
848 ScaleAddRow = ScaleAddRow_MSA;
849 }
850 }
851 #endif
852
853 for (j = 0; j < dst_height; ++j) {
854 int boxheight;
855 int iy = y >> 16;
856 const uint8_t* src = src_ptr + iy * src_stride;
857 y += dy;
858 if (y > max_y) {
859 y = max_y;
860 }
861 boxheight = MIN1((y >> 16) - iy);
862 memset(row16, 0, src_width * 2);
863 for (k = 0; k < boxheight; ++k) {
864 ScaleAddRow(src, (uint16_t*)(row16), src_width);
865 src += src_stride;
866 }
867 ScaleAddCols(dst_width, boxheight, x, dx, (uint16_t*)(row16), dst_ptr);
868 dst_ptr += dst_stride;
869 }
870 free_aligned_buffer_64(row16);
871 }
872 }
873
ScalePlaneBox_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr)874 static void ScalePlaneBox_16(int src_width,
875 int src_height,
876 int dst_width,
877 int dst_height,
878 int src_stride,
879 int dst_stride,
880 const uint16_t* src_ptr,
881 uint16_t* dst_ptr) {
882 int j, k;
883 // Initial source x/y coordinate and step values as 16.16 fixed point.
884 int x = 0;
885 int y = 0;
886 int dx = 0;
887 int dy = 0;
888 const int max_y = (src_height << 16);
889 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
890 &dx, &dy);
891 src_width = Abs(src_width);
892 {
893 // Allocate a row buffer of uint32_t.
894 align_buffer_64(row32, src_width * 4);
895 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
896 const uint32_t* src_ptr, uint16_t* dst_ptr) =
897 (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C;
898 void (*ScaleAddRow)(const uint16_t* src_ptr, uint32_t* dst_ptr,
899 int src_width) = ScaleAddRow_16_C;
900
901 #if defined(HAS_SCALEADDROW_16_SSE2)
902 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
903 ScaleAddRow = ScaleAddRow_16_SSE2;
904 }
905 #endif
906
907 for (j = 0; j < dst_height; ++j) {
908 int boxheight;
909 int iy = y >> 16;
910 const uint16_t* src = src_ptr + iy * src_stride;
911 y += dy;
912 if (y > max_y) {
913 y = max_y;
914 }
915 boxheight = MIN1((y >> 16) - iy);
916 memset(row32, 0, src_width * 4);
917 for (k = 0; k < boxheight; ++k) {
918 ScaleAddRow(src, (uint32_t*)(row32), src_width);
919 src += src_stride;
920 }
921 ScaleAddCols(dst_width, boxheight, x, dx, (uint32_t*)(row32), dst_ptr);
922 dst_ptr += dst_stride;
923 }
924 free_aligned_buffer_64(row32);
925 }
926 }
927
928 // Scale plane down with bilinear interpolation.
ScalePlaneBilinearDown(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)929 void ScalePlaneBilinearDown(int src_width,
930 int src_height,
931 int dst_width,
932 int dst_height,
933 int src_stride,
934 int dst_stride,
935 const uint8_t* src_ptr,
936 uint8_t* dst_ptr,
937 enum FilterMode filtering) {
938 // Initial source x/y coordinate and step values as 16.16 fixed point.
939 int x = 0;
940 int y = 0;
941 int dx = 0;
942 int dy = 0;
943 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
944 // Allocate a row buffer.
945 align_buffer_64(row, src_width);
946
947 const int max_y = (src_height - 1) << 16;
948 int j;
949 void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
950 int dst_width, int x, int dx) =
951 (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
952 void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
953 ptrdiff_t src_stride, int dst_width,
954 int source_y_fraction) = InterpolateRow_C;
955 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
956 &dx, &dy);
957 src_width = Abs(src_width);
958
959 #if defined(HAS_INTERPOLATEROW_SSSE3)
960 if (TestCpuFlag(kCpuHasSSSE3)) {
961 InterpolateRow = InterpolateRow_Any_SSSE3;
962 if (IS_ALIGNED(src_width, 16)) {
963 InterpolateRow = InterpolateRow_SSSE3;
964 }
965 }
966 #endif
967 #if defined(HAS_INTERPOLATEROW_AVX2)
968 if (TestCpuFlag(kCpuHasAVX2)) {
969 InterpolateRow = InterpolateRow_Any_AVX2;
970 if (IS_ALIGNED(src_width, 32)) {
971 InterpolateRow = InterpolateRow_AVX2;
972 }
973 }
974 #endif
975 #if defined(HAS_INTERPOLATEROW_NEON)
976 if (TestCpuFlag(kCpuHasNEON)) {
977 InterpolateRow = InterpolateRow_Any_NEON;
978 if (IS_ALIGNED(src_width, 16)) {
979 InterpolateRow = InterpolateRow_NEON;
980 }
981 }
982 #endif
983 #if defined(HAS_INTERPOLATEROW_MSA)
984 if (TestCpuFlag(kCpuHasMSA)) {
985 InterpolateRow = InterpolateRow_Any_MSA;
986 if (IS_ALIGNED(src_width, 32)) {
987 InterpolateRow = InterpolateRow_MSA;
988 }
989 }
990 #endif
991
992 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
993 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
994 ScaleFilterCols = ScaleFilterCols_SSSE3;
995 }
996 #endif
997 #if defined(HAS_SCALEFILTERCOLS_NEON)
998 if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
999 ScaleFilterCols = ScaleFilterCols_Any_NEON;
1000 if (IS_ALIGNED(dst_width, 8)) {
1001 ScaleFilterCols = ScaleFilterCols_NEON;
1002 }
1003 }
1004 #endif
1005 #if defined(HAS_SCALEFILTERCOLS_MSA)
1006 if (TestCpuFlag(kCpuHasMSA) && src_width < 32768) {
1007 ScaleFilterCols = ScaleFilterCols_Any_MSA;
1008 if (IS_ALIGNED(dst_width, 16)) {
1009 ScaleFilterCols = ScaleFilterCols_MSA;
1010 }
1011 }
1012 #endif
1013 if (y > max_y) {
1014 y = max_y;
1015 }
1016
1017 for (j = 0; j < dst_height; ++j) {
1018 int yi = y >> 16;
1019 const uint8_t* src = src_ptr + yi * src_stride;
1020 if (filtering == kFilterLinear) {
1021 ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1022 } else {
1023 int yf = (y >> 8) & 255;
1024 InterpolateRow(row, src, src_stride, src_width, yf);
1025 ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
1026 }
1027 dst_ptr += dst_stride;
1028 y += dy;
1029 if (y > max_y) {
1030 y = max_y;
1031 }
1032 }
1033 free_aligned_buffer_64(row);
1034 }
1035
ScalePlaneBilinearDown_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)1036 void ScalePlaneBilinearDown_16(int src_width,
1037 int src_height,
1038 int dst_width,
1039 int dst_height,
1040 int src_stride,
1041 int dst_stride,
1042 const uint16_t* src_ptr,
1043 uint16_t* dst_ptr,
1044 enum FilterMode filtering) {
1045 // Initial source x/y coordinate and step values as 16.16 fixed point.
1046 int x = 0;
1047 int y = 0;
1048 int dx = 0;
1049 int dy = 0;
1050 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
1051 // Allocate a row buffer.
1052 align_buffer_64(row, src_width * 2);
1053
1054 const int max_y = (src_height - 1) << 16;
1055 int j;
1056 void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr,
1057 int dst_width, int x, int dx) =
1058 (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
1059 void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr,
1060 ptrdiff_t src_stride, int dst_width,
1061 int source_y_fraction) = InterpolateRow_16_C;
1062 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1063 &dx, &dy);
1064 src_width = Abs(src_width);
1065
1066 #if defined(HAS_INTERPOLATEROW_16_SSE2)
1067 if (TestCpuFlag(kCpuHasSSE2)) {
1068 InterpolateRow = InterpolateRow_Any_16_SSE2;
1069 if (IS_ALIGNED(src_width, 16)) {
1070 InterpolateRow = InterpolateRow_16_SSE2;
1071 }
1072 }
1073 #endif
1074 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
1075 if (TestCpuFlag(kCpuHasSSSE3)) {
1076 InterpolateRow = InterpolateRow_Any_16_SSSE3;
1077 if (IS_ALIGNED(src_width, 16)) {
1078 InterpolateRow = InterpolateRow_16_SSSE3;
1079 }
1080 }
1081 #endif
1082 #if defined(HAS_INTERPOLATEROW_16_AVX2)
1083 if (TestCpuFlag(kCpuHasAVX2)) {
1084 InterpolateRow = InterpolateRow_Any_16_AVX2;
1085 if (IS_ALIGNED(src_width, 32)) {
1086 InterpolateRow = InterpolateRow_16_AVX2;
1087 }
1088 }
1089 #endif
1090 #if defined(HAS_INTERPOLATEROW_16_NEON)
1091 if (TestCpuFlag(kCpuHasNEON)) {
1092 InterpolateRow = InterpolateRow_Any_16_NEON;
1093 if (IS_ALIGNED(src_width, 16)) {
1094 InterpolateRow = InterpolateRow_16_NEON;
1095 }
1096 }
1097 #endif
1098
1099 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1100 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1101 ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1102 }
1103 #endif
1104 if (y > max_y) {
1105 y = max_y;
1106 }
1107
1108 for (j = 0; j < dst_height; ++j) {
1109 int yi = y >> 16;
1110 const uint16_t* src = src_ptr + yi * src_stride;
1111 if (filtering == kFilterLinear) {
1112 ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1113 } else {
1114 int yf = (y >> 8) & 255;
1115 InterpolateRow((uint16_t*)row, src, src_stride, src_width, yf);
1116 ScaleFilterCols(dst_ptr, (uint16_t*)row, dst_width, x, dx);
1117 }
1118 dst_ptr += dst_stride;
1119 y += dy;
1120 if (y > max_y) {
1121 y = max_y;
1122 }
1123 }
1124 free_aligned_buffer_64(row);
1125 }
1126
1127 // Scale up down with bilinear interpolation.
ScalePlaneBilinearUp(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)1128 void ScalePlaneBilinearUp(int src_width,
1129 int src_height,
1130 int dst_width,
1131 int dst_height,
1132 int src_stride,
1133 int dst_stride,
1134 const uint8_t* src_ptr,
1135 uint8_t* dst_ptr,
1136 enum FilterMode filtering) {
1137 int j;
1138 // Initial source x/y coordinate and step values as 16.16 fixed point.
1139 int x = 0;
1140 int y = 0;
1141 int dx = 0;
1142 int dy = 0;
1143 const int max_y = (src_height - 1) << 16;
1144 void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr,
1145 ptrdiff_t src_stride, int dst_width,
1146 int source_y_fraction) = InterpolateRow_C;
1147 void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr,
1148 int dst_width, int x, int dx) =
1149 filtering ? ScaleFilterCols_C : ScaleCols_C;
1150 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1151 &dx, &dy);
1152 src_width = Abs(src_width);
1153
1154 #if defined(HAS_INTERPOLATEROW_SSSE3)
1155 if (TestCpuFlag(kCpuHasSSSE3)) {
1156 InterpolateRow = InterpolateRow_Any_SSSE3;
1157 if (IS_ALIGNED(dst_width, 16)) {
1158 InterpolateRow = InterpolateRow_SSSE3;
1159 }
1160 }
1161 #endif
1162 #if defined(HAS_INTERPOLATEROW_AVX2)
1163 if (TestCpuFlag(kCpuHasAVX2)) {
1164 InterpolateRow = InterpolateRow_Any_AVX2;
1165 if (IS_ALIGNED(dst_width, 32)) {
1166 InterpolateRow = InterpolateRow_AVX2;
1167 }
1168 }
1169 #endif
1170 #if defined(HAS_INTERPOLATEROW_NEON)
1171 if (TestCpuFlag(kCpuHasNEON)) {
1172 InterpolateRow = InterpolateRow_Any_NEON;
1173 if (IS_ALIGNED(dst_width, 16)) {
1174 InterpolateRow = InterpolateRow_NEON;
1175 }
1176 }
1177 #endif
1178
1179 if (filtering && src_width >= 32768) {
1180 ScaleFilterCols = ScaleFilterCols64_C;
1181 }
1182 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
1183 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1184 ScaleFilterCols = ScaleFilterCols_SSSE3;
1185 }
1186 #endif
1187 #if defined(HAS_SCALEFILTERCOLS_NEON)
1188 if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
1189 ScaleFilterCols = ScaleFilterCols_Any_NEON;
1190 if (IS_ALIGNED(dst_width, 8)) {
1191 ScaleFilterCols = ScaleFilterCols_NEON;
1192 }
1193 }
1194 #endif
1195 #if defined(HAS_SCALEFILTERCOLS_MSA)
1196 if (filtering && TestCpuFlag(kCpuHasMSA) && src_width < 32768) {
1197 ScaleFilterCols = ScaleFilterCols_Any_MSA;
1198 if (IS_ALIGNED(dst_width, 16)) {
1199 ScaleFilterCols = ScaleFilterCols_MSA;
1200 }
1201 }
1202 #endif
1203 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1204 ScaleFilterCols = ScaleColsUp2_C;
1205 #if defined(HAS_SCALECOLS_SSE2)
1206 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1207 ScaleFilterCols = ScaleColsUp2_SSE2;
1208 }
1209 #endif
1210 }
1211
1212 if (y > max_y) {
1213 y = max_y;
1214 }
1215 {
1216 int yi = y >> 16;
1217 const uint8_t* src = src_ptr + yi * src_stride;
1218
1219 // Allocate 2 row buffers.
1220 const int kRowSize = (dst_width + 31) & ~31;
1221 align_buffer_64(row, kRowSize * 2);
1222
1223 uint8_t* rowptr = row;
1224 int rowstride = kRowSize;
1225 int lasty = yi;
1226
1227 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1228 if (src_height > 1) {
1229 src += src_stride;
1230 }
1231 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1232 src += src_stride;
1233
1234 for (j = 0; j < dst_height; ++j) {
1235 yi = y >> 16;
1236 if (yi != lasty) {
1237 if (y > max_y) {
1238 y = max_y;
1239 yi = y >> 16;
1240 src = src_ptr + yi * src_stride;
1241 }
1242 if (yi != lasty) {
1243 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1244 rowptr += rowstride;
1245 rowstride = -rowstride;
1246 lasty = yi;
1247 src += src_stride;
1248 }
1249 }
1250 if (filtering == kFilterLinear) {
1251 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1252 } else {
1253 int yf = (y >> 8) & 255;
1254 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1255 }
1256 dst_ptr += dst_stride;
1257 y += dy;
1258 }
1259 free_aligned_buffer_64(row);
1260 }
1261 }
1262
ScalePlaneBilinearUp_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)1263 void ScalePlaneBilinearUp_16(int src_width,
1264 int src_height,
1265 int dst_width,
1266 int dst_height,
1267 int src_stride,
1268 int dst_stride,
1269 const uint16_t* src_ptr,
1270 uint16_t* dst_ptr,
1271 enum FilterMode filtering) {
1272 int j;
1273 // Initial source x/y coordinate and step values as 16.16 fixed point.
1274 int x = 0;
1275 int y = 0;
1276 int dx = 0;
1277 int dy = 0;
1278 const int max_y = (src_height - 1) << 16;
1279 void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr,
1280 ptrdiff_t src_stride, int dst_width,
1281 int source_y_fraction) = InterpolateRow_16_C;
1282 void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr,
1283 int dst_width, int x, int dx) =
1284 filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
1285 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1286 &dx, &dy);
1287 src_width = Abs(src_width);
1288
1289 #if defined(HAS_INTERPOLATEROW_16_SSE2)
1290 if (TestCpuFlag(kCpuHasSSE2)) {
1291 InterpolateRow = InterpolateRow_Any_16_SSE2;
1292 if (IS_ALIGNED(dst_width, 16)) {
1293 InterpolateRow = InterpolateRow_16_SSE2;
1294 }
1295 }
1296 #endif
1297 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
1298 if (TestCpuFlag(kCpuHasSSSE3)) {
1299 InterpolateRow = InterpolateRow_Any_16_SSSE3;
1300 if (IS_ALIGNED(dst_width, 16)) {
1301 InterpolateRow = InterpolateRow_16_SSSE3;
1302 }
1303 }
1304 #endif
1305 #if defined(HAS_INTERPOLATEROW_16_AVX2)
1306 if (TestCpuFlag(kCpuHasAVX2)) {
1307 InterpolateRow = InterpolateRow_Any_16_AVX2;
1308 if (IS_ALIGNED(dst_width, 32)) {
1309 InterpolateRow = InterpolateRow_16_AVX2;
1310 }
1311 }
1312 #endif
1313 #if defined(HAS_INTERPOLATEROW_16_NEON)
1314 if (TestCpuFlag(kCpuHasNEON)) {
1315 InterpolateRow = InterpolateRow_Any_16_NEON;
1316 if (IS_ALIGNED(dst_width, 16)) {
1317 InterpolateRow = InterpolateRow_16_NEON;
1318 }
1319 }
1320 #endif
1321
1322 if (filtering && src_width >= 32768) {
1323 ScaleFilterCols = ScaleFilterCols64_16_C;
1324 }
1325 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1326 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1327 ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1328 }
1329 #endif
1330 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1331 ScaleFilterCols = ScaleColsUp2_16_C;
1332 #if defined(HAS_SCALECOLS_16_SSE2)
1333 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1334 ScaleFilterCols = ScaleColsUp2_16_SSE2;
1335 }
1336 #endif
1337 }
1338
1339 if (y > max_y) {
1340 y = max_y;
1341 }
1342 {
1343 int yi = y >> 16;
1344 const uint16_t* src = src_ptr + yi * src_stride;
1345
1346 // Allocate 2 row buffers.
1347 const int kRowSize = (dst_width + 31) & ~31;
1348 align_buffer_64(row, kRowSize * 4);
1349
1350 uint16_t* rowptr = (uint16_t*)row;
1351 int rowstride = kRowSize;
1352 int lasty = yi;
1353
1354 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1355 if (src_height > 1) {
1356 src += src_stride;
1357 }
1358 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1359 src += src_stride;
1360
1361 for (j = 0; j < dst_height; ++j) {
1362 yi = y >> 16;
1363 if (yi != lasty) {
1364 if (y > max_y) {
1365 y = max_y;
1366 yi = y >> 16;
1367 src = src_ptr + yi * src_stride;
1368 }
1369 if (yi != lasty) {
1370 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1371 rowptr += rowstride;
1372 rowstride = -rowstride;
1373 lasty = yi;
1374 src += src_stride;
1375 }
1376 }
1377 if (filtering == kFilterLinear) {
1378 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1379 } else {
1380 int yf = (y >> 8) & 255;
1381 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1382 }
1383 dst_ptr += dst_stride;
1384 y += dy;
1385 }
1386 free_aligned_buffer_64(row);
1387 }
1388 }
1389
1390 // Scale Plane to/from any dimensions, without interpolation.
1391 // Fixed point math is used for performance: The upper 16 bits
1392 // of x and dx is the integer part of the source position and
1393 // the lower 16 bits are the fixed decimal part.
1394
ScalePlaneSimple(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr)1395 static void ScalePlaneSimple(int src_width,
1396 int src_height,
1397 int dst_width,
1398 int dst_height,
1399 int src_stride,
1400 int dst_stride,
1401 const uint8_t* src_ptr,
1402 uint8_t* dst_ptr) {
1403 int i;
1404 void (*ScaleCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width,
1405 int x, int dx) = ScaleCols_C;
1406 // Initial source x/y coordinate and step values as 16.16 fixed point.
1407 int x = 0;
1408 int y = 0;
1409 int dx = 0;
1410 int dy = 0;
1411 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
1412 &dx, &dy);
1413 src_width = Abs(src_width);
1414
1415 if (src_width * 2 == dst_width && x < 0x8000) {
1416 ScaleCols = ScaleColsUp2_C;
1417 #if defined(HAS_SCALECOLS_SSE2)
1418 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1419 ScaleCols = ScaleColsUp2_SSE2;
1420 }
1421 #endif
1422 }
1423
1424 for (i = 0; i < dst_height; ++i) {
1425 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
1426 dst_ptr += dst_stride;
1427 y += dy;
1428 }
1429 }
1430
ScalePlaneSimple_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr)1431 static void ScalePlaneSimple_16(int src_width,
1432 int src_height,
1433 int dst_width,
1434 int dst_height,
1435 int src_stride,
1436 int dst_stride,
1437 const uint16_t* src_ptr,
1438 uint16_t* dst_ptr) {
1439 int i;
1440 void (*ScaleCols)(uint16_t * dst_ptr, const uint16_t* src_ptr, int dst_width,
1441 int x, int dx) = ScaleCols_16_C;
1442 // Initial source x/y coordinate and step values as 16.16 fixed point.
1443 int x = 0;
1444 int y = 0;
1445 int dx = 0;
1446 int dy = 0;
1447 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
1448 &dx, &dy);
1449 src_width = Abs(src_width);
1450
1451 if (src_width * 2 == dst_width && x < 0x8000) {
1452 ScaleCols = ScaleColsUp2_16_C;
1453 #if defined(HAS_SCALECOLS_16_SSE2)
1454 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1455 ScaleCols = ScaleColsUp2_16_SSE2;
1456 }
1457 #endif
1458 }
1459
1460 for (i = 0; i < dst_height; ++i) {
1461 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
1462 dst_ptr += dst_stride;
1463 y += dy;
1464 }
1465 }
1466
1467 // Scale a plane.
1468 // This function dispatches to a specialized scaler based on scale factor.
1469
1470 LIBYUV_API
ScalePlane(const uint8_t * src,int src_stride,int src_width,int src_height,uint8_t * dst,int dst_stride,int dst_width,int dst_height,enum FilterMode filtering)1471 void ScalePlane(const uint8_t* src,
1472 int src_stride,
1473 int src_width,
1474 int src_height,
1475 uint8_t* dst,
1476 int dst_stride,
1477 int dst_width,
1478 int dst_height,
1479 enum FilterMode filtering) {
1480 // Simplify filtering when possible.
1481 filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
1482 filtering);
1483
1484 // Negative height means invert the image.
1485 if (src_height < 0) {
1486 src_height = -src_height;
1487 src = src + (src_height - 1) * src_stride;
1488 src_stride = -src_stride;
1489 }
1490
1491 // Use specialized scales to improve performance for common resolutions.
1492 // For example, all the 1/2 scalings will use ScalePlaneDown2()
1493 if (dst_width == src_width && dst_height == src_height) {
1494 // Straight copy.
1495 CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
1496 return;
1497 }
1498 if (dst_width == src_width && filtering != kFilterBox) {
1499 int dy = FixedDiv(src_height, dst_height);
1500 // Arbitrary scale vertically, but unscaled horizontally.
1501 ScalePlaneVertical(src_height, dst_width, dst_height, src_stride,
1502 dst_stride, src, dst, 0, 0, dy, 1, filtering);
1503 return;
1504 }
1505 if (dst_width <= Abs(src_width) && dst_height <= src_height) {
1506 // Scale down.
1507 if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
1508 // optimized, 3/4
1509 ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride,
1510 dst_stride, src, dst, filtering);
1511 return;
1512 }
1513 if (2 * dst_width == src_width && 2 * dst_height == src_height) {
1514 // optimized, 1/2
1515 ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride,
1516 dst_stride, src, dst, filtering);
1517 return;
1518 }
1519 // 3/8 rounded up for odd sized chroma height.
1520 if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
1521 // optimized, 3/8
1522 ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride,
1523 dst_stride, src, dst, filtering);
1524 return;
1525 }
1526 if (4 * dst_width == src_width && 4 * dst_height == src_height &&
1527 (filtering == kFilterBox || filtering == kFilterNone)) {
1528 // optimized, 1/4
1529 ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride,
1530 dst_stride, src, dst, filtering);
1531 return;
1532 }
1533 }
1534 if (filtering == kFilterBox && dst_height * 2 < src_height) {
1535 ScalePlaneBox(src_width, src_height, dst_width, dst_height, src_stride,
1536 dst_stride, src, dst);
1537 return;
1538 }
1539 if (filtering && dst_height > src_height) {
1540 ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
1541 src_stride, dst_stride, src, dst, filtering);
1542 return;
1543 }
1544 if (filtering) {
1545 ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
1546 src_stride, dst_stride, src, dst, filtering);
1547 return;
1548 }
1549 ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride,
1550 dst_stride, src, dst);
1551 }
1552
1553 LIBYUV_API
ScalePlane_16(const uint16_t * src,int src_stride,int src_width,int src_height,uint16_t * dst,int dst_stride,int dst_width,int dst_height,enum FilterMode filtering)1554 void ScalePlane_16(const uint16_t* src,
1555 int src_stride,
1556 int src_width,
1557 int src_height,
1558 uint16_t* dst,
1559 int dst_stride,
1560 int dst_width,
1561 int dst_height,
1562 enum FilterMode filtering) {
1563 // Simplify filtering when possible.
1564 filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
1565 filtering);
1566
1567 // Negative height means invert the image.
1568 if (src_height < 0) {
1569 src_height = -src_height;
1570 src = src + (src_height - 1) * src_stride;
1571 src_stride = -src_stride;
1572 }
1573
1574 // Use specialized scales to improve performance for common resolutions.
1575 // For example, all the 1/2 scalings will use ScalePlaneDown2()
1576 if (dst_width == src_width && dst_height == src_height) {
1577 // Straight copy.
1578 CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
1579 return;
1580 }
1581 if (dst_width == src_width && filtering != kFilterBox) {
1582 int dy = FixedDiv(src_height, dst_height);
1583 // Arbitrary scale vertically, but unscaled vertically.
1584 ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride,
1585 dst_stride, src, dst, 0, 0, dy, 1, filtering);
1586 return;
1587 }
1588 if (dst_width <= Abs(src_width) && dst_height <= src_height) {
1589 // Scale down.
1590 if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
1591 // optimized, 3/4
1592 ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
1593 src_stride, dst_stride, src, dst, filtering);
1594 return;
1595 }
1596 if (2 * dst_width == src_width && 2 * dst_height == src_height) {
1597 // optimized, 1/2
1598 ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
1599 src_stride, dst_stride, src, dst, filtering);
1600 return;
1601 }
1602 // 3/8 rounded up for odd sized chroma height.
1603 if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
1604 // optimized, 3/8
1605 ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
1606 src_stride, dst_stride, src, dst, filtering);
1607 return;
1608 }
1609 if (4 * dst_width == src_width && 4 * dst_height == src_height &&
1610 (filtering == kFilterBox || filtering == kFilterNone)) {
1611 // optimized, 1/4
1612 ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
1613 src_stride, dst_stride, src, dst, filtering);
1614 return;
1615 }
1616 }
1617 if (filtering == kFilterBox && dst_height * 2 < src_height) {
1618 ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, src_stride,
1619 dst_stride, src, dst);
1620 return;
1621 }
1622 if (filtering && dst_height > src_height) {
1623 ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
1624 src_stride, dst_stride, src, dst, filtering);
1625 return;
1626 }
1627 if (filtering) {
1628 ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height,
1629 src_stride, dst_stride, src, dst, filtering);
1630 return;
1631 }
1632 ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride,
1633 dst_stride, src, dst);
1634 }
1635
1636 // Scale an I420 image.
1637 // This function in turn calls a scaling function for each plane.
1638
1639 LIBYUV_API
I420Scale(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,int src_width,int src_height,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)1640 int I420Scale(const uint8_t* src_y,
1641 int src_stride_y,
1642 const uint8_t* src_u,
1643 int src_stride_u,
1644 const uint8_t* src_v,
1645 int src_stride_v,
1646 int src_width,
1647 int src_height,
1648 uint8_t* dst_y,
1649 int dst_stride_y,
1650 uint8_t* dst_u,
1651 int dst_stride_u,
1652 uint8_t* dst_v,
1653 int dst_stride_v,
1654 int dst_width,
1655 int dst_height,
1656 enum FilterMode filtering) {
1657 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1658 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1659 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1660 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1661 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1662 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
1663 dst_width <= 0 || dst_height <= 0) {
1664 return -1;
1665 }
1666
1667 ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
1668 dst_width, dst_height, filtering);
1669 ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
1670 dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
1671 ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
1672 dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
1673 return 0;
1674 }
1675
1676 LIBYUV_API
I420Scale_16(const uint16_t * src_y,int src_stride_y,const uint16_t * src_u,int src_stride_u,const uint16_t * src_v,int src_stride_v,int src_width,int src_height,uint16_t * dst_y,int dst_stride_y,uint16_t * dst_u,int dst_stride_u,uint16_t * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)1677 int I420Scale_16(const uint16_t* src_y,
1678 int src_stride_y,
1679 const uint16_t* src_u,
1680 int src_stride_u,
1681 const uint16_t* src_v,
1682 int src_stride_v,
1683 int src_width,
1684 int src_height,
1685 uint16_t* dst_y,
1686 int dst_stride_y,
1687 uint16_t* dst_u,
1688 int dst_stride_u,
1689 uint16_t* dst_v,
1690 int dst_stride_v,
1691 int dst_width,
1692 int dst_height,
1693 enum FilterMode filtering) {
1694 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1695 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1696 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1697 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1698 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1699 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
1700 dst_width <= 0 || dst_height <= 0) {
1701 return -1;
1702 }
1703
1704 ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y,
1705 dst_width, dst_height, filtering);
1706 ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
1707 dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
1708 ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
1709 dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
1710 return 0;
1711 }
1712
1713 // Deprecated api
1714 LIBYUV_API
Scale(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,int src_stride_y,int src_stride_u,int src_stride_v,int src_width,int src_height,uint8_t * dst_y,uint8_t * dst_u,uint8_t * dst_v,int dst_stride_y,int dst_stride_u,int dst_stride_v,int dst_width,int dst_height,LIBYUV_BOOL interpolate)1715 int Scale(const uint8_t* src_y,
1716 const uint8_t* src_u,
1717 const uint8_t* src_v,
1718 int src_stride_y,
1719 int src_stride_u,
1720 int src_stride_v,
1721 int src_width,
1722 int src_height,
1723 uint8_t* dst_y,
1724 uint8_t* dst_u,
1725 uint8_t* dst_v,
1726 int dst_stride_y,
1727 int dst_stride_u,
1728 int dst_stride_v,
1729 int dst_width,
1730 int dst_height,
1731 LIBYUV_BOOL interpolate) {
1732 return I420Scale(src_y, src_stride_y, src_u, src_stride_u, src_v,
1733 src_stride_v, src_width, src_height, dst_y, dst_stride_y,
1734 dst_u, dst_stride_u, dst_v, dst_stride_v, dst_width,
1735 dst_height, interpolate ? kFilterBox : kFilterNone);
1736 }
1737
1738 #ifdef __cplusplus
1739 } // extern "C"
1740 } // namespace libyuv
1741 #endif
1742