1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/scale.h"
12
13 #include <assert.h>
14 #include <string.h>
15
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyPlane
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20 #include "libyuv/scale_uv.h" // For UVScale
21
22 #ifdef __cplusplus
23 namespace libyuv {
24 extern "C" {
25 #endif
26
Abs(int v)27 static __inline int Abs(int v) {
28 return v >= 0 ? v : -v;
29 }
30
31 #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
32 #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
33
34 // Scale plane, 1/2
35 // This is an optimized version for scaling down a plane to 1/2 of
36 // its original size.
37
ScalePlaneDown2(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)38 static void ScalePlaneDown2(int src_width,
39 int src_height,
40 int dst_width,
41 int dst_height,
42 int src_stride,
43 int dst_stride,
44 const uint8_t* src_ptr,
45 uint8_t* dst_ptr,
46 enum FilterMode filtering) {
47 int y;
48 void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
49 uint8_t* dst_ptr, int dst_width) =
50 filtering == kFilterNone
51 ? ScaleRowDown2_C
52 : (filtering == kFilterLinear ? ScaleRowDown2Linear_C
53 : ScaleRowDown2Box_C);
54 int row_stride = src_stride * 2;
55 (void)src_width;
56 (void)src_height;
57 if (!filtering) {
58 src_ptr += src_stride; // Point to odd rows.
59 src_stride = 0;
60 }
61
62 #if defined(HAS_SCALEROWDOWN2_NEON)
63 if (TestCpuFlag(kCpuHasNEON)) {
64 ScaleRowDown2 =
65 filtering == kFilterNone
66 ? ScaleRowDown2_Any_NEON
67 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON
68 : ScaleRowDown2Box_Any_NEON);
69 if (IS_ALIGNED(dst_width, 16)) {
70 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON
71 : (filtering == kFilterLinear
72 ? ScaleRowDown2Linear_NEON
73 : ScaleRowDown2Box_NEON);
74 }
75 }
76 #endif
77 #if defined(HAS_SCALEROWDOWN2_SSSE3)
78 if (TestCpuFlag(kCpuHasSSSE3)) {
79 ScaleRowDown2 =
80 filtering == kFilterNone
81 ? ScaleRowDown2_Any_SSSE3
82 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3
83 : ScaleRowDown2Box_Any_SSSE3);
84 if (IS_ALIGNED(dst_width, 16)) {
85 ScaleRowDown2 =
86 filtering == kFilterNone
87 ? ScaleRowDown2_SSSE3
88 : (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3
89 : ScaleRowDown2Box_SSSE3);
90 }
91 }
92 #endif
93 #if defined(HAS_SCALEROWDOWN2_AVX2)
94 if (TestCpuFlag(kCpuHasAVX2)) {
95 ScaleRowDown2 =
96 filtering == kFilterNone
97 ? ScaleRowDown2_Any_AVX2
98 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2
99 : ScaleRowDown2Box_Any_AVX2);
100 if (IS_ALIGNED(dst_width, 32)) {
101 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2
102 : (filtering == kFilterLinear
103 ? ScaleRowDown2Linear_AVX2
104 : ScaleRowDown2Box_AVX2);
105 }
106 }
107 #endif
108 #if defined(HAS_SCALEROWDOWN2_MSA)
109 if (TestCpuFlag(kCpuHasMSA)) {
110 ScaleRowDown2 =
111 filtering == kFilterNone
112 ? ScaleRowDown2_Any_MSA
113 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MSA
114 : ScaleRowDown2Box_Any_MSA);
115 if (IS_ALIGNED(dst_width, 32)) {
116 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MSA
117 : (filtering == kFilterLinear
118 ? ScaleRowDown2Linear_MSA
119 : ScaleRowDown2Box_MSA);
120 }
121 }
122 #endif
123 #if defined(HAS_SCALEROWDOWN2_LSX)
124 if (TestCpuFlag(kCpuHasLSX)) {
125 ScaleRowDown2 =
126 filtering == kFilterNone
127 ? ScaleRowDown2_Any_LSX
128 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_LSX
129 : ScaleRowDown2Box_Any_LSX);
130 if (IS_ALIGNED(dst_width, 32)) {
131 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_LSX
132 : (filtering == kFilterLinear
133 ? ScaleRowDown2Linear_LSX
134 : ScaleRowDown2Box_LSX);
135 }
136 }
137 #endif
138 #if defined(HAS_SCALEROWDOWN2_RVV)
139 if (TestCpuFlag(kCpuHasRVV)) {
140 ScaleRowDown2 = filtering == kFilterNone
141 ? ScaleRowDown2_RVV
142 : (filtering == kFilterLinear ? ScaleRowDown2Linear_RVV
143 : ScaleRowDown2Box_RVV);
144 }
145 #endif
146
147 if (filtering == kFilterLinear) {
148 src_stride = 0;
149 }
150 // TODO(fbarchard): Loop through source height to allow odd height.
151 for (y = 0; y < dst_height; ++y) {
152 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
153 src_ptr += row_stride;
154 dst_ptr += dst_stride;
155 }
156 }
157
ScalePlaneDown2_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)158 static void ScalePlaneDown2_16(int src_width,
159 int src_height,
160 int dst_width,
161 int dst_height,
162 int src_stride,
163 int dst_stride,
164 const uint16_t* src_ptr,
165 uint16_t* dst_ptr,
166 enum FilterMode filtering) {
167 int y;
168 void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
169 uint16_t* dst_ptr, int dst_width) =
170 filtering == kFilterNone
171 ? ScaleRowDown2_16_C
172 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C
173 : ScaleRowDown2Box_16_C);
174 int row_stride = src_stride * 2;
175 (void)src_width;
176 (void)src_height;
177 if (!filtering) {
178 src_ptr += src_stride; // Point to odd rows.
179 src_stride = 0;
180 }
181
182 #if defined(HAS_SCALEROWDOWN2_16_NEON)
183 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
184 ScaleRowDown2 =
185 filtering ? ScaleRowDown2Box_16_NEON : ScaleRowDown2_16_NEON;
186 }
187 #endif
188 #if defined(HAS_SCALEROWDOWN2_16_SSE2)
189 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
190 ScaleRowDown2 =
191 filtering == kFilterNone
192 ? ScaleRowDown2_16_SSE2
193 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2
194 : ScaleRowDown2Box_16_SSE2);
195 }
196 #endif
197
198 if (filtering == kFilterLinear) {
199 src_stride = 0;
200 }
201 // TODO(fbarchard): Loop through source height to allow odd height.
202 for (y = 0; y < dst_height; ++y) {
203 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
204 src_ptr += row_stride;
205 dst_ptr += dst_stride;
206 }
207 }
208
ScalePlaneDown2_16To8(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint8_t * dst_ptr,int scale,enum FilterMode filtering)209 void ScalePlaneDown2_16To8(int src_width,
210 int src_height,
211 int dst_width,
212 int dst_height,
213 int src_stride,
214 int dst_stride,
215 const uint16_t* src_ptr,
216 uint8_t* dst_ptr,
217 int scale,
218 enum FilterMode filtering) {
219 int y;
220 void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
221 uint8_t* dst_ptr, int dst_width, int scale) =
222 (src_width & 1)
223 ? (filtering == kFilterNone
224 ? ScaleRowDown2_16To8_Odd_C
225 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_Odd_C
226 : ScaleRowDown2Box_16To8_Odd_C))
227 : (filtering == kFilterNone
228 ? ScaleRowDown2_16To8_C
229 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16To8_C
230 : ScaleRowDown2Box_16To8_C));
231 int row_stride = src_stride * 2;
232 (void)dst_height;
233 if (!filtering) {
234 src_ptr += src_stride; // Point to odd rows.
235 src_stride = 0;
236 }
237
238 if (filtering == kFilterLinear) {
239 src_stride = 0;
240 }
241 for (y = 0; y < src_height / 2; ++y) {
242 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width, scale);
243 src_ptr += row_stride;
244 dst_ptr += dst_stride;
245 }
246 if (src_height & 1) {
247 if (!filtering) {
248 src_ptr -= src_stride; // Point to last row.
249 }
250 ScaleRowDown2(src_ptr, 0, dst_ptr, dst_width, scale);
251 }
252 }
253
254 // Scale plane, 1/4
255 // This is an optimized version for scaling down a plane to 1/4 of
256 // its original size.
257
ScalePlaneDown4(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)258 static void ScalePlaneDown4(int src_width,
259 int src_height,
260 int dst_width,
261 int dst_height,
262 int src_stride,
263 int dst_stride,
264 const uint8_t* src_ptr,
265 uint8_t* dst_ptr,
266 enum FilterMode filtering) {
267 int y;
268 void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride,
269 uint8_t* dst_ptr, int dst_width) =
270 filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
271 int row_stride = src_stride * 4;
272 (void)src_width;
273 (void)src_height;
274 if (!filtering) {
275 src_ptr += src_stride * 2; // Point to row 2.
276 src_stride = 0;
277 }
278 #if defined(HAS_SCALEROWDOWN4_NEON)
279 if (TestCpuFlag(kCpuHasNEON)) {
280 ScaleRowDown4 =
281 filtering ? ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;
282 if (IS_ALIGNED(dst_width, 8)) {
283 ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
284 }
285 }
286 #endif
287 #if defined(HAS_SCALEROWDOWN4_SSSE3)
288 if (TestCpuFlag(kCpuHasSSSE3)) {
289 ScaleRowDown4 =
290 filtering ? ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3;
291 if (IS_ALIGNED(dst_width, 8)) {
292 ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3;
293 }
294 }
295 #endif
296 #if defined(HAS_SCALEROWDOWN4_AVX2)
297 if (TestCpuFlag(kCpuHasAVX2)) {
298 ScaleRowDown4 =
299 filtering ? ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
300 if (IS_ALIGNED(dst_width, 16)) {
301 ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
302 }
303 }
304 #endif
305 #if defined(HAS_SCALEROWDOWN4_MSA)
306 if (TestCpuFlag(kCpuHasMSA)) {
307 ScaleRowDown4 =
308 filtering ? ScaleRowDown4Box_Any_MSA : ScaleRowDown4_Any_MSA;
309 if (IS_ALIGNED(dst_width, 16)) {
310 ScaleRowDown4 = filtering ? ScaleRowDown4Box_MSA : ScaleRowDown4_MSA;
311 }
312 }
313 #endif
314 #if defined(HAS_SCALEROWDOWN4_LSX)
315 if (TestCpuFlag(kCpuHasLSX)) {
316 ScaleRowDown4 =
317 filtering ? ScaleRowDown4Box_Any_LSX : ScaleRowDown4_Any_LSX;
318 if (IS_ALIGNED(dst_width, 16)) {
319 ScaleRowDown4 = filtering ? ScaleRowDown4Box_LSX : ScaleRowDown4_LSX;
320 }
321 }
322 #endif
323 #if defined(HAS_SCALEROWDOWN4_RVV)
324 if (TestCpuFlag(kCpuHasRVV)) {
325 ScaleRowDown4 = filtering ? ScaleRowDown4Box_RVV : ScaleRowDown4_RVV;
326 }
327 #endif
328
329 if (filtering == kFilterLinear) {
330 src_stride = 0;
331 }
332 for (y = 0; y < dst_height; ++y) {
333 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
334 src_ptr += row_stride;
335 dst_ptr += dst_stride;
336 }
337 }
338
ScalePlaneDown4_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)339 static void ScalePlaneDown4_16(int src_width,
340 int src_height,
341 int dst_width,
342 int dst_height,
343 int src_stride,
344 int dst_stride,
345 const uint16_t* src_ptr,
346 uint16_t* dst_ptr,
347 enum FilterMode filtering) {
348 int y;
349 void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride,
350 uint16_t* dst_ptr, int dst_width) =
351 filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
352 int row_stride = src_stride * 4;
353 (void)src_width;
354 (void)src_height;
355 if (!filtering) {
356 src_ptr += src_stride * 2; // Point to row 2.
357 src_stride = 0;
358 }
359 #if defined(HAS_SCALEROWDOWN4_16_NEON)
360 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
361 ScaleRowDown4 =
362 filtering ? ScaleRowDown4Box_16_NEON : ScaleRowDown4_16_NEON;
363 }
364 #endif
365 #if defined(HAS_SCALEROWDOWN4_16_SSE2)
366 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
367 ScaleRowDown4 =
368 filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2;
369 }
370 #endif
371
372 if (filtering == kFilterLinear) {
373 src_stride = 0;
374 }
375 for (y = 0; y < dst_height; ++y) {
376 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
377 src_ptr += row_stride;
378 dst_ptr += dst_stride;
379 }
380 }
381
382 // Scale plane down, 3/4
ScalePlaneDown34(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)383 static void ScalePlaneDown34(int src_width,
384 int src_height,
385 int dst_width,
386 int dst_height,
387 int src_stride,
388 int dst_stride,
389 const uint8_t* src_ptr,
390 uint8_t* dst_ptr,
391 enum FilterMode filtering) {
392 int y;
393 void (*ScaleRowDown34_0)(const uint8_t* src_ptr, ptrdiff_t src_stride,
394 uint8_t* dst_ptr, int dst_width);
395 void (*ScaleRowDown34_1)(const uint8_t* src_ptr, ptrdiff_t src_stride,
396 uint8_t* dst_ptr, int dst_width);
397 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
398 (void)src_width;
399 (void)src_height;
400 assert(dst_width % 3 == 0);
401 if (!filtering) {
402 ScaleRowDown34_0 = ScaleRowDown34_C;
403 ScaleRowDown34_1 = ScaleRowDown34_C;
404 } else {
405 ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
406 ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
407 }
408 #if defined(HAS_SCALEROWDOWN34_NEON)
409 if (TestCpuFlag(kCpuHasNEON)) {
410 if (!filtering) {
411 ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
412 ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
413 } else {
414 ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
415 ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
416 }
417 if (dst_width % 24 == 0) {
418 if (!filtering) {
419 ScaleRowDown34_0 = ScaleRowDown34_NEON;
420 ScaleRowDown34_1 = ScaleRowDown34_NEON;
421 } else {
422 ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
423 ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
424 }
425 }
426 }
427 #endif
428 #if defined(HAS_SCALEROWDOWN34_MSA)
429 if (TestCpuFlag(kCpuHasMSA)) {
430 if (!filtering) {
431 ScaleRowDown34_0 = ScaleRowDown34_Any_MSA;
432 ScaleRowDown34_1 = ScaleRowDown34_Any_MSA;
433 } else {
434 ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_MSA;
435 ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_MSA;
436 }
437 if (dst_width % 48 == 0) {
438 if (!filtering) {
439 ScaleRowDown34_0 = ScaleRowDown34_MSA;
440 ScaleRowDown34_1 = ScaleRowDown34_MSA;
441 } else {
442 ScaleRowDown34_0 = ScaleRowDown34_0_Box_MSA;
443 ScaleRowDown34_1 = ScaleRowDown34_1_Box_MSA;
444 }
445 }
446 }
447 #endif
448 #if defined(HAS_SCALEROWDOWN34_LSX)
449 if (TestCpuFlag(kCpuHasLSX)) {
450 if (!filtering) {
451 ScaleRowDown34_0 = ScaleRowDown34_Any_LSX;
452 ScaleRowDown34_1 = ScaleRowDown34_Any_LSX;
453 } else {
454 ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_LSX;
455 ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_LSX;
456 }
457 if (dst_width % 48 == 0) {
458 if (!filtering) {
459 ScaleRowDown34_0 = ScaleRowDown34_LSX;
460 ScaleRowDown34_1 = ScaleRowDown34_LSX;
461 } else {
462 ScaleRowDown34_0 = ScaleRowDown34_0_Box_LSX;
463 ScaleRowDown34_1 = ScaleRowDown34_1_Box_LSX;
464 }
465 }
466 }
467 #endif
468 #if defined(HAS_SCALEROWDOWN34_SSSE3)
469 if (TestCpuFlag(kCpuHasSSSE3)) {
470 if (!filtering) {
471 ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
472 ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
473 } else {
474 ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
475 ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
476 }
477 if (dst_width % 24 == 0) {
478 if (!filtering) {
479 ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
480 ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
481 } else {
482 ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
483 ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
484 }
485 }
486 }
487 #endif
488 #if defined(HAS_SCALEROWDOWN34_RVV)
489 if (TestCpuFlag(kCpuHasRVV)) {
490 if (!filtering) {
491 ScaleRowDown34_0 = ScaleRowDown34_RVV;
492 ScaleRowDown34_1 = ScaleRowDown34_RVV;
493 } else {
494 ScaleRowDown34_0 = ScaleRowDown34_0_Box_RVV;
495 ScaleRowDown34_1 = ScaleRowDown34_1_Box_RVV;
496 }
497 }
498 #endif
499
500 for (y = 0; y < dst_height - 2; y += 3) {
501 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
502 src_ptr += src_stride;
503 dst_ptr += dst_stride;
504 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
505 src_ptr += src_stride;
506 dst_ptr += dst_stride;
507 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
508 src_ptr += src_stride * 2;
509 dst_ptr += dst_stride;
510 }
511
512 // Remainder 1 or 2 rows with last row vertically unfiltered
513 if ((dst_height % 3) == 2) {
514 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
515 src_ptr += src_stride;
516 dst_ptr += dst_stride;
517 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
518 } else if ((dst_height % 3) == 1) {
519 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
520 }
521 }
522
ScalePlaneDown34_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)523 static void ScalePlaneDown34_16(int src_width,
524 int src_height,
525 int dst_width,
526 int dst_height,
527 int src_stride,
528 int dst_stride,
529 const uint16_t* src_ptr,
530 uint16_t* dst_ptr,
531 enum FilterMode filtering) {
532 int y;
533 void (*ScaleRowDown34_0)(const uint16_t* src_ptr, ptrdiff_t src_stride,
534 uint16_t* dst_ptr, int dst_width);
535 void (*ScaleRowDown34_1)(const uint16_t* src_ptr, ptrdiff_t src_stride,
536 uint16_t* dst_ptr, int dst_width);
537 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
538 (void)src_width;
539 (void)src_height;
540 assert(dst_width % 3 == 0);
541 if (!filtering) {
542 ScaleRowDown34_0 = ScaleRowDown34_16_C;
543 ScaleRowDown34_1 = ScaleRowDown34_16_C;
544 } else {
545 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;
546 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;
547 }
548 #if defined(HAS_SCALEROWDOWN34_16_NEON)
549 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
550 if (!filtering) {
551 ScaleRowDown34_0 = ScaleRowDown34_16_NEON;
552 ScaleRowDown34_1 = ScaleRowDown34_16_NEON;
553 } else {
554 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;
555 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;
556 }
557 }
558 #endif
559 #if defined(HAS_SCALEROWDOWN34_16_SSSE3)
560 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
561 if (!filtering) {
562 ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
563 ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
564 } else {
565 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;
566 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;
567 }
568 }
569 #endif
570
571 for (y = 0; y < dst_height - 2; y += 3) {
572 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
573 src_ptr += src_stride;
574 dst_ptr += dst_stride;
575 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
576 src_ptr += src_stride;
577 dst_ptr += dst_stride;
578 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width);
579 src_ptr += src_stride * 2;
580 dst_ptr += dst_stride;
581 }
582
583 // Remainder 1 or 2 rows with last row vertically unfiltered
584 if ((dst_height % 3) == 2) {
585 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
586 src_ptr += src_stride;
587 dst_ptr += dst_stride;
588 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
589 } else if ((dst_height % 3) == 1) {
590 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
591 }
592 }
593
594 // Scale plane, 3/8
595 // This is an optimized version for scaling down a plane to 3/8
596 // of its original size.
597 //
598 // Uses box filter arranges like this
599 // aaabbbcc -> abc
600 // aaabbbcc def
601 // aaabbbcc ghi
602 // dddeeeff
603 // dddeeeff
604 // dddeeeff
605 // ggghhhii
606 // ggghhhii
607 // Boxes are 3x3, 2x3, 3x2 and 2x2
608
ScalePlaneDown38(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)609 static void ScalePlaneDown38(int src_width,
610 int src_height,
611 int dst_width,
612 int dst_height,
613 int src_stride,
614 int dst_stride,
615 const uint8_t* src_ptr,
616 uint8_t* dst_ptr,
617 enum FilterMode filtering) {
618 int y;
619 void (*ScaleRowDown38_3)(const uint8_t* src_ptr, ptrdiff_t src_stride,
620 uint8_t* dst_ptr, int dst_width);
621 void (*ScaleRowDown38_2)(const uint8_t* src_ptr, ptrdiff_t src_stride,
622 uint8_t* dst_ptr, int dst_width);
623 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
624 assert(dst_width % 3 == 0);
625 (void)src_width;
626 (void)src_height;
627 if (!filtering) {
628 ScaleRowDown38_3 = ScaleRowDown38_C;
629 ScaleRowDown38_2 = ScaleRowDown38_C;
630 } else {
631 ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
632 ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
633 }
634
635 #if defined(HAS_SCALEROWDOWN38_NEON)
636 if (TestCpuFlag(kCpuHasNEON)) {
637 if (!filtering) {
638 ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
639 ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
640 } else {
641 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
642 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
643 }
644 if (dst_width % 12 == 0) {
645 if (!filtering) {
646 ScaleRowDown38_3 = ScaleRowDown38_NEON;
647 ScaleRowDown38_2 = ScaleRowDown38_NEON;
648 } else {
649 ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
650 ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
651 }
652 }
653 }
654 #endif
655 #if defined(HAS_SCALEROWDOWN38_SSSE3)
656 if (TestCpuFlag(kCpuHasSSSE3)) {
657 if (!filtering) {
658 ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
659 ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
660 } else {
661 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
662 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
663 }
664 if (dst_width % 12 == 0 && !filtering) {
665 ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
666 ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
667 }
668 if (dst_width % 6 == 0 && filtering) {
669 ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
670 ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
671 }
672 }
673 #endif
674 #if defined(HAS_SCALEROWDOWN38_MSA)
675 if (TestCpuFlag(kCpuHasMSA)) {
676 if (!filtering) {
677 ScaleRowDown38_3 = ScaleRowDown38_Any_MSA;
678 ScaleRowDown38_2 = ScaleRowDown38_Any_MSA;
679 } else {
680 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_MSA;
681 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_MSA;
682 }
683 if (dst_width % 12 == 0) {
684 if (!filtering) {
685 ScaleRowDown38_3 = ScaleRowDown38_MSA;
686 ScaleRowDown38_2 = ScaleRowDown38_MSA;
687 } else {
688 ScaleRowDown38_3 = ScaleRowDown38_3_Box_MSA;
689 ScaleRowDown38_2 = ScaleRowDown38_2_Box_MSA;
690 }
691 }
692 }
693 #endif
694 #if defined(HAS_SCALEROWDOWN38_LSX)
695 if (TestCpuFlag(kCpuHasLSX)) {
696 if (!filtering) {
697 ScaleRowDown38_3 = ScaleRowDown38_Any_LSX;
698 ScaleRowDown38_2 = ScaleRowDown38_Any_LSX;
699 } else {
700 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_LSX;
701 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_LSX;
702 }
703 if (dst_width % 12 == 0) {
704 if (!filtering) {
705 ScaleRowDown38_3 = ScaleRowDown38_LSX;
706 ScaleRowDown38_2 = ScaleRowDown38_LSX;
707 } else {
708 ScaleRowDown38_3 = ScaleRowDown38_3_Box_LSX;
709 ScaleRowDown38_2 = ScaleRowDown38_2_Box_LSX;
710 }
711 }
712 }
713 #endif
714 #if defined(HAS_SCALEROWDOWN38_RVV)
715 if (TestCpuFlag(kCpuHasRVV)) {
716 if (!filtering) {
717 ScaleRowDown38_3 = ScaleRowDown38_RVV;
718 ScaleRowDown38_2 = ScaleRowDown38_RVV;
719 } else {
720 ScaleRowDown38_3 = ScaleRowDown38_3_Box_RVV;
721 ScaleRowDown38_2 = ScaleRowDown38_2_Box_RVV;
722 }
723 }
724 #endif
725
726 for (y = 0; y < dst_height - 2; y += 3) {
727 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
728 src_ptr += src_stride * 3;
729 dst_ptr += dst_stride;
730 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
731 src_ptr += src_stride * 3;
732 dst_ptr += dst_stride;
733 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
734 src_ptr += src_stride * 2;
735 dst_ptr += dst_stride;
736 }
737
738 // Remainder 1 or 2 rows with last row vertically unfiltered
739 if ((dst_height % 3) == 2) {
740 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
741 src_ptr += src_stride * 3;
742 dst_ptr += dst_stride;
743 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
744 } else if ((dst_height % 3) == 1) {
745 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
746 }
747 }
748
ScalePlaneDown38_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)749 static void ScalePlaneDown38_16(int src_width,
750 int src_height,
751 int dst_width,
752 int dst_height,
753 int src_stride,
754 int dst_stride,
755 const uint16_t* src_ptr,
756 uint16_t* dst_ptr,
757 enum FilterMode filtering) {
758 int y;
759 void (*ScaleRowDown38_3)(const uint16_t* src_ptr, ptrdiff_t src_stride,
760 uint16_t* dst_ptr, int dst_width);
761 void (*ScaleRowDown38_2)(const uint16_t* src_ptr, ptrdiff_t src_stride,
762 uint16_t* dst_ptr, int dst_width);
763 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
764 (void)src_width;
765 (void)src_height;
766 assert(dst_width % 3 == 0);
767 if (!filtering) {
768 ScaleRowDown38_3 = ScaleRowDown38_16_C;
769 ScaleRowDown38_2 = ScaleRowDown38_16_C;
770 } else {
771 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;
772 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;
773 }
774 #if defined(HAS_SCALEROWDOWN38_16_NEON)
775 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
776 if (!filtering) {
777 ScaleRowDown38_3 = ScaleRowDown38_16_NEON;
778 ScaleRowDown38_2 = ScaleRowDown38_16_NEON;
779 } else {
780 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;
781 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
782 }
783 }
784 #endif
785 #if defined(HAS_SCALEROWDOWN38_16_SSSE3)
786 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
787 if (!filtering) {
788 ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
789 ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
790 } else {
791 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;
792 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
793 }
794 }
795 #endif
796
797 for (y = 0; y < dst_height - 2; y += 3) {
798 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
799 src_ptr += src_stride * 3;
800 dst_ptr += dst_stride;
801 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
802 src_ptr += src_stride * 3;
803 dst_ptr += dst_stride;
804 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
805 src_ptr += src_stride * 2;
806 dst_ptr += dst_stride;
807 }
808
809 // Remainder 1 or 2 rows with last row vertically unfiltered
810 if ((dst_height % 3) == 2) {
811 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
812 src_ptr += src_stride * 3;
813 dst_ptr += dst_stride;
814 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
815 } else if ((dst_height % 3) == 1) {
816 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
817 }
818 }
819
820 #define MIN1(x) ((x) < 1 ? 1 : (x))
821
SumPixels(int iboxwidth,const uint16_t * src_ptr)822 static __inline uint32_t SumPixels(int iboxwidth, const uint16_t* src_ptr) {
823 uint32_t sum = 0u;
824 int x;
825 assert(iboxwidth > 0);
826 for (x = 0; x < iboxwidth; ++x) {
827 sum += src_ptr[x];
828 }
829 return sum;
830 }
831
SumPixels_16(int iboxwidth,const uint32_t * src_ptr)832 static __inline uint32_t SumPixels_16(int iboxwidth, const uint32_t* src_ptr) {
833 uint32_t sum = 0u;
834 int x;
835 assert(iboxwidth > 0);
836 for (x = 0; x < iboxwidth; ++x) {
837 sum += src_ptr[x];
838 }
839 return sum;
840 }
841
ScaleAddCols2_C(int dst_width,int boxheight,int x,int dx,const uint16_t * src_ptr,uint8_t * dst_ptr)842 static void ScaleAddCols2_C(int dst_width,
843 int boxheight,
844 int x,
845 int dx,
846 const uint16_t* src_ptr,
847 uint8_t* dst_ptr) {
848 int i;
849 int scaletbl[2];
850 int minboxwidth = dx >> 16;
851 int boxwidth;
852 scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
853 scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
854 for (i = 0; i < dst_width; ++i) {
855 int ix = x >> 16;
856 x += dx;
857 boxwidth = MIN1((x >> 16) - ix);
858 int scaletbl_index = boxwidth - minboxwidth;
859 assert((scaletbl_index == 0) || (scaletbl_index == 1));
860 *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + ix) *
861 scaletbl[scaletbl_index] >>
862 16);
863 }
864 }
865
ScaleAddCols2_16_C(int dst_width,int boxheight,int x,int dx,const uint32_t * src_ptr,uint16_t * dst_ptr)866 static void ScaleAddCols2_16_C(int dst_width,
867 int boxheight,
868 int x,
869 int dx,
870 const uint32_t* src_ptr,
871 uint16_t* dst_ptr) {
872 int i;
873 int scaletbl[2];
874 int minboxwidth = dx >> 16;
875 int boxwidth;
876 scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
877 scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
878 for (i = 0; i < dst_width; ++i) {
879 int ix = x >> 16;
880 x += dx;
881 boxwidth = MIN1((x >> 16) - ix);
882 int scaletbl_index = boxwidth - minboxwidth;
883 assert((scaletbl_index == 0) || (scaletbl_index == 1));
884 *dst_ptr++ =
885 SumPixels_16(boxwidth, src_ptr + ix) * scaletbl[scaletbl_index] >> 16;
886 }
887 }
888
ScaleAddCols0_C(int dst_width,int boxheight,int x,int dx,const uint16_t * src_ptr,uint8_t * dst_ptr)889 static void ScaleAddCols0_C(int dst_width,
890 int boxheight,
891 int x,
892 int dx,
893 const uint16_t* src_ptr,
894 uint8_t* dst_ptr) {
895 int scaleval = 65536 / boxheight;
896 int i;
897 (void)dx;
898 src_ptr += (x >> 16);
899 for (i = 0; i < dst_width; ++i) {
900 *dst_ptr++ = (uint8_t)(src_ptr[i] * scaleval >> 16);
901 }
902 }
903
ScaleAddCols1_C(int dst_width,int boxheight,int x,int dx,const uint16_t * src_ptr,uint8_t * dst_ptr)904 static void ScaleAddCols1_C(int dst_width,
905 int boxheight,
906 int x,
907 int dx,
908 const uint16_t* src_ptr,
909 uint8_t* dst_ptr) {
910 int boxwidth = MIN1(dx >> 16);
911 int scaleval = 65536 / (boxwidth * boxheight);
912 int i;
913 x >>= 16;
914 for (i = 0; i < dst_width; ++i) {
915 *dst_ptr++ = (uint8_t)(SumPixels(boxwidth, src_ptr + x) * scaleval >> 16);
916 x += boxwidth;
917 }
918 }
919
ScaleAddCols1_16_C(int dst_width,int boxheight,int x,int dx,const uint32_t * src_ptr,uint16_t * dst_ptr)920 static void ScaleAddCols1_16_C(int dst_width,
921 int boxheight,
922 int x,
923 int dx,
924 const uint32_t* src_ptr,
925 uint16_t* dst_ptr) {
926 int boxwidth = MIN1(dx >> 16);
927 int scaleval = 65536 / (boxwidth * boxheight);
928 int i;
929 for (i = 0; i < dst_width; ++i) {
930 *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
931 x += boxwidth;
932 }
933 }
934
935 // Scale plane down to any dimensions, with interpolation.
936 // (boxfilter).
937 //
938 // Same method as SimpleScale, which is fixed point, outputting
939 // one pixel of destination using fixed point (16.16) to step
940 // through source, sampling a box of pixel with simple
941 // averaging.
ScalePlaneBox(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr)942 static int ScalePlaneBox(int src_width,
943 int src_height,
944 int dst_width,
945 int dst_height,
946 int src_stride,
947 int dst_stride,
948 const uint8_t* src_ptr,
949 uint8_t* dst_ptr) {
950 int j, k;
951 // Initial source x/y coordinate and step values as 16.16 fixed point.
952 int x = 0;
953 int y = 0;
954 int dx = 0;
955 int dy = 0;
956 const int max_y = (src_height << 16);
957 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
958 &dx, &dy);
959 src_width = Abs(src_width);
960 {
961 // Allocate a row buffer of uint16_t.
962 align_buffer_64(row16, src_width * 2);
963 if (!row16)
964 return 1;
965 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
966 const uint16_t* src_ptr, uint8_t* dst_ptr) =
967 (dx & 0xffff) ? ScaleAddCols2_C
968 : ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
969 void (*ScaleAddRow)(const uint8_t* src_ptr, uint16_t* dst_ptr,
970 int src_width) = ScaleAddRow_C;
971 #if defined(HAS_SCALEADDROW_SSE2)
972 if (TestCpuFlag(kCpuHasSSE2)) {
973 ScaleAddRow = ScaleAddRow_Any_SSE2;
974 if (IS_ALIGNED(src_width, 16)) {
975 ScaleAddRow = ScaleAddRow_SSE2;
976 }
977 }
978 #endif
979 #if defined(HAS_SCALEADDROW_AVX2)
980 if (TestCpuFlag(kCpuHasAVX2)) {
981 ScaleAddRow = ScaleAddRow_Any_AVX2;
982 if (IS_ALIGNED(src_width, 32)) {
983 ScaleAddRow = ScaleAddRow_AVX2;
984 }
985 }
986 #endif
987 #if defined(HAS_SCALEADDROW_NEON)
988 if (TestCpuFlag(kCpuHasNEON)) {
989 ScaleAddRow = ScaleAddRow_Any_NEON;
990 if (IS_ALIGNED(src_width, 16)) {
991 ScaleAddRow = ScaleAddRow_NEON;
992 }
993 }
994 #endif
995 #if defined(HAS_SCALEADDROW_MSA)
996 if (TestCpuFlag(kCpuHasMSA)) {
997 ScaleAddRow = ScaleAddRow_Any_MSA;
998 if (IS_ALIGNED(src_width, 16)) {
999 ScaleAddRow = ScaleAddRow_MSA;
1000 }
1001 }
1002 #endif
1003 #if defined(HAS_SCALEADDROW_LSX)
1004 if (TestCpuFlag(kCpuHasLSX)) {
1005 ScaleAddRow = ScaleAddRow_Any_LSX;
1006 if (IS_ALIGNED(src_width, 16)) {
1007 ScaleAddRow = ScaleAddRow_LSX;
1008 }
1009 }
1010 #endif
1011 #if defined(HAS_SCALEADDROW_RVV)
1012 if (TestCpuFlag(kCpuHasRVV)) {
1013 ScaleAddRow = ScaleAddRow_RVV;
1014 }
1015 #endif
1016
1017 for (j = 0; j < dst_height; ++j) {
1018 int boxheight;
1019 int iy = y >> 16;
1020 const uint8_t* src = src_ptr + iy * (int64_t)src_stride;
1021 y += dy;
1022 if (y > max_y) {
1023 y = max_y;
1024 }
1025 boxheight = MIN1((y >> 16) - iy);
1026 memset(row16, 0, src_width * 2);
1027 for (k = 0; k < boxheight; ++k) {
1028 ScaleAddRow(src, (uint16_t*)(row16), src_width);
1029 src += src_stride;
1030 }
1031 ScaleAddCols(dst_width, boxheight, x, dx, (uint16_t*)(row16), dst_ptr);
1032 dst_ptr += dst_stride;
1033 }
1034 free_aligned_buffer_64(row16);
1035 }
1036 return 0;
1037 }
1038
ScalePlaneBox_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr)1039 static int ScalePlaneBox_16(int src_width,
1040 int src_height,
1041 int dst_width,
1042 int dst_height,
1043 int src_stride,
1044 int dst_stride,
1045 const uint16_t* src_ptr,
1046 uint16_t* dst_ptr) {
1047 int j, k;
1048 // Initial source x/y coordinate and step values as 16.16 fixed point.
1049 int x = 0;
1050 int y = 0;
1051 int dx = 0;
1052 int dy = 0;
1053 const int max_y = (src_height << 16);
1054 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y,
1055 &dx, &dy);
1056 src_width = Abs(src_width);
1057 {
1058 // Allocate a row buffer of uint32_t.
1059 align_buffer_64(row32, src_width * 4);
1060 if (!row32)
1061 return 1;
1062 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
1063 const uint32_t* src_ptr, uint16_t* dst_ptr) =
1064 (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C;
1065 void (*ScaleAddRow)(const uint16_t* src_ptr, uint32_t* dst_ptr,
1066 int src_width) = ScaleAddRow_16_C;
1067
1068 #if defined(HAS_SCALEADDROW_16_SSE2)
1069 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
1070 ScaleAddRow = ScaleAddRow_16_SSE2;
1071 }
1072 #endif
1073
1074 for (j = 0; j < dst_height; ++j) {
1075 int boxheight;
1076 int iy = y >> 16;
1077 const uint16_t* src = src_ptr + iy * (int64_t)src_stride;
1078 y += dy;
1079 if (y > max_y) {
1080 y = max_y;
1081 }
1082 boxheight = MIN1((y >> 16) - iy);
1083 memset(row32, 0, src_width * 4);
1084 for (k = 0; k < boxheight; ++k) {
1085 ScaleAddRow(src, (uint32_t*)(row32), src_width);
1086 src += src_stride;
1087 }
1088 ScaleAddCols(dst_width, boxheight, x, dx, (uint32_t*)(row32), dst_ptr);
1089 dst_ptr += dst_stride;
1090 }
1091 free_aligned_buffer_64(row32);
1092 }
1093 return 0;
1094 }
1095
1096 // Scale plane down with bilinear interpolation.
ScalePlaneBilinearDown(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)1097 static int ScalePlaneBilinearDown(int src_width,
1098 int src_height,
1099 int dst_width,
1100 int dst_height,
1101 int src_stride,
1102 int dst_stride,
1103 const uint8_t* src_ptr,
1104 uint8_t* dst_ptr,
1105 enum FilterMode filtering) {
1106 // Initial source x/y coordinate and step values as 16.16 fixed point.
1107 int x = 0;
1108 int y = 0;
1109 int dx = 0;
1110 int dy = 0;
1111 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
1112 // Allocate a row buffer.
1113 align_buffer_64(row, src_width);
1114 if (!row)
1115 return 1;
1116
1117 const int max_y = (src_height - 1) << 16;
1118 int j;
1119 void (*ScaleFilterCols)(uint8_t* dst_ptr, const uint8_t* src_ptr,
1120 int dst_width, int x, int dx) =
1121 (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
1122 void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
1123 ptrdiff_t src_stride, int dst_width,
1124 int source_y_fraction) = InterpolateRow_C;
1125 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1126 &dx, &dy);
1127 src_width = Abs(src_width);
1128
1129 #if defined(HAS_INTERPOLATEROW_SSSE3)
1130 if (TestCpuFlag(kCpuHasSSSE3)) {
1131 InterpolateRow = InterpolateRow_Any_SSSE3;
1132 if (IS_ALIGNED(src_width, 16)) {
1133 InterpolateRow = InterpolateRow_SSSE3;
1134 }
1135 }
1136 #endif
1137 #if defined(HAS_INTERPOLATEROW_AVX2)
1138 if (TestCpuFlag(kCpuHasAVX2)) {
1139 InterpolateRow = InterpolateRow_Any_AVX2;
1140 if (IS_ALIGNED(src_width, 32)) {
1141 InterpolateRow = InterpolateRow_AVX2;
1142 }
1143 }
1144 #endif
1145 #if defined(HAS_INTERPOLATEROW_NEON)
1146 if (TestCpuFlag(kCpuHasNEON)) {
1147 InterpolateRow = InterpolateRow_Any_NEON;
1148 if (IS_ALIGNED(src_width, 16)) {
1149 InterpolateRow = InterpolateRow_NEON;
1150 }
1151 }
1152 #endif
1153 #if defined(HAS_INTERPOLATEROW_MSA)
1154 if (TestCpuFlag(kCpuHasMSA)) {
1155 InterpolateRow = InterpolateRow_Any_MSA;
1156 if (IS_ALIGNED(src_width, 32)) {
1157 InterpolateRow = InterpolateRow_MSA;
1158 }
1159 }
1160 #endif
1161 #if defined(HAS_INTERPOLATEROW_LSX)
1162 if (TestCpuFlag(kCpuHasLSX)) {
1163 InterpolateRow = InterpolateRow_Any_LSX;
1164 if (IS_ALIGNED(src_width, 32)) {
1165 InterpolateRow = InterpolateRow_LSX;
1166 }
1167 }
1168 #endif
1169 #if defined(HAS_INTERPOLATEROW_RVV)
1170 if (TestCpuFlag(kCpuHasRVV)) {
1171 InterpolateRow = InterpolateRow_RVV;
1172 }
1173 #endif
1174
1175 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
1176 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1177 ScaleFilterCols = ScaleFilterCols_SSSE3;
1178 }
1179 #endif
1180 #if defined(HAS_SCALEFILTERCOLS_NEON)
1181 if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
1182 ScaleFilterCols = ScaleFilterCols_Any_NEON;
1183 if (IS_ALIGNED(dst_width, 8)) {
1184 ScaleFilterCols = ScaleFilterCols_NEON;
1185 }
1186 }
1187 #endif
1188 #if defined(HAS_SCALEFILTERCOLS_MSA)
1189 if (TestCpuFlag(kCpuHasMSA) && src_width < 32768) {
1190 ScaleFilterCols = ScaleFilterCols_Any_MSA;
1191 if (IS_ALIGNED(dst_width, 16)) {
1192 ScaleFilterCols = ScaleFilterCols_MSA;
1193 }
1194 }
1195 #endif
1196 #if defined(HAS_SCALEFILTERCOLS_LSX)
1197 if (TestCpuFlag(kCpuHasLSX) && src_width < 32768) {
1198 ScaleFilterCols = ScaleFilterCols_Any_LSX;
1199 if (IS_ALIGNED(dst_width, 16)) {
1200 ScaleFilterCols = ScaleFilterCols_LSX;
1201 }
1202 }
1203 #endif
1204 if (y > max_y) {
1205 y = max_y;
1206 }
1207
1208 for (j = 0; j < dst_height; ++j) {
1209 int yi = y >> 16;
1210 const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
1211 if (filtering == kFilterLinear) {
1212 ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1213 } else {
1214 int yf = (y >> 8) & 255;
1215 InterpolateRow(row, src, src_stride, src_width, yf);
1216 ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
1217 }
1218 dst_ptr += dst_stride;
1219 y += dy;
1220 if (y > max_y) {
1221 y = max_y;
1222 }
1223 }
1224 free_aligned_buffer_64(row);
1225 return 0;
1226 }
1227
ScalePlaneBilinearDown_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)1228 static int ScalePlaneBilinearDown_16(int src_width,
1229 int src_height,
1230 int dst_width,
1231 int dst_height,
1232 int src_stride,
1233 int dst_stride,
1234 const uint16_t* src_ptr,
1235 uint16_t* dst_ptr,
1236 enum FilterMode filtering) {
1237 // Initial source x/y coordinate and step values as 16.16 fixed point.
1238 int x = 0;
1239 int y = 0;
1240 int dx = 0;
1241 int dy = 0;
1242 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
1243 // Allocate a row buffer.
1244 align_buffer_64(row, src_width * 2);
1245 if (!row)
1246 return 1;
1247
1248 const int max_y = (src_height - 1) << 16;
1249 int j;
1250 void (*ScaleFilterCols)(uint16_t* dst_ptr, const uint16_t* src_ptr,
1251 int dst_width, int x, int dx) =
1252 (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
1253 void (*InterpolateRow)(uint16_t* dst_ptr, const uint16_t* src_ptr,
1254 ptrdiff_t src_stride, int dst_width,
1255 int source_y_fraction) = InterpolateRow_16_C;
1256 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1257 &dx, &dy);
1258 src_width = Abs(src_width);
1259
1260 #if defined(HAS_INTERPOLATEROW_16_SSE2)
1261 if (TestCpuFlag(kCpuHasSSE2)) {
1262 InterpolateRow = InterpolateRow_16_Any_SSE2;
1263 if (IS_ALIGNED(src_width, 16)) {
1264 InterpolateRow = InterpolateRow_16_SSE2;
1265 }
1266 }
1267 #endif
1268 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
1269 if (TestCpuFlag(kCpuHasSSSE3)) {
1270 InterpolateRow = InterpolateRow_16_Any_SSSE3;
1271 if (IS_ALIGNED(src_width, 16)) {
1272 InterpolateRow = InterpolateRow_16_SSSE3;
1273 }
1274 }
1275 #endif
1276 #if defined(HAS_INTERPOLATEROW_16_AVX2)
1277 if (TestCpuFlag(kCpuHasAVX2)) {
1278 InterpolateRow = InterpolateRow_16_Any_AVX2;
1279 if (IS_ALIGNED(src_width, 32)) {
1280 InterpolateRow = InterpolateRow_16_AVX2;
1281 }
1282 }
1283 #endif
1284 #if defined(HAS_INTERPOLATEROW_16_NEON)
1285 if (TestCpuFlag(kCpuHasNEON)) {
1286 InterpolateRow = InterpolateRow_16_Any_NEON;
1287 if (IS_ALIGNED(src_width, 16)) {
1288 InterpolateRow = InterpolateRow_16_NEON;
1289 }
1290 }
1291 #endif
1292
1293 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1294 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1295 ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1296 }
1297 #endif
1298 if (y > max_y) {
1299 y = max_y;
1300 }
1301
1302 for (j = 0; j < dst_height; ++j) {
1303 int yi = y >> 16;
1304 const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
1305 if (filtering == kFilterLinear) {
1306 ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1307 } else {
1308 int yf = (y >> 8) & 255;
1309 InterpolateRow((uint16_t*)row, src, src_stride, src_width, yf);
1310 ScaleFilterCols(dst_ptr, (uint16_t*)row, dst_width, x, dx);
1311 }
1312 dst_ptr += dst_stride;
1313 y += dy;
1314 if (y > max_y) {
1315 y = max_y;
1316 }
1317 }
1318 free_aligned_buffer_64(row);
1319 return 0;
1320 }
1321
1322 // Scale up down with bilinear interpolation.
ScalePlaneBilinearUp(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr,enum FilterMode filtering)1323 static int ScalePlaneBilinearUp(int src_width,
1324 int src_height,
1325 int dst_width,
1326 int dst_height,
1327 int src_stride,
1328 int dst_stride,
1329 const uint8_t* src_ptr,
1330 uint8_t* dst_ptr,
1331 enum FilterMode filtering) {
1332 int j;
1333 // Initial source x/y coordinate and step values as 16.16 fixed point.
1334 int x = 0;
1335 int y = 0;
1336 int dx = 0;
1337 int dy = 0;
1338 const int max_y = (src_height - 1) << 16;
1339 void (*InterpolateRow)(uint8_t* dst_ptr, const uint8_t* src_ptr,
1340 ptrdiff_t src_stride, int dst_width,
1341 int source_y_fraction) = InterpolateRow_C;
1342 void (*ScaleFilterCols)(uint8_t* dst_ptr, const uint8_t* src_ptr,
1343 int dst_width, int x, int dx) =
1344 filtering ? ScaleFilterCols_C : ScaleCols_C;
1345 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1346 &dx, &dy);
1347 src_width = Abs(src_width);
1348
1349 #if defined(HAS_INTERPOLATEROW_SSSE3)
1350 if (TestCpuFlag(kCpuHasSSSE3)) {
1351 InterpolateRow = InterpolateRow_Any_SSSE3;
1352 if (IS_ALIGNED(dst_width, 16)) {
1353 InterpolateRow = InterpolateRow_SSSE3;
1354 }
1355 }
1356 #endif
1357 #if defined(HAS_INTERPOLATEROW_AVX2)
1358 if (TestCpuFlag(kCpuHasAVX2)) {
1359 InterpolateRow = InterpolateRow_Any_AVX2;
1360 if (IS_ALIGNED(dst_width, 32)) {
1361 InterpolateRow = InterpolateRow_AVX2;
1362 }
1363 }
1364 #endif
1365 #if defined(HAS_INTERPOLATEROW_NEON)
1366 if (TestCpuFlag(kCpuHasNEON)) {
1367 InterpolateRow = InterpolateRow_Any_NEON;
1368 if (IS_ALIGNED(dst_width, 16)) {
1369 InterpolateRow = InterpolateRow_NEON;
1370 }
1371 }
1372 #endif
1373 #if defined(HAS_INTERPOLATEROW_RVV)
1374 if (TestCpuFlag(kCpuHasRVV)) {
1375 InterpolateRow = InterpolateRow_RVV;
1376 }
1377 #endif
1378
1379 if (filtering && src_width >= 32768) {
1380 ScaleFilterCols = ScaleFilterCols64_C;
1381 }
1382 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
1383 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1384 ScaleFilterCols = ScaleFilterCols_SSSE3;
1385 }
1386 #endif
1387 #if defined(HAS_SCALEFILTERCOLS_NEON)
1388 if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
1389 ScaleFilterCols = ScaleFilterCols_Any_NEON;
1390 if (IS_ALIGNED(dst_width, 8)) {
1391 ScaleFilterCols = ScaleFilterCols_NEON;
1392 }
1393 }
1394 #endif
1395 #if defined(HAS_SCALEFILTERCOLS_MSA)
1396 if (filtering && TestCpuFlag(kCpuHasMSA) && src_width < 32768) {
1397 ScaleFilterCols = ScaleFilterCols_Any_MSA;
1398 if (IS_ALIGNED(dst_width, 16)) {
1399 ScaleFilterCols = ScaleFilterCols_MSA;
1400 }
1401 }
1402 #endif
1403 #if defined(HAS_SCALEFILTERCOLS_LSX)
1404 if (filtering && TestCpuFlag(kCpuHasLSX) && src_width < 32768) {
1405 ScaleFilterCols = ScaleFilterCols_Any_LSX;
1406 if (IS_ALIGNED(dst_width, 16)) {
1407 ScaleFilterCols = ScaleFilterCols_LSX;
1408 }
1409 }
1410 #endif
1411 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1412 ScaleFilterCols = ScaleColsUp2_C;
1413 #if defined(HAS_SCALECOLS_SSE2)
1414 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1415 ScaleFilterCols = ScaleColsUp2_SSE2;
1416 }
1417 #endif
1418 }
1419
1420 if (y > max_y) {
1421 y = max_y;
1422 }
1423 {
1424 int yi = y >> 16;
1425 const uint8_t* src = src_ptr + yi * (int64_t)src_stride;
1426
1427 // Allocate 2 row buffers.
1428 const int row_size = (dst_width + 31) & ~31;
1429 align_buffer_64(row, row_size * 2);
1430 if (!row)
1431 return 1;
1432
1433 uint8_t* rowptr = row;
1434 int rowstride = row_size;
1435 int lasty = yi;
1436
1437 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1438 if (src_height > 1) {
1439 src += src_stride;
1440 }
1441 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1442 if (src_height > 2) {
1443 src += src_stride;
1444 }
1445
1446 for (j = 0; j < dst_height; ++j) {
1447 yi = y >> 16;
1448 if (yi != lasty) {
1449 if (y > max_y) {
1450 y = max_y;
1451 yi = y >> 16;
1452 src = src_ptr + yi * (int64_t)src_stride;
1453 }
1454 if (yi != lasty) {
1455 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1456 rowptr += rowstride;
1457 rowstride = -rowstride;
1458 lasty = yi;
1459 if ((y + 65536) < max_y) {
1460 src += src_stride;
1461 }
1462 }
1463 }
1464 if (filtering == kFilterLinear) {
1465 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1466 } else {
1467 int yf = (y >> 8) & 255;
1468 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1469 }
1470 dst_ptr += dst_stride;
1471 y += dy;
1472 }
1473 free_aligned_buffer_64(row);
1474 }
1475 return 0;
1476 }
1477
1478 // Scale plane, horizontally up by 2 times.
1479 // Uses linear filter horizontally, nearest vertically.
1480 // This is an optimized version for scaling up a plane to 2 times of
1481 // its original width, using linear interpolation.
1482 // This is used to scale U and V planes of I422 to I444.
ScalePlaneUp2_Linear(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr)1483 static void ScalePlaneUp2_Linear(int src_width,
1484 int src_height,
1485 int dst_width,
1486 int dst_height,
1487 int src_stride,
1488 int dst_stride,
1489 const uint8_t* src_ptr,
1490 uint8_t* dst_ptr) {
1491 void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) =
1492 ScaleRowUp2_Linear_Any_C;
1493 int i;
1494 int y;
1495 int dy;
1496
1497 (void)src_width;
1498 // This function can only scale up by 2 times horizontally.
1499 assert(src_width == ((dst_width + 1) / 2));
1500
1501 #ifdef HAS_SCALEROWUP2_LINEAR_SSE2
1502 if (TestCpuFlag(kCpuHasSSE2)) {
1503 ScaleRowUp = ScaleRowUp2_Linear_Any_SSE2;
1504 }
1505 #endif
1506
1507 #ifdef HAS_SCALEROWUP2_LINEAR_SSSE3
1508 if (TestCpuFlag(kCpuHasSSSE3)) {
1509 ScaleRowUp = ScaleRowUp2_Linear_Any_SSSE3;
1510 }
1511 #endif
1512
1513 #ifdef HAS_SCALEROWUP2_LINEAR_AVX2
1514 if (TestCpuFlag(kCpuHasAVX2)) {
1515 ScaleRowUp = ScaleRowUp2_Linear_Any_AVX2;
1516 }
1517 #endif
1518
1519 #ifdef HAS_SCALEROWUP2_LINEAR_NEON
1520 if (TestCpuFlag(kCpuHasNEON)) {
1521 ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
1522 }
1523 #endif
1524 #ifdef HAS_SCALEROWUP2_LINEAR_RVV
1525 if (TestCpuFlag(kCpuHasRVV)) {
1526 ScaleRowUp = ScaleRowUp2_Linear_RVV;
1527 }
1528 #endif
1529
1530 if (dst_height == 1) {
1531 ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
1532 dst_width);
1533 } else {
1534 dy = FixedDiv(src_height - 1, dst_height - 1);
1535 y = (1 << 15) - 1;
1536 for (i = 0; i < dst_height; ++i) {
1537 ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
1538 dst_ptr += dst_stride;
1539 y += dy;
1540 }
1541 }
1542 }
1543
1544 // Scale plane, up by 2 times.
1545 // This is an optimized version for scaling up a plane to 2 times of
1546 // its original size, using bilinear interpolation.
1547 // This is used to scale U and V planes of I420 to I444.
ScalePlaneUp2_Bilinear(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr)1548 static void ScalePlaneUp2_Bilinear(int src_width,
1549 int src_height,
1550 int dst_width,
1551 int dst_height,
1552 int src_stride,
1553 int dst_stride,
1554 const uint8_t* src_ptr,
1555 uint8_t* dst_ptr) {
1556 void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
1557 uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
1558 ScaleRowUp2_Bilinear_Any_C;
1559 int x;
1560
1561 (void)src_width;
1562 // This function can only scale up by 2 times.
1563 assert(src_width == ((dst_width + 1) / 2));
1564 assert(src_height == ((dst_height + 1) / 2));
1565
1566 #ifdef HAS_SCALEROWUP2_BILINEAR_SSE2
1567 if (TestCpuFlag(kCpuHasSSE2)) {
1568 Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSE2;
1569 }
1570 #endif
1571
1572 #ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3
1573 if (TestCpuFlag(kCpuHasSSSE3)) {
1574 Scale2RowUp = ScaleRowUp2_Bilinear_Any_SSSE3;
1575 }
1576 #endif
1577
1578 #ifdef HAS_SCALEROWUP2_BILINEAR_AVX2
1579 if (TestCpuFlag(kCpuHasAVX2)) {
1580 Scale2RowUp = ScaleRowUp2_Bilinear_Any_AVX2;
1581 }
1582 #endif
1583
1584 #ifdef HAS_SCALEROWUP2_BILINEAR_NEON
1585 if (TestCpuFlag(kCpuHasNEON)) {
1586 Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
1587 }
1588 #endif
1589 #ifdef HAS_SCALEROWUP2_BILINEAR_RVV
1590 if (TestCpuFlag(kCpuHasRVV)) {
1591 Scale2RowUp = ScaleRowUp2_Bilinear_RVV;
1592 }
1593 #endif
1594
1595 Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1596 dst_ptr += dst_stride;
1597 for (x = 0; x < src_height - 1; ++x) {
1598 Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
1599 src_ptr += src_stride;
1600 // TODO(fbarchard): Test performance of writing one row of destination at a
1601 // time.
1602 dst_ptr += 2 * dst_stride;
1603 }
1604 if (!(dst_height & 1)) {
1605 Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1606 }
1607 }
1608
1609 // Scale at most 14 bit plane, horizontally up by 2 times.
1610 // This is an optimized version for scaling up a plane to 2 times of
1611 // its original width, using linear interpolation.
1612 // stride is in count of uint16_t.
1613 // This is used to scale U and V planes of I210 to I410 and I212 to I412.
ScalePlaneUp2_12_Linear(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr)1614 static void ScalePlaneUp2_12_Linear(int src_width,
1615 int src_height,
1616 int dst_width,
1617 int dst_height,
1618 int src_stride,
1619 int dst_stride,
1620 const uint16_t* src_ptr,
1621 uint16_t* dst_ptr) {
1622 void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
1623 int dst_width) = ScaleRowUp2_Linear_16_Any_C;
1624 int i;
1625 int y;
1626 int dy;
1627
1628 (void)src_width;
1629 // This function can only scale up by 2 times horizontally.
1630 assert(src_width == ((dst_width + 1) / 2));
1631
1632 #ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3
1633 if (TestCpuFlag(kCpuHasSSSE3)) {
1634 ScaleRowUp = ScaleRowUp2_Linear_12_Any_SSSE3;
1635 }
1636 #endif
1637
1638 #ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2
1639 if (TestCpuFlag(kCpuHasAVX2)) {
1640 ScaleRowUp = ScaleRowUp2_Linear_12_Any_AVX2;
1641 }
1642 #endif
1643
1644 #ifdef HAS_SCALEROWUP2_LINEAR_12_NEON
1645 if (TestCpuFlag(kCpuHasNEON)) {
1646 ScaleRowUp = ScaleRowUp2_Linear_12_Any_NEON;
1647 }
1648 #endif
1649
1650 if (dst_height == 1) {
1651 ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
1652 dst_width);
1653 } else {
1654 dy = FixedDiv(src_height - 1, dst_height - 1);
1655 y = (1 << 15) - 1;
1656 for (i = 0; i < dst_height; ++i) {
1657 ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
1658 dst_ptr += dst_stride;
1659 y += dy;
1660 }
1661 }
1662 }
1663
1664 // Scale at most 12 bit plane, up by 2 times.
1665 // This is an optimized version for scaling up a plane to 2 times of
1666 // its original size, using bilinear interpolation.
1667 // stride is in count of uint16_t.
1668 // This is used to scale U and V planes of I010 to I410 and I012 to I412.
ScalePlaneUp2_12_Bilinear(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr)1669 static void ScalePlaneUp2_12_Bilinear(int src_width,
1670 int src_height,
1671 int dst_width,
1672 int dst_height,
1673 int src_stride,
1674 int dst_stride,
1675 const uint16_t* src_ptr,
1676 uint16_t* dst_ptr) {
1677 void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
1678 uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
1679 ScaleRowUp2_Bilinear_16_Any_C;
1680 int x;
1681
1682 (void)src_width;
1683 // This function can only scale up by 2 times.
1684 assert(src_width == ((dst_width + 1) / 2));
1685 assert(src_height == ((dst_height + 1) / 2));
1686
1687 #ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3
1688 if (TestCpuFlag(kCpuHasSSSE3)) {
1689 Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_SSSE3;
1690 }
1691 #endif
1692
1693 #ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2
1694 if (TestCpuFlag(kCpuHasAVX2)) {
1695 Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_AVX2;
1696 }
1697 #endif
1698
1699 #ifdef HAS_SCALEROWUP2_BILINEAR_12_NEON
1700 if (TestCpuFlag(kCpuHasNEON)) {
1701 Scale2RowUp = ScaleRowUp2_Bilinear_12_Any_NEON;
1702 }
1703 #endif
1704
1705 Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1706 dst_ptr += dst_stride;
1707 for (x = 0; x < src_height - 1; ++x) {
1708 Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
1709 src_ptr += src_stride;
1710 dst_ptr += 2 * dst_stride;
1711 }
1712 if (!(dst_height & 1)) {
1713 Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1714 }
1715 }
1716
ScalePlaneUp2_16_Linear(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr)1717 static void ScalePlaneUp2_16_Linear(int src_width,
1718 int src_height,
1719 int dst_width,
1720 int dst_height,
1721 int src_stride,
1722 int dst_stride,
1723 const uint16_t* src_ptr,
1724 uint16_t* dst_ptr) {
1725 void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
1726 int dst_width) = ScaleRowUp2_Linear_16_Any_C;
1727 int i;
1728 int y;
1729 int dy;
1730
1731 (void)src_width;
1732 // This function can only scale up by 2 times horizontally.
1733 assert(src_width == ((dst_width + 1) / 2));
1734
1735 #ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2
1736 if (TestCpuFlag(kCpuHasSSE2)) {
1737 ScaleRowUp = ScaleRowUp2_Linear_16_Any_SSE2;
1738 }
1739 #endif
1740
1741 #ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2
1742 if (TestCpuFlag(kCpuHasAVX2)) {
1743 ScaleRowUp = ScaleRowUp2_Linear_16_Any_AVX2;
1744 }
1745 #endif
1746
1747 #ifdef HAS_SCALEROWUP2_LINEAR_16_NEON
1748 if (TestCpuFlag(kCpuHasNEON)) {
1749 ScaleRowUp = ScaleRowUp2_Linear_16_Any_NEON;
1750 }
1751 #endif
1752
1753 if (dst_height == 1) {
1754 ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
1755 dst_width);
1756 } else {
1757 dy = FixedDiv(src_height - 1, dst_height - 1);
1758 y = (1 << 15) - 1;
1759 for (i = 0; i < dst_height; ++i) {
1760 ScaleRowUp(src_ptr + (y >> 16) * (int64_t)src_stride, dst_ptr, dst_width);
1761 dst_ptr += dst_stride;
1762 y += dy;
1763 }
1764 }
1765 }
1766
ScalePlaneUp2_16_Bilinear(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr)1767 static void ScalePlaneUp2_16_Bilinear(int src_width,
1768 int src_height,
1769 int dst_width,
1770 int dst_height,
1771 int src_stride,
1772 int dst_stride,
1773 const uint16_t* src_ptr,
1774 uint16_t* dst_ptr) {
1775 void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
1776 uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
1777 ScaleRowUp2_Bilinear_16_Any_C;
1778 int x;
1779
1780 (void)src_width;
1781 // This function can only scale up by 2 times.
1782 assert(src_width == ((dst_width + 1) / 2));
1783 assert(src_height == ((dst_height + 1) / 2));
1784
1785 #ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2
1786 if (TestCpuFlag(kCpuHasSSE2)) {
1787 Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_SSE2;
1788 }
1789 #endif
1790
1791 #ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2
1792 if (TestCpuFlag(kCpuHasAVX2)) {
1793 Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_AVX2;
1794 }
1795 #endif
1796
1797 #ifdef HAS_SCALEROWUP2_BILINEAR_16_NEON
1798 if (TestCpuFlag(kCpuHasNEON)) {
1799 Scale2RowUp = ScaleRowUp2_Bilinear_16_Any_NEON;
1800 }
1801 #endif
1802
1803 Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1804 dst_ptr += dst_stride;
1805 for (x = 0; x < src_height - 1; ++x) {
1806 Scale2RowUp(src_ptr, src_stride, dst_ptr, dst_stride, dst_width);
1807 src_ptr += src_stride;
1808 dst_ptr += 2 * dst_stride;
1809 }
1810 if (!(dst_height & 1)) {
1811 Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
1812 }
1813 }
1814
ScalePlaneBilinearUp_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr,enum FilterMode filtering)1815 static int ScalePlaneBilinearUp_16(int src_width,
1816 int src_height,
1817 int dst_width,
1818 int dst_height,
1819 int src_stride,
1820 int dst_stride,
1821 const uint16_t* src_ptr,
1822 uint16_t* dst_ptr,
1823 enum FilterMode filtering) {
1824 int j;
1825 // Initial source x/y coordinate and step values as 16.16 fixed point.
1826 int x = 0;
1827 int y = 0;
1828 int dx = 0;
1829 int dy = 0;
1830 const int max_y = (src_height - 1) << 16;
1831 void (*InterpolateRow)(uint16_t* dst_ptr, const uint16_t* src_ptr,
1832 ptrdiff_t src_stride, int dst_width,
1833 int source_y_fraction) = InterpolateRow_16_C;
1834 void (*ScaleFilterCols)(uint16_t* dst_ptr, const uint16_t* src_ptr,
1835 int dst_width, int x, int dx) =
1836 filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
1837 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
1838 &dx, &dy);
1839 src_width = Abs(src_width);
1840
1841 #if defined(HAS_INTERPOLATEROW_16_SSE2)
1842 if (TestCpuFlag(kCpuHasSSE2)) {
1843 InterpolateRow = InterpolateRow_16_Any_SSE2;
1844 if (IS_ALIGNED(dst_width, 16)) {
1845 InterpolateRow = InterpolateRow_16_SSE2;
1846 }
1847 }
1848 #endif
1849 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
1850 if (TestCpuFlag(kCpuHasSSSE3)) {
1851 InterpolateRow = InterpolateRow_16_Any_SSSE3;
1852 if (IS_ALIGNED(dst_width, 16)) {
1853 InterpolateRow = InterpolateRow_16_SSSE3;
1854 }
1855 }
1856 #endif
1857 #if defined(HAS_INTERPOLATEROW_16_AVX2)
1858 if (TestCpuFlag(kCpuHasAVX2)) {
1859 InterpolateRow = InterpolateRow_16_Any_AVX2;
1860 if (IS_ALIGNED(dst_width, 32)) {
1861 InterpolateRow = InterpolateRow_16_AVX2;
1862 }
1863 }
1864 #endif
1865 #if defined(HAS_INTERPOLATEROW_16_NEON)
1866 if (TestCpuFlag(kCpuHasNEON)) {
1867 InterpolateRow = InterpolateRow_16_Any_NEON;
1868 if (IS_ALIGNED(dst_width, 16)) {
1869 InterpolateRow = InterpolateRow_16_NEON;
1870 }
1871 }
1872 #endif
1873
1874 if (filtering && src_width >= 32768) {
1875 ScaleFilterCols = ScaleFilterCols64_16_C;
1876 }
1877 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1878 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1879 ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1880 }
1881 #endif
1882 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1883 ScaleFilterCols = ScaleColsUp2_16_C;
1884 #if defined(HAS_SCALECOLS_16_SSE2)
1885 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1886 ScaleFilterCols = ScaleColsUp2_16_SSE2;
1887 }
1888 #endif
1889 }
1890 if (y > max_y) {
1891 y = max_y;
1892 }
1893 {
1894 int yi = y >> 16;
1895 const uint16_t* src = src_ptr + yi * (int64_t)src_stride;
1896
1897 // Allocate 2 row buffers.
1898 const int row_size = (dst_width + 31) & ~31;
1899 align_buffer_64(row, row_size * 4);
1900 int rowstride = row_size;
1901 int lasty = yi;
1902 uint16_t* rowptr = (uint16_t*)row;
1903 if (!row)
1904 return 1;
1905
1906 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1907 if (src_height > 1) {
1908 src += src_stride;
1909 }
1910 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1911 if (src_height > 2) {
1912 src += src_stride;
1913 }
1914
1915 for (j = 0; j < dst_height; ++j) {
1916 yi = y >> 16;
1917 if (yi != lasty) {
1918 if (y > max_y) {
1919 y = max_y;
1920 yi = y >> 16;
1921 src = src_ptr + yi * (int64_t)src_stride;
1922 }
1923 if (yi != lasty) {
1924 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1925 rowptr += rowstride;
1926 rowstride = -rowstride;
1927 lasty = yi;
1928 if ((y + 65536) < max_y) {
1929 src += src_stride;
1930 }
1931 }
1932 }
1933 if (filtering == kFilterLinear) {
1934 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1935 } else {
1936 int yf = (y >> 8) & 255;
1937 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1938 }
1939 dst_ptr += dst_stride;
1940 y += dy;
1941 }
1942 free_aligned_buffer_64(row);
1943 }
1944 return 0;
1945 }
1946
1947 // Scale Plane to/from any dimensions, without interpolation.
1948 // Fixed point math is used for performance: The upper 16 bits
1949 // of x and dx is the integer part of the source position and
1950 // the lower 16 bits are the fixed decimal part.
1951
ScalePlaneSimple(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_ptr,uint8_t * dst_ptr)1952 static void ScalePlaneSimple(int src_width,
1953 int src_height,
1954 int dst_width,
1955 int dst_height,
1956 int src_stride,
1957 int dst_stride,
1958 const uint8_t* src_ptr,
1959 uint8_t* dst_ptr) {
1960 int i;
1961 void (*ScaleCols)(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width,
1962 int x, int dx) = ScaleCols_C;
1963 // Initial source x/y coordinate and step values as 16.16 fixed point.
1964 int x = 0;
1965 int y = 0;
1966 int dx = 0;
1967 int dy = 0;
1968 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
1969 &dx, &dy);
1970 src_width = Abs(src_width);
1971
1972 if (src_width * 2 == dst_width && x < 0x8000) {
1973 ScaleCols = ScaleColsUp2_C;
1974 #if defined(HAS_SCALECOLS_SSE2)
1975 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1976 ScaleCols = ScaleColsUp2_SSE2;
1977 }
1978 #endif
1979 }
1980
1981 for (i = 0; i < dst_height; ++i) {
1982 ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x,
1983 dx);
1984 dst_ptr += dst_stride;
1985 y += dy;
1986 }
1987 }
1988
ScalePlaneSimple_16(int src_width,int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_ptr,uint16_t * dst_ptr)1989 static void ScalePlaneSimple_16(int src_width,
1990 int src_height,
1991 int dst_width,
1992 int dst_height,
1993 int src_stride,
1994 int dst_stride,
1995 const uint16_t* src_ptr,
1996 uint16_t* dst_ptr) {
1997 int i;
1998 void (*ScaleCols)(uint16_t* dst_ptr, const uint16_t* src_ptr, int dst_width,
1999 int x, int dx) = ScaleCols_16_C;
2000 // Initial source x/y coordinate and step values as 16.16 fixed point.
2001 int x = 0;
2002 int y = 0;
2003 int dx = 0;
2004 int dy = 0;
2005 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y,
2006 &dx, &dy);
2007 src_width = Abs(src_width);
2008
2009 if (src_width * 2 == dst_width && x < 0x8000) {
2010 ScaleCols = ScaleColsUp2_16_C;
2011 #if defined(HAS_SCALECOLS_16_SSE2)
2012 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
2013 ScaleCols = ScaleColsUp2_16_SSE2;
2014 }
2015 #endif
2016 }
2017
2018 for (i = 0; i < dst_height; ++i) {
2019 ScaleCols(dst_ptr, src_ptr + (y >> 16) * (int64_t)src_stride, dst_width, x,
2020 dx);
2021 dst_ptr += dst_stride;
2022 y += dy;
2023 }
2024 }
2025
2026 // Scale a plane.
2027 // This function dispatches to a specialized scaler based on scale factor.
2028 LIBYUV_API
ScalePlane(const uint8_t * src,int src_stride,int src_width,int src_height,uint8_t * dst,int dst_stride,int dst_width,int dst_height,enum FilterMode filtering)2029 int ScalePlane(const uint8_t* src,
2030 int src_stride,
2031 int src_width,
2032 int src_height,
2033 uint8_t* dst,
2034 int dst_stride,
2035 int dst_width,
2036 int dst_height,
2037 enum FilterMode filtering) {
2038 // Simplify filtering when possible.
2039 filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
2040 filtering);
2041
2042 // Negative height means invert the image.
2043 if (src_height < 0) {
2044 src_height = -src_height;
2045 src = src + (src_height - 1) * (int64_t)src_stride;
2046 src_stride = -src_stride;
2047 }
2048 // Use specialized scales to improve performance for common resolutions.
2049 // For example, all the 1/2 scalings will use ScalePlaneDown2()
2050 if (dst_width == src_width && dst_height == src_height) {
2051 // Straight copy.
2052 CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
2053 return 0;
2054 }
2055 if (dst_width == src_width && filtering != kFilterBox) {
2056 int dy = 0;
2057 int y = 0;
2058 // When scaling down, use the center 2 rows to filter.
2059 // When scaling up, last row of destination uses the last 2 source rows.
2060 if (dst_height <= src_height) {
2061 dy = FixedDiv(src_height, dst_height);
2062 y = CENTERSTART(dy, -32768); // Subtract 0.5 (32768) to center filter.
2063 } else if (src_height > 1 && dst_height > 1) {
2064 dy = FixedDiv1(src_height, dst_height);
2065 }
2066 // Arbitrary scale vertically, but unscaled horizontally.
2067 ScalePlaneVertical(src_height, dst_width, dst_height, src_stride,
2068 dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering);
2069 return 0;
2070 }
2071 if (dst_width <= Abs(src_width) && dst_height <= src_height) {
2072 // Scale down.
2073 if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
2074 // optimized, 3/4
2075 ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride,
2076 dst_stride, src, dst, filtering);
2077 return 0;
2078 }
2079 if (2 * dst_width == src_width && 2 * dst_height == src_height) {
2080 // optimized, 1/2
2081 ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride,
2082 dst_stride, src, dst, filtering);
2083 return 0;
2084 }
2085 // 3/8 rounded up for odd sized chroma height.
2086 if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
2087 // optimized, 3/8
2088 ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride,
2089 dst_stride, src, dst, filtering);
2090 return 0;
2091 }
2092 if (4 * dst_width == src_width && 4 * dst_height == src_height &&
2093 (filtering == kFilterBox || filtering == kFilterNone)) {
2094 // optimized, 1/4
2095 ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride,
2096 dst_stride, src, dst, filtering);
2097 return 0;
2098 }
2099 }
2100 if (filtering == kFilterBox && dst_height * 2 < src_height) {
2101 return ScalePlaneBox(src_width, src_height, dst_width, dst_height,
2102 src_stride, dst_stride, src, dst);
2103 }
2104 if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
2105 ScalePlaneUp2_Linear(src_width, src_height, dst_width, dst_height,
2106 src_stride, dst_stride, src, dst);
2107 return 0;
2108 }
2109 if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
2110 (filtering == kFilterBilinear || filtering == kFilterBox)) {
2111 ScalePlaneUp2_Bilinear(src_width, src_height, dst_width, dst_height,
2112 src_stride, dst_stride, src, dst);
2113 return 0;
2114 }
2115 if (filtering && dst_height > src_height) {
2116 return ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
2117 src_stride, dst_stride, src, dst, filtering);
2118 }
2119 if (filtering) {
2120 return ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
2121 src_stride, dst_stride, src, dst, filtering);
2122 }
2123 ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride,
2124 dst_stride, src, dst);
2125 return 0;
2126 }
2127
2128 LIBYUV_API
ScalePlane_16(const uint16_t * src,int src_stride,int src_width,int src_height,uint16_t * dst,int dst_stride,int dst_width,int dst_height,enum FilterMode filtering)2129 int ScalePlane_16(const uint16_t* src,
2130 int src_stride,
2131 int src_width,
2132 int src_height,
2133 uint16_t* dst,
2134 int dst_stride,
2135 int dst_width,
2136 int dst_height,
2137 enum FilterMode filtering) {
2138 // Simplify filtering when possible.
2139 filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
2140 filtering);
2141
2142 // Negative height means invert the image.
2143 if (src_height < 0) {
2144 src_height = -src_height;
2145 src = src + (src_height - 1) * (int64_t)src_stride;
2146 src_stride = -src_stride;
2147 }
2148 // Use specialized scales to improve performance for common resolutions.
2149 // For example, all the 1/2 scalings will use ScalePlaneDown2()
2150 if (dst_width == src_width && dst_height == src_height) {
2151 // Straight copy.
2152 CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
2153 return 0;
2154 }
2155 if (dst_width == src_width && filtering != kFilterBox) {
2156 int dy = 0;
2157 int y = 0;
2158 // When scaling down, use the center 2 rows to filter.
2159 // When scaling up, last row of destination uses the last 2 source rows.
2160 if (dst_height <= src_height) {
2161 dy = FixedDiv(src_height, dst_height);
2162 y = CENTERSTART(dy, -32768); // Subtract 0.5 (32768) to center filter.
2163 // When scaling up, ensure the last row of destination uses the last
2164 // source. Avoid divide by zero for dst_height but will do no scaling
2165 // later.
2166 } else if (src_height > 1 && dst_height > 1) {
2167 dy = FixedDiv1(src_height, dst_height);
2168 }
2169 // Arbitrary scale vertically, but unscaled horizontally.
2170 ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride,
2171 dst_stride, src, dst, 0, y, dy, /*bpp=*/1, filtering);
2172 return 0;
2173 }
2174 if (dst_width <= Abs(src_width) && dst_height <= src_height) {
2175 // Scale down.
2176 if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) {
2177 // optimized, 3/4
2178 ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
2179 src_stride, dst_stride, src, dst, filtering);
2180 return 0;
2181 }
2182 if (2 * dst_width == src_width && 2 * dst_height == src_height) {
2183 // optimized, 1/2
2184 ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
2185 src_stride, dst_stride, src, dst, filtering);
2186 return 0;
2187 }
2188 // 3/8 rounded up for odd sized chroma height.
2189 if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) {
2190 // optimized, 3/8
2191 ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
2192 src_stride, dst_stride, src, dst, filtering);
2193 return 0;
2194 }
2195 if (4 * dst_width == src_width && 4 * dst_height == src_height &&
2196 (filtering == kFilterBox || filtering == kFilterNone)) {
2197 // optimized, 1/4
2198 ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
2199 src_stride, dst_stride, src, dst, filtering);
2200 return 0;
2201 }
2202 }
2203 if (filtering == kFilterBox && dst_height * 2 < src_height) {
2204 return ScalePlaneBox_16(src_width, src_height, dst_width, dst_height,
2205 src_stride, dst_stride, src, dst);
2206 }
2207 if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
2208 ScalePlaneUp2_16_Linear(src_width, src_height, dst_width, dst_height,
2209 src_stride, dst_stride, src, dst);
2210 return 0;
2211 }
2212 if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
2213 (filtering == kFilterBilinear || filtering == kFilterBox)) {
2214 ScalePlaneUp2_16_Bilinear(src_width, src_height, dst_width, dst_height,
2215 src_stride, dst_stride, src, dst);
2216 return 0;
2217 }
2218 if (filtering && dst_height > src_height) {
2219 return ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
2220 src_stride, dst_stride, src, dst, filtering);
2221 }
2222 if (filtering) {
2223 return ScalePlaneBilinearDown_16(src_width, src_height, dst_width,
2224 dst_height, src_stride, dst_stride, src,
2225 dst, filtering);
2226 }
2227 ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride,
2228 dst_stride, src, dst);
2229 return 0;
2230 }
2231
2232 LIBYUV_API
ScalePlane_12(const uint16_t * src,int src_stride,int src_width,int src_height,uint16_t * dst,int dst_stride,int dst_width,int dst_height,enum FilterMode filtering)2233 int ScalePlane_12(const uint16_t* src,
2234 int src_stride,
2235 int src_width,
2236 int src_height,
2237 uint16_t* dst,
2238 int dst_stride,
2239 int dst_width,
2240 int dst_height,
2241 enum FilterMode filtering) {
2242 // Simplify filtering when possible.
2243 filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
2244 filtering);
2245
2246 // Negative height means invert the image.
2247 if (src_height < 0) {
2248 src_height = -src_height;
2249 src = src + (src_height - 1) * (int64_t)src_stride;
2250 src_stride = -src_stride;
2251 }
2252
2253 if ((dst_width + 1) / 2 == src_width && filtering == kFilterLinear) {
2254 ScalePlaneUp2_12_Linear(src_width, src_height, dst_width, dst_height,
2255 src_stride, dst_stride, src, dst);
2256 return 0;
2257 }
2258 if ((dst_height + 1) / 2 == src_height && (dst_width + 1) / 2 == src_width &&
2259 (filtering == kFilterBilinear || filtering == kFilterBox)) {
2260 ScalePlaneUp2_12_Bilinear(src_width, src_height, dst_width, dst_height,
2261 src_stride, dst_stride, src, dst);
2262 return 0;
2263 }
2264
2265 return ScalePlane_16(src, src_stride, src_width, src_height, dst, dst_stride,
2266 dst_width, dst_height, filtering);
2267 }
2268
2269 // Scale an I420 image.
2270 // This function in turn calls a scaling function for each plane.
2271
2272 LIBYUV_API
I420Scale(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,int src_width,int src_height,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)2273 int I420Scale(const uint8_t* src_y,
2274 int src_stride_y,
2275 const uint8_t* src_u,
2276 int src_stride_u,
2277 const uint8_t* src_v,
2278 int src_stride_v,
2279 int src_width,
2280 int src_height,
2281 uint8_t* dst_y,
2282 int dst_stride_y,
2283 uint8_t* dst_u,
2284 int dst_stride_u,
2285 uint8_t* dst_v,
2286 int dst_stride_v,
2287 int dst_width,
2288 int dst_height,
2289 enum FilterMode filtering) {
2290 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2291 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
2292 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2293 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
2294 int r;
2295
2296 if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2297 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2298 dst_width <= 0 || dst_height <= 0) {
2299 return -1;
2300 }
2301
2302 r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
2303 dst_stride_y, dst_width, dst_height, filtering);
2304 if (r != 0) {
2305 return r;
2306 }
2307 r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
2308 dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
2309 if (r != 0) {
2310 return r;
2311 }
2312 r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
2313 dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
2314 return r;
2315 }
2316
2317 LIBYUV_API
I420Scale_16(const uint16_t * src_y,int src_stride_y,const uint16_t * src_u,int src_stride_u,const uint16_t * src_v,int src_stride_v,int src_width,int src_height,uint16_t * dst_y,int dst_stride_y,uint16_t * dst_u,int dst_stride_u,uint16_t * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)2318 int I420Scale_16(const uint16_t* src_y,
2319 int src_stride_y,
2320 const uint16_t* src_u,
2321 int src_stride_u,
2322 const uint16_t* src_v,
2323 int src_stride_v,
2324 int src_width,
2325 int src_height,
2326 uint16_t* dst_y,
2327 int dst_stride_y,
2328 uint16_t* dst_u,
2329 int dst_stride_u,
2330 uint16_t* dst_v,
2331 int dst_stride_v,
2332 int dst_width,
2333 int dst_height,
2334 enum FilterMode filtering) {
2335 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2336 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
2337 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2338 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
2339 int r;
2340
2341 if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2342 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2343 dst_width <= 0 || dst_height <= 0) {
2344 return -1;
2345 }
2346
2347 r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
2348 dst_stride_y, dst_width, dst_height, filtering);
2349 if (r != 0) {
2350 return r;
2351 }
2352 r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
2353 dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
2354 if (r != 0) {
2355 return r;
2356 }
2357 r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
2358 dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
2359 return r;
2360 }
2361
2362 LIBYUV_API
I420Scale_12(const uint16_t * src_y,int src_stride_y,const uint16_t * src_u,int src_stride_u,const uint16_t * src_v,int src_stride_v,int src_width,int src_height,uint16_t * dst_y,int dst_stride_y,uint16_t * dst_u,int dst_stride_u,uint16_t * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)2363 int I420Scale_12(const uint16_t* src_y,
2364 int src_stride_y,
2365 const uint16_t* src_u,
2366 int src_stride_u,
2367 const uint16_t* src_v,
2368 int src_stride_v,
2369 int src_width,
2370 int src_height,
2371 uint16_t* dst_y,
2372 int dst_stride_y,
2373 uint16_t* dst_u,
2374 int dst_stride_u,
2375 uint16_t* dst_v,
2376 int dst_stride_v,
2377 int dst_width,
2378 int dst_height,
2379 enum FilterMode filtering) {
2380 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2381 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
2382 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2383 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
2384 int r;
2385
2386 if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2387 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2388 dst_width <= 0 || dst_height <= 0) {
2389 return -1;
2390 }
2391
2392 r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
2393 dst_stride_y, dst_width, dst_height, filtering);
2394 if (r != 0) {
2395 return r;
2396 }
2397 r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u,
2398 dst_stride_u, dst_halfwidth, dst_halfheight, filtering);
2399 if (r != 0) {
2400 return r;
2401 }
2402 r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v,
2403 dst_stride_v, dst_halfwidth, dst_halfheight, filtering);
2404 return r;
2405 }
2406
2407 // Scale an I444 image.
2408 // This function in turn calls a scaling function for each plane.
2409
2410 LIBYUV_API
I444Scale(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,int src_width,int src_height,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)2411 int I444Scale(const uint8_t* src_y,
2412 int src_stride_y,
2413 const uint8_t* src_u,
2414 int src_stride_u,
2415 const uint8_t* src_v,
2416 int src_stride_v,
2417 int src_width,
2418 int src_height,
2419 uint8_t* dst_y,
2420 int dst_stride_y,
2421 uint8_t* dst_u,
2422 int dst_stride_u,
2423 uint8_t* dst_v,
2424 int dst_stride_v,
2425 int dst_width,
2426 int dst_height,
2427 enum FilterMode filtering) {
2428 int r;
2429
2430 if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2431 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2432 dst_width <= 0 || dst_height <= 0) {
2433 return -1;
2434 }
2435
2436 r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
2437 dst_stride_y, dst_width, dst_height, filtering);
2438 if (r != 0) {
2439 return r;
2440 }
2441 r = ScalePlane(src_u, src_stride_u, src_width, src_height, dst_u,
2442 dst_stride_u, dst_width, dst_height, filtering);
2443 if (r != 0) {
2444 return r;
2445 }
2446 r = ScalePlane(src_v, src_stride_v, src_width, src_height, dst_v,
2447 dst_stride_v, dst_width, dst_height, filtering);
2448 return r;
2449 }
2450
2451 LIBYUV_API
I444Scale_16(const uint16_t * src_y,int src_stride_y,const uint16_t * src_u,int src_stride_u,const uint16_t * src_v,int src_stride_v,int src_width,int src_height,uint16_t * dst_y,int dst_stride_y,uint16_t * dst_u,int dst_stride_u,uint16_t * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)2452 int I444Scale_16(const uint16_t* src_y,
2453 int src_stride_y,
2454 const uint16_t* src_u,
2455 int src_stride_u,
2456 const uint16_t* src_v,
2457 int src_stride_v,
2458 int src_width,
2459 int src_height,
2460 uint16_t* dst_y,
2461 int dst_stride_y,
2462 uint16_t* dst_u,
2463 int dst_stride_u,
2464 uint16_t* dst_v,
2465 int dst_stride_v,
2466 int dst_width,
2467 int dst_height,
2468 enum FilterMode filtering) {
2469 int r;
2470
2471 if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2472 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2473 dst_width <= 0 || dst_height <= 0) {
2474 return -1;
2475 }
2476
2477 r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
2478 dst_stride_y, dst_width, dst_height, filtering);
2479 if (r != 0) {
2480 return r;
2481 }
2482 r = ScalePlane_16(src_u, src_stride_u, src_width, src_height, dst_u,
2483 dst_stride_u, dst_width, dst_height, filtering);
2484 if (r != 0) {
2485 return r;
2486 }
2487 r = ScalePlane_16(src_v, src_stride_v, src_width, src_height, dst_v,
2488 dst_stride_v, dst_width, dst_height, filtering);
2489 return r;
2490 }
2491
2492 LIBYUV_API
I444Scale_12(const uint16_t * src_y,int src_stride_y,const uint16_t * src_u,int src_stride_u,const uint16_t * src_v,int src_stride_v,int src_width,int src_height,uint16_t * dst_y,int dst_stride_y,uint16_t * dst_u,int dst_stride_u,uint16_t * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)2493 int I444Scale_12(const uint16_t* src_y,
2494 int src_stride_y,
2495 const uint16_t* src_u,
2496 int src_stride_u,
2497 const uint16_t* src_v,
2498 int src_stride_v,
2499 int src_width,
2500 int src_height,
2501 uint16_t* dst_y,
2502 int dst_stride_y,
2503 uint16_t* dst_u,
2504 int dst_stride_u,
2505 uint16_t* dst_v,
2506 int dst_stride_v,
2507 int dst_width,
2508 int dst_height,
2509 enum FilterMode filtering) {
2510 int r;
2511
2512 if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2513 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2514 dst_width <= 0 || dst_height <= 0) {
2515 return -1;
2516 }
2517
2518 r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
2519 dst_stride_y, dst_width, dst_height, filtering);
2520 if (r != 0) {
2521 return r;
2522 }
2523 r = ScalePlane_12(src_u, src_stride_u, src_width, src_height, dst_u,
2524 dst_stride_u, dst_width, dst_height, filtering);
2525 if (r != 0) {
2526 return r;
2527 }
2528 r = ScalePlane_12(src_v, src_stride_v, src_width, src_height, dst_v,
2529 dst_stride_v, dst_width, dst_height, filtering);
2530 return r;
2531 }
2532
2533 // Scale an I422 image.
2534 // This function in turn calls a scaling function for each plane.
2535
2536 LIBYUV_API
I422Scale(const uint8_t * src_y,int src_stride_y,const uint8_t * src_u,int src_stride_u,const uint8_t * src_v,int src_stride_v,int src_width,int src_height,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_u,int dst_stride_u,uint8_t * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)2537 int I422Scale(const uint8_t* src_y,
2538 int src_stride_y,
2539 const uint8_t* src_u,
2540 int src_stride_u,
2541 const uint8_t* src_v,
2542 int src_stride_v,
2543 int src_width,
2544 int src_height,
2545 uint8_t* dst_y,
2546 int dst_stride_y,
2547 uint8_t* dst_u,
2548 int dst_stride_u,
2549 uint8_t* dst_v,
2550 int dst_stride_v,
2551 int dst_width,
2552 int dst_height,
2553 enum FilterMode filtering) {
2554 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2555 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2556 int r;
2557
2558 if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2559 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2560 dst_width <= 0 || dst_height <= 0) {
2561 return -1;
2562 }
2563
2564 r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
2565 dst_stride_y, dst_width, dst_height, filtering);
2566 if (r != 0) {
2567 return r;
2568 }
2569 r = ScalePlane(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
2570 dst_stride_u, dst_halfwidth, dst_height, filtering);
2571 if (r != 0) {
2572 return r;
2573 }
2574 r = ScalePlane(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
2575 dst_stride_v, dst_halfwidth, dst_height, filtering);
2576 return r;
2577 }
2578
2579 LIBYUV_API
I422Scale_16(const uint16_t * src_y,int src_stride_y,const uint16_t * src_u,int src_stride_u,const uint16_t * src_v,int src_stride_v,int src_width,int src_height,uint16_t * dst_y,int dst_stride_y,uint16_t * dst_u,int dst_stride_u,uint16_t * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)2580 int I422Scale_16(const uint16_t* src_y,
2581 int src_stride_y,
2582 const uint16_t* src_u,
2583 int src_stride_u,
2584 const uint16_t* src_v,
2585 int src_stride_v,
2586 int src_width,
2587 int src_height,
2588 uint16_t* dst_y,
2589 int dst_stride_y,
2590 uint16_t* dst_u,
2591 int dst_stride_u,
2592 uint16_t* dst_v,
2593 int dst_stride_v,
2594 int dst_width,
2595 int dst_height,
2596 enum FilterMode filtering) {
2597 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2598 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2599 int r;
2600
2601 if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2602 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2603 dst_width <= 0 || dst_height <= 0) {
2604 return -1;
2605 }
2606
2607 r = ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y,
2608 dst_stride_y, dst_width, dst_height, filtering);
2609 if (r != 0) {
2610 return r;
2611 }
2612 r = ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
2613 dst_stride_u, dst_halfwidth, dst_height, filtering);
2614 if (r != 0) {
2615 return r;
2616 }
2617 r = ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
2618 dst_stride_v, dst_halfwidth, dst_height, filtering);
2619 return r;
2620 }
2621
2622 LIBYUV_API
I422Scale_12(const uint16_t * src_y,int src_stride_y,const uint16_t * src_u,int src_stride_u,const uint16_t * src_v,int src_stride_v,int src_width,int src_height,uint16_t * dst_y,int dst_stride_y,uint16_t * dst_u,int dst_stride_u,uint16_t * dst_v,int dst_stride_v,int dst_width,int dst_height,enum FilterMode filtering)2623 int I422Scale_12(const uint16_t* src_y,
2624 int src_stride_y,
2625 const uint16_t* src_u,
2626 int src_stride_u,
2627 const uint16_t* src_v,
2628 int src_stride_v,
2629 int src_width,
2630 int src_height,
2631 uint16_t* dst_y,
2632 int dst_stride_y,
2633 uint16_t* dst_u,
2634 int dst_stride_u,
2635 uint16_t* dst_v,
2636 int dst_stride_v,
2637 int dst_width,
2638 int dst_height,
2639 enum FilterMode filtering) {
2640 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2641 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2642 int r;
2643
2644 if (!src_y || !src_u || !src_v || src_width <= 0 || src_height == 0 ||
2645 src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v ||
2646 dst_width <= 0 || dst_height <= 0) {
2647 return -1;
2648 }
2649
2650 r = ScalePlane_12(src_y, src_stride_y, src_width, src_height, dst_y,
2651 dst_stride_y, dst_width, dst_height, filtering);
2652 if (r != 0) {
2653 return r;
2654 }
2655 r = ScalePlane_12(src_u, src_stride_u, src_halfwidth, src_height, dst_u,
2656 dst_stride_u, dst_halfwidth, dst_height, filtering);
2657 if (r != 0) {
2658 return r;
2659 }
2660 r = ScalePlane_12(src_v, src_stride_v, src_halfwidth, src_height, dst_v,
2661 dst_stride_v, dst_halfwidth, dst_height, filtering);
2662 return r;
2663 }
2664
2665 // Scale an NV12 image.
2666 // This function in turn calls a scaling function for each plane.
2667
2668 LIBYUV_API
NV12Scale(const uint8_t * src_y,int src_stride_y,const uint8_t * src_uv,int src_stride_uv,int src_width,int src_height,uint8_t * dst_y,int dst_stride_y,uint8_t * dst_uv,int dst_stride_uv,int dst_width,int dst_height,enum FilterMode filtering)2669 int NV12Scale(const uint8_t* src_y,
2670 int src_stride_y,
2671 const uint8_t* src_uv,
2672 int src_stride_uv,
2673 int src_width,
2674 int src_height,
2675 uint8_t* dst_y,
2676 int dst_stride_y,
2677 uint8_t* dst_uv,
2678 int dst_stride_uv,
2679 int dst_width,
2680 int dst_height,
2681 enum FilterMode filtering) {
2682 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
2683 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
2684 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
2685 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
2686 int r;
2687
2688 if (!src_y || !src_uv || src_width <= 0 || src_height == 0 ||
2689 src_width > 32768 || src_height > 32768 || !dst_y || !dst_uv ||
2690 dst_width <= 0 || dst_height <= 0) {
2691 return -1;
2692 }
2693
2694 r = ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y,
2695 dst_stride_y, dst_width, dst_height, filtering);
2696 if (r != 0) {
2697 return r;
2698 }
2699 r = UVScale(src_uv, src_stride_uv, src_halfwidth, src_halfheight, dst_uv,
2700 dst_stride_uv, dst_halfwidth, dst_halfheight, filtering);
2701 return r;
2702 }
2703
2704 // Deprecated api
2705 LIBYUV_API
Scale(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,int src_stride_y,int src_stride_u,int src_stride_v,int src_width,int src_height,uint8_t * dst_y,uint8_t * dst_u,uint8_t * dst_v,int dst_stride_y,int dst_stride_u,int dst_stride_v,int dst_width,int dst_height,LIBYUV_BOOL interpolate)2706 int Scale(const uint8_t* src_y,
2707 const uint8_t* src_u,
2708 const uint8_t* src_v,
2709 int src_stride_y,
2710 int src_stride_u,
2711 int src_stride_v,
2712 int src_width,
2713 int src_height,
2714 uint8_t* dst_y,
2715 uint8_t* dst_u,
2716 uint8_t* dst_v,
2717 int dst_stride_y,
2718 int dst_stride_u,
2719 int dst_stride_v,
2720 int dst_width,
2721 int dst_height,
2722 LIBYUV_BOOL interpolate) {
2723 return I420Scale(src_y, src_stride_y, src_u, src_stride_u, src_v,
2724 src_stride_v, src_width, src_height, dst_y, dst_stride_y,
2725 dst_u, dst_stride_u, dst_v, dst_stride_v, dst_width,
2726 dst_height, interpolate ? kFilterBox : kFilterNone);
2727 }
2728
2729 #ifdef __cplusplus
2730 } // extern "C"
2731 } // namespace libyuv
2732 #endif
2733