1 /*
2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/scale.h"
12
13 #include <assert.h>
14 #include <string.h>
15
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyARGB
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25
Abs(int v)26 static __inline int Abs(int v) {
27 return v >= 0 ? v : -v;
28 }
29
30 // CPU agnostic row functions
ScaleRowDown2_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)31 void ScaleRowDown2_C(const uint8* src_ptr,
32 ptrdiff_t src_stride,
33 uint8* dst,
34 int dst_width) {
35 int x;
36 (void)src_stride;
37 for (x = 0; x < dst_width - 1; x += 2) {
38 dst[0] = src_ptr[1];
39 dst[1] = src_ptr[3];
40 dst += 2;
41 src_ptr += 4;
42 }
43 if (dst_width & 1) {
44 dst[0] = src_ptr[1];
45 }
46 }
47
ScaleRowDown2_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst,int dst_width)48 void ScaleRowDown2_16_C(const uint16* src_ptr,
49 ptrdiff_t src_stride,
50 uint16* dst,
51 int dst_width) {
52 int x;
53 (void)src_stride;
54 for (x = 0; x < dst_width - 1; x += 2) {
55 dst[0] = src_ptr[1];
56 dst[1] = src_ptr[3];
57 dst += 2;
58 src_ptr += 4;
59 }
60 if (dst_width & 1) {
61 dst[0] = src_ptr[1];
62 }
63 }
64
ScaleRowDown2Linear_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)65 void ScaleRowDown2Linear_C(const uint8* src_ptr,
66 ptrdiff_t src_stride,
67 uint8* dst,
68 int dst_width) {
69 const uint8* s = src_ptr;
70 int x;
71 (void)src_stride;
72 for (x = 0; x < dst_width - 1; x += 2) {
73 dst[0] = (s[0] + s[1] + 1) >> 1;
74 dst[1] = (s[2] + s[3] + 1) >> 1;
75 dst += 2;
76 s += 4;
77 }
78 if (dst_width & 1) {
79 dst[0] = (s[0] + s[1] + 1) >> 1;
80 }
81 }
82
ScaleRowDown2Linear_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst,int dst_width)83 void ScaleRowDown2Linear_16_C(const uint16* src_ptr,
84 ptrdiff_t src_stride,
85 uint16* dst,
86 int dst_width) {
87 const uint16* s = src_ptr;
88 int x;
89 (void)src_stride;
90 for (x = 0; x < dst_width - 1; x += 2) {
91 dst[0] = (s[0] + s[1] + 1) >> 1;
92 dst[1] = (s[2] + s[3] + 1) >> 1;
93 dst += 2;
94 s += 4;
95 }
96 if (dst_width & 1) {
97 dst[0] = (s[0] + s[1] + 1) >> 1;
98 }
99 }
100
ScaleRowDown2Box_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)101 void ScaleRowDown2Box_C(const uint8* src_ptr,
102 ptrdiff_t src_stride,
103 uint8* dst,
104 int dst_width) {
105 const uint8* s = src_ptr;
106 const uint8* t = src_ptr + src_stride;
107 int x;
108 for (x = 0; x < dst_width - 1; x += 2) {
109 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
110 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
111 dst += 2;
112 s += 4;
113 t += 4;
114 }
115 if (dst_width & 1) {
116 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
117 }
118 }
119
ScaleRowDown2Box_Odd_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)120 void ScaleRowDown2Box_Odd_C(const uint8* src_ptr,
121 ptrdiff_t src_stride,
122 uint8* dst,
123 int dst_width) {
124 const uint8* s = src_ptr;
125 const uint8* t = src_ptr + src_stride;
126 int x;
127 dst_width -= 1;
128 for (x = 0; x < dst_width - 1; x += 2) {
129 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
130 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
131 dst += 2;
132 s += 4;
133 t += 4;
134 }
135 if (dst_width & 1) {
136 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
137 dst += 1;
138 s += 2;
139 t += 2;
140 }
141 dst[0] = (s[0] + t[0] + 1) >> 1;
142 }
143
ScaleRowDown2Box_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst,int dst_width)144 void ScaleRowDown2Box_16_C(const uint16* src_ptr,
145 ptrdiff_t src_stride,
146 uint16* dst,
147 int dst_width) {
148 const uint16* s = src_ptr;
149 const uint16* t = src_ptr + src_stride;
150 int x;
151 for (x = 0; x < dst_width - 1; x += 2) {
152 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
153 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
154 dst += 2;
155 s += 4;
156 t += 4;
157 }
158 if (dst_width & 1) {
159 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
160 }
161 }
162
ScaleRowDown4_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)163 void ScaleRowDown4_C(const uint8* src_ptr,
164 ptrdiff_t src_stride,
165 uint8* dst,
166 int dst_width) {
167 int x;
168 (void)src_stride;
169 for (x = 0; x < dst_width - 1; x += 2) {
170 dst[0] = src_ptr[2];
171 dst[1] = src_ptr[6];
172 dst += 2;
173 src_ptr += 8;
174 }
175 if (dst_width & 1) {
176 dst[0] = src_ptr[2];
177 }
178 }
179
ScaleRowDown4_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst,int dst_width)180 void ScaleRowDown4_16_C(const uint16* src_ptr,
181 ptrdiff_t src_stride,
182 uint16* dst,
183 int dst_width) {
184 int x;
185 (void)src_stride;
186 for (x = 0; x < dst_width - 1; x += 2) {
187 dst[0] = src_ptr[2];
188 dst[1] = src_ptr[6];
189 dst += 2;
190 src_ptr += 8;
191 }
192 if (dst_width & 1) {
193 dst[0] = src_ptr[2];
194 }
195 }
196
ScaleRowDown4Box_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)197 void ScaleRowDown4Box_C(const uint8* src_ptr,
198 ptrdiff_t src_stride,
199 uint8* dst,
200 int dst_width) {
201 intptr_t stride = src_stride;
202 int x;
203 for (x = 0; x < dst_width - 1; x += 2) {
204 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
205 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
206 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
207 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
208 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
209 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
210 src_ptr[stride * 3 + 3] + 8) >>
211 4;
212 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
213 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
214 src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
215 src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
216 src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
217 src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
218 src_ptr[stride * 3 + 7] + 8) >>
219 4;
220 dst += 2;
221 src_ptr += 8;
222 }
223 if (dst_width & 1) {
224 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
225 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
226 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
227 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
228 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
229 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
230 src_ptr[stride * 3 + 3] + 8) >>
231 4;
232 }
233 }
234
ScaleRowDown4Box_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst,int dst_width)235 void ScaleRowDown4Box_16_C(const uint16* src_ptr,
236 ptrdiff_t src_stride,
237 uint16* dst,
238 int dst_width) {
239 intptr_t stride = src_stride;
240 int x;
241 for (x = 0; x < dst_width - 1; x += 2) {
242 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
243 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
244 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
245 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
246 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
247 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
248 src_ptr[stride * 3 + 3] + 8) >>
249 4;
250 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
251 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
252 src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
253 src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
254 src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
255 src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
256 src_ptr[stride * 3 + 7] + 8) >>
257 4;
258 dst += 2;
259 src_ptr += 8;
260 }
261 if (dst_width & 1) {
262 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
263 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
264 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
265 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
266 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
267 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
268 src_ptr[stride * 3 + 3] + 8) >>
269 4;
270 }
271 }
272
ScaleRowDown34_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)273 void ScaleRowDown34_C(const uint8* src_ptr,
274 ptrdiff_t src_stride,
275 uint8* dst,
276 int dst_width) {
277 int x;
278 (void)src_stride;
279 assert((dst_width % 3 == 0) && (dst_width > 0));
280 for (x = 0; x < dst_width; x += 3) {
281 dst[0] = src_ptr[0];
282 dst[1] = src_ptr[1];
283 dst[2] = src_ptr[3];
284 dst += 3;
285 src_ptr += 4;
286 }
287 }
288
ScaleRowDown34_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst,int dst_width)289 void ScaleRowDown34_16_C(const uint16* src_ptr,
290 ptrdiff_t src_stride,
291 uint16* dst,
292 int dst_width) {
293 int x;
294 (void)src_stride;
295 assert((dst_width % 3 == 0) && (dst_width > 0));
296 for (x = 0; x < dst_width; x += 3) {
297 dst[0] = src_ptr[0];
298 dst[1] = src_ptr[1];
299 dst[2] = src_ptr[3];
300 dst += 3;
301 src_ptr += 4;
302 }
303 }
304
305 // Filter rows 0 and 1 together, 3 : 1
ScaleRowDown34_0_Box_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * d,int dst_width)306 void ScaleRowDown34_0_Box_C(const uint8* src_ptr,
307 ptrdiff_t src_stride,
308 uint8* d,
309 int dst_width) {
310 const uint8* s = src_ptr;
311 const uint8* t = src_ptr + src_stride;
312 int x;
313 assert((dst_width % 3 == 0) && (dst_width > 0));
314 for (x = 0; x < dst_width; x += 3) {
315 uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
316 uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
317 uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
318 uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
319 uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
320 uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
321 d[0] = (a0 * 3 + b0 + 2) >> 2;
322 d[1] = (a1 * 3 + b1 + 2) >> 2;
323 d[2] = (a2 * 3 + b2 + 2) >> 2;
324 d += 3;
325 s += 4;
326 t += 4;
327 }
328 }
329
ScaleRowDown34_0_Box_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * d,int dst_width)330 void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr,
331 ptrdiff_t src_stride,
332 uint16* d,
333 int dst_width) {
334 const uint16* s = src_ptr;
335 const uint16* t = src_ptr + src_stride;
336 int x;
337 assert((dst_width % 3 == 0) && (dst_width > 0));
338 for (x = 0; x < dst_width; x += 3) {
339 uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
340 uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
341 uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
342 uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
343 uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
344 uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
345 d[0] = (a0 * 3 + b0 + 2) >> 2;
346 d[1] = (a1 * 3 + b1 + 2) >> 2;
347 d[2] = (a2 * 3 + b2 + 2) >> 2;
348 d += 3;
349 s += 4;
350 t += 4;
351 }
352 }
353
354 // Filter rows 1 and 2 together, 1 : 1
ScaleRowDown34_1_Box_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * d,int dst_width)355 void ScaleRowDown34_1_Box_C(const uint8* src_ptr,
356 ptrdiff_t src_stride,
357 uint8* d,
358 int dst_width) {
359 const uint8* s = src_ptr;
360 const uint8* t = src_ptr + src_stride;
361 int x;
362 assert((dst_width % 3 == 0) && (dst_width > 0));
363 for (x = 0; x < dst_width; x += 3) {
364 uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
365 uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
366 uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
367 uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
368 uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
369 uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
370 d[0] = (a0 + b0 + 1) >> 1;
371 d[1] = (a1 + b1 + 1) >> 1;
372 d[2] = (a2 + b2 + 1) >> 1;
373 d += 3;
374 s += 4;
375 t += 4;
376 }
377 }
378
ScaleRowDown34_1_Box_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * d,int dst_width)379 void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr,
380 ptrdiff_t src_stride,
381 uint16* d,
382 int dst_width) {
383 const uint16* s = src_ptr;
384 const uint16* t = src_ptr + src_stride;
385 int x;
386 assert((dst_width % 3 == 0) && (dst_width > 0));
387 for (x = 0; x < dst_width; x += 3) {
388 uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
389 uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
390 uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
391 uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
392 uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
393 uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
394 d[0] = (a0 + b0 + 1) >> 1;
395 d[1] = (a1 + b1 + 1) >> 1;
396 d[2] = (a2 + b2 + 1) >> 1;
397 d += 3;
398 s += 4;
399 t += 4;
400 }
401 }
402
403 // Scales a single row of pixels using point sampling.
ScaleCols_C(uint8 * dst_ptr,const uint8 * src_ptr,int dst_width,int x,int dx)404 void ScaleCols_C(uint8* dst_ptr,
405 const uint8* src_ptr,
406 int dst_width,
407 int x,
408 int dx) {
409 int j;
410 for (j = 0; j < dst_width - 1; j += 2) {
411 dst_ptr[0] = src_ptr[x >> 16];
412 x += dx;
413 dst_ptr[1] = src_ptr[x >> 16];
414 x += dx;
415 dst_ptr += 2;
416 }
417 if (dst_width & 1) {
418 dst_ptr[0] = src_ptr[x >> 16];
419 }
420 }
421
ScaleCols_16_C(uint16 * dst_ptr,const uint16 * src_ptr,int dst_width,int x,int dx)422 void ScaleCols_16_C(uint16* dst_ptr,
423 const uint16* src_ptr,
424 int dst_width,
425 int x,
426 int dx) {
427 int j;
428 for (j = 0; j < dst_width - 1; j += 2) {
429 dst_ptr[0] = src_ptr[x >> 16];
430 x += dx;
431 dst_ptr[1] = src_ptr[x >> 16];
432 x += dx;
433 dst_ptr += 2;
434 }
435 if (dst_width & 1) {
436 dst_ptr[0] = src_ptr[x >> 16];
437 }
438 }
439
440 // Scales a single row of pixels up by 2x using point sampling.
ScaleColsUp2_C(uint8 * dst_ptr,const uint8 * src_ptr,int dst_width,int x,int dx)441 void ScaleColsUp2_C(uint8* dst_ptr,
442 const uint8* src_ptr,
443 int dst_width,
444 int x,
445 int dx) {
446 int j;
447 (void)x;
448 (void)dx;
449 for (j = 0; j < dst_width - 1; j += 2) {
450 dst_ptr[1] = dst_ptr[0] = src_ptr[0];
451 src_ptr += 1;
452 dst_ptr += 2;
453 }
454 if (dst_width & 1) {
455 dst_ptr[0] = src_ptr[0];
456 }
457 }
458
ScaleColsUp2_16_C(uint16 * dst_ptr,const uint16 * src_ptr,int dst_width,int x,int dx)459 void ScaleColsUp2_16_C(uint16* dst_ptr,
460 const uint16* src_ptr,
461 int dst_width,
462 int x,
463 int dx) {
464 int j;
465 (void)x;
466 (void)dx;
467 for (j = 0; j < dst_width - 1; j += 2) {
468 dst_ptr[1] = dst_ptr[0] = src_ptr[0];
469 src_ptr += 1;
470 dst_ptr += 2;
471 }
472 if (dst_width & 1) {
473 dst_ptr[0] = src_ptr[0];
474 }
475 }
476
477 // (1-f)a + fb can be replaced with a + f(b-a)
478 #if defined(__arm__) || defined(__aarch64__)
479 #define BLENDER(a, b, f) \
480 (uint8)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
481 #else
482 // Intel uses 7 bit math with rounding.
483 #define BLENDER(a, b, f) \
484 (uint8)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
485 #endif
486
ScaleFilterCols_C(uint8 * dst_ptr,const uint8 * src_ptr,int dst_width,int x,int dx)487 void ScaleFilterCols_C(uint8* dst_ptr,
488 const uint8* src_ptr,
489 int dst_width,
490 int x,
491 int dx) {
492 int j;
493 for (j = 0; j < dst_width - 1; j += 2) {
494 int xi = x >> 16;
495 int a = src_ptr[xi];
496 int b = src_ptr[xi + 1];
497 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
498 x += dx;
499 xi = x >> 16;
500 a = src_ptr[xi];
501 b = src_ptr[xi + 1];
502 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
503 x += dx;
504 dst_ptr += 2;
505 }
506 if (dst_width & 1) {
507 int xi = x >> 16;
508 int a = src_ptr[xi];
509 int b = src_ptr[xi + 1];
510 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
511 }
512 }
513
ScaleFilterCols64_C(uint8 * dst_ptr,const uint8 * src_ptr,int dst_width,int x32,int dx)514 void ScaleFilterCols64_C(uint8* dst_ptr,
515 const uint8* src_ptr,
516 int dst_width,
517 int x32,
518 int dx) {
519 int64 x = (int64)(x32);
520 int j;
521 for (j = 0; j < dst_width - 1; j += 2) {
522 int64 xi = x >> 16;
523 int a = src_ptr[xi];
524 int b = src_ptr[xi + 1];
525 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
526 x += dx;
527 xi = x >> 16;
528 a = src_ptr[xi];
529 b = src_ptr[xi + 1];
530 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
531 x += dx;
532 dst_ptr += 2;
533 }
534 if (dst_width & 1) {
535 int64 xi = x >> 16;
536 int a = src_ptr[xi];
537 int b = src_ptr[xi + 1];
538 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
539 }
540 }
541 #undef BLENDER
542
543 // Same as 8 bit arm blender but return is cast to uint16
544 #define BLENDER(a, b, f) \
545 (uint16)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
546
ScaleFilterCols_16_C(uint16 * dst_ptr,const uint16 * src_ptr,int dst_width,int x,int dx)547 void ScaleFilterCols_16_C(uint16* dst_ptr,
548 const uint16* src_ptr,
549 int dst_width,
550 int x,
551 int dx) {
552 int j;
553 for (j = 0; j < dst_width - 1; j += 2) {
554 int xi = x >> 16;
555 int a = src_ptr[xi];
556 int b = src_ptr[xi + 1];
557 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
558 x += dx;
559 xi = x >> 16;
560 a = src_ptr[xi];
561 b = src_ptr[xi + 1];
562 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
563 x += dx;
564 dst_ptr += 2;
565 }
566 if (dst_width & 1) {
567 int xi = x >> 16;
568 int a = src_ptr[xi];
569 int b = src_ptr[xi + 1];
570 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
571 }
572 }
573
ScaleFilterCols64_16_C(uint16 * dst_ptr,const uint16 * src_ptr,int dst_width,int x32,int dx)574 void ScaleFilterCols64_16_C(uint16* dst_ptr,
575 const uint16* src_ptr,
576 int dst_width,
577 int x32,
578 int dx) {
579 int64 x = (int64)(x32);
580 int j;
581 for (j = 0; j < dst_width - 1; j += 2) {
582 int64 xi = x >> 16;
583 int a = src_ptr[xi];
584 int b = src_ptr[xi + 1];
585 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
586 x += dx;
587 xi = x >> 16;
588 a = src_ptr[xi];
589 b = src_ptr[xi + 1];
590 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
591 x += dx;
592 dst_ptr += 2;
593 }
594 if (dst_width & 1) {
595 int64 xi = x >> 16;
596 int a = src_ptr[xi];
597 int b = src_ptr[xi + 1];
598 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
599 }
600 }
601 #undef BLENDER
602
ScaleRowDown38_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst,int dst_width)603 void ScaleRowDown38_C(const uint8* src_ptr,
604 ptrdiff_t src_stride,
605 uint8* dst,
606 int dst_width) {
607 int x;
608 (void)src_stride;
609 assert(dst_width % 3 == 0);
610 for (x = 0; x < dst_width; x += 3) {
611 dst[0] = src_ptr[0];
612 dst[1] = src_ptr[3];
613 dst[2] = src_ptr[6];
614 dst += 3;
615 src_ptr += 8;
616 }
617 }
618
ScaleRowDown38_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst,int dst_width)619 void ScaleRowDown38_16_C(const uint16* src_ptr,
620 ptrdiff_t src_stride,
621 uint16* dst,
622 int dst_width) {
623 int x;
624 (void)src_stride;
625 assert(dst_width % 3 == 0);
626 for (x = 0; x < dst_width; x += 3) {
627 dst[0] = src_ptr[0];
628 dst[1] = src_ptr[3];
629 dst[2] = src_ptr[6];
630 dst += 3;
631 src_ptr += 8;
632 }
633 }
634
635 // 8x3 -> 3x1
ScaleRowDown38_3_Box_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst_ptr,int dst_width)636 void ScaleRowDown38_3_Box_C(const uint8* src_ptr,
637 ptrdiff_t src_stride,
638 uint8* dst_ptr,
639 int dst_width) {
640 intptr_t stride = src_stride;
641 int i;
642 assert((dst_width % 3 == 0) && (dst_width > 0));
643 for (i = 0; i < dst_width; i += 3) {
644 dst_ptr[0] =
645 (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
646 src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
647 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
648 (65536 / 9) >>
649 16;
650 dst_ptr[1] =
651 (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
652 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
653 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
654 (65536 / 9) >>
655 16;
656 dst_ptr[2] =
657 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
658 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
659 (65536 / 6) >>
660 16;
661 src_ptr += 8;
662 dst_ptr += 3;
663 }
664 }
665
ScaleRowDown38_3_Box_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst_ptr,int dst_width)666 void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr,
667 ptrdiff_t src_stride,
668 uint16* dst_ptr,
669 int dst_width) {
670 intptr_t stride = src_stride;
671 int i;
672 assert((dst_width % 3 == 0) && (dst_width > 0));
673 for (i = 0; i < dst_width; i += 3) {
674 dst_ptr[0] =
675 (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
676 src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
677 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
678 (65536 / 9) >>
679 16;
680 dst_ptr[1] =
681 (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
682 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
683 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
684 (65536 / 9) >>
685 16;
686 dst_ptr[2] =
687 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
688 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
689 (65536 / 6) >>
690 16;
691 src_ptr += 8;
692 dst_ptr += 3;
693 }
694 }
695
696 // 8x2 -> 3x1
ScaleRowDown38_2_Box_C(const uint8 * src_ptr,ptrdiff_t src_stride,uint8 * dst_ptr,int dst_width)697 void ScaleRowDown38_2_Box_C(const uint8* src_ptr,
698 ptrdiff_t src_stride,
699 uint8* dst_ptr,
700 int dst_width) {
701 intptr_t stride = src_stride;
702 int i;
703 assert((dst_width % 3 == 0) && (dst_width > 0));
704 for (i = 0; i < dst_width; i += 3) {
705 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
706 src_ptr[stride + 1] + src_ptr[stride + 2]) *
707 (65536 / 6) >>
708 16;
709 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
710 src_ptr[stride + 4] + src_ptr[stride + 5]) *
711 (65536 / 6) >>
712 16;
713 dst_ptr[2] =
714 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
715 (65536 / 4) >>
716 16;
717 src_ptr += 8;
718 dst_ptr += 3;
719 }
720 }
721
ScaleRowDown38_2_Box_16_C(const uint16 * src_ptr,ptrdiff_t src_stride,uint16 * dst_ptr,int dst_width)722 void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr,
723 ptrdiff_t src_stride,
724 uint16* dst_ptr,
725 int dst_width) {
726 intptr_t stride = src_stride;
727 int i;
728 assert((dst_width % 3 == 0) && (dst_width > 0));
729 for (i = 0; i < dst_width; i += 3) {
730 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
731 src_ptr[stride + 1] + src_ptr[stride + 2]) *
732 (65536 / 6) >>
733 16;
734 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
735 src_ptr[stride + 4] + src_ptr[stride + 5]) *
736 (65536 / 6) >>
737 16;
738 dst_ptr[2] =
739 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
740 (65536 / 4) >>
741 16;
742 src_ptr += 8;
743 dst_ptr += 3;
744 }
745 }
746
ScaleAddRow_C(const uint8 * src_ptr,uint16 * dst_ptr,int src_width)747 void ScaleAddRow_C(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
748 int x;
749 assert(src_width > 0);
750 for (x = 0; x < src_width - 1; x += 2) {
751 dst_ptr[0] += src_ptr[0];
752 dst_ptr[1] += src_ptr[1];
753 src_ptr += 2;
754 dst_ptr += 2;
755 }
756 if (src_width & 1) {
757 dst_ptr[0] += src_ptr[0];
758 }
759 }
760
ScaleAddRow_16_C(const uint16 * src_ptr,uint32 * dst_ptr,int src_width)761 void ScaleAddRow_16_C(const uint16* src_ptr, uint32* dst_ptr, int src_width) {
762 int x;
763 assert(src_width > 0);
764 for (x = 0; x < src_width - 1; x += 2) {
765 dst_ptr[0] += src_ptr[0];
766 dst_ptr[1] += src_ptr[1];
767 src_ptr += 2;
768 dst_ptr += 2;
769 }
770 if (src_width & 1) {
771 dst_ptr[0] += src_ptr[0];
772 }
773 }
774
ScaleARGBRowDown2_C(const uint8 * src_argb,ptrdiff_t src_stride,uint8 * dst_argb,int dst_width)775 void ScaleARGBRowDown2_C(const uint8* src_argb,
776 ptrdiff_t src_stride,
777 uint8* dst_argb,
778 int dst_width) {
779 const uint32* src = (const uint32*)(src_argb);
780 uint32* dst = (uint32*)(dst_argb);
781 int x;
782 (void)src_stride;
783 for (x = 0; x < dst_width - 1; x += 2) {
784 dst[0] = src[1];
785 dst[1] = src[3];
786 src += 4;
787 dst += 2;
788 }
789 if (dst_width & 1) {
790 dst[0] = src[1];
791 }
792 }
793
ScaleARGBRowDown2Linear_C(const uint8 * src_argb,ptrdiff_t src_stride,uint8 * dst_argb,int dst_width)794 void ScaleARGBRowDown2Linear_C(const uint8* src_argb,
795 ptrdiff_t src_stride,
796 uint8* dst_argb,
797 int dst_width) {
798 int x;
799 (void)src_stride;
800 for (x = 0; x < dst_width; ++x) {
801 dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
802 dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
803 dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
804 dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
805 src_argb += 8;
806 dst_argb += 4;
807 }
808 }
809
ScaleARGBRowDown2Box_C(const uint8 * src_argb,ptrdiff_t src_stride,uint8 * dst_argb,int dst_width)810 void ScaleARGBRowDown2Box_C(const uint8* src_argb,
811 ptrdiff_t src_stride,
812 uint8* dst_argb,
813 int dst_width) {
814 int x;
815 for (x = 0; x < dst_width; ++x) {
816 dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
817 src_argb[src_stride + 4] + 2) >>
818 2;
819 dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
820 src_argb[src_stride + 5] + 2) >>
821 2;
822 dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
823 src_argb[src_stride + 6] + 2) >>
824 2;
825 dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
826 src_argb[src_stride + 7] + 2) >>
827 2;
828 src_argb += 8;
829 dst_argb += 4;
830 }
831 }
832
ScaleARGBRowDownEven_C(const uint8 * src_argb,ptrdiff_t src_stride,int src_stepx,uint8 * dst_argb,int dst_width)833 void ScaleARGBRowDownEven_C(const uint8* src_argb,
834 ptrdiff_t src_stride,
835 int src_stepx,
836 uint8* dst_argb,
837 int dst_width) {
838 const uint32* src = (const uint32*)(src_argb);
839 uint32* dst = (uint32*)(dst_argb);
840 (void)src_stride;
841 int x;
842 for (x = 0; x < dst_width - 1; x += 2) {
843 dst[0] = src[0];
844 dst[1] = src[src_stepx];
845 src += src_stepx * 2;
846 dst += 2;
847 }
848 if (dst_width & 1) {
849 dst[0] = src[0];
850 }
851 }
852
ScaleARGBRowDownEvenBox_C(const uint8 * src_argb,ptrdiff_t src_stride,int src_stepx,uint8 * dst_argb,int dst_width)853 void ScaleARGBRowDownEvenBox_C(const uint8* src_argb,
854 ptrdiff_t src_stride,
855 int src_stepx,
856 uint8* dst_argb,
857 int dst_width) {
858 int x;
859 for (x = 0; x < dst_width; ++x) {
860 dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
861 src_argb[src_stride + 4] + 2) >>
862 2;
863 dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
864 src_argb[src_stride + 5] + 2) >>
865 2;
866 dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
867 src_argb[src_stride + 6] + 2) >>
868 2;
869 dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
870 src_argb[src_stride + 7] + 2) >>
871 2;
872 src_argb += src_stepx * 4;
873 dst_argb += 4;
874 }
875 }
876
877 // Scales a single row of pixels using point sampling.
ScaleARGBCols_C(uint8 * dst_argb,const uint8 * src_argb,int dst_width,int x,int dx)878 void ScaleARGBCols_C(uint8* dst_argb,
879 const uint8* src_argb,
880 int dst_width,
881 int x,
882 int dx) {
883 const uint32* src = (const uint32*)(src_argb);
884 uint32* dst = (uint32*)(dst_argb);
885 int j;
886 for (j = 0; j < dst_width - 1; j += 2) {
887 dst[0] = src[x >> 16];
888 x += dx;
889 dst[1] = src[x >> 16];
890 x += dx;
891 dst += 2;
892 }
893 if (dst_width & 1) {
894 dst[0] = src[x >> 16];
895 }
896 }
897
ScaleARGBCols64_C(uint8 * dst_argb,const uint8 * src_argb,int dst_width,int x32,int dx)898 void ScaleARGBCols64_C(uint8* dst_argb,
899 const uint8* src_argb,
900 int dst_width,
901 int x32,
902 int dx) {
903 int64 x = (int64)(x32);
904 const uint32* src = (const uint32*)(src_argb);
905 uint32* dst = (uint32*)(dst_argb);
906 int j;
907 for (j = 0; j < dst_width - 1; j += 2) {
908 dst[0] = src[x >> 16];
909 x += dx;
910 dst[1] = src[x >> 16];
911 x += dx;
912 dst += 2;
913 }
914 if (dst_width & 1) {
915 dst[0] = src[x >> 16];
916 }
917 }
918
919 // Scales a single row of pixels up by 2x using point sampling.
ScaleARGBColsUp2_C(uint8 * dst_argb,const uint8 * src_argb,int dst_width,int x,int dx)920 void ScaleARGBColsUp2_C(uint8* dst_argb,
921 const uint8* src_argb,
922 int dst_width,
923 int x,
924 int dx) {
925 const uint32* src = (const uint32*)(src_argb);
926 uint32* dst = (uint32*)(dst_argb);
927 int j;
928 (void)x;
929 (void)dx;
930 for (j = 0; j < dst_width - 1; j += 2) {
931 dst[1] = dst[0] = src[0];
932 src += 1;
933 dst += 2;
934 }
935 if (dst_width & 1) {
936 dst[0] = src[0];
937 }
938 }
939
940 // TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
941 // Mimics SSSE3 blender
942 #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
943 #define BLENDERC(a, b, f, s) \
944 (uint32)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
945 #define BLENDER(a, b, f) \
946 BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \
947 BLENDERC(a, b, f, 0)
948
ScaleARGBFilterCols_C(uint8 * dst_argb,const uint8 * src_argb,int dst_width,int x,int dx)949 void ScaleARGBFilterCols_C(uint8* dst_argb,
950 const uint8* src_argb,
951 int dst_width,
952 int x,
953 int dx) {
954 const uint32* src = (const uint32*)(src_argb);
955 uint32* dst = (uint32*)(dst_argb);
956 int j;
957 for (j = 0; j < dst_width - 1; j += 2) {
958 int xi = x >> 16;
959 int xf = (x >> 9) & 0x7f;
960 uint32 a = src[xi];
961 uint32 b = src[xi + 1];
962 dst[0] = BLENDER(a, b, xf);
963 x += dx;
964 xi = x >> 16;
965 xf = (x >> 9) & 0x7f;
966 a = src[xi];
967 b = src[xi + 1];
968 dst[1] = BLENDER(a, b, xf);
969 x += dx;
970 dst += 2;
971 }
972 if (dst_width & 1) {
973 int xi = x >> 16;
974 int xf = (x >> 9) & 0x7f;
975 uint32 a = src[xi];
976 uint32 b = src[xi + 1];
977 dst[0] = BLENDER(a, b, xf);
978 }
979 }
980
ScaleARGBFilterCols64_C(uint8 * dst_argb,const uint8 * src_argb,int dst_width,int x32,int dx)981 void ScaleARGBFilterCols64_C(uint8* dst_argb,
982 const uint8* src_argb,
983 int dst_width,
984 int x32,
985 int dx) {
986 int64 x = (int64)(x32);
987 const uint32* src = (const uint32*)(src_argb);
988 uint32* dst = (uint32*)(dst_argb);
989 int j;
990 for (j = 0; j < dst_width - 1; j += 2) {
991 int64 xi = x >> 16;
992 int xf = (x >> 9) & 0x7f;
993 uint32 a = src[xi];
994 uint32 b = src[xi + 1];
995 dst[0] = BLENDER(a, b, xf);
996 x += dx;
997 xi = x >> 16;
998 xf = (x >> 9) & 0x7f;
999 a = src[xi];
1000 b = src[xi + 1];
1001 dst[1] = BLENDER(a, b, xf);
1002 x += dx;
1003 dst += 2;
1004 }
1005 if (dst_width & 1) {
1006 int64 xi = x >> 16;
1007 int xf = (x >> 9) & 0x7f;
1008 uint32 a = src[xi];
1009 uint32 b = src[xi + 1];
1010 dst[0] = BLENDER(a, b, xf);
1011 }
1012 }
1013 #undef BLENDER1
1014 #undef BLENDERC
1015 #undef BLENDER
1016
1017 // Scale plane vertically with bilinear interpolation.
ScalePlaneVertical(int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8 * src_argb,uint8 * dst_argb,int x,int y,int dy,int bpp,enum FilterMode filtering)1018 void ScalePlaneVertical(int src_height,
1019 int dst_width,
1020 int dst_height,
1021 int src_stride,
1022 int dst_stride,
1023 const uint8* src_argb,
1024 uint8* dst_argb,
1025 int x,
1026 int y,
1027 int dy,
1028 int bpp,
1029 enum FilterMode filtering) {
1030 // TODO(fbarchard): Allow higher bpp.
1031 int dst_width_bytes = dst_width * bpp;
1032 void (*InterpolateRow)(uint8 * dst_argb, const uint8* src_argb,
1033 ptrdiff_t src_stride, int dst_width,
1034 int source_y_fraction) = InterpolateRow_C;
1035 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1036 int j;
1037 assert(bpp >= 1 && bpp <= 4);
1038 assert(src_height != 0);
1039 assert(dst_width > 0);
1040 assert(dst_height > 0);
1041 src_argb += (x >> 16) * bpp;
1042 #if defined(HAS_INTERPOLATEROW_SSSE3)
1043 if (TestCpuFlag(kCpuHasSSSE3)) {
1044 InterpolateRow = InterpolateRow_Any_SSSE3;
1045 if (IS_ALIGNED(dst_width_bytes, 16)) {
1046 InterpolateRow = InterpolateRow_SSSE3;
1047 }
1048 }
1049 #endif
1050 #if defined(HAS_INTERPOLATEROW_AVX2)
1051 if (TestCpuFlag(kCpuHasAVX2)) {
1052 InterpolateRow = InterpolateRow_Any_AVX2;
1053 if (IS_ALIGNED(dst_width_bytes, 32)) {
1054 InterpolateRow = InterpolateRow_AVX2;
1055 }
1056 }
1057 #endif
1058 #if defined(HAS_INTERPOLATEROW_NEON)
1059 if (TestCpuFlag(kCpuHasNEON)) {
1060 InterpolateRow = InterpolateRow_Any_NEON;
1061 if (IS_ALIGNED(dst_width_bytes, 16)) {
1062 InterpolateRow = InterpolateRow_NEON;
1063 }
1064 }
1065 #endif
1066 #if defined(HAS_INTERPOLATEROW_DSPR2)
1067 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) &&
1068 IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst_argb, 4) &&
1069 IS_ALIGNED(dst_stride, 4)) {
1070 InterpolateRow = InterpolateRow_Any_DSPR2;
1071 if (IS_ALIGNED(dst_width_bytes, 4)) {
1072 InterpolateRow = InterpolateRow_DSPR2;
1073 }
1074 }
1075 #endif
1076 #if defined(HAS_INTERPOLATEROW_MSA)
1077 if (TestCpuFlag(kCpuHasMSA)) {
1078 InterpolateRow = InterpolateRow_Any_MSA;
1079 if (IS_ALIGNED(dst_width_bytes, 32)) {
1080 InterpolateRow = InterpolateRow_MSA;
1081 }
1082 }
1083 #endif
1084 for (j = 0; j < dst_height; ++j) {
1085 int yi;
1086 int yf;
1087 if (y > max_y) {
1088 y = max_y;
1089 }
1090 yi = y >> 16;
1091 yf = filtering ? ((y >> 8) & 255) : 0;
1092 InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
1093 dst_width_bytes, yf);
1094 dst_argb += dst_stride;
1095 y += dy;
1096 }
1097 }
ScalePlaneVertical_16(int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16 * src_argb,uint16 * dst_argb,int x,int y,int dy,int wpp,enum FilterMode filtering)1098 void ScalePlaneVertical_16(int src_height,
1099 int dst_width,
1100 int dst_height,
1101 int src_stride,
1102 int dst_stride,
1103 const uint16* src_argb,
1104 uint16* dst_argb,
1105 int x,
1106 int y,
1107 int dy,
1108 int wpp,
1109 enum FilterMode filtering) {
1110 // TODO(fbarchard): Allow higher wpp.
1111 int dst_width_words = dst_width * wpp;
1112 void (*InterpolateRow)(uint16 * dst_argb, const uint16* src_argb,
1113 ptrdiff_t src_stride, int dst_width,
1114 int source_y_fraction) = InterpolateRow_16_C;
1115 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1116 int j;
1117 assert(wpp >= 1 && wpp <= 2);
1118 assert(src_height != 0);
1119 assert(dst_width > 0);
1120 assert(dst_height > 0);
1121 src_argb += (x >> 16) * wpp;
1122 #if defined(HAS_INTERPOLATEROW_16_SSE2)
1123 if (TestCpuFlag(kCpuHasSSE2)) {
1124 InterpolateRow = InterpolateRow_Any_16_SSE2;
1125 if (IS_ALIGNED(dst_width_bytes, 16)) {
1126 InterpolateRow = InterpolateRow_16_SSE2;
1127 }
1128 }
1129 #endif
1130 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
1131 if (TestCpuFlag(kCpuHasSSSE3)) {
1132 InterpolateRow = InterpolateRow_Any_16_SSSE3;
1133 if (IS_ALIGNED(dst_width_bytes, 16)) {
1134 InterpolateRow = InterpolateRow_16_SSSE3;
1135 }
1136 }
1137 #endif
1138 #if defined(HAS_INTERPOLATEROW_16_AVX2)
1139 if (TestCpuFlag(kCpuHasAVX2)) {
1140 InterpolateRow = InterpolateRow_Any_16_AVX2;
1141 if (IS_ALIGNED(dst_width_bytes, 32)) {
1142 InterpolateRow = InterpolateRow_16_AVX2;
1143 }
1144 }
1145 #endif
1146 #if defined(HAS_INTERPOLATEROW_16_NEON)
1147 if (TestCpuFlag(kCpuHasNEON)) {
1148 InterpolateRow = InterpolateRow_Any_16_NEON;
1149 if (IS_ALIGNED(dst_width_bytes, 16)) {
1150 InterpolateRow = InterpolateRow_16_NEON;
1151 }
1152 }
1153 #endif
1154 #if defined(HAS_INTERPOLATEROW_16_DSPR2)
1155 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_argb, 4) &&
1156 IS_ALIGNED(src_stride, 4) && IS_ALIGNED(dst_argb, 4) &&
1157 IS_ALIGNED(dst_stride, 4)) {
1158 InterpolateRow = InterpolateRow_Any_16_DSPR2;
1159 if (IS_ALIGNED(dst_width_bytes, 4)) {
1160 InterpolateRow = InterpolateRow_16_DSPR2;
1161 }
1162 }
1163 #endif
1164 for (j = 0; j < dst_height; ++j) {
1165 int yi;
1166 int yf;
1167 if (y > max_y) {
1168 y = max_y;
1169 }
1170 yi = y >> 16;
1171 yf = filtering ? ((y >> 8) & 255) : 0;
1172 InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
1173 dst_width_words, yf);
1174 dst_argb += dst_stride;
1175 y += dy;
1176 }
1177 }
1178
1179 // Simplify the filtering based on scale factors.
ScaleFilterReduce(int src_width,int src_height,int dst_width,int dst_height,enum FilterMode filtering)1180 enum FilterMode ScaleFilterReduce(int src_width,
1181 int src_height,
1182 int dst_width,
1183 int dst_height,
1184 enum FilterMode filtering) {
1185 if (src_width < 0) {
1186 src_width = -src_width;
1187 }
1188 if (src_height < 0) {
1189 src_height = -src_height;
1190 }
1191 if (filtering == kFilterBox) {
1192 // If scaling both axis to 0.5 or larger, switch from Box to Bilinear.
1193 if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) {
1194 filtering = kFilterBilinear;
1195 }
1196 }
1197 if (filtering == kFilterBilinear) {
1198 if (src_height == 1) {
1199 filtering = kFilterLinear;
1200 }
1201 // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
1202 if (dst_height == src_height || dst_height * 3 == src_height) {
1203 filtering = kFilterLinear;
1204 }
1205 // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
1206 // avoid reading 2 pixels horizontally that causes memory exception.
1207 if (src_width == 1) {
1208 filtering = kFilterNone;
1209 }
1210 }
1211 if (filtering == kFilterLinear) {
1212 if (src_width == 1) {
1213 filtering = kFilterNone;
1214 }
1215 // TODO(fbarchard): Detect any odd scale factor and reduce to None.
1216 if (dst_width == src_width || dst_width * 3 == src_width) {
1217 filtering = kFilterNone;
1218 }
1219 }
1220 return filtering;
1221 }
1222
1223 // Divide num by div and return as 16.16 fixed point result.
FixedDiv_C(int num,int div)1224 int FixedDiv_C(int num, int div) {
1225 return (int)(((int64)(num) << 16) / div);
1226 }
1227
1228 // Divide num by div and return as 16.16 fixed point result.
FixedDiv1_C(int num,int div)1229 int FixedDiv1_C(int num, int div) {
1230 return (int)((((int64)(num) << 16) - 0x00010001) / (div - 1));
1231 }
1232
1233 #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
1234
1235 // Compute slope values for stepping.
ScaleSlope(int src_width,int src_height,int dst_width,int dst_height,enum FilterMode filtering,int * x,int * y,int * dx,int * dy)1236 void ScaleSlope(int src_width,
1237 int src_height,
1238 int dst_width,
1239 int dst_height,
1240 enum FilterMode filtering,
1241 int* x,
1242 int* y,
1243 int* dx,
1244 int* dy) {
1245 assert(x != NULL);
1246 assert(y != NULL);
1247 assert(dx != NULL);
1248 assert(dy != NULL);
1249 assert(src_width != 0);
1250 assert(src_height != 0);
1251 assert(dst_width > 0);
1252 assert(dst_height > 0);
1253 // Check for 1 pixel and avoid FixedDiv overflow.
1254 if (dst_width == 1 && src_width >= 32768) {
1255 dst_width = src_width;
1256 }
1257 if (dst_height == 1 && src_height >= 32768) {
1258 dst_height = src_height;
1259 }
1260 if (filtering == kFilterBox) {
1261 // Scale step for point sampling duplicates all pixels equally.
1262 *dx = FixedDiv(Abs(src_width), dst_width);
1263 *dy = FixedDiv(src_height, dst_height);
1264 *x = 0;
1265 *y = 0;
1266 } else if (filtering == kFilterBilinear) {
1267 // Scale step for bilinear sampling renders last pixel once for upsample.
1268 if (dst_width <= Abs(src_width)) {
1269 *dx = FixedDiv(Abs(src_width), dst_width);
1270 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
1271 } else if (dst_width > 1) {
1272 *dx = FixedDiv1(Abs(src_width), dst_width);
1273 *x = 0;
1274 }
1275 if (dst_height <= src_height) {
1276 *dy = FixedDiv(src_height, dst_height);
1277 *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
1278 } else if (dst_height > 1) {
1279 *dy = FixedDiv1(src_height, dst_height);
1280 *y = 0;
1281 }
1282 } else if (filtering == kFilterLinear) {
1283 // Scale step for bilinear sampling renders last pixel once for upsample.
1284 if (dst_width <= Abs(src_width)) {
1285 *dx = FixedDiv(Abs(src_width), dst_width);
1286 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
1287 } else if (dst_width > 1) {
1288 *dx = FixedDiv1(Abs(src_width), dst_width);
1289 *x = 0;
1290 }
1291 *dy = FixedDiv(src_height, dst_height);
1292 *y = *dy >> 1;
1293 } else {
1294 // Scale step for point sampling duplicates all pixels equally.
1295 *dx = FixedDiv(Abs(src_width), dst_width);
1296 *dy = FixedDiv(src_height, dst_height);
1297 *x = CENTERSTART(*dx, 0);
1298 *y = CENTERSTART(*dy, 0);
1299 }
1300 // Negative src_width means horizontally mirror.
1301 if (src_width < 0) {
1302 *x += (dst_width - 1) * *dx;
1303 *dx = -*dx;
1304 // src_width = -src_width; // Caller must do this.
1305 }
1306 }
1307 #undef CENTERSTART
1308
1309 #ifdef __cplusplus
1310 } // extern "C"
1311 } // namespace libyuv
1312 #endif
1313