1 /*
2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "libyuv/scale.h"
12
13 #include <assert.h>
14 #include <string.h>
15
16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyARGB
18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h"
20
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25
26 #ifdef __cplusplus
27 #define STATIC_CAST(type, expr) static_cast<type>(expr)
28 #else
29 #define STATIC_CAST(type, expr) (type)(expr)
30 #endif
31
32 // TODO(fbarchard): make clamp255 preserve negative values.
clamp255(int32_t v)33 static __inline int32_t clamp255(int32_t v) {
34 return (-(v >= 255) | v) & 255;
35 }
36
37 // Use scale to convert lsb formats to msb, depending how many bits there are:
38 // 32768 = 9 bits
39 // 16384 = 10 bits
40 // 4096 = 12 bits
41 // 256 = 16 bits
42 // TODO(fbarchard): change scale to bits
43 #define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
44
Abs(int v)45 static __inline int Abs(int v) {
46 return v >= 0 ? v : -v;
47 }
48
49 // CPU agnostic row functions
ScaleRowDown2_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)50 void ScaleRowDown2_C(const uint8_t* src_ptr,
51 ptrdiff_t src_stride,
52 uint8_t* dst,
53 int dst_width) {
54 int x;
55 (void)src_stride;
56 for (x = 0; x < dst_width - 1; x += 2) {
57 dst[0] = src_ptr[1];
58 dst[1] = src_ptr[3];
59 dst += 2;
60 src_ptr += 4;
61 }
62 if (dst_width & 1) {
63 dst[0] = src_ptr[1];
64 }
65 }
66
ScaleRowDown2_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)67 void ScaleRowDown2_16_C(const uint16_t* src_ptr,
68 ptrdiff_t src_stride,
69 uint16_t* dst,
70 int dst_width) {
71 int x;
72 (void)src_stride;
73 for (x = 0; x < dst_width - 1; x += 2) {
74 dst[0] = src_ptr[1];
75 dst[1] = src_ptr[3];
76 dst += 2;
77 src_ptr += 4;
78 }
79 if (dst_width & 1) {
80 dst[0] = src_ptr[1];
81 }
82 }
83
ScaleRowDown2_16To8_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width,int scale)84 void ScaleRowDown2_16To8_C(const uint16_t* src_ptr,
85 ptrdiff_t src_stride,
86 uint8_t* dst,
87 int dst_width,
88 int scale) {
89 int x;
90 (void)src_stride;
91 assert(scale >= 256);
92 assert(scale <= 32768);
93 for (x = 0; x < dst_width - 1; x += 2) {
94 dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
95 dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale));
96 dst += 2;
97 src_ptr += 4;
98 }
99 if (dst_width & 1) {
100 dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
101 }
102 }
103
ScaleRowDown2_16To8_Odd_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width,int scale)104 void ScaleRowDown2_16To8_Odd_C(const uint16_t* src_ptr,
105 ptrdiff_t src_stride,
106 uint8_t* dst,
107 int dst_width,
108 int scale) {
109 int x;
110 (void)src_stride;
111 assert(scale >= 256);
112 assert(scale <= 32768);
113 dst_width -= 1;
114 for (x = 0; x < dst_width - 1; x += 2) {
115 dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
116 dst[1] = STATIC_CAST(uint8_t, C16TO8(src_ptr[3], scale));
117 dst += 2;
118 src_ptr += 4;
119 }
120 if (dst_width & 1) {
121 dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[1], scale));
122 dst += 1;
123 src_ptr += 2;
124 }
125 dst[0] = STATIC_CAST(uint8_t, C16TO8(src_ptr[0], scale));
126 }
127
ScaleRowDown2Linear_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)128 void ScaleRowDown2Linear_C(const uint8_t* src_ptr,
129 ptrdiff_t src_stride,
130 uint8_t* dst,
131 int dst_width) {
132 const uint8_t* s = src_ptr;
133 int x;
134 (void)src_stride;
135 for (x = 0; x < dst_width - 1; x += 2) {
136 dst[0] = (s[0] + s[1] + 1) >> 1;
137 dst[1] = (s[2] + s[3] + 1) >> 1;
138 dst += 2;
139 s += 4;
140 }
141 if (dst_width & 1) {
142 dst[0] = (s[0] + s[1] + 1) >> 1;
143 }
144 }
145
ScaleRowDown2Linear_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)146 void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr,
147 ptrdiff_t src_stride,
148 uint16_t* dst,
149 int dst_width) {
150 const uint16_t* s = src_ptr;
151 int x;
152 (void)src_stride;
153 for (x = 0; x < dst_width - 1; x += 2) {
154 dst[0] = (s[0] + s[1] + 1) >> 1;
155 dst[1] = (s[2] + s[3] + 1) >> 1;
156 dst += 2;
157 s += 4;
158 }
159 if (dst_width & 1) {
160 dst[0] = (s[0] + s[1] + 1) >> 1;
161 }
162 }
163
ScaleRowDown2Linear_16To8_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width,int scale)164 void ScaleRowDown2Linear_16To8_C(const uint16_t* src_ptr,
165 ptrdiff_t src_stride,
166 uint8_t* dst,
167 int dst_width,
168 int scale) {
169 const uint16_t* s = src_ptr;
170 int x;
171 (void)src_stride;
172 assert(scale >= 256);
173 assert(scale <= 32768);
174 for (x = 0; x < dst_width - 1; x += 2) {
175 dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
176 dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale));
177 dst += 2;
178 s += 4;
179 }
180 if (dst_width & 1) {
181 dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
182 }
183 }
184
ScaleRowDown2Linear_16To8_Odd_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width,int scale)185 void ScaleRowDown2Linear_16To8_Odd_C(const uint16_t* src_ptr,
186 ptrdiff_t src_stride,
187 uint8_t* dst,
188 int dst_width,
189 int scale) {
190 const uint16_t* s = src_ptr;
191 int x;
192 (void)src_stride;
193 assert(scale >= 256);
194 assert(scale <= 32768);
195 dst_width -= 1;
196 for (x = 0; x < dst_width - 1; x += 2) {
197 dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
198 dst[1] = STATIC_CAST(uint8_t, C16TO8((s[2] + s[3] + 1) >> 1, scale));
199 dst += 2;
200 s += 4;
201 }
202 if (dst_width & 1) {
203 dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + s[1] + 1) >> 1, scale));
204 dst += 1;
205 s += 2;
206 }
207 dst[0] = STATIC_CAST(uint8_t, C16TO8(s[0], scale));
208 }
209
ScaleRowDown2Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)210 void ScaleRowDown2Box_C(const uint8_t* src_ptr,
211 ptrdiff_t src_stride,
212 uint8_t* dst,
213 int dst_width) {
214 const uint8_t* s = src_ptr;
215 const uint8_t* t = src_ptr + src_stride;
216 int x;
217 for (x = 0; x < dst_width - 1; x += 2) {
218 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
219 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
220 dst += 2;
221 s += 4;
222 t += 4;
223 }
224 if (dst_width & 1) {
225 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
226 }
227 }
228
ScaleRowDown2Box_Odd_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)229 void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr,
230 ptrdiff_t src_stride,
231 uint8_t* dst,
232 int dst_width) {
233 const uint8_t* s = src_ptr;
234 const uint8_t* t = src_ptr + src_stride;
235 int x;
236 dst_width -= 1;
237 for (x = 0; x < dst_width - 1; x += 2) {
238 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
239 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
240 dst += 2;
241 s += 4;
242 t += 4;
243 }
244 if (dst_width & 1) {
245 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
246 dst += 1;
247 s += 2;
248 t += 2;
249 }
250 dst[0] = (s[0] + t[0] + 1) >> 1;
251 }
252
ScaleRowDown2Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)253 void ScaleRowDown2Box_16_C(const uint16_t* src_ptr,
254 ptrdiff_t src_stride,
255 uint16_t* dst,
256 int dst_width) {
257 const uint16_t* s = src_ptr;
258 const uint16_t* t = src_ptr + src_stride;
259 int x;
260 for (x = 0; x < dst_width - 1; x += 2) {
261 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
262 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2;
263 dst += 2;
264 s += 4;
265 t += 4;
266 }
267 if (dst_width & 1) {
268 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2;
269 }
270 }
271
ScaleRowDown2Box_16To8_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width,int scale)272 void ScaleRowDown2Box_16To8_C(const uint16_t* src_ptr,
273 ptrdiff_t src_stride,
274 uint8_t* dst,
275 int dst_width,
276 int scale) {
277 const uint16_t* s = src_ptr;
278 const uint16_t* t = src_ptr + src_stride;
279 int x;
280 assert(scale >= 256);
281 assert(scale <= 32768);
282 for (x = 0; x < dst_width - 1; x += 2) {
283 dst[0] = STATIC_CAST(uint8_t,
284 C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
285 dst[1] = STATIC_CAST(uint8_t,
286 C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale));
287 dst += 2;
288 s += 4;
289 t += 4;
290 }
291 if (dst_width & 1) {
292 dst[0] = STATIC_CAST(uint8_t,
293 C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
294 }
295 }
296
ScaleRowDown2Box_16To8_Odd_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width,int scale)297 void ScaleRowDown2Box_16To8_Odd_C(const uint16_t* src_ptr,
298 ptrdiff_t src_stride,
299 uint8_t* dst,
300 int dst_width,
301 int scale) {
302 const uint16_t* s = src_ptr;
303 const uint16_t* t = src_ptr + src_stride;
304 int x;
305 assert(scale >= 256);
306 assert(scale <= 32768);
307 dst_width -= 1;
308 for (x = 0; x < dst_width - 1; x += 2) {
309 dst[0] = STATIC_CAST(uint8_t,
310 C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
311 dst[1] = STATIC_CAST(uint8_t,
312 C16TO8((s[2] + s[3] + t[2] + t[3] + 2) >> 2, scale));
313 dst += 2;
314 s += 4;
315 t += 4;
316 }
317 if (dst_width & 1) {
318 dst[0] = STATIC_CAST(uint8_t,
319 C16TO8((s[0] + s[1] + t[0] + t[1] + 2) >> 2, scale));
320 dst += 1;
321 s += 2;
322 t += 2;
323 }
324 dst[0] = STATIC_CAST(uint8_t, C16TO8((s[0] + t[0] + 1) >> 1, scale));
325 }
326
ScaleRowDown4_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)327 void ScaleRowDown4_C(const uint8_t* src_ptr,
328 ptrdiff_t src_stride,
329 uint8_t* dst,
330 int dst_width) {
331 int x;
332 (void)src_stride;
333 for (x = 0; x < dst_width - 1; x += 2) {
334 dst[0] = src_ptr[2];
335 dst[1] = src_ptr[6];
336 dst += 2;
337 src_ptr += 8;
338 }
339 if (dst_width & 1) {
340 dst[0] = src_ptr[2];
341 }
342 }
343
ScaleRowDown4_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)344 void ScaleRowDown4_16_C(const uint16_t* src_ptr,
345 ptrdiff_t src_stride,
346 uint16_t* dst,
347 int dst_width) {
348 int x;
349 (void)src_stride;
350 for (x = 0; x < dst_width - 1; x += 2) {
351 dst[0] = src_ptr[2];
352 dst[1] = src_ptr[6];
353 dst += 2;
354 src_ptr += 8;
355 }
356 if (dst_width & 1) {
357 dst[0] = src_ptr[2];
358 }
359 }
360
ScaleRowDown4Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)361 void ScaleRowDown4Box_C(const uint8_t* src_ptr,
362 ptrdiff_t src_stride,
363 uint8_t* dst,
364 int dst_width) {
365 intptr_t stride = src_stride;
366 int x;
367 for (x = 0; x < dst_width - 1; x += 2) {
368 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
369 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
370 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
371 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
372 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
373 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
374 src_ptr[stride * 3 + 3] + 8) >>
375 4;
376 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
377 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
378 src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
379 src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
380 src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
381 src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
382 src_ptr[stride * 3 + 7] + 8) >>
383 4;
384 dst += 2;
385 src_ptr += 8;
386 }
387 if (dst_width & 1) {
388 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
389 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
390 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
391 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
392 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
393 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
394 src_ptr[stride * 3 + 3] + 8) >>
395 4;
396 }
397 }
398
ScaleRowDown4Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)399 void ScaleRowDown4Box_16_C(const uint16_t* src_ptr,
400 ptrdiff_t src_stride,
401 uint16_t* dst,
402 int dst_width) {
403 intptr_t stride = src_stride;
404 int x;
405 for (x = 0; x < dst_width - 1; x += 2) {
406 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
407 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
408 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
409 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
410 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
411 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
412 src_ptr[stride * 3 + 3] + 8) >>
413 4;
414 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] +
415 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] +
416 src_ptr[stride + 7] + src_ptr[stride * 2 + 4] +
417 src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] +
418 src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] +
419 src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] +
420 src_ptr[stride * 3 + 7] + 8) >>
421 4;
422 dst += 2;
423 src_ptr += 8;
424 }
425 if (dst_width & 1) {
426 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] +
427 src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] +
428 src_ptr[stride + 3] + src_ptr[stride * 2 + 0] +
429 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] +
430 src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] +
431 src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] +
432 src_ptr[stride * 3 + 3] + 8) >>
433 4;
434 }
435 }
436
ScaleRowDown34_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)437 void ScaleRowDown34_C(const uint8_t* src_ptr,
438 ptrdiff_t src_stride,
439 uint8_t* dst,
440 int dst_width) {
441 int x;
442 (void)src_stride;
443 assert((dst_width % 3 == 0) && (dst_width > 0));
444 for (x = 0; x < dst_width; x += 3) {
445 dst[0] = src_ptr[0];
446 dst[1] = src_ptr[1];
447 dst[2] = src_ptr[3];
448 dst += 3;
449 src_ptr += 4;
450 }
451 }
452
ScaleRowDown34_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)453 void ScaleRowDown34_16_C(const uint16_t* src_ptr,
454 ptrdiff_t src_stride,
455 uint16_t* dst,
456 int dst_width) {
457 int x;
458 (void)src_stride;
459 assert((dst_width % 3 == 0) && (dst_width > 0));
460 for (x = 0; x < dst_width; x += 3) {
461 dst[0] = src_ptr[0];
462 dst[1] = src_ptr[1];
463 dst[2] = src_ptr[3];
464 dst += 3;
465 src_ptr += 4;
466 }
467 }
468
469 // Filter rows 0 and 1 together, 3 : 1
ScaleRowDown34_0_Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * d,int dst_width)470 void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr,
471 ptrdiff_t src_stride,
472 uint8_t* d,
473 int dst_width) {
474 const uint8_t* s = src_ptr;
475 const uint8_t* t = src_ptr + src_stride;
476 int x;
477 assert((dst_width % 3 == 0) && (dst_width > 0));
478 for (x = 0; x < dst_width; x += 3) {
479 uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
480 uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
481 uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
482 uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
483 uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
484 uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
485 d[0] = (a0 * 3 + b0 + 2) >> 2;
486 d[1] = (a1 * 3 + b1 + 2) >> 2;
487 d[2] = (a2 * 3 + b2 + 2) >> 2;
488 d += 3;
489 s += 4;
490 t += 4;
491 }
492 }
493
ScaleRowDown34_0_Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * d,int dst_width)494 void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr,
495 ptrdiff_t src_stride,
496 uint16_t* d,
497 int dst_width) {
498 const uint16_t* s = src_ptr;
499 const uint16_t* t = src_ptr + src_stride;
500 int x;
501 assert((dst_width % 3 == 0) && (dst_width > 0));
502 for (x = 0; x < dst_width; x += 3) {
503 uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
504 uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
505 uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
506 uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
507 uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
508 uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
509 d[0] = (a0 * 3 + b0 + 2) >> 2;
510 d[1] = (a1 * 3 + b1 + 2) >> 2;
511 d[2] = (a2 * 3 + b2 + 2) >> 2;
512 d += 3;
513 s += 4;
514 t += 4;
515 }
516 }
517
518 // Filter rows 1 and 2 together, 1 : 1
ScaleRowDown34_1_Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * d,int dst_width)519 void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr,
520 ptrdiff_t src_stride,
521 uint8_t* d,
522 int dst_width) {
523 const uint8_t* s = src_ptr;
524 const uint8_t* t = src_ptr + src_stride;
525 int x;
526 assert((dst_width % 3 == 0) && (dst_width > 0));
527 for (x = 0; x < dst_width; x += 3) {
528 uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
529 uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
530 uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
531 uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
532 uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
533 uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
534 d[0] = (a0 + b0 + 1) >> 1;
535 d[1] = (a1 + b1 + 1) >> 1;
536 d[2] = (a2 + b2 + 1) >> 1;
537 d += 3;
538 s += 4;
539 t += 4;
540 }
541 }
542
ScaleRowDown34_1_Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * d,int dst_width)543 void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr,
544 ptrdiff_t src_stride,
545 uint16_t* d,
546 int dst_width) {
547 const uint16_t* s = src_ptr;
548 const uint16_t* t = src_ptr + src_stride;
549 int x;
550 assert((dst_width % 3 == 0) && (dst_width > 0));
551 for (x = 0; x < dst_width; x += 3) {
552 uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2;
553 uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1;
554 uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2;
555 uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2;
556 uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1;
557 uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2;
558 d[0] = (a0 + b0 + 1) >> 1;
559 d[1] = (a1 + b1 + 1) >> 1;
560 d[2] = (a2 + b2 + 1) >> 1;
561 d += 3;
562 s += 4;
563 t += 4;
564 }
565 }
566
567 // Sample position: (O is src sample position, X is dst sample position)
568 //
569 // v dst_ptr at here v stop at here
570 // X O X X O X X O X X O X X O X
571 // ^ src_ptr at here
ScaleRowUp2_Linear_C(const uint8_t * src_ptr,uint8_t * dst_ptr,int dst_width)572 void ScaleRowUp2_Linear_C(const uint8_t* src_ptr,
573 uint8_t* dst_ptr,
574 int dst_width) {
575 int src_width = dst_width >> 1;
576 int x;
577 assert((dst_width % 2 == 0) && (dst_width >= 0));
578 for (x = 0; x < src_width; ++x) {
579 dst_ptr[2 * x + 0] = (src_ptr[x + 0] * 3 + src_ptr[x + 1] * 1 + 2) >> 2;
580 dst_ptr[2 * x + 1] = (src_ptr[x + 0] * 1 + src_ptr[x + 1] * 3 + 2) >> 2;
581 }
582 }
583
584 // Sample position: (O is src sample position, X is dst sample position)
585 //
586 // src_ptr at here
587 // X v X X X X X X X X X
588 // O O O O O
589 // X X X X X X X X X X
590 // ^ dst_ptr at here ^ stop at here
591 // X X X X X X X X X X
592 // O O O O O
593 // X X X X X X X X X X
ScaleRowUp2_Bilinear_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst_ptr,ptrdiff_t dst_stride,int dst_width)594 void ScaleRowUp2_Bilinear_C(const uint8_t* src_ptr,
595 ptrdiff_t src_stride,
596 uint8_t* dst_ptr,
597 ptrdiff_t dst_stride,
598 int dst_width) {
599 const uint8_t* s = src_ptr;
600 const uint8_t* t = src_ptr + src_stride;
601 uint8_t* d = dst_ptr;
602 uint8_t* e = dst_ptr + dst_stride;
603 int src_width = dst_width >> 1;
604 int x;
605 assert((dst_width % 2 == 0) && (dst_width >= 0));
606 for (x = 0; x < src_width; ++x) {
607 d[2 * x + 0] =
608 (s[x + 0] * 9 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 1 + 8) >> 4;
609 d[2 * x + 1] =
610 (s[x + 0] * 3 + s[x + 1] * 9 + t[x + 0] * 1 + t[x + 1] * 3 + 8) >> 4;
611 e[2 * x + 0] =
612 (s[x + 0] * 3 + s[x + 1] * 1 + t[x + 0] * 9 + t[x + 1] * 3 + 8) >> 4;
613 e[2 * x + 1] =
614 (s[x + 0] * 1 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 9 + 8) >> 4;
615 }
616 }
617
618 // Only suitable for at most 14 bit range.
ScaleRowUp2_Linear_16_C(const uint16_t * src_ptr,uint16_t * dst_ptr,int dst_width)619 void ScaleRowUp2_Linear_16_C(const uint16_t* src_ptr,
620 uint16_t* dst_ptr,
621 int dst_width) {
622 int src_width = dst_width >> 1;
623 int x;
624 assert((dst_width % 2 == 0) && (dst_width >= 0));
625 for (x = 0; x < src_width; ++x) {
626 dst_ptr[2 * x + 0] = (src_ptr[x + 0] * 3 + src_ptr[x + 1] * 1 + 2) >> 2;
627 dst_ptr[2 * x + 1] = (src_ptr[x + 0] * 1 + src_ptr[x + 1] * 3 + 2) >> 2;
628 }
629 }
630
631 // Only suitable for at most 12bit range.
ScaleRowUp2_Bilinear_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst_ptr,ptrdiff_t dst_stride,int dst_width)632 void ScaleRowUp2_Bilinear_16_C(const uint16_t* src_ptr,
633 ptrdiff_t src_stride,
634 uint16_t* dst_ptr,
635 ptrdiff_t dst_stride,
636 int dst_width) {
637 const uint16_t* s = src_ptr;
638 const uint16_t* t = src_ptr + src_stride;
639 uint16_t* d = dst_ptr;
640 uint16_t* e = dst_ptr + dst_stride;
641 int src_width = dst_width >> 1;
642 int x;
643 assert((dst_width % 2 == 0) && (dst_width >= 0));
644 for (x = 0; x < src_width; ++x) {
645 d[2 * x + 0] =
646 (s[x + 0] * 9 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 1 + 8) >> 4;
647 d[2 * x + 1] =
648 (s[x + 0] * 3 + s[x + 1] * 9 + t[x + 0] * 1 + t[x + 1] * 3 + 8) >> 4;
649 e[2 * x + 0] =
650 (s[x + 0] * 3 + s[x + 1] * 1 + t[x + 0] * 9 + t[x + 1] * 3 + 8) >> 4;
651 e[2 * x + 1] =
652 (s[x + 0] * 1 + s[x + 1] * 3 + t[x + 0] * 3 + t[x + 1] * 9 + 8) >> 4;
653 }
654 }
655
656 // Scales a single row of pixels using point sampling.
ScaleCols_C(uint8_t * dst_ptr,const uint8_t * src_ptr,int dst_width,int x,int dx)657 void ScaleCols_C(uint8_t* dst_ptr,
658 const uint8_t* src_ptr,
659 int dst_width,
660 int x,
661 int dx) {
662 int j;
663 for (j = 0; j < dst_width - 1; j += 2) {
664 dst_ptr[0] = src_ptr[x >> 16];
665 x += dx;
666 dst_ptr[1] = src_ptr[x >> 16];
667 x += dx;
668 dst_ptr += 2;
669 }
670 if (dst_width & 1) {
671 dst_ptr[0] = src_ptr[x >> 16];
672 }
673 }
674
ScaleCols_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,int dst_width,int x,int dx)675 void ScaleCols_16_C(uint16_t* dst_ptr,
676 const uint16_t* src_ptr,
677 int dst_width,
678 int x,
679 int dx) {
680 int j;
681 for (j = 0; j < dst_width - 1; j += 2) {
682 dst_ptr[0] = src_ptr[x >> 16];
683 x += dx;
684 dst_ptr[1] = src_ptr[x >> 16];
685 x += dx;
686 dst_ptr += 2;
687 }
688 if (dst_width & 1) {
689 dst_ptr[0] = src_ptr[x >> 16];
690 }
691 }
692
693 // Scales a single row of pixels up by 2x using point sampling.
ScaleColsUp2_C(uint8_t * dst_ptr,const uint8_t * src_ptr,int dst_width,int x,int dx)694 void ScaleColsUp2_C(uint8_t* dst_ptr,
695 const uint8_t* src_ptr,
696 int dst_width,
697 int x,
698 int dx) {
699 int j;
700 (void)x;
701 (void)dx;
702 for (j = 0; j < dst_width - 1; j += 2) {
703 dst_ptr[1] = dst_ptr[0] = src_ptr[0];
704 src_ptr += 1;
705 dst_ptr += 2;
706 }
707 if (dst_width & 1) {
708 dst_ptr[0] = src_ptr[0];
709 }
710 }
711
ScaleColsUp2_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,int dst_width,int x,int dx)712 void ScaleColsUp2_16_C(uint16_t* dst_ptr,
713 const uint16_t* src_ptr,
714 int dst_width,
715 int x,
716 int dx) {
717 int j;
718 (void)x;
719 (void)dx;
720 for (j = 0; j < dst_width - 1; j += 2) {
721 dst_ptr[1] = dst_ptr[0] = src_ptr[0];
722 src_ptr += 1;
723 dst_ptr += 2;
724 }
725 if (dst_width & 1) {
726 dst_ptr[0] = src_ptr[0];
727 }
728 }
729
730 // (1-f)a + fb can be replaced with a + f(b-a)
731 #if defined(__arm__) || defined(__aarch64__)
732 #define BLENDER(a, b, f) \
733 (uint8_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
734 #else
735 // Intel uses 7 bit math with rounding.
736 #define BLENDER(a, b, f) \
737 (uint8_t)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7))
738 #endif
739
ScaleFilterCols_C(uint8_t * dst_ptr,const uint8_t * src_ptr,int dst_width,int x,int dx)740 void ScaleFilterCols_C(uint8_t* dst_ptr,
741 const uint8_t* src_ptr,
742 int dst_width,
743 int x,
744 int dx) {
745 int j;
746 for (j = 0; j < dst_width - 1; j += 2) {
747 int xi = x >> 16;
748 int a = src_ptr[xi];
749 int b = src_ptr[xi + 1];
750 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
751 x += dx;
752 xi = x >> 16;
753 a = src_ptr[xi];
754 b = src_ptr[xi + 1];
755 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
756 x += dx;
757 dst_ptr += 2;
758 }
759 if (dst_width & 1) {
760 int xi = x >> 16;
761 int a = src_ptr[xi];
762 int b = src_ptr[xi + 1];
763 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
764 }
765 }
766
ScaleFilterCols64_C(uint8_t * dst_ptr,const uint8_t * src_ptr,int dst_width,int x32,int dx)767 void ScaleFilterCols64_C(uint8_t* dst_ptr,
768 const uint8_t* src_ptr,
769 int dst_width,
770 int x32,
771 int dx) {
772 int64_t x = (int64_t)(x32);
773 int j;
774 for (j = 0; j < dst_width - 1; j += 2) {
775 int64_t xi = x >> 16;
776 int a = src_ptr[xi];
777 int b = src_ptr[xi + 1];
778 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
779 x += dx;
780 xi = x >> 16;
781 a = src_ptr[xi];
782 b = src_ptr[xi + 1];
783 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
784 x += dx;
785 dst_ptr += 2;
786 }
787 if (dst_width & 1) {
788 int64_t xi = x >> 16;
789 int a = src_ptr[xi];
790 int b = src_ptr[xi + 1];
791 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
792 }
793 }
794 #undef BLENDER
795
796 // Same as 8 bit arm blender but return is cast to uint16_t
797 #define BLENDER(a, b, f) \
798 (uint16_t)( \
799 (int)(a) + \
800 (int)((((int64_t)((f)) * ((int64_t)(b) - (int)(a))) + 0x8000) >> 16))
801
ScaleFilterCols_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,int dst_width,int x,int dx)802 void ScaleFilterCols_16_C(uint16_t* dst_ptr,
803 const uint16_t* src_ptr,
804 int dst_width,
805 int x,
806 int dx) {
807 int j;
808 for (j = 0; j < dst_width - 1; j += 2) {
809 int xi = x >> 16;
810 int a = src_ptr[xi];
811 int b = src_ptr[xi + 1];
812 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
813 x += dx;
814 xi = x >> 16;
815 a = src_ptr[xi];
816 b = src_ptr[xi + 1];
817 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
818 x += dx;
819 dst_ptr += 2;
820 }
821 if (dst_width & 1) {
822 int xi = x >> 16;
823 int a = src_ptr[xi];
824 int b = src_ptr[xi + 1];
825 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
826 }
827 }
828
ScaleFilterCols64_16_C(uint16_t * dst_ptr,const uint16_t * src_ptr,int dst_width,int x32,int dx)829 void ScaleFilterCols64_16_C(uint16_t* dst_ptr,
830 const uint16_t* src_ptr,
831 int dst_width,
832 int x32,
833 int dx) {
834 int64_t x = (int64_t)(x32);
835 int j;
836 for (j = 0; j < dst_width - 1; j += 2) {
837 int64_t xi = x >> 16;
838 int a = src_ptr[xi];
839 int b = src_ptr[xi + 1];
840 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
841 x += dx;
842 xi = x >> 16;
843 a = src_ptr[xi];
844 b = src_ptr[xi + 1];
845 dst_ptr[1] = BLENDER(a, b, x & 0xffff);
846 x += dx;
847 dst_ptr += 2;
848 }
849 if (dst_width & 1) {
850 int64_t xi = x >> 16;
851 int a = src_ptr[xi];
852 int b = src_ptr[xi + 1];
853 dst_ptr[0] = BLENDER(a, b, x & 0xffff);
854 }
855 }
856 #undef BLENDER
857
ScaleRowDown38_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst,int dst_width)858 void ScaleRowDown38_C(const uint8_t* src_ptr,
859 ptrdiff_t src_stride,
860 uint8_t* dst,
861 int dst_width) {
862 int x;
863 (void)src_stride;
864 assert(dst_width % 3 == 0);
865 for (x = 0; x < dst_width; x += 3) {
866 dst[0] = src_ptr[0];
867 dst[1] = src_ptr[3];
868 dst[2] = src_ptr[6];
869 dst += 3;
870 src_ptr += 8;
871 }
872 }
873
ScaleRowDown38_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst,int dst_width)874 void ScaleRowDown38_16_C(const uint16_t* src_ptr,
875 ptrdiff_t src_stride,
876 uint16_t* dst,
877 int dst_width) {
878 int x;
879 (void)src_stride;
880 assert(dst_width % 3 == 0);
881 for (x = 0; x < dst_width; x += 3) {
882 dst[0] = src_ptr[0];
883 dst[1] = src_ptr[3];
884 dst[2] = src_ptr[6];
885 dst += 3;
886 src_ptr += 8;
887 }
888 }
889
890 // 8x3 -> 3x1
ScaleRowDown38_3_Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst_ptr,int dst_width)891 void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr,
892 ptrdiff_t src_stride,
893 uint8_t* dst_ptr,
894 int dst_width) {
895 intptr_t stride = src_stride;
896 int i;
897 assert((dst_width % 3 == 0) && (dst_width > 0));
898 for (i = 0; i < dst_width; i += 3) {
899 dst_ptr[0] =
900 (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
901 src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
902 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
903 (65536 / 9) >>
904 16;
905 dst_ptr[1] =
906 (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
907 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
908 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
909 (65536 / 9) >>
910 16;
911 dst_ptr[2] =
912 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
913 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
914 (65536 / 6) >>
915 16;
916 src_ptr += 8;
917 dst_ptr += 3;
918 }
919 }
920
ScaleRowDown38_3_Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst_ptr,int dst_width)921 void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr,
922 ptrdiff_t src_stride,
923 uint16_t* dst_ptr,
924 int dst_width) {
925 intptr_t stride = src_stride;
926 int i;
927 assert((dst_width % 3 == 0) && (dst_width > 0));
928 for (i = 0; i < dst_width; i += 3) {
929 dst_ptr[0] =
930 (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
931 src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] +
932 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) *
933 (65536u / 9u) >>
934 16;
935 dst_ptr[1] =
936 (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
937 src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] +
938 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) *
939 (65536u / 9u) >>
940 16;
941 dst_ptr[2] =
942 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] +
943 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) *
944 (65536u / 6u) >>
945 16;
946 src_ptr += 8;
947 dst_ptr += 3;
948 }
949 }
950
951 // 8x2 -> 3x1
ScaleRowDown38_2_Box_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst_ptr,int dst_width)952 void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr,
953 ptrdiff_t src_stride,
954 uint8_t* dst_ptr,
955 int dst_width) {
956 intptr_t stride = src_stride;
957 int i;
958 assert((dst_width % 3 == 0) && (dst_width > 0));
959 for (i = 0; i < dst_width; i += 3) {
960 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
961 src_ptr[stride + 1] + src_ptr[stride + 2]) *
962 (65536 / 6) >>
963 16;
964 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
965 src_ptr[stride + 4] + src_ptr[stride + 5]) *
966 (65536 / 6) >>
967 16;
968 dst_ptr[2] =
969 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
970 (65536 / 4) >>
971 16;
972 src_ptr += 8;
973 dst_ptr += 3;
974 }
975 }
976
ScaleRowDown38_2_Box_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst_ptr,int dst_width)977 void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr,
978 ptrdiff_t src_stride,
979 uint16_t* dst_ptr,
980 int dst_width) {
981 intptr_t stride = src_stride;
982 int i;
983 assert((dst_width % 3 == 0) && (dst_width > 0));
984 for (i = 0; i < dst_width; i += 3) {
985 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] +
986 src_ptr[stride + 1] + src_ptr[stride + 2]) *
987 (65536u / 6u) >>
988 16;
989 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] +
990 src_ptr[stride + 4] + src_ptr[stride + 5]) *
991 (65536u / 6u) >>
992 16;
993 dst_ptr[2] =
994 (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) *
995 (65536u / 4u) >>
996 16;
997 src_ptr += 8;
998 dst_ptr += 3;
999 }
1000 }
1001
ScaleAddRow_C(const uint8_t * src_ptr,uint16_t * dst_ptr,int src_width)1002 void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
1003 int x;
1004 assert(src_width > 0);
1005 for (x = 0; x < src_width - 1; x += 2) {
1006 dst_ptr[0] += src_ptr[0];
1007 dst_ptr[1] += src_ptr[1];
1008 src_ptr += 2;
1009 dst_ptr += 2;
1010 }
1011 if (src_width & 1) {
1012 dst_ptr[0] += src_ptr[0];
1013 }
1014 }
1015
ScaleAddRow_16_C(const uint16_t * src_ptr,uint32_t * dst_ptr,int src_width)1016 void ScaleAddRow_16_C(const uint16_t* src_ptr,
1017 uint32_t* dst_ptr,
1018 int src_width) {
1019 int x;
1020 assert(src_width > 0);
1021 for (x = 0; x < src_width - 1; x += 2) {
1022 dst_ptr[0] += src_ptr[0];
1023 dst_ptr[1] += src_ptr[1];
1024 src_ptr += 2;
1025 dst_ptr += 2;
1026 }
1027 if (src_width & 1) {
1028 dst_ptr[0] += src_ptr[0];
1029 }
1030 }
1031
1032 // ARGB scale row functions
1033
ScaleARGBRowDown2_C(const uint8_t * src_argb,ptrdiff_t src_stride,uint8_t * dst_argb,int dst_width)1034 void ScaleARGBRowDown2_C(const uint8_t* src_argb,
1035 ptrdiff_t src_stride,
1036 uint8_t* dst_argb,
1037 int dst_width) {
1038 const uint32_t* src = (const uint32_t*)(src_argb);
1039 uint32_t* dst = (uint32_t*)(dst_argb);
1040 int x;
1041 (void)src_stride;
1042 for (x = 0; x < dst_width - 1; x += 2) {
1043 dst[0] = src[1];
1044 dst[1] = src[3];
1045 src += 4;
1046 dst += 2;
1047 }
1048 if (dst_width & 1) {
1049 dst[0] = src[1];
1050 }
1051 }
1052
ScaleARGBRowDown2Linear_C(const uint8_t * src_argb,ptrdiff_t src_stride,uint8_t * dst_argb,int dst_width)1053 void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb,
1054 ptrdiff_t src_stride,
1055 uint8_t* dst_argb,
1056 int dst_width) {
1057 int x;
1058 (void)src_stride;
1059 for (x = 0; x < dst_width; ++x) {
1060 dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1;
1061 dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1;
1062 dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1;
1063 dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1;
1064 src_argb += 8;
1065 dst_argb += 4;
1066 }
1067 }
1068
ScaleARGBRowDown2Box_C(const uint8_t * src_argb,ptrdiff_t src_stride,uint8_t * dst_argb,int dst_width)1069 void ScaleARGBRowDown2Box_C(const uint8_t* src_argb,
1070 ptrdiff_t src_stride,
1071 uint8_t* dst_argb,
1072 int dst_width) {
1073 int x;
1074 for (x = 0; x < dst_width; ++x) {
1075 dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
1076 src_argb[src_stride + 4] + 2) >>
1077 2;
1078 dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
1079 src_argb[src_stride + 5] + 2) >>
1080 2;
1081 dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
1082 src_argb[src_stride + 6] + 2) >>
1083 2;
1084 dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
1085 src_argb[src_stride + 7] + 2) >>
1086 2;
1087 src_argb += 8;
1088 dst_argb += 4;
1089 }
1090 }
1091
ScaleARGBRowDownEven_C(const uint8_t * src_argb,ptrdiff_t src_stride,int src_stepx,uint8_t * dst_argb,int dst_width)1092 void ScaleARGBRowDownEven_C(const uint8_t* src_argb,
1093 ptrdiff_t src_stride,
1094 int src_stepx,
1095 uint8_t* dst_argb,
1096 int dst_width) {
1097 const uint32_t* src = (const uint32_t*)(src_argb);
1098 uint32_t* dst = (uint32_t*)(dst_argb);
1099 (void)src_stride;
1100 int x;
1101 for (x = 0; x < dst_width - 1; x += 2) {
1102 dst[0] = src[0];
1103 dst[1] = src[src_stepx];
1104 src += src_stepx * 2;
1105 dst += 2;
1106 }
1107 if (dst_width & 1) {
1108 dst[0] = src[0];
1109 }
1110 }
1111
ScaleARGBRowDownEvenBox_C(const uint8_t * src_argb,ptrdiff_t src_stride,int src_stepx,uint8_t * dst_argb,int dst_width)1112 void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb,
1113 ptrdiff_t src_stride,
1114 int src_stepx,
1115 uint8_t* dst_argb,
1116 int dst_width) {
1117 int x;
1118 for (x = 0; x < dst_width; ++x) {
1119 dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] +
1120 src_argb[src_stride + 4] + 2) >>
1121 2;
1122 dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] +
1123 src_argb[src_stride + 5] + 2) >>
1124 2;
1125 dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] +
1126 src_argb[src_stride + 6] + 2) >>
1127 2;
1128 dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] +
1129 src_argb[src_stride + 7] + 2) >>
1130 2;
1131 src_argb += src_stepx * 4;
1132 dst_argb += 4;
1133 }
1134 }
1135
1136 // Scales a single row of pixels using point sampling.
ScaleARGBCols_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x,int dx)1137 void ScaleARGBCols_C(uint8_t* dst_argb,
1138 const uint8_t* src_argb,
1139 int dst_width,
1140 int x,
1141 int dx) {
1142 const uint32_t* src = (const uint32_t*)(src_argb);
1143 uint32_t* dst = (uint32_t*)(dst_argb);
1144 int j;
1145 for (j = 0; j < dst_width - 1; j += 2) {
1146 dst[0] = src[x >> 16];
1147 x += dx;
1148 dst[1] = src[x >> 16];
1149 x += dx;
1150 dst += 2;
1151 }
1152 if (dst_width & 1) {
1153 dst[0] = src[x >> 16];
1154 }
1155 }
1156
ScaleARGBCols64_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x32,int dx)1157 void ScaleARGBCols64_C(uint8_t* dst_argb,
1158 const uint8_t* src_argb,
1159 int dst_width,
1160 int x32,
1161 int dx) {
1162 int64_t x = (int64_t)(x32);
1163 const uint32_t* src = (const uint32_t*)(src_argb);
1164 uint32_t* dst = (uint32_t*)(dst_argb);
1165 int j;
1166 for (j = 0; j < dst_width - 1; j += 2) {
1167 dst[0] = src[x >> 16];
1168 x += dx;
1169 dst[1] = src[x >> 16];
1170 x += dx;
1171 dst += 2;
1172 }
1173 if (dst_width & 1) {
1174 dst[0] = src[x >> 16];
1175 }
1176 }
1177
1178 // Scales a single row of pixels up by 2x using point sampling.
ScaleARGBColsUp2_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x,int dx)1179 void ScaleARGBColsUp2_C(uint8_t* dst_argb,
1180 const uint8_t* src_argb,
1181 int dst_width,
1182 int x,
1183 int dx) {
1184 const uint32_t* src = (const uint32_t*)(src_argb);
1185 uint32_t* dst = (uint32_t*)(dst_argb);
1186 int j;
1187 (void)x;
1188 (void)dx;
1189 for (j = 0; j < dst_width - 1; j += 2) {
1190 dst[1] = dst[0] = src[0];
1191 src += 1;
1192 dst += 2;
1193 }
1194 if (dst_width & 1) {
1195 dst[0] = src[0];
1196 }
1197 }
1198
1199 // TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
1200 // Mimics SSSE3 blender
1201 #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
1202 #define BLENDERC(a, b, f, s) \
1203 (uint32_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
1204 #define BLENDER(a, b, f) \
1205 BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \
1206 BLENDERC(a, b, f, 0)
1207
ScaleARGBFilterCols_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x,int dx)1208 void ScaleARGBFilterCols_C(uint8_t* dst_argb,
1209 const uint8_t* src_argb,
1210 int dst_width,
1211 int x,
1212 int dx) {
1213 const uint32_t* src = (const uint32_t*)(src_argb);
1214 uint32_t* dst = (uint32_t*)(dst_argb);
1215 int j;
1216 for (j = 0; j < dst_width - 1; j += 2) {
1217 int xi = x >> 16;
1218 int xf = (x >> 9) & 0x7f;
1219 uint32_t a = src[xi];
1220 uint32_t b = src[xi + 1];
1221 dst[0] = BLENDER(a, b, xf);
1222 x += dx;
1223 xi = x >> 16;
1224 xf = (x >> 9) & 0x7f;
1225 a = src[xi];
1226 b = src[xi + 1];
1227 dst[1] = BLENDER(a, b, xf);
1228 x += dx;
1229 dst += 2;
1230 }
1231 if (dst_width & 1) {
1232 int xi = x >> 16;
1233 int xf = (x >> 9) & 0x7f;
1234 uint32_t a = src[xi];
1235 uint32_t b = src[xi + 1];
1236 dst[0] = BLENDER(a, b, xf);
1237 }
1238 }
1239
ScaleARGBFilterCols64_C(uint8_t * dst_argb,const uint8_t * src_argb,int dst_width,int x32,int dx)1240 void ScaleARGBFilterCols64_C(uint8_t* dst_argb,
1241 const uint8_t* src_argb,
1242 int dst_width,
1243 int x32,
1244 int dx) {
1245 int64_t x = (int64_t)(x32);
1246 const uint32_t* src = (const uint32_t*)(src_argb);
1247 uint32_t* dst = (uint32_t*)(dst_argb);
1248 int j;
1249 for (j = 0; j < dst_width - 1; j += 2) {
1250 int64_t xi = x >> 16;
1251 int xf = (x >> 9) & 0x7f;
1252 uint32_t a = src[xi];
1253 uint32_t b = src[xi + 1];
1254 dst[0] = BLENDER(a, b, xf);
1255 x += dx;
1256 xi = x >> 16;
1257 xf = (x >> 9) & 0x7f;
1258 a = src[xi];
1259 b = src[xi + 1];
1260 dst[1] = BLENDER(a, b, xf);
1261 x += dx;
1262 dst += 2;
1263 }
1264 if (dst_width & 1) {
1265 int64_t xi = x >> 16;
1266 int xf = (x >> 9) & 0x7f;
1267 uint32_t a = src[xi];
1268 uint32_t b = src[xi + 1];
1269 dst[0] = BLENDER(a, b, xf);
1270 }
1271 }
1272 #undef BLENDER1
1273 #undef BLENDERC
1274 #undef BLENDER
1275
1276 // UV scale row functions
1277 // same as ARGB but 2 channels
1278
ScaleUVRowDown2_C(const uint8_t * src_uv,ptrdiff_t src_stride,uint8_t * dst_uv,int dst_width)1279 void ScaleUVRowDown2_C(const uint8_t* src_uv,
1280 ptrdiff_t src_stride,
1281 uint8_t* dst_uv,
1282 int dst_width) {
1283 int x;
1284 (void)src_stride;
1285 for (x = 0; x < dst_width; ++x) {
1286 dst_uv[0] = src_uv[2]; // Store the 2nd UV
1287 dst_uv[1] = src_uv[3];
1288 src_uv += 4;
1289 dst_uv += 2;
1290 }
1291 }
1292
ScaleUVRowDown2Linear_C(const uint8_t * src_uv,ptrdiff_t src_stride,uint8_t * dst_uv,int dst_width)1293 void ScaleUVRowDown2Linear_C(const uint8_t* src_uv,
1294 ptrdiff_t src_stride,
1295 uint8_t* dst_uv,
1296 int dst_width) {
1297 int x;
1298 (void)src_stride;
1299 for (x = 0; x < dst_width; ++x) {
1300 dst_uv[0] = (src_uv[0] + src_uv[2] + 1) >> 1;
1301 dst_uv[1] = (src_uv[1] + src_uv[3] + 1) >> 1;
1302 src_uv += 4;
1303 dst_uv += 2;
1304 }
1305 }
1306
ScaleUVRowDown2Box_C(const uint8_t * src_uv,ptrdiff_t src_stride,uint8_t * dst_uv,int dst_width)1307 void ScaleUVRowDown2Box_C(const uint8_t* src_uv,
1308 ptrdiff_t src_stride,
1309 uint8_t* dst_uv,
1310 int dst_width) {
1311 int x;
1312 for (x = 0; x < dst_width; ++x) {
1313 dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] +
1314 src_uv[src_stride + 2] + 2) >>
1315 2;
1316 dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] +
1317 src_uv[src_stride + 3] + 2) >>
1318 2;
1319 src_uv += 4;
1320 dst_uv += 2;
1321 }
1322 }
1323
ScaleUVRowDownEven_C(const uint8_t * src_uv,ptrdiff_t src_stride,int src_stepx,uint8_t * dst_uv,int dst_width)1324 void ScaleUVRowDownEven_C(const uint8_t* src_uv,
1325 ptrdiff_t src_stride,
1326 int src_stepx,
1327 uint8_t* dst_uv,
1328 int dst_width) {
1329 const uint16_t* src = (const uint16_t*)(src_uv);
1330 uint16_t* dst = (uint16_t*)(dst_uv);
1331 (void)src_stride;
1332 int x;
1333 for (x = 0; x < dst_width - 1; x += 2) {
1334 dst[0] = src[0];
1335 dst[1] = src[src_stepx];
1336 src += src_stepx * 2;
1337 dst += 2;
1338 }
1339 if (dst_width & 1) {
1340 dst[0] = src[0];
1341 }
1342 }
1343
ScaleUVRowDownEvenBox_C(const uint8_t * src_uv,ptrdiff_t src_stride,int src_stepx,uint8_t * dst_uv,int dst_width)1344 void ScaleUVRowDownEvenBox_C(const uint8_t* src_uv,
1345 ptrdiff_t src_stride,
1346 int src_stepx,
1347 uint8_t* dst_uv,
1348 int dst_width) {
1349 int x;
1350 for (x = 0; x < dst_width; ++x) {
1351 dst_uv[0] = (src_uv[0] + src_uv[2] + src_uv[src_stride] +
1352 src_uv[src_stride + 2] + 2) >>
1353 2;
1354 dst_uv[1] = (src_uv[1] + src_uv[3] + src_uv[src_stride + 1] +
1355 src_uv[src_stride + 3] + 2) >>
1356 2;
1357 src_uv += src_stepx * 2;
1358 dst_uv += 2;
1359 }
1360 }
1361
ScaleUVRowUp2_Linear_C(const uint8_t * src_ptr,uint8_t * dst_ptr,int dst_width)1362 void ScaleUVRowUp2_Linear_C(const uint8_t* src_ptr,
1363 uint8_t* dst_ptr,
1364 int dst_width) {
1365 int src_width = dst_width >> 1;
1366 int x;
1367 assert((dst_width % 2 == 0) && (dst_width >= 0));
1368 for (x = 0; x < src_width; ++x) {
1369 dst_ptr[4 * x + 0] =
1370 (src_ptr[2 * x + 0] * 3 + src_ptr[2 * x + 2] * 1 + 2) >> 2;
1371 dst_ptr[4 * x + 1] =
1372 (src_ptr[2 * x + 1] * 3 + src_ptr[2 * x + 3] * 1 + 2) >> 2;
1373 dst_ptr[4 * x + 2] =
1374 (src_ptr[2 * x + 0] * 1 + src_ptr[2 * x + 2] * 3 + 2) >> 2;
1375 dst_ptr[4 * x + 3] =
1376 (src_ptr[2 * x + 1] * 1 + src_ptr[2 * x + 3] * 3 + 2) >> 2;
1377 }
1378 }
1379
ScaleUVRowUp2_Bilinear_C(const uint8_t * src_ptr,ptrdiff_t src_stride,uint8_t * dst_ptr,ptrdiff_t dst_stride,int dst_width)1380 void ScaleUVRowUp2_Bilinear_C(const uint8_t* src_ptr,
1381 ptrdiff_t src_stride,
1382 uint8_t* dst_ptr,
1383 ptrdiff_t dst_stride,
1384 int dst_width) {
1385 const uint8_t* s = src_ptr;
1386 const uint8_t* t = src_ptr + src_stride;
1387 uint8_t* d = dst_ptr;
1388 uint8_t* e = dst_ptr + dst_stride;
1389 int src_width = dst_width >> 1;
1390 int x;
1391 assert((dst_width % 2 == 0) && (dst_width >= 0));
1392 for (x = 0; x < src_width; ++x) {
1393 d[4 * x + 0] = (s[2 * x + 0] * 9 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
1394 t[2 * x + 2] * 1 + 8) >>
1395 4;
1396 d[4 * x + 1] = (s[2 * x + 1] * 9 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
1397 t[2 * x + 3] * 1 + 8) >>
1398 4;
1399 d[4 * x + 2] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 9 + t[2 * x + 0] * 1 +
1400 t[2 * x + 2] * 3 + 8) >>
1401 4;
1402 d[4 * x + 3] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 9 + t[2 * x + 1] * 1 +
1403 t[2 * x + 3] * 3 + 8) >>
1404 4;
1405 e[4 * x + 0] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 1 + t[2 * x + 0] * 9 +
1406 t[2 * x + 2] * 3 + 8) >>
1407 4;
1408 e[4 * x + 1] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 1 + t[2 * x + 1] * 9 +
1409 t[2 * x + 3] * 3 + 8) >>
1410 4;
1411 e[4 * x + 2] = (s[2 * x + 0] * 1 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
1412 t[2 * x + 2] * 9 + 8) >>
1413 4;
1414 e[4 * x + 3] = (s[2 * x + 1] * 1 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
1415 t[2 * x + 3] * 9 + 8) >>
1416 4;
1417 }
1418 }
1419
ScaleUVRowUp2_Linear_16_C(const uint16_t * src_ptr,uint16_t * dst_ptr,int dst_width)1420 void ScaleUVRowUp2_Linear_16_C(const uint16_t* src_ptr,
1421 uint16_t* dst_ptr,
1422 int dst_width) {
1423 int src_width = dst_width >> 1;
1424 int x;
1425 assert((dst_width % 2 == 0) && (dst_width >= 0));
1426 for (x = 0; x < src_width; ++x) {
1427 dst_ptr[4 * x + 0] =
1428 (src_ptr[2 * x + 0] * 3 + src_ptr[2 * x + 2] * 1 + 2) >> 2;
1429 dst_ptr[4 * x + 1] =
1430 (src_ptr[2 * x + 1] * 3 + src_ptr[2 * x + 3] * 1 + 2) >> 2;
1431 dst_ptr[4 * x + 2] =
1432 (src_ptr[2 * x + 0] * 1 + src_ptr[2 * x + 2] * 3 + 2) >> 2;
1433 dst_ptr[4 * x + 3] =
1434 (src_ptr[2 * x + 1] * 1 + src_ptr[2 * x + 3] * 3 + 2) >> 2;
1435 }
1436 }
1437
ScaleUVRowUp2_Bilinear_16_C(const uint16_t * src_ptr,ptrdiff_t src_stride,uint16_t * dst_ptr,ptrdiff_t dst_stride,int dst_width)1438 void ScaleUVRowUp2_Bilinear_16_C(const uint16_t* src_ptr,
1439 ptrdiff_t src_stride,
1440 uint16_t* dst_ptr,
1441 ptrdiff_t dst_stride,
1442 int dst_width) {
1443 const uint16_t* s = src_ptr;
1444 const uint16_t* t = src_ptr + src_stride;
1445 uint16_t* d = dst_ptr;
1446 uint16_t* e = dst_ptr + dst_stride;
1447 int src_width = dst_width >> 1;
1448 int x;
1449 assert((dst_width % 2 == 0) && (dst_width >= 0));
1450 for (x = 0; x < src_width; ++x) {
1451 d[4 * x + 0] = (s[2 * x + 0] * 9 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
1452 t[2 * x + 2] * 1 + 8) >>
1453 4;
1454 d[4 * x + 1] = (s[2 * x + 1] * 9 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
1455 t[2 * x + 3] * 1 + 8) >>
1456 4;
1457 d[4 * x + 2] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 9 + t[2 * x + 0] * 1 +
1458 t[2 * x + 2] * 3 + 8) >>
1459 4;
1460 d[4 * x + 3] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 9 + t[2 * x + 1] * 1 +
1461 t[2 * x + 3] * 3 + 8) >>
1462 4;
1463 e[4 * x + 0] = (s[2 * x + 0] * 3 + s[2 * x + 2] * 1 + t[2 * x + 0] * 9 +
1464 t[2 * x + 2] * 3 + 8) >>
1465 4;
1466 e[4 * x + 1] = (s[2 * x + 1] * 3 + s[2 * x + 3] * 1 + t[2 * x + 1] * 9 +
1467 t[2 * x + 3] * 3 + 8) >>
1468 4;
1469 e[4 * x + 2] = (s[2 * x + 0] * 1 + s[2 * x + 2] * 3 + t[2 * x + 0] * 3 +
1470 t[2 * x + 2] * 9 + 8) >>
1471 4;
1472 e[4 * x + 3] = (s[2 * x + 1] * 1 + s[2 * x + 3] * 3 + t[2 * x + 1] * 3 +
1473 t[2 * x + 3] * 9 + 8) >>
1474 4;
1475 }
1476 }
1477
1478 // Scales a single row of pixels using point sampling.
ScaleUVCols_C(uint8_t * dst_uv,const uint8_t * src_uv,int dst_width,int x,int dx)1479 void ScaleUVCols_C(uint8_t* dst_uv,
1480 const uint8_t* src_uv,
1481 int dst_width,
1482 int x,
1483 int dx) {
1484 const uint16_t* src = (const uint16_t*)(src_uv);
1485 uint16_t* dst = (uint16_t*)(dst_uv);
1486 int j;
1487 for (j = 0; j < dst_width - 1; j += 2) {
1488 dst[0] = src[x >> 16];
1489 x += dx;
1490 dst[1] = src[x >> 16];
1491 x += dx;
1492 dst += 2;
1493 }
1494 if (dst_width & 1) {
1495 dst[0] = src[x >> 16];
1496 }
1497 }
1498
ScaleUVCols64_C(uint8_t * dst_uv,const uint8_t * src_uv,int dst_width,int x32,int dx)1499 void ScaleUVCols64_C(uint8_t* dst_uv,
1500 const uint8_t* src_uv,
1501 int dst_width,
1502 int x32,
1503 int dx) {
1504 int64_t x = (int64_t)(x32);
1505 const uint16_t* src = (const uint16_t*)(src_uv);
1506 uint16_t* dst = (uint16_t*)(dst_uv);
1507 int j;
1508 for (j = 0; j < dst_width - 1; j += 2) {
1509 dst[0] = src[x >> 16];
1510 x += dx;
1511 dst[1] = src[x >> 16];
1512 x += dx;
1513 dst += 2;
1514 }
1515 if (dst_width & 1) {
1516 dst[0] = src[x >> 16];
1517 }
1518 }
1519
1520 // Scales a single row of pixels up by 2x using point sampling.
ScaleUVColsUp2_C(uint8_t * dst_uv,const uint8_t * src_uv,int dst_width,int x,int dx)1521 void ScaleUVColsUp2_C(uint8_t* dst_uv,
1522 const uint8_t* src_uv,
1523 int dst_width,
1524 int x,
1525 int dx) {
1526 const uint16_t* src = (const uint16_t*)(src_uv);
1527 uint16_t* dst = (uint16_t*)(dst_uv);
1528 int j;
1529 (void)x;
1530 (void)dx;
1531 for (j = 0; j < dst_width - 1; j += 2) {
1532 dst[1] = dst[0] = src[0];
1533 src += 1;
1534 dst += 2;
1535 }
1536 if (dst_width & 1) {
1537 dst[0] = src[0];
1538 }
1539 }
1540
1541 // TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607.
1542 // Mimics SSSE3 blender
1543 #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7
1544 #define BLENDERC(a, b, f, s) \
1545 (uint16_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s)
1546 #define BLENDER(a, b, f) BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0)
1547
ScaleUVFilterCols_C(uint8_t * dst_uv,const uint8_t * src_uv,int dst_width,int x,int dx)1548 void ScaleUVFilterCols_C(uint8_t* dst_uv,
1549 const uint8_t* src_uv,
1550 int dst_width,
1551 int x,
1552 int dx) {
1553 const uint16_t* src = (const uint16_t*)(src_uv);
1554 uint16_t* dst = (uint16_t*)(dst_uv);
1555 int j;
1556 for (j = 0; j < dst_width - 1; j += 2) {
1557 int xi = x >> 16;
1558 int xf = (x >> 9) & 0x7f;
1559 uint16_t a = src[xi];
1560 uint16_t b = src[xi + 1];
1561 dst[0] = BLENDER(a, b, xf);
1562 x += dx;
1563 xi = x >> 16;
1564 xf = (x >> 9) & 0x7f;
1565 a = src[xi];
1566 b = src[xi + 1];
1567 dst[1] = BLENDER(a, b, xf);
1568 x += dx;
1569 dst += 2;
1570 }
1571 if (dst_width & 1) {
1572 int xi = x >> 16;
1573 int xf = (x >> 9) & 0x7f;
1574 uint16_t a = src[xi];
1575 uint16_t b = src[xi + 1];
1576 dst[0] = BLENDER(a, b, xf);
1577 }
1578 }
1579
ScaleUVFilterCols64_C(uint8_t * dst_uv,const uint8_t * src_uv,int dst_width,int x32,int dx)1580 void ScaleUVFilterCols64_C(uint8_t* dst_uv,
1581 const uint8_t* src_uv,
1582 int dst_width,
1583 int x32,
1584 int dx) {
1585 int64_t x = (int64_t)(x32);
1586 const uint16_t* src = (const uint16_t*)(src_uv);
1587 uint16_t* dst = (uint16_t*)(dst_uv);
1588 int j;
1589 for (j = 0; j < dst_width - 1; j += 2) {
1590 int64_t xi = x >> 16;
1591 int xf = (x >> 9) & 0x7f;
1592 uint16_t a = src[xi];
1593 uint16_t b = src[xi + 1];
1594 dst[0] = BLENDER(a, b, xf);
1595 x += dx;
1596 xi = x >> 16;
1597 xf = (x >> 9) & 0x7f;
1598 a = src[xi];
1599 b = src[xi + 1];
1600 dst[1] = BLENDER(a, b, xf);
1601 x += dx;
1602 dst += 2;
1603 }
1604 if (dst_width & 1) {
1605 int64_t xi = x >> 16;
1606 int xf = (x >> 9) & 0x7f;
1607 uint16_t a = src[xi];
1608 uint16_t b = src[xi + 1];
1609 dst[0] = BLENDER(a, b, xf);
1610 }
1611 }
1612 #undef BLENDER1
1613 #undef BLENDERC
1614 #undef BLENDER
1615
1616 // Scale plane vertically with bilinear interpolation.
ScalePlaneVertical(int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint8_t * src_argb,uint8_t * dst_argb,int x,int y,int dy,int bpp,enum FilterMode filtering)1617 void ScalePlaneVertical(int src_height,
1618 int dst_width,
1619 int dst_height,
1620 int src_stride,
1621 int dst_stride,
1622 const uint8_t* src_argb,
1623 uint8_t* dst_argb,
1624 int x,
1625 int y,
1626 int dy,
1627 int bpp, // bytes per pixel. 4 for ARGB.
1628 enum FilterMode filtering) {
1629 // TODO(fbarchard): Allow higher bpp.
1630 int dst_width_bytes = dst_width * bpp;
1631 void (*InterpolateRow)(uint8_t* dst_argb, const uint8_t* src_argb,
1632 ptrdiff_t src_stride, int dst_width,
1633 int source_y_fraction) = InterpolateRow_C;
1634 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1635 int j;
1636 assert(bpp >= 1 && bpp <= 4);
1637 assert(src_height != 0);
1638 assert(dst_width > 0);
1639 assert(dst_height > 0);
1640 src_argb += (x >> 16) * bpp;
1641 #if defined(HAS_INTERPOLATEROW_SSSE3)
1642 if (TestCpuFlag(kCpuHasSSSE3)) {
1643 InterpolateRow = InterpolateRow_Any_SSSE3;
1644 if (IS_ALIGNED(dst_width_bytes, 16)) {
1645 InterpolateRow = InterpolateRow_SSSE3;
1646 }
1647 }
1648 #endif
1649 #if defined(HAS_INTERPOLATEROW_AVX2)
1650 if (TestCpuFlag(kCpuHasAVX2)) {
1651 InterpolateRow = InterpolateRow_Any_AVX2;
1652 if (IS_ALIGNED(dst_width_bytes, 32)) {
1653 InterpolateRow = InterpolateRow_AVX2;
1654 }
1655 }
1656 #endif
1657 #if defined(HAS_INTERPOLATEROW_NEON)
1658 if (TestCpuFlag(kCpuHasNEON)) {
1659 InterpolateRow = InterpolateRow_Any_NEON;
1660 if (IS_ALIGNED(dst_width_bytes, 16)) {
1661 InterpolateRow = InterpolateRow_NEON;
1662 }
1663 }
1664 #endif
1665 #if defined(HAS_INTERPOLATEROW_MSA)
1666 if (TestCpuFlag(kCpuHasMSA)) {
1667 InterpolateRow = InterpolateRow_Any_MSA;
1668 if (IS_ALIGNED(dst_width_bytes, 32)) {
1669 InterpolateRow = InterpolateRow_MSA;
1670 }
1671 }
1672 #endif
1673 #if defined(HAS_INTERPOLATEROW_LSX)
1674 if (TestCpuFlag(kCpuHasLSX)) {
1675 InterpolateRow = InterpolateRow_Any_LSX;
1676 if (IS_ALIGNED(dst_width_bytes, 32)) {
1677 InterpolateRow = InterpolateRow_LSX;
1678 }
1679 }
1680 #endif
1681 #if defined(HAS_INTERPOLATEROW_RVV)
1682 if (TestCpuFlag(kCpuHasRVV)) {
1683 InterpolateRow = InterpolateRow_RVV;
1684 }
1685 #endif
1686
1687 for (j = 0; j < dst_height; ++j) {
1688 int yi;
1689 int yf;
1690 if (y > max_y) {
1691 y = max_y;
1692 }
1693 yi = y >> 16;
1694 yf = filtering ? ((y >> 8) & 255) : 0;
1695 InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
1696 dst_width_bytes, yf);
1697 dst_argb += dst_stride;
1698 y += dy;
1699 }
1700 }
1701
ScalePlaneVertical_16(int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_argb,uint16_t * dst_argb,int x,int y,int dy,int wpp,enum FilterMode filtering)1702 void ScalePlaneVertical_16(int src_height,
1703 int dst_width,
1704 int dst_height,
1705 int src_stride,
1706 int dst_stride,
1707 const uint16_t* src_argb,
1708 uint16_t* dst_argb,
1709 int x,
1710 int y,
1711 int dy,
1712 int wpp, /* words per pixel. normally 1 */
1713 enum FilterMode filtering) {
1714 // TODO(fbarchard): Allow higher wpp.
1715 int dst_width_words = dst_width * wpp;
1716 void (*InterpolateRow)(uint16_t* dst_argb, const uint16_t* src_argb,
1717 ptrdiff_t src_stride, int dst_width,
1718 int source_y_fraction) = InterpolateRow_16_C;
1719 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1720 int j;
1721 assert(wpp >= 1 && wpp <= 2);
1722 assert(src_height != 0);
1723 assert(dst_width > 0);
1724 assert(dst_height > 0);
1725 src_argb += (x >> 16) * wpp;
1726 #if defined(HAS_INTERPOLATEROW_16_SSE2)
1727 if (TestCpuFlag(kCpuHasSSE2)) {
1728 InterpolateRow = InterpolateRow_16_Any_SSE2;
1729 if (IS_ALIGNED(dst_width_words, 16)) {
1730 InterpolateRow = InterpolateRow_16_SSE2;
1731 }
1732 }
1733 #endif
1734 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
1735 if (TestCpuFlag(kCpuHasSSSE3)) {
1736 InterpolateRow = InterpolateRow_16_Any_SSSE3;
1737 if (IS_ALIGNED(dst_width_words, 16)) {
1738 InterpolateRow = InterpolateRow_16_SSSE3;
1739 }
1740 }
1741 #endif
1742 #if defined(HAS_INTERPOLATEROW_16_AVX2)
1743 if (TestCpuFlag(kCpuHasAVX2)) {
1744 InterpolateRow = InterpolateRow_16_Any_AVX2;
1745 if (IS_ALIGNED(dst_width_words, 32)) {
1746 InterpolateRow = InterpolateRow_16_AVX2;
1747 }
1748 }
1749 #endif
1750 #if defined(HAS_INTERPOLATEROW_16_NEON)
1751 if (TestCpuFlag(kCpuHasNEON)) {
1752 InterpolateRow = InterpolateRow_16_Any_NEON;
1753 if (IS_ALIGNED(dst_width_words, 8)) {
1754 InterpolateRow = InterpolateRow_16_NEON;
1755 }
1756 }
1757 #endif
1758 for (j = 0; j < dst_height; ++j) {
1759 int yi;
1760 int yf;
1761 if (y > max_y) {
1762 y = max_y;
1763 }
1764 yi = y >> 16;
1765 yf = filtering ? ((y >> 8) & 255) : 0;
1766 InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride,
1767 dst_width_words, yf);
1768 dst_argb += dst_stride;
1769 y += dy;
1770 }
1771 }
1772
1773 // Use scale to convert lsb formats to msb, depending how many bits there are:
1774 // 32768 = 9 bits
1775 // 16384 = 10 bits
1776 // 4096 = 12 bits
1777 // 256 = 16 bits
1778 // TODO(fbarchard): change scale to bits
ScalePlaneVertical_16To8(int src_height,int dst_width,int dst_height,int src_stride,int dst_stride,const uint16_t * src_argb,uint8_t * dst_argb,int x,int y,int dy,int wpp,int scale,enum FilterMode filtering)1779 void ScalePlaneVertical_16To8(int src_height,
1780 int dst_width,
1781 int dst_height,
1782 int src_stride,
1783 int dst_stride,
1784 const uint16_t* src_argb,
1785 uint8_t* dst_argb,
1786 int x,
1787 int y,
1788 int dy,
1789 int wpp, /* words per pixel. normally 1 */
1790 int scale,
1791 enum FilterMode filtering) {
1792 // TODO(fbarchard): Allow higher wpp.
1793 int dst_width_words = dst_width * wpp;
1794 // TODO(https://crbug.com/libyuv/931): Add NEON 32 bit and AVX2 versions.
1795 void (*InterpolateRow_16To8)(uint8_t* dst_argb, const uint16_t* src_argb,
1796 ptrdiff_t src_stride, int scale, int dst_width,
1797 int source_y_fraction) = InterpolateRow_16To8_C;
1798 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0;
1799 int j;
1800 assert(wpp >= 1 && wpp <= 2);
1801 assert(src_height != 0);
1802 assert(dst_width > 0);
1803 assert(dst_height > 0);
1804 src_argb += (x >> 16) * wpp;
1805
1806 #if defined(HAS_INTERPOLATEROW_16TO8_NEON)
1807 if (TestCpuFlag(kCpuHasNEON)) {
1808 InterpolateRow_16To8 = InterpolateRow_16To8_Any_NEON;
1809 if (IS_ALIGNED(dst_width, 8)) {
1810 InterpolateRow_16To8 = InterpolateRow_16To8_NEON;
1811 }
1812 }
1813 #endif
1814 #if defined(HAS_INTERPOLATEROW_16TO8_AVX2)
1815 if (TestCpuFlag(kCpuHasAVX2)) {
1816 InterpolateRow_16To8 = InterpolateRow_16To8_Any_AVX2;
1817 if (IS_ALIGNED(dst_width, 32)) {
1818 InterpolateRow_16To8 = InterpolateRow_16To8_AVX2;
1819 }
1820 }
1821 #endif
1822 for (j = 0; j < dst_height; ++j) {
1823 int yi;
1824 int yf;
1825 if (y > max_y) {
1826 y = max_y;
1827 }
1828 yi = y >> 16;
1829 yf = filtering ? ((y >> 8) & 255) : 0;
1830 InterpolateRow_16To8(dst_argb, src_argb + yi * src_stride, src_stride,
1831 scale, dst_width_words, yf);
1832 dst_argb += dst_stride;
1833 y += dy;
1834 }
1835 }
1836
1837 // Simplify the filtering based on scale factors.
ScaleFilterReduce(int src_width,int src_height,int dst_width,int dst_height,enum FilterMode filtering)1838 enum FilterMode ScaleFilterReduce(int src_width,
1839 int src_height,
1840 int dst_width,
1841 int dst_height,
1842 enum FilterMode filtering) {
1843 if (src_width < 0) {
1844 src_width = -src_width;
1845 }
1846 if (src_height < 0) {
1847 src_height = -src_height;
1848 }
1849 if (filtering == kFilterBox) {
1850 // If scaling either axis to 0.5 or larger, switch from Box to Bilinear.
1851 if (dst_width * 2 >= src_width || dst_height * 2 >= src_height) {
1852 filtering = kFilterBilinear;
1853 }
1854 }
1855 if (filtering == kFilterBilinear) {
1856 if (src_height == 1) {
1857 filtering = kFilterLinear;
1858 }
1859 // TODO(fbarchard): Detect any odd scale factor and reduce to Linear.
1860 if (dst_height == src_height || dst_height * 3 == src_height) {
1861 filtering = kFilterLinear;
1862 }
1863 // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to
1864 // avoid reading 2 pixels horizontally that causes memory exception.
1865 if (src_width == 1) {
1866 filtering = kFilterNone;
1867 }
1868 }
1869 if (filtering == kFilterLinear) {
1870 if (src_width == 1) {
1871 filtering = kFilterNone;
1872 }
1873 // TODO(fbarchard): Detect any odd scale factor and reduce to None.
1874 if (dst_width == src_width || dst_width * 3 == src_width) {
1875 filtering = kFilterNone;
1876 }
1877 }
1878 return filtering;
1879 }
1880
1881 // Divide num by div and return as 16.16 fixed point result.
FixedDiv_C(int num,int div)1882 int FixedDiv_C(int num, int div) {
1883 return (int)(((int64_t)(num) << 16) / div);
1884 }
1885
1886 // Divide num - 1 by div - 1 and return as 16.16 fixed point result.
FixedDiv1_C(int num,int div)1887 int FixedDiv1_C(int num, int div) {
1888 return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1));
1889 }
1890
1891 #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s)
1892
1893 // Compute slope values for stepping.
ScaleSlope(int src_width,int src_height,int dst_width,int dst_height,enum FilterMode filtering,int * x,int * y,int * dx,int * dy)1894 void ScaleSlope(int src_width,
1895 int src_height,
1896 int dst_width,
1897 int dst_height,
1898 enum FilterMode filtering,
1899 int* x,
1900 int* y,
1901 int* dx,
1902 int* dy) {
1903 assert(x != NULL);
1904 assert(y != NULL);
1905 assert(dx != NULL);
1906 assert(dy != NULL);
1907 assert(src_width != 0);
1908 assert(src_height != 0);
1909 assert(dst_width > 0);
1910 assert(dst_height > 0);
1911 // Check for 1 pixel and avoid FixedDiv overflow.
1912 if (dst_width == 1 && src_width >= 32768) {
1913 dst_width = src_width;
1914 }
1915 if (dst_height == 1 && src_height >= 32768) {
1916 dst_height = src_height;
1917 }
1918 if (filtering == kFilterBox) {
1919 // Scale step for point sampling duplicates all pixels equally.
1920 *dx = FixedDiv(Abs(src_width), dst_width);
1921 *dy = FixedDiv(src_height, dst_height);
1922 *x = 0;
1923 *y = 0;
1924 } else if (filtering == kFilterBilinear) {
1925 // Scale step for bilinear sampling renders last pixel once for upsample.
1926 if (dst_width <= Abs(src_width)) {
1927 *dx = FixedDiv(Abs(src_width), dst_width);
1928 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
1929 } else if (src_width > 1 && dst_width > 1) {
1930 *dx = FixedDiv1(Abs(src_width), dst_width);
1931 *x = 0;
1932 }
1933 if (dst_height <= src_height) {
1934 *dy = FixedDiv(src_height, dst_height);
1935 *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter.
1936 } else if (src_height > 1 && dst_height > 1) {
1937 *dy = FixedDiv1(src_height, dst_height);
1938 *y = 0;
1939 }
1940 } else if (filtering == kFilterLinear) {
1941 // Scale step for bilinear sampling renders last pixel once for upsample.
1942 if (dst_width <= Abs(src_width)) {
1943 *dx = FixedDiv(Abs(src_width), dst_width);
1944 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter.
1945 } else if (src_width > 1 && dst_width > 1) {
1946 *dx = FixedDiv1(Abs(src_width), dst_width);
1947 *x = 0;
1948 }
1949 *dy = FixedDiv(src_height, dst_height);
1950 *y = *dy >> 1;
1951 } else {
1952 // Scale step for point sampling duplicates all pixels equally.
1953 *dx = FixedDiv(Abs(src_width), dst_width);
1954 *dy = FixedDiv(src_height, dst_height);
1955 *x = CENTERSTART(*dx, 0);
1956 *y = CENTERSTART(*dy, 0);
1957 }
1958 // Negative src_width means horizontally mirror.
1959 if (src_width < 0) {
1960 *x += (dst_width - 1) * *dx;
1961 *dx = -*dx;
1962 // src_width = -src_width; // Caller must do this.
1963 }
1964 }
1965 #undef CENTERSTART
1966
1967 #ifdef __cplusplus
1968 } // extern "C"
1969 } // namespace libyuv
1970 #endif
1971