1 /* 2 * Copyright 2015 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <string.h> // For memset/memcpy 12 13 #include "libyuv/scale.h" 14 #include "libyuv/scale_row.h" 15 16 #include "libyuv/basic_types.h" 17 18 #ifdef __cplusplus 19 namespace libyuv { 20 extern "C" { 21 #endif 22 23 // Fixed scale down. 24 // Mask may be non-power of 2, so use MOD 25 #define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ 26 void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \ 27 int dst_width) { \ 28 int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \ 29 int n = dst_width - r; \ 30 if (n > 0) { \ 31 SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ 32 } \ 33 SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ 34 dst_ptr + n * BPP, r); \ 35 } 36 37 // Fixed scale down for odd source width. Used by I420Blend subsampling. 38 // Since dst_width is (width + 1) / 2, this function scales one less pixel 39 // and copies the last pixel. 40 #define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ 41 void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \ 42 int dst_width) { \ 43 int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \ 44 int n = (dst_width - 1) - r; \ 45 if (n > 0) { \ 46 SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ 47 } \ 48 SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ 49 dst_ptr + n * BPP, r + 1); \ 50 } 51 52 #ifdef HAS_SCALEROWDOWN2_SSSE3 53 SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15) 54 SDANY(ScaleRowDown2Linear_Any_SSSE3, 55 ScaleRowDown2Linear_SSSE3, 56 ScaleRowDown2Linear_C, 57 2, 58 1, 59 15) 60 SDANY(ScaleRowDown2Box_Any_SSSE3, 61 ScaleRowDown2Box_SSSE3, 62 ScaleRowDown2Box_C, 63 2, 64 1, 65 15) 66 SDODD(ScaleRowDown2Box_Odd_SSSE3, 67 ScaleRowDown2Box_SSSE3, 68 ScaleRowDown2Box_Odd_C, 69 2, 70 1, 71 15) 72 #endif 73 #ifdef HAS_SCALEUVROWDOWN2BOX_SSSE3 74 SDANY(ScaleUVRowDown2Box_Any_SSSE3, 75 ScaleUVRowDown2Box_SSSE3, 76 ScaleUVRowDown2Box_C, 77 2, 78 2, 79 3) 80 #endif 81 #ifdef HAS_SCALEUVROWDOWN2BOX_AVX2 82 SDANY(ScaleUVRowDown2Box_Any_AVX2, 83 ScaleUVRowDown2Box_AVX2, 84 ScaleUVRowDown2Box_C, 85 2, 86 2, 87 7) 88 #endif 89 #ifdef HAS_SCALEROWDOWN2_AVX2 90 SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31) 91 SDANY(ScaleRowDown2Linear_Any_AVX2, 92 ScaleRowDown2Linear_AVX2, 93 ScaleRowDown2Linear_C, 94 2, 95 1, 96 31) 97 SDANY(ScaleRowDown2Box_Any_AVX2, 98 ScaleRowDown2Box_AVX2, 99 ScaleRowDown2Box_C, 100 2, 101 1, 102 31) 103 SDODD(ScaleRowDown2Box_Odd_AVX2, 104 ScaleRowDown2Box_AVX2, 105 ScaleRowDown2Box_Odd_C, 106 2, 107 1, 108 31) 109 #endif 110 #ifdef HAS_SCALEROWDOWN2_NEON 111 SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15) 112 SDANY(ScaleRowDown2Linear_Any_NEON, 113 ScaleRowDown2Linear_NEON, 114 ScaleRowDown2Linear_C, 115 2, 116 1, 117 15) 118 SDANY(ScaleRowDown2Box_Any_NEON, 119 ScaleRowDown2Box_NEON, 120 ScaleRowDown2Box_C, 121 2, 122 1, 123 15) 124 SDODD(ScaleRowDown2Box_Odd_NEON, 125 ScaleRowDown2Box_NEON, 126 ScaleRowDown2Box_Odd_C, 127 2, 128 1, 129 15) 130 #endif 131 #ifdef HAS_SCALEUVROWDOWN2_NEON 132 SDANY(ScaleUVRowDown2_Any_NEON, 133 ScaleUVRowDown2_NEON, 134 ScaleUVRowDown2_C, 135 2, 136 2, 137 7) 138 #endif 139 #ifdef HAS_SCALEUVROWDOWN2LINEAR_NEON 140 SDANY(ScaleUVRowDown2Linear_Any_NEON, 141 ScaleUVRowDown2Linear_NEON, 142 ScaleUVRowDown2Linear_C, 143 2, 144 2, 145 7) 146 #endif 147 #ifdef HAS_SCALEUVROWDOWN2BOX_NEON 148 SDANY(ScaleUVRowDown2Box_Any_NEON, 149 ScaleUVRowDown2Box_NEON, 150 ScaleUVRowDown2Box_C, 151 2, 152 2, 153 7) 154 #endif 155 156 #ifdef HAS_SCALEROWDOWN2_MSA 157 SDANY(ScaleRowDown2_Any_MSA, ScaleRowDown2_MSA, ScaleRowDown2_C, 2, 1, 31) 158 SDANY(ScaleRowDown2Linear_Any_MSA, 159 ScaleRowDown2Linear_MSA, 160 ScaleRowDown2Linear_C, 161 2, 162 1, 163 31) 164 SDANY(ScaleRowDown2Box_Any_MSA, 165 ScaleRowDown2Box_MSA, 166 ScaleRowDown2Box_C, 167 2, 168 1, 169 31) 170 #endif 171 #ifdef HAS_SCALEROWDOWN2_LSX 172 SDANY(ScaleRowDown2_Any_LSX, ScaleRowDown2_LSX, ScaleRowDown2_C, 2, 1, 31) 173 SDANY(ScaleRowDown2Linear_Any_LSX, 174 ScaleRowDown2Linear_LSX, 175 ScaleRowDown2Linear_C, 176 2, 177 1, 178 31) 179 SDANY(ScaleRowDown2Box_Any_LSX, 180 ScaleRowDown2Box_LSX, 181 ScaleRowDown2Box_C, 182 2, 183 1, 184 31) 185 #endif 186 #ifdef HAS_SCALEROWDOWN4_SSSE3 187 SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7) 188 SDANY(ScaleRowDown4Box_Any_SSSE3, 189 ScaleRowDown4Box_SSSE3, 190 ScaleRowDown4Box_C, 191 4, 192 1, 193 7) 194 #endif 195 #ifdef HAS_SCALEROWDOWN4_AVX2 196 SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15) 197 SDANY(ScaleRowDown4Box_Any_AVX2, 198 ScaleRowDown4Box_AVX2, 199 ScaleRowDown4Box_C, 200 4, 201 1, 202 15) 203 #endif 204 #ifdef HAS_SCALEROWDOWN4_NEON 205 SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 7) 206 SDANY(ScaleRowDown4Box_Any_NEON, 207 ScaleRowDown4Box_NEON, 208 ScaleRowDown4Box_C, 209 4, 210 1, 211 7) 212 #endif 213 #ifdef HAS_SCALEROWDOWN4_MSA 214 SDANY(ScaleRowDown4_Any_MSA, ScaleRowDown4_MSA, ScaleRowDown4_C, 4, 1, 15) 215 SDANY(ScaleRowDown4Box_Any_MSA, 216 ScaleRowDown4Box_MSA, 217 ScaleRowDown4Box_C, 218 4, 219 1, 220 15) 221 #endif 222 #ifdef HAS_SCALEROWDOWN4_LSX 223 SDANY(ScaleRowDown4_Any_LSX, ScaleRowDown4_LSX, ScaleRowDown4_C, 4, 1, 15) 224 SDANY(ScaleRowDown4Box_Any_LSX, 225 ScaleRowDown4Box_LSX, 226 ScaleRowDown4Box_C, 227 4, 228 1, 229 15) 230 #endif 231 #ifdef HAS_SCALEROWDOWN34_SSSE3 232 SDANY(ScaleRowDown34_Any_SSSE3, 233 ScaleRowDown34_SSSE3, 234 ScaleRowDown34_C, 235 4 / 3, 236 1, 237 23) 238 SDANY(ScaleRowDown34_0_Box_Any_SSSE3, 239 ScaleRowDown34_0_Box_SSSE3, 240 ScaleRowDown34_0_Box_C, 241 4 / 3, 242 1, 243 23) 244 SDANY(ScaleRowDown34_1_Box_Any_SSSE3, 245 ScaleRowDown34_1_Box_SSSE3, 246 ScaleRowDown34_1_Box_C, 247 4 / 3, 248 1, 249 23) 250 #endif 251 #ifdef HAS_SCALEROWDOWN34_NEON 252 SDANY(ScaleRowDown34_Any_NEON, 253 ScaleRowDown34_NEON, 254 ScaleRowDown34_C, 255 4 / 3, 256 1, 257 23) 258 SDANY(ScaleRowDown34_0_Box_Any_NEON, 259 ScaleRowDown34_0_Box_NEON, 260 ScaleRowDown34_0_Box_C, 261 4 / 3, 262 1, 263 23) 264 SDANY(ScaleRowDown34_1_Box_Any_NEON, 265 ScaleRowDown34_1_Box_NEON, 266 ScaleRowDown34_1_Box_C, 267 4 / 3, 268 1, 269 23) 270 #endif 271 #ifdef HAS_SCALEROWDOWN34_MSA 272 SDANY(ScaleRowDown34_Any_MSA, 273 ScaleRowDown34_MSA, 274 ScaleRowDown34_C, 275 4 / 3, 276 1, 277 47) 278 SDANY(ScaleRowDown34_0_Box_Any_MSA, 279 ScaleRowDown34_0_Box_MSA, 280 ScaleRowDown34_0_Box_C, 281 4 / 3, 282 1, 283 47) 284 SDANY(ScaleRowDown34_1_Box_Any_MSA, 285 ScaleRowDown34_1_Box_MSA, 286 ScaleRowDown34_1_Box_C, 287 4 / 3, 288 1, 289 47) 290 #endif 291 #ifdef HAS_SCALEROWDOWN34_LSX 292 SDANY(ScaleRowDown34_Any_LSX, 293 ScaleRowDown34_LSX, 294 ScaleRowDown34_C, 295 4 / 3, 296 1, 297 47) 298 SDANY(ScaleRowDown34_0_Box_Any_LSX, 299 ScaleRowDown34_0_Box_LSX, 300 ScaleRowDown34_0_Box_C, 301 4 / 3, 302 1, 303 47) 304 SDANY(ScaleRowDown34_1_Box_Any_LSX, 305 ScaleRowDown34_1_Box_LSX, 306 ScaleRowDown34_1_Box_C, 307 4 / 3, 308 1, 309 47) 310 #endif 311 #ifdef HAS_SCALEROWDOWN38_SSSE3 312 SDANY(ScaleRowDown38_Any_SSSE3, 313 ScaleRowDown38_SSSE3, 314 ScaleRowDown38_C, 315 8 / 3, 316 1, 317 11) 318 SDANY(ScaleRowDown38_3_Box_Any_SSSE3, 319 ScaleRowDown38_3_Box_SSSE3, 320 ScaleRowDown38_3_Box_C, 321 8 / 3, 322 1, 323 5) 324 SDANY(ScaleRowDown38_2_Box_Any_SSSE3, 325 ScaleRowDown38_2_Box_SSSE3, 326 ScaleRowDown38_2_Box_C, 327 8 / 3, 328 1, 329 5) 330 #endif 331 #ifdef HAS_SCALEROWDOWN38_NEON 332 SDANY(ScaleRowDown38_Any_NEON, 333 ScaleRowDown38_NEON, 334 ScaleRowDown38_C, 335 8 / 3, 336 1, 337 11) 338 SDANY(ScaleRowDown38_3_Box_Any_NEON, 339 ScaleRowDown38_3_Box_NEON, 340 ScaleRowDown38_3_Box_C, 341 8 / 3, 342 1, 343 11) 344 SDANY(ScaleRowDown38_2_Box_Any_NEON, 345 ScaleRowDown38_2_Box_NEON, 346 ScaleRowDown38_2_Box_C, 347 8 / 3, 348 1, 349 11) 350 #endif 351 #ifdef HAS_SCALEROWDOWN38_MSA 352 SDANY(ScaleRowDown38_Any_MSA, 353 ScaleRowDown38_MSA, 354 ScaleRowDown38_C, 355 8 / 3, 356 1, 357 11) 358 SDANY(ScaleRowDown38_3_Box_Any_MSA, 359 ScaleRowDown38_3_Box_MSA, 360 ScaleRowDown38_3_Box_C, 361 8 / 3, 362 1, 363 11) 364 SDANY(ScaleRowDown38_2_Box_Any_MSA, 365 ScaleRowDown38_2_Box_MSA, 366 ScaleRowDown38_2_Box_C, 367 8 / 3, 368 1, 369 11) 370 #endif 371 #ifdef HAS_SCALEROWDOWN38_LSX 372 SDANY(ScaleRowDown38_Any_LSX, 373 ScaleRowDown38_LSX, 374 ScaleRowDown38_C, 375 8 / 3, 376 1, 377 11) 378 SDANY(ScaleRowDown38_3_Box_Any_LSX, 379 ScaleRowDown38_3_Box_LSX, 380 ScaleRowDown38_3_Box_C, 381 8 / 3, 382 1, 383 11) 384 SDANY(ScaleRowDown38_2_Box_Any_LSX, 385 ScaleRowDown38_2_Box_LSX, 386 ScaleRowDown38_2_Box_C, 387 8 / 3, 388 1, 389 11) 390 #endif 391 392 #ifdef HAS_SCALEARGBROWDOWN2_SSE2 393 SDANY(ScaleARGBRowDown2_Any_SSE2, 394 ScaleARGBRowDown2_SSE2, 395 ScaleARGBRowDown2_C, 396 2, 397 4, 398 3) 399 SDANY(ScaleARGBRowDown2Linear_Any_SSE2, 400 ScaleARGBRowDown2Linear_SSE2, 401 ScaleARGBRowDown2Linear_C, 402 2, 403 4, 404 3) 405 SDANY(ScaleARGBRowDown2Box_Any_SSE2, 406 ScaleARGBRowDown2Box_SSE2, 407 ScaleARGBRowDown2Box_C, 408 2, 409 4, 410 3) 411 #endif 412 #ifdef HAS_SCALEARGBROWDOWN2_NEON 413 SDANY(ScaleARGBRowDown2_Any_NEON, 414 ScaleARGBRowDown2_NEON, 415 ScaleARGBRowDown2_C, 416 2, 417 4, 418 7) 419 SDANY(ScaleARGBRowDown2Linear_Any_NEON, 420 ScaleARGBRowDown2Linear_NEON, 421 ScaleARGBRowDown2Linear_C, 422 2, 423 4, 424 7) 425 SDANY(ScaleARGBRowDown2Box_Any_NEON, 426 ScaleARGBRowDown2Box_NEON, 427 ScaleARGBRowDown2Box_C, 428 2, 429 4, 430 7) 431 #endif 432 #ifdef HAS_SCALEARGBROWDOWN2_MSA 433 SDANY(ScaleARGBRowDown2_Any_MSA, 434 ScaleARGBRowDown2_MSA, 435 ScaleARGBRowDown2_C, 436 2, 437 4, 438 3) 439 SDANY(ScaleARGBRowDown2Linear_Any_MSA, 440 ScaleARGBRowDown2Linear_MSA, 441 ScaleARGBRowDown2Linear_C, 442 2, 443 4, 444 3) 445 SDANY(ScaleARGBRowDown2Box_Any_MSA, 446 ScaleARGBRowDown2Box_MSA, 447 ScaleARGBRowDown2Box_C, 448 2, 449 4, 450 3) 451 #endif 452 #ifdef HAS_SCALEARGBROWDOWN2_LSX 453 SDANY(ScaleARGBRowDown2_Any_LSX, 454 ScaleARGBRowDown2_LSX, 455 ScaleARGBRowDown2_C, 456 2, 457 4, 458 3) 459 SDANY(ScaleARGBRowDown2Linear_Any_LSX, 460 ScaleARGBRowDown2Linear_LSX, 461 ScaleARGBRowDown2Linear_C, 462 2, 463 4, 464 3) 465 SDANY(ScaleARGBRowDown2Box_Any_LSX, 466 ScaleARGBRowDown2Box_LSX, 467 ScaleARGBRowDown2Box_C, 468 2, 469 4, 470 3) 471 #endif 472 #undef SDANY 473 474 // Scale down by even scale factor. 475 #define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \ 476 void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, \ 477 uint8_t* dst_ptr, int dst_width) { \ 478 int r = dst_width & MASK; \ 479 int n = dst_width & ~MASK; \ 480 if (n > 0) { \ 481 SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \ 482 } \ 483 SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \ 484 dst_ptr + n * BPP, r); \ 485 } 486 487 #ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2 488 SDAANY(ScaleARGBRowDownEven_Any_SSE2, 489 ScaleARGBRowDownEven_SSE2, 490 ScaleARGBRowDownEven_C, 491 4, 492 3) 493 SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2, 494 ScaleARGBRowDownEvenBox_SSE2, 495 ScaleARGBRowDownEvenBox_C, 496 4, 497 3) 498 #endif 499 #ifdef HAS_SCALEARGBROWDOWNEVEN_NEON 500 SDAANY(ScaleARGBRowDownEven_Any_NEON, 501 ScaleARGBRowDownEven_NEON, 502 ScaleARGBRowDownEven_C, 503 4, 504 3) 505 SDAANY(ScaleARGBRowDownEvenBox_Any_NEON, 506 ScaleARGBRowDownEvenBox_NEON, 507 ScaleARGBRowDownEvenBox_C, 508 4, 509 3) 510 #endif 511 #ifdef HAS_SCALEARGBROWDOWNEVEN_MSA 512 SDAANY(ScaleARGBRowDownEven_Any_MSA, 513 ScaleARGBRowDownEven_MSA, 514 ScaleARGBRowDownEven_C, 515 4, 516 3) 517 SDAANY(ScaleARGBRowDownEvenBox_Any_MSA, 518 ScaleARGBRowDownEvenBox_MSA, 519 ScaleARGBRowDownEvenBox_C, 520 4, 521 3) 522 #endif 523 #ifdef HAS_SCALEARGBROWDOWNEVEN_LSX 524 SDAANY(ScaleARGBRowDownEven_Any_LSX, 525 ScaleARGBRowDownEven_LSX, 526 ScaleARGBRowDownEven_C, 527 4, 528 3) 529 SDAANY(ScaleARGBRowDownEvenBox_Any_LSX, 530 ScaleARGBRowDownEvenBox_LSX, 531 ScaleARGBRowDownEvenBox_C, 532 4, 533 3) 534 #endif 535 #ifdef HAS_SCALEUVROWDOWNEVEN_NEON 536 SDAANY(ScaleUVRowDownEven_Any_NEON, 537 ScaleUVRowDownEven_NEON, 538 ScaleUVRowDownEven_C, 539 2, 540 3) 541 #endif 542 543 #ifdef SASIMDONLY 544 // This also works and uses memcpy and SIMD instead of C, but is slower on ARM 545 546 // Add rows box filter scale down. Using macro from row_any 547 #define SAROW(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \ 548 void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int width) { \ 549 SIMD_ALIGNED(uint16_t dst_temp[32]); \ 550 SIMD_ALIGNED(uint8_t src_temp[32]); \ 551 memset(dst_temp, 0, 32 * 2); /* for msan */ \ 552 int r = width & MASK; \ 553 int n = width & ~MASK; \ 554 if (n > 0) { \ 555 ANY_SIMD(src_ptr, dst_ptr, n); \ 556 } \ 557 memcpy(src_temp, src_ptr + n * SBPP, r * SBPP); \ 558 memcpy(dst_temp, dst_ptr + n * BPP, r * BPP); \ 559 ANY_SIMD(src_temp, dst_temp, MASK + 1); \ 560 memcpy(dst_ptr + n * BPP, dst_temp, r * BPP); \ 561 } 562 563 #ifdef HAS_SCALEADDROW_SSE2 564 SAROW(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, 1, 2, 15) 565 #endif 566 #ifdef HAS_SCALEADDROW_AVX2 567 SAROW(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, 1, 2, 31) 568 #endif 569 #ifdef HAS_SCALEADDROW_NEON 570 SAROW(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, 1, 2, 15) 571 #endif 572 #ifdef HAS_SCALEADDROW_MSA 573 SAROW(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, 1, 2, 15) 574 #endif 575 #ifdef HAS_SCALEADDROW_LSX 576 SAROW(ScaleAddRow_Any_LSX, ScaleAddRow_LSX, 1, 2, 15) 577 #endif 578 #undef SAANY 579 580 #else 581 582 // Add rows box filter scale down. 583 #define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \ 584 void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \ 585 int n = src_width & ~MASK; \ 586 if (n > 0) { \ 587 SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \ 588 } \ 589 SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \ 590 } 591 592 #ifdef HAS_SCALEADDROW_SSE2 593 SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15) 594 #endif 595 #ifdef HAS_SCALEADDROW_AVX2 596 SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31) 597 #endif 598 #ifdef HAS_SCALEADDROW_NEON 599 SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15) 600 #endif 601 #ifdef HAS_SCALEADDROW_MSA 602 SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15) 603 #endif 604 #ifdef HAS_SCALEADDROW_LSX 605 SAANY(ScaleAddRow_Any_LSX, ScaleAddRow_LSX, ScaleAddRow_C, 15) 606 #endif 607 #undef SAANY 608 609 #endif // SASIMDONLY 610 611 // Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols 612 #define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \ 613 void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, \ 614 int dx) { \ 615 int r = dst_width & MASK; \ 616 int n = dst_width & ~MASK; \ 617 if (n > 0) { \ 618 TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \ 619 } \ 620 TERP_C(dst_ptr + n * BPP, src_ptr, r, x + n * dx, dx); \ 621 } 622 623 #ifdef HAS_SCALEFILTERCOLS_NEON 624 CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7) 625 #endif 626 #ifdef HAS_SCALEFILTERCOLS_MSA 627 CANY(ScaleFilterCols_Any_MSA, ScaleFilterCols_MSA, ScaleFilterCols_C, 1, 15) 628 #endif 629 #ifdef HAS_SCALEFILTERCOLS_LSX 630 CANY(ScaleFilterCols_Any_LSX, ScaleFilterCols_LSX, ScaleFilterCols_C, 1, 15) 631 #endif 632 #ifdef HAS_SCALEARGBCOLS_NEON 633 CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7) 634 #endif 635 #ifdef HAS_SCALEARGBCOLS_MSA 636 CANY(ScaleARGBCols_Any_MSA, ScaleARGBCols_MSA, ScaleARGBCols_C, 4, 3) 637 #endif 638 #ifdef HAS_SCALEARGBCOLS_LSX 639 CANY(ScaleARGBCols_Any_LSX, ScaleARGBCols_LSX, ScaleARGBCols_C, 4, 3) 640 #endif 641 #ifdef HAS_SCALEARGBFILTERCOLS_NEON 642 CANY(ScaleARGBFilterCols_Any_NEON, 643 ScaleARGBFilterCols_NEON, 644 ScaleARGBFilterCols_C, 645 4, 646 3) 647 #endif 648 #ifdef HAS_SCALEARGBFILTERCOLS_MSA 649 CANY(ScaleARGBFilterCols_Any_MSA, 650 ScaleARGBFilterCols_MSA, 651 ScaleARGBFilterCols_C, 652 4, 653 7) 654 #endif 655 #ifdef HAS_SCALEARGBFILTERCOLS_LSX 656 CANY(ScaleARGBFilterCols_Any_LSX, 657 ScaleARGBFilterCols_LSX, 658 ScaleARGBFilterCols_C, 659 4, 660 7) 661 #endif 662 #undef CANY 663 664 // Scale up horizontally 2 times using linear filter. 665 #define SUH2LANY(NAME, SIMD, C, MASK, PTYPE) \ 666 void NAME(const PTYPE* src_ptr, PTYPE* dst_ptr, int dst_width) { \ 667 int work_width = (dst_width - 1) & ~1; \ 668 int r = work_width & MASK; \ 669 int n = work_width & ~MASK; \ 670 dst_ptr[0] = src_ptr[0]; \ 671 if (work_width > 0) { \ 672 if (n != 0) { \ 673 SIMD(src_ptr, dst_ptr + 1, n); \ 674 } \ 675 C(src_ptr + (n / 2), dst_ptr + n + 1, r); \ 676 } \ 677 dst_ptr[dst_width - 1] = src_ptr[(dst_width - 1) / 2]; \ 678 } 679 680 // Even the C versions need to be wrapped, because boundary pixels have to 681 // be handled differently 682 683 SUH2LANY(ScaleRowUp2_Linear_Any_C, 684 ScaleRowUp2_Linear_C, 685 ScaleRowUp2_Linear_C, 686 0, 687 uint8_t) 688 689 SUH2LANY(ScaleRowUp2_Linear_16_Any_C, 690 ScaleRowUp2_Linear_16_C, 691 ScaleRowUp2_Linear_16_C, 692 0, 693 uint16_t) 694 695 #ifdef HAS_SCALEROWUP2_LINEAR_SSE2 696 SUH2LANY(ScaleRowUp2_Linear_Any_SSE2, 697 ScaleRowUp2_Linear_SSE2, 698 ScaleRowUp2_Linear_C, 699 15, 700 uint8_t) 701 #endif 702 703 #ifdef HAS_SCALEROWUP2_LINEAR_SSSE3 704 SUH2LANY(ScaleRowUp2_Linear_Any_SSSE3, 705 ScaleRowUp2_Linear_SSSE3, 706 ScaleRowUp2_Linear_C, 707 15, 708 uint8_t) 709 #endif 710 711 #ifdef HAS_SCALEROWUP2_LINEAR_12_SSSE3 712 SUH2LANY(ScaleRowUp2_Linear_12_Any_SSSE3, 713 ScaleRowUp2_Linear_12_SSSE3, 714 ScaleRowUp2_Linear_16_C, 715 15, 716 uint16_t) 717 #endif 718 719 #ifdef HAS_SCALEROWUP2_LINEAR_16_SSE2 720 SUH2LANY(ScaleRowUp2_Linear_16_Any_SSE2, 721 ScaleRowUp2_Linear_16_SSE2, 722 ScaleRowUp2_Linear_16_C, 723 7, 724 uint16_t) 725 #endif 726 727 #ifdef HAS_SCALEROWUP2_LINEAR_AVX2 728 SUH2LANY(ScaleRowUp2_Linear_Any_AVX2, 729 ScaleRowUp2_Linear_AVX2, 730 ScaleRowUp2_Linear_C, 731 31, 732 uint8_t) 733 #endif 734 735 #ifdef HAS_SCALEROWUP2_LINEAR_12_AVX2 736 SUH2LANY(ScaleRowUp2_Linear_12_Any_AVX2, 737 ScaleRowUp2_Linear_12_AVX2, 738 ScaleRowUp2_Linear_16_C, 739 31, 740 uint16_t) 741 #endif 742 743 #ifdef HAS_SCALEROWUP2_LINEAR_16_AVX2 744 SUH2LANY(ScaleRowUp2_Linear_16_Any_AVX2, 745 ScaleRowUp2_Linear_16_AVX2, 746 ScaleRowUp2_Linear_16_C, 747 15, 748 uint16_t) 749 #endif 750 751 #ifdef HAS_SCALEROWUP2_LINEAR_NEON 752 SUH2LANY(ScaleRowUp2_Linear_Any_NEON, 753 ScaleRowUp2_Linear_NEON, 754 ScaleRowUp2_Linear_C, 755 15, 756 uint8_t) 757 #endif 758 759 #ifdef HAS_SCALEROWUP2_LINEAR_12_NEON 760 SUH2LANY(ScaleRowUp2_Linear_12_Any_NEON, 761 ScaleRowUp2_Linear_12_NEON, 762 ScaleRowUp2_Linear_16_C, 763 15, 764 uint16_t) 765 #endif 766 767 #ifdef HAS_SCALEROWUP2_LINEAR_16_NEON 768 SUH2LANY(ScaleRowUp2_Linear_16_Any_NEON, 769 ScaleRowUp2_Linear_16_NEON, 770 ScaleRowUp2_Linear_16_C, 771 15, 772 uint16_t) 773 #endif 774 775 #undef SUH2LANY 776 777 // Scale up 2 times using bilinear filter. 778 // This function produces 2 rows at a time. 779 #define SU2BLANY(NAME, SIMD, C, MASK, PTYPE) \ 780 void NAME(const PTYPE* src_ptr, ptrdiff_t src_stride, PTYPE* dst_ptr, \ 781 ptrdiff_t dst_stride, int dst_width) { \ 782 int work_width = (dst_width - 1) & ~1; \ 783 int r = work_width & MASK; \ 784 int n = work_width & ~MASK; \ 785 const PTYPE* sa = src_ptr; \ 786 const PTYPE* sb = src_ptr + src_stride; \ 787 PTYPE* da = dst_ptr; \ 788 PTYPE* db = dst_ptr + dst_stride; \ 789 da[0] = (3 * sa[0] + sb[0] + 2) >> 2; \ 790 db[0] = (sa[0] + 3 * sb[0] + 2) >> 2; \ 791 if (work_width > 0) { \ 792 if (n != 0) { \ 793 SIMD(sa, sb - sa, da + 1, db - da, n); \ 794 } \ 795 C(sa + (n / 2), sb - sa, da + n + 1, db - da, r); \ 796 } \ 797 da[dst_width - 1] = \ 798 (3 * sa[(dst_width - 1) / 2] + sb[(dst_width - 1) / 2] + 2) >> 2; \ 799 db[dst_width - 1] = \ 800 (sa[(dst_width - 1) / 2] + 3 * sb[(dst_width - 1) / 2] + 2) >> 2; \ 801 } 802 803 SU2BLANY(ScaleRowUp2_Bilinear_Any_C, 804 ScaleRowUp2_Bilinear_C, 805 ScaleRowUp2_Bilinear_C, 806 0, 807 uint8_t) 808 809 SU2BLANY(ScaleRowUp2_Bilinear_16_Any_C, 810 ScaleRowUp2_Bilinear_16_C, 811 ScaleRowUp2_Bilinear_16_C, 812 0, 813 uint16_t) 814 815 #ifdef HAS_SCALEROWUP2_BILINEAR_SSE2 816 SU2BLANY(ScaleRowUp2_Bilinear_Any_SSE2, 817 ScaleRowUp2_Bilinear_SSE2, 818 ScaleRowUp2_Bilinear_C, 819 15, 820 uint8_t) 821 #endif 822 823 #ifdef HAS_SCALEROWUP2_BILINEAR_12_SSSE3 824 SU2BLANY(ScaleRowUp2_Bilinear_12_Any_SSSE3, 825 ScaleRowUp2_Bilinear_12_SSSE3, 826 ScaleRowUp2_Bilinear_16_C, 827 15, 828 uint16_t) 829 #endif 830 831 #ifdef HAS_SCALEROWUP2_BILINEAR_16_SSE2 832 SU2BLANY(ScaleRowUp2_Bilinear_16_Any_SSE2, 833 ScaleRowUp2_Bilinear_16_SSE2, 834 ScaleRowUp2_Bilinear_16_C, 835 7, 836 uint16_t) 837 #endif 838 839 #ifdef HAS_SCALEROWUP2_BILINEAR_SSSE3 840 SU2BLANY(ScaleRowUp2_Bilinear_Any_SSSE3, 841 ScaleRowUp2_Bilinear_SSSE3, 842 ScaleRowUp2_Bilinear_C, 843 15, 844 uint8_t) 845 #endif 846 847 #ifdef HAS_SCALEROWUP2_BILINEAR_AVX2 848 SU2BLANY(ScaleRowUp2_Bilinear_Any_AVX2, 849 ScaleRowUp2_Bilinear_AVX2, 850 ScaleRowUp2_Bilinear_C, 851 31, 852 uint8_t) 853 #endif 854 855 #ifdef HAS_SCALEROWUP2_BILINEAR_12_AVX2 856 SU2BLANY(ScaleRowUp2_Bilinear_12_Any_AVX2, 857 ScaleRowUp2_Bilinear_12_AVX2, 858 ScaleRowUp2_Bilinear_16_C, 859 15, 860 uint16_t) 861 #endif 862 863 #ifdef HAS_SCALEROWUP2_BILINEAR_16_AVX2 864 SU2BLANY(ScaleRowUp2_Bilinear_16_Any_AVX2, 865 ScaleRowUp2_Bilinear_16_AVX2, 866 ScaleRowUp2_Bilinear_16_C, 867 15, 868 uint16_t) 869 #endif 870 871 #ifdef HAS_SCALEROWUP2_BILINEAR_NEON 872 SU2BLANY(ScaleRowUp2_Bilinear_Any_NEON, 873 ScaleRowUp2_Bilinear_NEON, 874 ScaleRowUp2_Bilinear_C, 875 15, 876 uint8_t) 877 #endif 878 879 #ifdef HAS_SCALEROWUP2_BILINEAR_12_NEON 880 SU2BLANY(ScaleRowUp2_Bilinear_12_Any_NEON, 881 ScaleRowUp2_Bilinear_12_NEON, 882 ScaleRowUp2_Bilinear_16_C, 883 15, 884 uint16_t) 885 #endif 886 887 #ifdef HAS_SCALEROWUP2_BILINEAR_16_NEON 888 SU2BLANY(ScaleRowUp2_Bilinear_16_Any_NEON, 889 ScaleRowUp2_Bilinear_16_NEON, 890 ScaleRowUp2_Bilinear_16_C, 891 7, 892 uint16_t) 893 #endif 894 895 #undef SU2BLANY 896 897 // Scale bi-planar plane up horizontally 2 times using linear filter. 898 #define SBUH2LANY(NAME, SIMD, C, MASK, PTYPE) \ 899 void NAME(const PTYPE* src_ptr, PTYPE* dst_ptr, int dst_width) { \ 900 int work_width = (dst_width - 1) & ~1; \ 901 int r = work_width & MASK; \ 902 int n = work_width & ~MASK; \ 903 dst_ptr[0] = src_ptr[0]; \ 904 dst_ptr[1] = src_ptr[1]; \ 905 if (work_width > 0) { \ 906 if (n != 0) { \ 907 SIMD(src_ptr, dst_ptr + 2, n); \ 908 } \ 909 C(src_ptr + n, dst_ptr + 2 * n + 2, r); \ 910 } \ 911 dst_ptr[2 * dst_width - 2] = src_ptr[((dst_width + 1) & ~1) - 2]; \ 912 dst_ptr[2 * dst_width - 1] = src_ptr[((dst_width + 1) & ~1) - 1]; \ 913 } 914 915 SBUH2LANY(ScaleUVRowUp2_Linear_Any_C, 916 ScaleUVRowUp2_Linear_C, 917 ScaleUVRowUp2_Linear_C, 918 0, 919 uint8_t) 920 921 SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_C, 922 ScaleUVRowUp2_Linear_16_C, 923 ScaleUVRowUp2_Linear_16_C, 924 0, 925 uint16_t) 926 927 #ifdef HAS_SCALEUVROWUP2_LINEAR_SSSE3 928 SBUH2LANY(ScaleUVRowUp2_Linear_Any_SSSE3, 929 ScaleUVRowUp2_Linear_SSSE3, 930 ScaleUVRowUp2_Linear_C, 931 7, 932 uint8_t) 933 #endif 934 935 #ifdef HAS_SCALEUVROWUP2_LINEAR_AVX2 936 SBUH2LANY(ScaleUVRowUp2_Linear_Any_AVX2, 937 ScaleUVRowUp2_Linear_AVX2, 938 ScaleUVRowUp2_Linear_C, 939 15, 940 uint8_t) 941 #endif 942 943 #ifdef HAS_SCALEUVROWUP2_LINEAR_16_SSE41 944 SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_SSE41, 945 ScaleUVRowUp2_Linear_16_SSE41, 946 ScaleUVRowUp2_Linear_16_C, 947 3, 948 uint16_t) 949 #endif 950 951 #ifdef HAS_SCALEUVROWUP2_LINEAR_16_AVX2 952 SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_AVX2, 953 ScaleUVRowUp2_Linear_16_AVX2, 954 ScaleUVRowUp2_Linear_16_C, 955 7, 956 uint16_t) 957 #endif 958 959 #ifdef HAS_SCALEUVROWUP2_LINEAR_NEON 960 SBUH2LANY(ScaleUVRowUp2_Linear_Any_NEON, 961 ScaleUVRowUp2_Linear_NEON, 962 ScaleUVRowUp2_Linear_C, 963 15, 964 uint8_t) 965 #endif 966 967 #ifdef HAS_SCALEUVROWUP2_LINEAR_16_NEON 968 SBUH2LANY(ScaleUVRowUp2_Linear_16_Any_NEON, 969 ScaleUVRowUp2_Linear_16_NEON, 970 ScaleUVRowUp2_Linear_16_C, 971 15, 972 uint16_t) 973 #endif 974 975 #undef SBUH2LANY 976 977 // Scale bi-planar plane up 2 times using bilinear filter. 978 // This function produces 2 rows at a time. 979 #define SBU2BLANY(NAME, SIMD, C, MASK, PTYPE) \ 980 void NAME(const PTYPE* src_ptr, ptrdiff_t src_stride, PTYPE* dst_ptr, \ 981 ptrdiff_t dst_stride, int dst_width) { \ 982 int work_width = (dst_width - 1) & ~1; \ 983 int r = work_width & MASK; \ 984 int n = work_width & ~MASK; \ 985 const PTYPE* sa = src_ptr; \ 986 const PTYPE* sb = src_ptr + src_stride; \ 987 PTYPE* da = dst_ptr; \ 988 PTYPE* db = dst_ptr + dst_stride; \ 989 da[0] = (3 * sa[0] + sb[0] + 2) >> 2; \ 990 db[0] = (sa[0] + 3 * sb[0] + 2) >> 2; \ 991 da[1] = (3 * sa[1] + sb[1] + 2) >> 2; \ 992 db[1] = (sa[1] + 3 * sb[1] + 2) >> 2; \ 993 if (work_width > 0) { \ 994 if (n != 0) { \ 995 SIMD(sa, sb - sa, da + 2, db - da, n); \ 996 } \ 997 C(sa + n, sb - sa, da + 2 * n + 2, db - da, r); \ 998 } \ 999 da[2 * dst_width - 2] = (3 * sa[((dst_width + 1) & ~1) - 2] + \ 1000 sb[((dst_width + 1) & ~1) - 2] + 2) >> \ 1001 2; \ 1002 db[2 * dst_width - 2] = (sa[((dst_width + 1) & ~1) - 2] + \ 1003 3 * sb[((dst_width + 1) & ~1) - 2] + 2) >> \ 1004 2; \ 1005 da[2 * dst_width - 1] = (3 * sa[((dst_width + 1) & ~1) - 1] + \ 1006 sb[((dst_width + 1) & ~1) - 1] + 2) >> \ 1007 2; \ 1008 db[2 * dst_width - 1] = (sa[((dst_width + 1) & ~1) - 1] + \ 1009 3 * sb[((dst_width + 1) & ~1) - 1] + 2) >> \ 1010 2; \ 1011 } 1012 1013 SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_C, 1014 ScaleUVRowUp2_Bilinear_C, 1015 ScaleUVRowUp2_Bilinear_C, 1016 0, 1017 uint8_t) 1018 1019 SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_C, 1020 ScaleUVRowUp2_Bilinear_16_C, 1021 ScaleUVRowUp2_Bilinear_16_C, 1022 0, 1023 uint16_t) 1024 1025 #ifdef HAS_SCALEUVROWUP2_BILINEAR_SSSE3 1026 SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_SSSE3, 1027 ScaleUVRowUp2_Bilinear_SSSE3, 1028 ScaleUVRowUp2_Bilinear_C, 1029 7, 1030 uint8_t) 1031 #endif 1032 1033 #ifdef HAS_SCALEUVROWUP2_BILINEAR_AVX2 1034 SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_AVX2, 1035 ScaleUVRowUp2_Bilinear_AVX2, 1036 ScaleUVRowUp2_Bilinear_C, 1037 15, 1038 uint8_t) 1039 #endif 1040 1041 #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_SSE41 1042 SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_SSE41, 1043 ScaleUVRowUp2_Bilinear_16_SSE41, 1044 ScaleUVRowUp2_Bilinear_16_C, 1045 7, 1046 uint16_t) 1047 #endif 1048 1049 #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_AVX2 1050 SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_AVX2, 1051 ScaleUVRowUp2_Bilinear_16_AVX2, 1052 ScaleUVRowUp2_Bilinear_16_C, 1053 7, 1054 uint16_t) 1055 #endif 1056 1057 #ifdef HAS_SCALEUVROWUP2_BILINEAR_NEON 1058 SBU2BLANY(ScaleUVRowUp2_Bilinear_Any_NEON, 1059 ScaleUVRowUp2_Bilinear_NEON, 1060 ScaleUVRowUp2_Bilinear_C, 1061 7, 1062 uint8_t) 1063 #endif 1064 1065 #ifdef HAS_SCALEUVROWUP2_BILINEAR_16_NEON 1066 SBU2BLANY(ScaleUVRowUp2_Bilinear_16_Any_NEON, 1067 ScaleUVRowUp2_Bilinear_16_NEON, 1068 ScaleUVRowUp2_Bilinear_16_C, 1069 7, 1070 uint16_t) 1071 #endif 1072 1073 #undef SBU2BLANY 1074 1075 #ifdef __cplusplus 1076 } // extern "C" 1077 } // namespace libyuv 1078 #endif 1079