1 /* 2 * Copyright 2015 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <string.h> // For memset/memcpy 12 13 #include "libyuv/scale.h" 14 #include "libyuv/scale_row.h" 15 16 #include "libyuv/basic_types.h" 17 18 #ifdef __cplusplus 19 namespace libyuv { 20 extern "C" { 21 #endif 22 23 // Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols 24 #define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \ 25 void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, \ 26 int dx) { \ 27 int r = dst_width & MASK; \ 28 int n = dst_width & ~MASK; \ 29 if (n > 0) { \ 30 TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \ 31 } \ 32 TERP_C(dst_ptr + n * BPP, src_ptr, r, x + n * dx, dx); \ 33 } 34 35 #ifdef HAS_SCALEFILTERCOLS_NEON 36 CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7) 37 #endif 38 #ifdef HAS_SCALEFILTERCOLS_MSA 39 CANY(ScaleFilterCols_Any_MSA, ScaleFilterCols_MSA, ScaleFilterCols_C, 1, 15) 40 #endif 41 #ifdef HAS_SCALEARGBCOLS_NEON 42 CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7) 43 #endif 44 #ifdef HAS_SCALEARGBCOLS_MSA 45 CANY(ScaleARGBCols_Any_MSA, ScaleARGBCols_MSA, ScaleARGBCols_C, 4, 3) 46 #endif 47 #ifdef HAS_SCALEARGBCOLS_MMI 48 CANY(ScaleARGBCols_Any_MMI, ScaleARGBCols_MMI, ScaleARGBCols_C, 4, 0) 49 #endif 50 #ifdef HAS_SCALEARGBFILTERCOLS_NEON 51 CANY(ScaleARGBFilterCols_Any_NEON, 52 ScaleARGBFilterCols_NEON, 53 ScaleARGBFilterCols_C, 54 4, 55 3) 56 #endif 57 #ifdef HAS_SCALEARGBFILTERCOLS_MSA 58 CANY(ScaleARGBFilterCols_Any_MSA, 59 ScaleARGBFilterCols_MSA, 60 ScaleARGBFilterCols_C, 61 4, 62 7) 63 #endif 64 #undef CANY 65 66 // Fixed scale down. 67 // Mask may be non-power of 2, so use MOD 68 #define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ 69 void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \ 70 int dst_width) { \ 71 int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \ 72 int n = dst_width - r; \ 73 if (n > 0) { \ 74 SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ 75 } \ 76 SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ 77 dst_ptr + n * BPP, r); \ 78 } 79 80 // Fixed scale down for odd source width. Used by I420Blend subsampling. 81 // Since dst_width is (width + 1) / 2, this function scales one less pixel 82 // and copies the last pixel. 83 #define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ 84 void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \ 85 int dst_width) { \ 86 int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \ 87 int n = (dst_width - 1) - r; \ 88 if (n > 0) { \ 89 SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ 90 } \ 91 SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ 92 dst_ptr + n * BPP, r + 1); \ 93 } 94 95 #ifdef HAS_SCALEROWDOWN2_SSSE3 96 SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15) 97 SDANY(ScaleRowDown2Linear_Any_SSSE3, 98 ScaleRowDown2Linear_SSSE3, 99 ScaleRowDown2Linear_C, 100 2, 101 1, 102 15) 103 SDANY(ScaleRowDown2Box_Any_SSSE3, 104 ScaleRowDown2Box_SSSE3, 105 ScaleRowDown2Box_C, 106 2, 107 1, 108 15) 109 SDODD(ScaleRowDown2Box_Odd_SSSE3, 110 ScaleRowDown2Box_SSSE3, 111 ScaleRowDown2Box_Odd_C, 112 2, 113 1, 114 15) 115 #endif 116 #ifdef HAS_SCALEROWDOWN2_AVX2 117 SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31) 118 SDANY(ScaleRowDown2Linear_Any_AVX2, 119 ScaleRowDown2Linear_AVX2, 120 ScaleRowDown2Linear_C, 121 2, 122 1, 123 31) 124 SDANY(ScaleRowDown2Box_Any_AVX2, 125 ScaleRowDown2Box_AVX2, 126 ScaleRowDown2Box_C, 127 2, 128 1, 129 31) 130 SDODD(ScaleRowDown2Box_Odd_AVX2, 131 ScaleRowDown2Box_AVX2, 132 ScaleRowDown2Box_Odd_C, 133 2, 134 1, 135 31) 136 #endif 137 #ifdef HAS_SCALEROWDOWN2_NEON 138 SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15) 139 SDANY(ScaleRowDown2Linear_Any_NEON, 140 ScaleRowDown2Linear_NEON, 141 ScaleRowDown2Linear_C, 142 2, 143 1, 144 15) 145 SDANY(ScaleRowDown2Box_Any_NEON, 146 ScaleRowDown2Box_NEON, 147 ScaleRowDown2Box_C, 148 2, 149 1, 150 15) 151 SDODD(ScaleRowDown2Box_Odd_NEON, 152 ScaleRowDown2Box_NEON, 153 ScaleRowDown2Box_Odd_C, 154 2, 155 1, 156 15) 157 #endif 158 #ifdef HAS_SCALEROWDOWN2_MSA 159 SDANY(ScaleRowDown2_Any_MSA, ScaleRowDown2_MSA, ScaleRowDown2_C, 2, 1, 31) 160 SDANY(ScaleRowDown2Linear_Any_MSA, 161 ScaleRowDown2Linear_MSA, 162 ScaleRowDown2Linear_C, 163 2, 164 1, 165 31) 166 SDANY(ScaleRowDown2Box_Any_MSA, 167 ScaleRowDown2Box_MSA, 168 ScaleRowDown2Box_C, 169 2, 170 1, 171 31) 172 #endif 173 #ifdef HAS_SCALEROWDOWN2_MMI 174 SDANY(ScaleRowDown2_Any_MMI, ScaleRowDown2_MMI, ScaleRowDown2_C, 2, 1, 7) 175 SDANY(ScaleRowDown2Linear_Any_MMI, 176 ScaleRowDown2Linear_MMI, 177 ScaleRowDown2Linear_C, 178 2, 179 1, 180 7) 181 SDANY(ScaleRowDown2Box_Any_MMI, 182 ScaleRowDown2Box_MMI, 183 ScaleRowDown2Box_C, 184 2, 185 1, 186 7) 187 SDODD(ScaleRowDown2Box_Odd_MMI, 188 ScaleRowDown2Box_MMI, 189 ScaleRowDown2Box_Odd_C, 190 2, 191 1, 192 7) 193 #endif 194 #ifdef HAS_SCALEROWDOWN4_SSSE3 195 SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7) 196 SDANY(ScaleRowDown4Box_Any_SSSE3, 197 ScaleRowDown4Box_SSSE3, 198 ScaleRowDown4Box_C, 199 4, 200 1, 201 7) 202 #endif 203 #ifdef HAS_SCALEROWDOWN4_AVX2 204 SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15) 205 SDANY(ScaleRowDown4Box_Any_AVX2, 206 ScaleRowDown4Box_AVX2, 207 ScaleRowDown4Box_C, 208 4, 209 1, 210 15) 211 #endif 212 #ifdef HAS_SCALEROWDOWN4_NEON 213 SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 7) 214 SDANY(ScaleRowDown4Box_Any_NEON, 215 ScaleRowDown4Box_NEON, 216 ScaleRowDown4Box_C, 217 4, 218 1, 219 7) 220 #endif 221 #ifdef HAS_SCALEROWDOWN4_MSA 222 SDANY(ScaleRowDown4_Any_MSA, ScaleRowDown4_MSA, ScaleRowDown4_C, 4, 1, 15) 223 SDANY(ScaleRowDown4Box_Any_MSA, 224 ScaleRowDown4Box_MSA, 225 ScaleRowDown4Box_C, 226 4, 227 1, 228 15) 229 #endif 230 #ifdef HAS_SCALEROWDOWN4_MMI 231 SDANY(ScaleRowDown4_Any_MMI, ScaleRowDown4_MMI, ScaleRowDown4_C, 4, 1, 7) 232 SDANY(ScaleRowDown4Box_Any_MMI, 233 ScaleRowDown4Box_MMI, 234 ScaleRowDown4Box_C, 235 4, 236 1, 237 7) 238 #endif 239 #ifdef HAS_SCALEROWDOWN34_SSSE3 240 SDANY(ScaleRowDown34_Any_SSSE3, 241 ScaleRowDown34_SSSE3, 242 ScaleRowDown34_C, 243 4 / 3, 244 1, 245 23) 246 SDANY(ScaleRowDown34_0_Box_Any_SSSE3, 247 ScaleRowDown34_0_Box_SSSE3, 248 ScaleRowDown34_0_Box_C, 249 4 / 3, 250 1, 251 23) 252 SDANY(ScaleRowDown34_1_Box_Any_SSSE3, 253 ScaleRowDown34_1_Box_SSSE3, 254 ScaleRowDown34_1_Box_C, 255 4 / 3, 256 1, 257 23) 258 #endif 259 #ifdef HAS_SCALEROWDOWN34_NEON 260 SDANY(ScaleRowDown34_Any_NEON, 261 ScaleRowDown34_NEON, 262 ScaleRowDown34_C, 263 4 / 3, 264 1, 265 23) 266 SDANY(ScaleRowDown34_0_Box_Any_NEON, 267 ScaleRowDown34_0_Box_NEON, 268 ScaleRowDown34_0_Box_C, 269 4 / 3, 270 1, 271 23) 272 SDANY(ScaleRowDown34_1_Box_Any_NEON, 273 ScaleRowDown34_1_Box_NEON, 274 ScaleRowDown34_1_Box_C, 275 4 / 3, 276 1, 277 23) 278 #endif 279 #ifdef HAS_SCALEROWDOWN34_MSA 280 SDANY(ScaleRowDown34_Any_MSA, 281 ScaleRowDown34_MSA, 282 ScaleRowDown34_C, 283 4 / 3, 284 1, 285 47) 286 SDANY(ScaleRowDown34_0_Box_Any_MSA, 287 ScaleRowDown34_0_Box_MSA, 288 ScaleRowDown34_0_Box_C, 289 4 / 3, 290 1, 291 47) 292 SDANY(ScaleRowDown34_1_Box_Any_MSA, 293 ScaleRowDown34_1_Box_MSA, 294 ScaleRowDown34_1_Box_C, 295 4 / 3, 296 1, 297 47) 298 #endif 299 #ifdef HAS_SCALEROWDOWN38_SSSE3 300 SDANY(ScaleRowDown38_Any_SSSE3, 301 ScaleRowDown38_SSSE3, 302 ScaleRowDown38_C, 303 8 / 3, 304 1, 305 11) 306 SDANY(ScaleRowDown38_3_Box_Any_SSSE3, 307 ScaleRowDown38_3_Box_SSSE3, 308 ScaleRowDown38_3_Box_C, 309 8 / 3, 310 1, 311 5) 312 SDANY(ScaleRowDown38_2_Box_Any_SSSE3, 313 ScaleRowDown38_2_Box_SSSE3, 314 ScaleRowDown38_2_Box_C, 315 8 / 3, 316 1, 317 5) 318 #endif 319 #ifdef HAS_SCALEROWDOWN38_NEON 320 SDANY(ScaleRowDown38_Any_NEON, 321 ScaleRowDown38_NEON, 322 ScaleRowDown38_C, 323 8 / 3, 324 1, 325 11) 326 SDANY(ScaleRowDown38_3_Box_Any_NEON, 327 ScaleRowDown38_3_Box_NEON, 328 ScaleRowDown38_3_Box_C, 329 8 / 3, 330 1, 331 11) 332 SDANY(ScaleRowDown38_2_Box_Any_NEON, 333 ScaleRowDown38_2_Box_NEON, 334 ScaleRowDown38_2_Box_C, 335 8 / 3, 336 1, 337 11) 338 #endif 339 #ifdef HAS_SCALEROWDOWN38_MSA 340 SDANY(ScaleRowDown38_Any_MSA, 341 ScaleRowDown38_MSA, 342 ScaleRowDown38_C, 343 8 / 3, 344 1, 345 11) 346 SDANY(ScaleRowDown38_3_Box_Any_MSA, 347 ScaleRowDown38_3_Box_MSA, 348 ScaleRowDown38_3_Box_C, 349 8 / 3, 350 1, 351 11) 352 SDANY(ScaleRowDown38_2_Box_Any_MSA, 353 ScaleRowDown38_2_Box_MSA, 354 ScaleRowDown38_2_Box_C, 355 8 / 3, 356 1, 357 11) 358 #endif 359 360 #ifdef HAS_SCALEARGBROWDOWN2_SSE2 361 SDANY(ScaleARGBRowDown2_Any_SSE2, 362 ScaleARGBRowDown2_SSE2, 363 ScaleARGBRowDown2_C, 364 2, 365 4, 366 3) 367 SDANY(ScaleARGBRowDown2Linear_Any_SSE2, 368 ScaleARGBRowDown2Linear_SSE2, 369 ScaleARGBRowDown2Linear_C, 370 2, 371 4, 372 3) 373 SDANY(ScaleARGBRowDown2Box_Any_SSE2, 374 ScaleARGBRowDown2Box_SSE2, 375 ScaleARGBRowDown2Box_C, 376 2, 377 4, 378 3) 379 #endif 380 #ifdef HAS_SCALEARGBROWDOWN2_NEON 381 SDANY(ScaleARGBRowDown2_Any_NEON, 382 ScaleARGBRowDown2_NEON, 383 ScaleARGBRowDown2_C, 384 2, 385 4, 386 7) 387 SDANY(ScaleARGBRowDown2Linear_Any_NEON, 388 ScaleARGBRowDown2Linear_NEON, 389 ScaleARGBRowDown2Linear_C, 390 2, 391 4, 392 7) 393 SDANY(ScaleARGBRowDown2Box_Any_NEON, 394 ScaleARGBRowDown2Box_NEON, 395 ScaleARGBRowDown2Box_C, 396 2, 397 4, 398 7) 399 #endif 400 #ifdef HAS_SCALEARGBROWDOWN2_MSA 401 SDANY(ScaleARGBRowDown2_Any_MSA, 402 ScaleARGBRowDown2_MSA, 403 ScaleARGBRowDown2_C, 404 2, 405 4, 406 3) 407 SDANY(ScaleARGBRowDown2Linear_Any_MSA, 408 ScaleARGBRowDown2Linear_MSA, 409 ScaleARGBRowDown2Linear_C, 410 2, 411 4, 412 3) 413 SDANY(ScaleARGBRowDown2Box_Any_MSA, 414 ScaleARGBRowDown2Box_MSA, 415 ScaleARGBRowDown2Box_C, 416 2, 417 4, 418 3) 419 #endif 420 #ifdef HAS_SCALEARGBROWDOWN2_MMI 421 SDANY(ScaleARGBRowDown2_Any_MMI, 422 ScaleARGBRowDown2_MMI, 423 ScaleARGBRowDown2_C, 424 2, 425 4, 426 1) 427 SDANY(ScaleARGBRowDown2Linear_Any_MMI, 428 ScaleARGBRowDown2Linear_MMI, 429 ScaleARGBRowDown2Linear_C, 430 2, 431 4, 432 1) 433 SDANY(ScaleARGBRowDown2Box_Any_MMI, 434 ScaleARGBRowDown2Box_MMI, 435 ScaleARGBRowDown2Box_C, 436 2, 437 4, 438 1) 439 #endif 440 #undef SDANY 441 442 // Scale down by even scale factor. 443 #define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \ 444 void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, \ 445 uint8_t* dst_ptr, int dst_width) { \ 446 int r = dst_width & MASK; \ 447 int n = dst_width & ~MASK; \ 448 if (n > 0) { \ 449 SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \ 450 } \ 451 SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \ 452 dst_ptr + n * BPP, r); \ 453 } 454 455 #ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2 456 SDAANY(ScaleARGBRowDownEven_Any_SSE2, 457 ScaleARGBRowDownEven_SSE2, 458 ScaleARGBRowDownEven_C, 459 4, 460 3) 461 SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2, 462 ScaleARGBRowDownEvenBox_SSE2, 463 ScaleARGBRowDownEvenBox_C, 464 4, 465 3) 466 #endif 467 #ifdef HAS_SCALEARGBROWDOWNEVEN_NEON 468 SDAANY(ScaleARGBRowDownEven_Any_NEON, 469 ScaleARGBRowDownEven_NEON, 470 ScaleARGBRowDownEven_C, 471 4, 472 3) 473 SDAANY(ScaleARGBRowDownEvenBox_Any_NEON, 474 ScaleARGBRowDownEvenBox_NEON, 475 ScaleARGBRowDownEvenBox_C, 476 4, 477 3) 478 #endif 479 #ifdef HAS_SCALEARGBROWDOWNEVEN_MSA 480 SDAANY(ScaleARGBRowDownEven_Any_MSA, 481 ScaleARGBRowDownEven_MSA, 482 ScaleARGBRowDownEven_C, 483 4, 484 3) 485 SDAANY(ScaleARGBRowDownEvenBox_Any_MSA, 486 ScaleARGBRowDownEvenBox_MSA, 487 ScaleARGBRowDownEvenBox_C, 488 4, 489 3) 490 #endif 491 #ifdef HAS_SCALEARGBROWDOWNEVEN_MMI 492 SDAANY(ScaleARGBRowDownEven_Any_MMI, 493 ScaleARGBRowDownEven_MMI, 494 ScaleARGBRowDownEven_C, 495 4, 496 1) 497 SDAANY(ScaleARGBRowDownEvenBox_Any_MMI, 498 ScaleARGBRowDownEvenBox_MMI, 499 ScaleARGBRowDownEvenBox_C, 500 4, 501 1) 502 #endif 503 504 #ifdef SASIMDONLY 505 // This also works and uses memcpy and SIMD instead of C, but is slower on ARM 506 507 // Add rows box filter scale down. Using macro from row_any 508 #define SAROW(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \ 509 void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int width) { \ 510 SIMD_ALIGNED(uint16_t dst_temp[32]); \ 511 SIMD_ALIGNED(uint8_t src_temp[32]); \ 512 memset(dst_temp, 0, 32 * 2); /* for msan */ \ 513 int r = width & MASK; \ 514 int n = width & ~MASK; \ 515 if (n > 0) { \ 516 ANY_SIMD(src_ptr, dst_ptr, n); \ 517 } \ 518 memcpy(src_temp, src_ptr + n * SBPP, r * SBPP); \ 519 memcpy(dst_temp, dst_ptr + n * BPP, r * BPP); \ 520 ANY_SIMD(src_temp, dst_temp, MASK + 1); \ 521 memcpy(dst_ptr + n * BPP, dst_temp, r * BPP); \ 522 } 523 524 #ifdef HAS_SCALEADDROW_SSE2 525 SAROW(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, 1, 2, 15) 526 #endif 527 #ifdef HAS_SCALEADDROW_AVX2 528 SAROW(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, 1, 2, 31) 529 #endif 530 #ifdef HAS_SCALEADDROW_NEON 531 SAROW(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, 1, 2, 15) 532 #endif 533 #ifdef HAS_SCALEADDROW_MSA 534 SAROW(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, 1, 2, 15) 535 #endif 536 #ifdef HAS_SCALEADDROW_MMI 537 SAROW(ScaleAddRow_Any_MMI, ScaleAddRow_MMI, 1, 2, 7) 538 #endif 539 #undef SAANY 540 541 #else 542 543 // Add rows box filter scale down. 544 #define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \ 545 void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \ 546 int n = src_width & ~MASK; \ 547 if (n > 0) { \ 548 SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \ 549 } \ 550 SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \ 551 } 552 553 #ifdef HAS_SCALEADDROW_SSE2 554 SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15) 555 #endif 556 #ifdef HAS_SCALEADDROW_AVX2 557 SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31) 558 #endif 559 #ifdef HAS_SCALEADDROW_NEON 560 SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15) 561 #endif 562 #ifdef HAS_SCALEADDROW_MSA 563 SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15) 564 #endif 565 #ifdef HAS_SCALEADDROW_MMI 566 SAANY(ScaleAddRow_Any_MMI, ScaleAddRow_MMI, ScaleAddRow_C, 7) 567 #endif 568 #undef SAANY 569 570 #endif // SASIMDONLY 571 572 #ifdef __cplusplus 573 } // extern "C" 574 } // namespace libyuv 575 #endif 576