1 /* 2 * Copyright 2015 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "libyuv/scale.h" 12 #include "libyuv/scale_row.h" 13 14 #include "libyuv/basic_types.h" 15 16 #ifdef __cplusplus 17 namespace libyuv { 18 extern "C" { 19 #endif 20 21 // Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols 22 #define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \ 23 void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, \ 24 int dx) { \ 25 int r = dst_width & MASK; \ 26 int n = dst_width & ~MASK; \ 27 if (n > 0) { \ 28 TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \ 29 } \ 30 TERP_C(dst_ptr + n * BPP, src_ptr, r, x + n * dx, dx); \ 31 } 32 33 #ifdef HAS_SCALEFILTERCOLS_NEON 34 CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7) 35 #endif 36 #ifdef HAS_SCALEFILTERCOLS_MSA 37 CANY(ScaleFilterCols_Any_MSA, ScaleFilterCols_MSA, ScaleFilterCols_C, 1, 15) 38 #endif 39 #ifdef HAS_SCALEARGBCOLS_NEON 40 CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7) 41 #endif 42 #ifdef HAS_SCALEARGBCOLS_MSA 43 CANY(ScaleARGBCols_Any_MSA, ScaleARGBCols_MSA, ScaleARGBCols_C, 4, 3) 44 #endif 45 #ifdef HAS_SCALEARGBFILTERCOLS_NEON 46 CANY(ScaleARGBFilterCols_Any_NEON, 47 ScaleARGBFilterCols_NEON, 48 ScaleARGBFilterCols_C, 49 4, 50 3) 51 #endif 52 #ifdef HAS_SCALEARGBFILTERCOLS_MSA 53 CANY(ScaleARGBFilterCols_Any_MSA, 54 ScaleARGBFilterCols_MSA, 55 ScaleARGBFilterCols_C, 56 4, 57 7) 58 #endif 59 #undef CANY 60 61 // Fixed scale down. 62 // Mask may be non-power of 2, so use MOD 63 #define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ 64 void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \ 65 int dst_width) { \ 66 int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \ 67 int n = dst_width - r; \ 68 if (n > 0) { \ 69 SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ 70 } \ 71 SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ 72 dst_ptr + n * BPP, r); \ 73 } 74 75 // Fixed scale down for odd source width. Used by I420Blend subsampling. 76 // Since dst_width is (width + 1) / 2, this function scales one less pixel 77 // and copies the last pixel. 78 #define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ 79 void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \ 80 int dst_width) { \ 81 int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \ 82 int n = (dst_width - 1) - r; \ 83 if (n > 0) { \ 84 SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ 85 } \ 86 SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ 87 dst_ptr + n * BPP, r + 1); \ 88 } 89 90 #ifdef HAS_SCALEROWDOWN2_SSSE3 91 SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15) 92 SDANY(ScaleRowDown2Linear_Any_SSSE3, 93 ScaleRowDown2Linear_SSSE3, 94 ScaleRowDown2Linear_C, 95 2, 96 1, 97 15) 98 SDANY(ScaleRowDown2Box_Any_SSSE3, 99 ScaleRowDown2Box_SSSE3, 100 ScaleRowDown2Box_C, 101 2, 102 1, 103 15) 104 SDODD(ScaleRowDown2Box_Odd_SSSE3, 105 ScaleRowDown2Box_SSSE3, 106 ScaleRowDown2Box_Odd_C, 107 2, 108 1, 109 15) 110 #endif 111 #ifdef HAS_SCALEROWDOWN2_AVX2 112 SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31) 113 SDANY(ScaleRowDown2Linear_Any_AVX2, 114 ScaleRowDown2Linear_AVX2, 115 ScaleRowDown2Linear_C, 116 2, 117 1, 118 31) 119 SDANY(ScaleRowDown2Box_Any_AVX2, 120 ScaleRowDown2Box_AVX2, 121 ScaleRowDown2Box_C, 122 2, 123 1, 124 31) 125 SDODD(ScaleRowDown2Box_Odd_AVX2, 126 ScaleRowDown2Box_AVX2, 127 ScaleRowDown2Box_Odd_C, 128 2, 129 1, 130 31) 131 #endif 132 #ifdef HAS_SCALEROWDOWN2_NEON 133 SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15) 134 SDANY(ScaleRowDown2Linear_Any_NEON, 135 ScaleRowDown2Linear_NEON, 136 ScaleRowDown2Linear_C, 137 2, 138 1, 139 15) 140 SDANY(ScaleRowDown2Box_Any_NEON, 141 ScaleRowDown2Box_NEON, 142 ScaleRowDown2Box_C, 143 2, 144 1, 145 15) 146 SDODD(ScaleRowDown2Box_Odd_NEON, 147 ScaleRowDown2Box_NEON, 148 ScaleRowDown2Box_Odd_C, 149 2, 150 1, 151 15) 152 #endif 153 #ifdef HAS_SCALEROWDOWN2_MSA 154 SDANY(ScaleRowDown2_Any_MSA, ScaleRowDown2_MSA, ScaleRowDown2_C, 2, 1, 31) 155 SDANY(ScaleRowDown2Linear_Any_MSA, 156 ScaleRowDown2Linear_MSA, 157 ScaleRowDown2Linear_C, 158 2, 159 1, 160 31) 161 SDANY(ScaleRowDown2Box_Any_MSA, 162 ScaleRowDown2Box_MSA, 163 ScaleRowDown2Box_C, 164 2, 165 1, 166 31) 167 #endif 168 #ifdef HAS_SCALEROWDOWN4_SSSE3 169 SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7) 170 SDANY(ScaleRowDown4Box_Any_SSSE3, 171 ScaleRowDown4Box_SSSE3, 172 ScaleRowDown4Box_C, 173 4, 174 1, 175 7) 176 #endif 177 #ifdef HAS_SCALEROWDOWN4_AVX2 178 SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15) 179 SDANY(ScaleRowDown4Box_Any_AVX2, 180 ScaleRowDown4Box_AVX2, 181 ScaleRowDown4Box_C, 182 4, 183 1, 184 15) 185 #endif 186 #ifdef HAS_SCALEROWDOWN4_NEON 187 SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 7) 188 SDANY(ScaleRowDown4Box_Any_NEON, 189 ScaleRowDown4Box_NEON, 190 ScaleRowDown4Box_C, 191 4, 192 1, 193 7) 194 #endif 195 #ifdef HAS_SCALEROWDOWN4_MSA 196 SDANY(ScaleRowDown4_Any_MSA, ScaleRowDown4_MSA, ScaleRowDown4_C, 4, 1, 15) 197 SDANY(ScaleRowDown4Box_Any_MSA, 198 ScaleRowDown4Box_MSA, 199 ScaleRowDown4Box_C, 200 4, 201 1, 202 15) 203 #endif 204 #ifdef HAS_SCALEROWDOWN34_SSSE3 205 SDANY(ScaleRowDown34_Any_SSSE3, 206 ScaleRowDown34_SSSE3, 207 ScaleRowDown34_C, 208 4 / 3, 209 1, 210 23) 211 SDANY(ScaleRowDown34_0_Box_Any_SSSE3, 212 ScaleRowDown34_0_Box_SSSE3, 213 ScaleRowDown34_0_Box_C, 214 4 / 3, 215 1, 216 23) 217 SDANY(ScaleRowDown34_1_Box_Any_SSSE3, 218 ScaleRowDown34_1_Box_SSSE3, 219 ScaleRowDown34_1_Box_C, 220 4 / 3, 221 1, 222 23) 223 #endif 224 #ifdef HAS_SCALEROWDOWN34_NEON 225 SDANY(ScaleRowDown34_Any_NEON, 226 ScaleRowDown34_NEON, 227 ScaleRowDown34_C, 228 4 / 3, 229 1, 230 23) 231 SDANY(ScaleRowDown34_0_Box_Any_NEON, 232 ScaleRowDown34_0_Box_NEON, 233 ScaleRowDown34_0_Box_C, 234 4 / 3, 235 1, 236 23) 237 SDANY(ScaleRowDown34_1_Box_Any_NEON, 238 ScaleRowDown34_1_Box_NEON, 239 ScaleRowDown34_1_Box_C, 240 4 / 3, 241 1, 242 23) 243 #endif 244 #ifdef HAS_SCALEROWDOWN34_MSA 245 SDANY(ScaleRowDown34_Any_MSA, 246 ScaleRowDown34_MSA, 247 ScaleRowDown34_C, 248 4 / 3, 249 1, 250 47) 251 SDANY(ScaleRowDown34_0_Box_Any_MSA, 252 ScaleRowDown34_0_Box_MSA, 253 ScaleRowDown34_0_Box_C, 254 4 / 3, 255 1, 256 47) 257 SDANY(ScaleRowDown34_1_Box_Any_MSA, 258 ScaleRowDown34_1_Box_MSA, 259 ScaleRowDown34_1_Box_C, 260 4 / 3, 261 1, 262 47) 263 #endif 264 #ifdef HAS_SCALEROWDOWN38_SSSE3 265 SDANY(ScaleRowDown38_Any_SSSE3, 266 ScaleRowDown38_SSSE3, 267 ScaleRowDown38_C, 268 8 / 3, 269 1, 270 11) 271 SDANY(ScaleRowDown38_3_Box_Any_SSSE3, 272 ScaleRowDown38_3_Box_SSSE3, 273 ScaleRowDown38_3_Box_C, 274 8 / 3, 275 1, 276 5) 277 SDANY(ScaleRowDown38_2_Box_Any_SSSE3, 278 ScaleRowDown38_2_Box_SSSE3, 279 ScaleRowDown38_2_Box_C, 280 8 / 3, 281 1, 282 5) 283 #endif 284 #ifdef HAS_SCALEROWDOWN38_NEON 285 SDANY(ScaleRowDown38_Any_NEON, 286 ScaleRowDown38_NEON, 287 ScaleRowDown38_C, 288 8 / 3, 289 1, 290 11) 291 SDANY(ScaleRowDown38_3_Box_Any_NEON, 292 ScaleRowDown38_3_Box_NEON, 293 ScaleRowDown38_3_Box_C, 294 8 / 3, 295 1, 296 11) 297 SDANY(ScaleRowDown38_2_Box_Any_NEON, 298 ScaleRowDown38_2_Box_NEON, 299 ScaleRowDown38_2_Box_C, 300 8 / 3, 301 1, 302 11) 303 #endif 304 #ifdef HAS_SCALEROWDOWN38_MSA 305 SDANY(ScaleRowDown38_Any_MSA, 306 ScaleRowDown38_MSA, 307 ScaleRowDown38_C, 308 8 / 3, 309 1, 310 11) 311 SDANY(ScaleRowDown38_3_Box_Any_MSA, 312 ScaleRowDown38_3_Box_MSA, 313 ScaleRowDown38_3_Box_C, 314 8 / 3, 315 1, 316 11) 317 SDANY(ScaleRowDown38_2_Box_Any_MSA, 318 ScaleRowDown38_2_Box_MSA, 319 ScaleRowDown38_2_Box_C, 320 8 / 3, 321 1, 322 11) 323 #endif 324 325 #ifdef HAS_SCALEARGBROWDOWN2_SSE2 326 SDANY(ScaleARGBRowDown2_Any_SSE2, 327 ScaleARGBRowDown2_SSE2, 328 ScaleARGBRowDown2_C, 329 2, 330 4, 331 3) 332 SDANY(ScaleARGBRowDown2Linear_Any_SSE2, 333 ScaleARGBRowDown2Linear_SSE2, 334 ScaleARGBRowDown2Linear_C, 335 2, 336 4, 337 3) 338 SDANY(ScaleARGBRowDown2Box_Any_SSE2, 339 ScaleARGBRowDown2Box_SSE2, 340 ScaleARGBRowDown2Box_C, 341 2, 342 4, 343 3) 344 #endif 345 #ifdef HAS_SCALEARGBROWDOWN2_NEON 346 SDANY(ScaleARGBRowDown2_Any_NEON, 347 ScaleARGBRowDown2_NEON, 348 ScaleARGBRowDown2_C, 349 2, 350 4, 351 7) 352 SDANY(ScaleARGBRowDown2Linear_Any_NEON, 353 ScaleARGBRowDown2Linear_NEON, 354 ScaleARGBRowDown2Linear_C, 355 2, 356 4, 357 7) 358 SDANY(ScaleARGBRowDown2Box_Any_NEON, 359 ScaleARGBRowDown2Box_NEON, 360 ScaleARGBRowDown2Box_C, 361 2, 362 4, 363 7) 364 #endif 365 #ifdef HAS_SCALEARGBROWDOWN2_MSA 366 SDANY(ScaleARGBRowDown2_Any_MSA, 367 ScaleARGBRowDown2_MSA, 368 ScaleARGBRowDown2_C, 369 2, 370 4, 371 3) 372 SDANY(ScaleARGBRowDown2Linear_Any_MSA, 373 ScaleARGBRowDown2Linear_MSA, 374 ScaleARGBRowDown2Linear_C, 375 2, 376 4, 377 3) 378 SDANY(ScaleARGBRowDown2Box_Any_MSA, 379 ScaleARGBRowDown2Box_MSA, 380 ScaleARGBRowDown2Box_C, 381 2, 382 4, 383 3) 384 #endif 385 #undef SDANY 386 387 // Scale down by even scale factor. 388 #define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \ 389 void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, \ 390 uint8_t* dst_ptr, int dst_width) { \ 391 int r = dst_width & MASK; \ 392 int n = dst_width & ~MASK; \ 393 if (n > 0) { \ 394 SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \ 395 } \ 396 SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \ 397 dst_ptr + n * BPP, r); \ 398 } 399 400 #ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2 401 SDAANY(ScaleARGBRowDownEven_Any_SSE2, 402 ScaleARGBRowDownEven_SSE2, 403 ScaleARGBRowDownEven_C, 404 4, 405 3) 406 SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2, 407 ScaleARGBRowDownEvenBox_SSE2, 408 ScaleARGBRowDownEvenBox_C, 409 4, 410 3) 411 #endif 412 #ifdef HAS_SCALEARGBROWDOWNEVEN_NEON 413 SDAANY(ScaleARGBRowDownEven_Any_NEON, 414 ScaleARGBRowDownEven_NEON, 415 ScaleARGBRowDownEven_C, 416 4, 417 3) 418 SDAANY(ScaleARGBRowDownEvenBox_Any_NEON, 419 ScaleARGBRowDownEvenBox_NEON, 420 ScaleARGBRowDownEvenBox_C, 421 4, 422 3) 423 #endif 424 #ifdef HAS_SCALEARGBROWDOWNEVEN_MSA 425 SDAANY(ScaleARGBRowDownEven_Any_MSA, 426 ScaleARGBRowDownEven_MSA, 427 ScaleARGBRowDownEven_C, 428 4, 429 3) 430 SDAANY(ScaleARGBRowDownEvenBox_Any_MSA, 431 ScaleARGBRowDownEvenBox_MSA, 432 ScaleARGBRowDownEvenBox_C, 433 4, 434 3) 435 #endif 436 437 // Add rows box filter scale down. 438 #define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \ 439 void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \ 440 int n = src_width & ~MASK; \ 441 if (n > 0) { \ 442 SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \ 443 } \ 444 SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \ 445 } 446 447 #ifdef HAS_SCALEADDROW_SSE2 448 SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15) 449 #endif 450 #ifdef HAS_SCALEADDROW_AVX2 451 SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31) 452 #endif 453 #ifdef HAS_SCALEADDROW_NEON 454 SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15) 455 #endif 456 #ifdef HAS_SCALEADDROW_MSA 457 SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15) 458 #endif 459 #undef SAANY 460 461 #ifdef __cplusplus 462 } // extern "C" 463 } // namespace libyuv 464 #endif 465