1 /* 2 * Copyright 2015 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "libyuv/scale.h" 12 #include "libyuv/scale_row.h" 13 14 #include "libyuv/basic_types.h" 15 16 #ifdef __cplusplus 17 namespace libyuv { 18 extern "C" { 19 #endif 20 21 // Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols 22 #define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \ 23 void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, int dst_width, int x, \ 24 int dx) { \ 25 int n = dst_width & ~MASK; \ 26 if (n > 0) { \ 27 TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \ 28 } \ 29 TERP_C(dst_ptr + n * BPP, src_ptr, dst_width & MASK, x + n * dx, dx); \ 30 } 31 32 #ifdef HAS_SCALEFILTERCOLS_NEON 33 CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7) 34 #endif 35 #ifdef HAS_SCALEARGBCOLS_NEON 36 CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7) 37 #endif 38 #ifdef HAS_SCALEARGBFILTERCOLS_NEON 39 CANY(ScaleARGBFilterCols_Any_NEON, 40 ScaleARGBFilterCols_NEON, 41 ScaleARGBFilterCols_C, 42 4, 43 3) 44 #endif 45 #undef CANY 46 47 // Fixed scale down. 48 #define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ 49 void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, \ 50 int dst_width) { \ 51 int r = (int)((unsigned int)dst_width % (MASK + 1)); \ 52 int n = dst_width - r; \ 53 if (n > 0) { \ 54 SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ 55 } \ 56 SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ 57 dst_ptr + n * BPP, r); \ 58 } 59 60 // Fixed scale down for odd source width. Used by I420Blend subsampling. 61 // Since dst_width is (width + 1) / 2, this function scales one less pixel 62 // and copies the last pixel. 63 #define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ 64 void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, uint8* dst_ptr, \ 65 int dst_width) { \ 66 int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); \ 67 int n = dst_width - r; \ 68 if (n > 0) { \ 69 SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ 70 } \ 71 SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ 72 dst_ptr + n * BPP, r); \ 73 } 74 75 #ifdef HAS_SCALEROWDOWN2_SSSE3 76 SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15) 77 SDANY(ScaleRowDown2Linear_Any_SSSE3, 78 ScaleRowDown2Linear_SSSE3, 79 ScaleRowDown2Linear_C, 80 2, 81 1, 82 15) 83 SDANY(ScaleRowDown2Box_Any_SSSE3, 84 ScaleRowDown2Box_SSSE3, 85 ScaleRowDown2Box_C, 86 2, 87 1, 88 15) 89 SDODD(ScaleRowDown2Box_Odd_SSSE3, 90 ScaleRowDown2Box_SSSE3, 91 ScaleRowDown2Box_Odd_C, 92 2, 93 1, 94 15) 95 #endif 96 #ifdef HAS_SCALEROWDOWN2_AVX2 97 SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31) 98 SDANY(ScaleRowDown2Linear_Any_AVX2, 99 ScaleRowDown2Linear_AVX2, 100 ScaleRowDown2Linear_C, 101 2, 102 1, 103 31) 104 SDANY(ScaleRowDown2Box_Any_AVX2, 105 ScaleRowDown2Box_AVX2, 106 ScaleRowDown2Box_C, 107 2, 108 1, 109 31) 110 SDODD(ScaleRowDown2Box_Odd_AVX2, 111 ScaleRowDown2Box_AVX2, 112 ScaleRowDown2Box_Odd_C, 113 2, 114 1, 115 31) 116 #endif 117 #ifdef HAS_SCALEROWDOWN2_NEON 118 SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15) 119 SDANY(ScaleRowDown2Linear_Any_NEON, 120 ScaleRowDown2Linear_NEON, 121 ScaleRowDown2Linear_C, 122 2, 123 1, 124 15) 125 SDANY(ScaleRowDown2Box_Any_NEON, 126 ScaleRowDown2Box_NEON, 127 ScaleRowDown2Box_C, 128 2, 129 1, 130 15) 131 SDODD(ScaleRowDown2Box_Odd_NEON, 132 ScaleRowDown2Box_NEON, 133 ScaleRowDown2Box_Odd_C, 134 2, 135 1, 136 15) 137 #endif 138 #ifdef HAS_SCALEROWDOWN2_MSA 139 SDANY(ScaleRowDown2_Any_MSA, ScaleRowDown2_MSA, ScaleRowDown2_C, 2, 1, 31) 140 SDANY(ScaleRowDown2Linear_Any_MSA, 141 ScaleRowDown2Linear_MSA, 142 ScaleRowDown2Linear_C, 143 2, 144 1, 145 31) 146 SDANY(ScaleRowDown2Box_Any_MSA, 147 ScaleRowDown2Box_MSA, 148 ScaleRowDown2Box_C, 149 2, 150 1, 151 31) 152 #endif 153 #ifdef HAS_SCALEROWDOWN4_SSSE3 154 SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7) 155 SDANY(ScaleRowDown4Box_Any_SSSE3, 156 ScaleRowDown4Box_SSSE3, 157 ScaleRowDown4Box_C, 158 4, 159 1, 160 7) 161 #endif 162 #ifdef HAS_SCALEROWDOWN4_AVX2 163 SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15) 164 SDANY(ScaleRowDown4Box_Any_AVX2, 165 ScaleRowDown4Box_AVX2, 166 ScaleRowDown4Box_C, 167 4, 168 1, 169 15) 170 #endif 171 #ifdef HAS_SCALEROWDOWN4_NEON 172 SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 7) 173 SDANY(ScaleRowDown4Box_Any_NEON, 174 ScaleRowDown4Box_NEON, 175 ScaleRowDown4Box_C, 176 4, 177 1, 178 7) 179 #endif 180 #ifdef HAS_SCALEROWDOWN4_MSA 181 SDANY(ScaleRowDown4_Any_MSA, ScaleRowDown4_MSA, ScaleRowDown4_C, 4, 1, 15) 182 SDANY(ScaleRowDown4Box_Any_MSA, 183 ScaleRowDown4Box_MSA, 184 ScaleRowDown4Box_C, 185 4, 186 1, 187 15) 188 #endif 189 #ifdef HAS_SCALEROWDOWN34_SSSE3 190 SDANY(ScaleRowDown34_Any_SSSE3, 191 ScaleRowDown34_SSSE3, 192 ScaleRowDown34_C, 193 4 / 3, 194 1, 195 23) 196 SDANY(ScaleRowDown34_0_Box_Any_SSSE3, 197 ScaleRowDown34_0_Box_SSSE3, 198 ScaleRowDown34_0_Box_C, 199 4 / 3, 200 1, 201 23) 202 SDANY(ScaleRowDown34_1_Box_Any_SSSE3, 203 ScaleRowDown34_1_Box_SSSE3, 204 ScaleRowDown34_1_Box_C, 205 4 / 3, 206 1, 207 23) 208 #endif 209 #ifdef HAS_SCALEROWDOWN34_NEON 210 SDANY(ScaleRowDown34_Any_NEON, 211 ScaleRowDown34_NEON, 212 ScaleRowDown34_C, 213 4 / 3, 214 1, 215 23) 216 SDANY(ScaleRowDown34_0_Box_Any_NEON, 217 ScaleRowDown34_0_Box_NEON, 218 ScaleRowDown34_0_Box_C, 219 4 / 3, 220 1, 221 23) 222 SDANY(ScaleRowDown34_1_Box_Any_NEON, 223 ScaleRowDown34_1_Box_NEON, 224 ScaleRowDown34_1_Box_C, 225 4 / 3, 226 1, 227 23) 228 #endif 229 #ifdef HAS_SCALEROWDOWN38_SSSE3 230 SDANY(ScaleRowDown38_Any_SSSE3, 231 ScaleRowDown38_SSSE3, 232 ScaleRowDown38_C, 233 8 / 3, 234 1, 235 11) 236 SDANY(ScaleRowDown38_3_Box_Any_SSSE3, 237 ScaleRowDown38_3_Box_SSSE3, 238 ScaleRowDown38_3_Box_C, 239 8 / 3, 240 1, 241 5) 242 SDANY(ScaleRowDown38_2_Box_Any_SSSE3, 243 ScaleRowDown38_2_Box_SSSE3, 244 ScaleRowDown38_2_Box_C, 245 8 / 3, 246 1, 247 5) 248 #endif 249 #ifdef HAS_SCALEROWDOWN38_NEON 250 SDANY(ScaleRowDown38_Any_NEON, 251 ScaleRowDown38_NEON, 252 ScaleRowDown38_C, 253 8 / 3, 254 1, 255 11) 256 SDANY(ScaleRowDown38_3_Box_Any_NEON, 257 ScaleRowDown38_3_Box_NEON, 258 ScaleRowDown38_3_Box_C, 259 8 / 3, 260 1, 261 11) 262 SDANY(ScaleRowDown38_2_Box_Any_NEON, 263 ScaleRowDown38_2_Box_NEON, 264 ScaleRowDown38_2_Box_C, 265 8 / 3, 266 1, 267 11) 268 #endif 269 #ifdef HAS_SCALEROWDOWN38_MSA 270 SDANY(ScaleRowDown38_Any_MSA, 271 ScaleRowDown38_MSA, 272 ScaleRowDown38_C, 273 8 / 3, 274 1, 275 11) 276 SDANY(ScaleRowDown38_3_Box_Any_MSA, 277 ScaleRowDown38_3_Box_MSA, 278 ScaleRowDown38_3_Box_C, 279 8 / 3, 280 1, 281 11) 282 SDANY(ScaleRowDown38_2_Box_Any_MSA, 283 ScaleRowDown38_2_Box_MSA, 284 ScaleRowDown38_2_Box_C, 285 8 / 3, 286 1, 287 11) 288 #endif 289 290 #ifdef HAS_SCALEARGBROWDOWN2_SSE2 291 SDANY(ScaleARGBRowDown2_Any_SSE2, 292 ScaleARGBRowDown2_SSE2, 293 ScaleARGBRowDown2_C, 294 2, 295 4, 296 3) 297 SDANY(ScaleARGBRowDown2Linear_Any_SSE2, 298 ScaleARGBRowDown2Linear_SSE2, 299 ScaleARGBRowDown2Linear_C, 300 2, 301 4, 302 3) 303 SDANY(ScaleARGBRowDown2Box_Any_SSE2, 304 ScaleARGBRowDown2Box_SSE2, 305 ScaleARGBRowDown2Box_C, 306 2, 307 4, 308 3) 309 #endif 310 #ifdef HAS_SCALEARGBROWDOWN2_NEON 311 SDANY(ScaleARGBRowDown2_Any_NEON, 312 ScaleARGBRowDown2_NEON, 313 ScaleARGBRowDown2_C, 314 2, 315 4, 316 7) 317 SDANY(ScaleARGBRowDown2Linear_Any_NEON, 318 ScaleARGBRowDown2Linear_NEON, 319 ScaleARGBRowDown2Linear_C, 320 2, 321 4, 322 7) 323 SDANY(ScaleARGBRowDown2Box_Any_NEON, 324 ScaleARGBRowDown2Box_NEON, 325 ScaleARGBRowDown2Box_C, 326 2, 327 4, 328 7) 329 #endif 330 #ifdef HAS_SCALEARGBROWDOWN2_MSA 331 SDANY(ScaleARGBRowDown2_Any_MSA, 332 ScaleARGBRowDown2_MSA, 333 ScaleARGBRowDown2_C, 334 2, 335 4, 336 3) 337 SDANY(ScaleARGBRowDown2Linear_Any_MSA, 338 ScaleARGBRowDown2Linear_MSA, 339 ScaleARGBRowDown2Linear_C, 340 2, 341 4, 342 3) 343 SDANY(ScaleARGBRowDown2Box_Any_MSA, 344 ScaleARGBRowDown2Box_MSA, 345 ScaleARGBRowDown2Box_C, 346 2, 347 4, 348 3) 349 #endif 350 #undef SDANY 351 352 // Scale down by even scale factor. 353 #define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \ 354 void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, int src_stepx, \ 355 uint8* dst_ptr, int dst_width) { \ 356 int r = (int)((unsigned int)dst_width % (MASK + 1)); \ 357 int n = dst_width - r; \ 358 if (n > 0) { \ 359 SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \ 360 } \ 361 SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \ 362 dst_ptr + n * BPP, r); \ 363 } 364 365 #ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2 366 SDAANY(ScaleARGBRowDownEven_Any_SSE2, 367 ScaleARGBRowDownEven_SSE2, 368 ScaleARGBRowDownEven_C, 369 4, 370 3) 371 SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2, 372 ScaleARGBRowDownEvenBox_SSE2, 373 ScaleARGBRowDownEvenBox_C, 374 4, 375 3) 376 #endif 377 #ifdef HAS_SCALEARGBROWDOWNEVEN_NEON 378 SDAANY(ScaleARGBRowDownEven_Any_NEON, 379 ScaleARGBRowDownEven_NEON, 380 ScaleARGBRowDownEven_C, 381 4, 382 3) 383 SDAANY(ScaleARGBRowDownEvenBox_Any_NEON, 384 ScaleARGBRowDownEvenBox_NEON, 385 ScaleARGBRowDownEvenBox_C, 386 4, 387 3) 388 #endif 389 #ifdef HAS_SCALEARGBROWDOWNEVEN_MSA 390 SDAANY(ScaleARGBRowDownEven_Any_MSA, 391 ScaleARGBRowDownEven_MSA, 392 ScaleARGBRowDownEven_C, 393 4, 394 3) 395 SDAANY(ScaleARGBRowDownEvenBox_Any_MSA, 396 ScaleARGBRowDownEvenBox_MSA, 397 ScaleARGBRowDownEvenBox_C, 398 4, 399 3) 400 #endif 401 402 // Add rows box filter scale down. 403 #define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \ 404 void NAMEANY(const uint8* src_ptr, uint16* dst_ptr, int src_width) { \ 405 int n = src_width & ~MASK; \ 406 if (n > 0) { \ 407 SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \ 408 } \ 409 SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \ 410 } 411 412 #ifdef HAS_SCALEADDROW_SSE2 413 SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15) 414 #endif 415 #ifdef HAS_SCALEADDROW_AVX2 416 SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31) 417 #endif 418 #ifdef HAS_SCALEADDROW_NEON 419 SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15) 420 #endif 421 #ifdef HAS_SCALEADDROW_MSA 422 SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15) 423 #endif 424 #ifdef HAS_SCALEADDROW_DSPR2 425 SAANY(ScaleAddRow_Any_DSPR2, ScaleAddRow_DSPR2, ScaleAddRow_C, 15) 426 #endif 427 #undef SAANY 428 429 #ifdef __cplusplus 430 } // extern "C" 431 } // namespace libyuv 432 #endif 433