1 /* 2 * Copyright 2012 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "libyuv/row.h" 12 13 #include <string.h> // For memset. 14 15 #include "libyuv/basic_types.h" 16 17 #ifdef __cplusplus 18 namespace libyuv { 19 extern "C" { 20 #endif 21 22 // memset for vin is meant to clear the source buffer so that 23 // SIMD that reads full multiple of 16 bytes will not trigger msan errors. 24 // memset is not needed for production, as the garbage values are processed but 25 // not used, although there may be edge cases for subsampling. 26 // The size of the buffer is based on the largest read, which can be inferred 27 // by the source type (e.g. ARGB) and the mask (last parameter), or by examining 28 // the source code for how much the source pointers are advanced. 29 30 // Subsampled source needs to be increase by 1 of not even. 31 #define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift)) 32 33 // Any 4 planes to 1 34 #define ANY41(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ 35 void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ 36 const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \ 37 int width) { \ 38 SIMD_ALIGNED(uint8_t vin[64 * 4]); \ 39 SIMD_ALIGNED(uint8_t vout[64]); \ 40 memset(vin, 0, sizeof(vin)); /* for msan */ \ 41 int r = width & MASK; \ 42 int n = width & ~MASK; \ 43 if (n > 0) { \ 44 ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, n); \ 45 } \ 46 memcpy(vin, y_buf + n, r); \ 47 memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 48 memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 49 memcpy(vin + 192, a_buf + n, r); \ 50 ANY_SIMD(vin, vin + 64, vin + 128, vin + 192, vout, MASK + 1); \ 51 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \ 52 } 53 54 #ifdef HAS_MERGEARGBROW_SSE2 55 ANY41(MergeARGBRow_Any_SSE2, MergeARGBRow_SSE2, 0, 0, 4, 7) 56 #endif 57 #ifdef HAS_MERGEARGBROW_AVX2 58 ANY41(MergeARGBRow_Any_AVX2, MergeARGBRow_AVX2, 0, 0, 4, 15) 59 #endif 60 #ifdef HAS_MERGEARGBROW_NEON 61 ANY41(MergeARGBRow_Any_NEON, MergeARGBRow_NEON, 0, 0, 4, 15) 62 #endif 63 64 // Note that odd width replication includes 444 due to implementation 65 // on arm that subsamples 444 to 422 internally. 66 // Any 4 planes to 1 with yuvconstants 67 #define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ 68 void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ 69 const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \ 70 const struct YuvConstants* yuvconstants, int width) { \ 71 SIMD_ALIGNED(uint8_t vin[64 * 4]); \ 72 SIMD_ALIGNED(uint8_t vout[64]); \ 73 memset(vin, 0, sizeof(vin)); /* for msan */ \ 74 int r = width & MASK; \ 75 int n = width & ~MASK; \ 76 if (n > 0) { \ 77 ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \ 78 } \ 79 memcpy(vin, y_buf + n, r); \ 80 memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 81 memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 82 memcpy(vin + 192, a_buf + n, r); \ 83 if (width & 1) { \ 84 vin[64 + SS(r, UVSHIFT)] = vin[64 + SS(r, UVSHIFT) - 1]; \ 85 vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1]; \ 86 } \ 87 ANY_SIMD(vin, vin + 64, vin + 128, vin + 192, vout, yuvconstants, \ 88 MASK + 1); \ 89 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \ 90 } 91 92 #ifdef HAS_I444ALPHATOARGBROW_SSSE3 93 ANY41C(I444AlphaToARGBRow_Any_SSSE3, I444AlphaToARGBRow_SSSE3, 0, 0, 4, 7) 94 #endif 95 #ifdef HAS_I444ALPHATOARGBROW_AVX2 96 ANY41C(I444AlphaToARGBRow_Any_AVX2, I444AlphaToARGBRow_AVX2, 0, 0, 4, 15) 97 #endif 98 #ifdef HAS_I422ALPHATOARGBROW_SSSE3 99 ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7) 100 #endif 101 #ifdef HAS_I422ALPHATOARGBROW_AVX2 102 ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 15) 103 #endif 104 #ifdef HAS_I444ALPHATOARGBROW_NEON 105 ANY41C(I444AlphaToARGBRow_Any_NEON, I444AlphaToARGBRow_NEON, 0, 0, 4, 7) 106 #endif 107 #ifdef HAS_I422ALPHATOARGBROW_NEON 108 ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7) 109 #endif 110 #ifdef HAS_I444ALPHATOARGBROW_MSA 111 ANY41C(I444AlphaToARGBRow_Any_MSA, I444AlphaToARGBRow_MSA, 0, 0, 4, 7) 112 #endif 113 #ifdef HAS_I422ALPHATOARGBROW_MSA 114 ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7) 115 #endif 116 #ifdef HAS_I422ALPHATOARGBROW_LSX 117 ANY41C(I422AlphaToARGBRow_Any_LSX, I422AlphaToARGBRow_LSX, 1, 0, 4, 15) 118 #endif 119 #ifdef HAS_I422ALPHATOARGBROW_LASX 120 ANY41C(I422AlphaToARGBRow_Any_LASX, I422AlphaToARGBRow_LASX, 1, 0, 4, 15) 121 #endif 122 #undef ANY41C 123 124 // Any 4 planes to 1 plane of 8 bit with yuvconstants 125 #define ANY41CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \ 126 void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, const T* a_buf, \ 127 uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \ 128 int width) { \ 129 SIMD_ALIGNED(T vin[16 * 4]); \ 130 SIMD_ALIGNED(uint8_t vout[64]); \ 131 memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \ 132 int r = width & MASK; \ 133 int n = width & ~MASK; \ 134 if (n > 0) { \ 135 ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \ 136 } \ 137 memcpy(vin, y_buf + n, r * SBPP); \ 138 memcpy(vin + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ 139 memcpy(vin + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ 140 memcpy(vin + 48, a_buf + n, r * SBPP); \ 141 ANY_SIMD(vin, vin + 16, vin + 32, vin + 48, vout, yuvconstants, MASK + 1); \ 142 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \ 143 } 144 145 #ifdef HAS_I210ALPHATOARGBROW_SSSE3 146 ANY41CT(I210AlphaToARGBRow_Any_SSSE3, 147 I210AlphaToARGBRow_SSSE3, 148 1, 149 0, 150 uint16_t, 151 2, 152 4, 153 7) 154 #endif 155 156 #ifdef HAS_I210ALPHATOARGBROW_AVX2 157 ANY41CT(I210AlphaToARGBRow_Any_AVX2, 158 I210AlphaToARGBRow_AVX2, 159 1, 160 0, 161 uint16_t, 162 2, 163 4, 164 15) 165 #endif 166 167 #ifdef HAS_I410ALPHATOARGBROW_SSSE3 168 ANY41CT(I410AlphaToARGBRow_Any_SSSE3, 169 I410AlphaToARGBRow_SSSE3, 170 0, 171 0, 172 uint16_t, 173 2, 174 4, 175 7) 176 #endif 177 178 #ifdef HAS_I410ALPHATOARGBROW_AVX2 179 ANY41CT(I410AlphaToARGBRow_Any_AVX2, 180 I410AlphaToARGBRow_AVX2, 181 0, 182 0, 183 uint16_t, 184 2, 185 4, 186 15) 187 #endif 188 189 #undef ANY41CT 190 191 // Any 4 planes to 1 plane with parameter 192 #define ANY41PT(NAMEANY, ANY_SIMD, STYPE, SBPP, DTYPE, BPP, MASK) \ 193 void NAMEANY(const STYPE* r_buf, const STYPE* g_buf, const STYPE* b_buf, \ 194 const STYPE* a_buf, DTYPE* dst_ptr, int depth, int width) { \ 195 SIMD_ALIGNED(STYPE vin[16 * 4]); \ 196 SIMD_ALIGNED(DTYPE vout[64]); \ 197 memset(vin, 0, sizeof(vin)); /* for msan */ \ 198 int r = width & MASK; \ 199 int n = width & ~MASK; \ 200 if (n > 0) { \ 201 ANY_SIMD(r_buf, g_buf, b_buf, a_buf, dst_ptr, depth, n); \ 202 } \ 203 memcpy(vin, r_buf + n, r * SBPP); \ 204 memcpy(vin + 16, g_buf + n, r * SBPP); \ 205 memcpy(vin + 32, b_buf + n, r * SBPP); \ 206 memcpy(vin + 48, a_buf + n, r * SBPP); \ 207 ANY_SIMD(vin, vin + 16, vin + 32, vin + 48, vout, depth, MASK + 1); \ 208 memcpy((uint8_t*)dst_ptr + n * BPP, vout, r * BPP); \ 209 } 210 211 #ifdef HAS_MERGEAR64ROW_AVX2 212 ANY41PT(MergeAR64Row_Any_AVX2, MergeAR64Row_AVX2, uint16_t, 2, uint16_t, 8, 15) 213 #endif 214 215 #ifdef HAS_MERGEAR64ROW_NEON 216 ANY41PT(MergeAR64Row_Any_NEON, MergeAR64Row_NEON, uint16_t, 2, uint16_t, 8, 7) 217 #endif 218 219 #ifdef HAS_MERGEARGB16TO8ROW_AVX2 220 ANY41PT(MergeARGB16To8Row_Any_AVX2, 221 MergeARGB16To8Row_AVX2, 222 uint16_t, 223 2, 224 uint8_t, 225 4, 226 15) 227 #endif 228 229 #ifdef HAS_MERGEARGB16TO8ROW_NEON 230 ANY41PT(MergeARGB16To8Row_Any_NEON, 231 MergeARGB16To8Row_NEON, 232 uint16_t, 233 2, 234 uint8_t, 235 4, 236 7) 237 #endif 238 239 #undef ANY41PT 240 241 // Any 3 planes to 1. 242 #define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ 243 void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ 244 const uint8_t* v_buf, uint8_t* dst_ptr, int width) { \ 245 SIMD_ALIGNED(uint8_t vin[64 * 3]); \ 246 SIMD_ALIGNED(uint8_t vout[64]); \ 247 memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \ 248 int r = width & MASK; \ 249 int n = width & ~MASK; \ 250 if (n > 0) { \ 251 ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \ 252 } \ 253 memcpy(vin, y_buf + n, r); \ 254 memcpy(vin + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 255 memcpy(vin + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 256 ANY_SIMD(vin, vin + 64, vin + 128, vout, MASK + 1); \ 257 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \ 258 } 259 260 // Merge functions. 261 #ifdef HAS_MERGERGBROW_SSSE3 262 ANY31(MergeRGBRow_Any_SSSE3, MergeRGBRow_SSSE3, 0, 0, 3, 15) 263 #endif 264 #ifdef HAS_MERGERGBROW_NEON 265 ANY31(MergeRGBRow_Any_NEON, MergeRGBRow_NEON, 0, 0, 3, 15) 266 #endif 267 #ifdef HAS_MERGEXRGBROW_SSE2 268 ANY31(MergeXRGBRow_Any_SSE2, MergeXRGBRow_SSE2, 0, 0, 4, 7) 269 #endif 270 #ifdef HAS_MERGEXRGBROW_AVX2 271 ANY31(MergeXRGBRow_Any_AVX2, MergeXRGBRow_AVX2, 0, 0, 4, 15) 272 #endif 273 #ifdef HAS_MERGEXRGBROW_NEON 274 ANY31(MergeXRGBRow_Any_NEON, MergeXRGBRow_NEON, 0, 0, 4, 15) 275 #endif 276 #ifdef HAS_I422TOYUY2ROW_SSE2 277 ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15) 278 ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15) 279 #endif 280 #ifdef HAS_I422TOYUY2ROW_AVX2 281 ANY31(I422ToYUY2Row_Any_AVX2, I422ToYUY2Row_AVX2, 1, 1, 4, 31) 282 ANY31(I422ToUYVYRow_Any_AVX2, I422ToUYVYRow_AVX2, 1, 1, 4, 31) 283 #endif 284 #ifdef HAS_I422TOYUY2ROW_NEON 285 ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15) 286 #endif 287 #ifdef HAS_I422TOYUY2ROW_MSA 288 ANY31(I422ToYUY2Row_Any_MSA, I422ToYUY2Row_MSA, 1, 1, 4, 31) 289 #endif 290 #ifdef HAS_I422TOYUY2ROW_LSX 291 ANY31(I422ToYUY2Row_Any_LSX, I422ToYUY2Row_LSX, 1, 1, 4, 15) 292 #endif 293 #ifdef HAS_I422TOYUY2ROW_LASX 294 ANY31(I422ToYUY2Row_Any_LASX, I422ToYUY2Row_LASX, 1, 1, 4, 31) 295 #endif 296 #ifdef HAS_I422TOUYVYROW_NEON 297 ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15) 298 #endif 299 #ifdef HAS_I422TOUYVYROW_MSA 300 ANY31(I422ToUYVYRow_Any_MSA, I422ToUYVYRow_MSA, 1, 1, 4, 31) 301 #endif 302 #ifdef HAS_I422TOUYVYROW_LSX 303 ANY31(I422ToUYVYRow_Any_LSX, I422ToUYVYRow_LSX, 1, 1, 4, 15) 304 #endif 305 #ifdef HAS_I422TOUYVYROW_LASX 306 ANY31(I422ToUYVYRow_Any_LASX, I422ToUYVYRow_LASX, 1, 1, 4, 31) 307 #endif 308 #ifdef HAS_BLENDPLANEROW_AVX2 309 ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31) 310 #endif 311 #ifdef HAS_BLENDPLANEROW_SSSE3 312 ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7) 313 #endif 314 #undef ANY31 315 316 // Note that odd width replication includes 444 due to implementation 317 // on arm that subsamples 444 to 422 internally. 318 // Any 3 planes to 1 with yuvconstants 319 #define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ 320 void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ 321 const uint8_t* v_buf, uint8_t* dst_ptr, \ 322 const struct YuvConstants* yuvconstants, int width) { \ 323 SIMD_ALIGNED(uint8_t vin[128 * 3]); \ 324 SIMD_ALIGNED(uint8_t vout[128]); \ 325 memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \ 326 int r = width & MASK; \ 327 int n = width & ~MASK; \ 328 if (n > 0) { \ 329 ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ 330 } \ 331 memcpy(vin, y_buf + n, r); \ 332 memcpy(vin + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 333 memcpy(vin + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ 334 if (width & 1) { \ 335 vin[128 + SS(r, UVSHIFT)] = vin[128 + SS(r, UVSHIFT) - 1]; \ 336 vin[256 + SS(r, UVSHIFT)] = vin[256 + SS(r, UVSHIFT) - 1]; \ 337 } \ 338 ANY_SIMD(vin, vin + 128, vin + 256, vout, yuvconstants, MASK + 1); \ 339 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \ 340 } 341 342 #ifdef HAS_I422TOARGBROW_SSSE3 343 ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7) 344 #endif 345 #ifdef HAS_I422TORGBAROW_SSSE3 346 ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7) 347 #endif 348 #ifdef HAS_I422TOARGB4444ROW_SSSE3 349 ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7) 350 #endif 351 #ifdef HAS_I422TOARGB1555ROW_SSSE3 352 ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7) 353 #endif 354 #ifdef HAS_I422TORGB565ROW_SSSE3 355 ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7) 356 #endif 357 #ifdef HAS_I422TORGB24ROW_SSSE3 358 ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 15) 359 #endif 360 #ifdef HAS_I422TOAR30ROW_SSSE3 361 ANY31C(I422ToAR30Row_Any_SSSE3, I422ToAR30Row_SSSE3, 1, 0, 4, 7) 362 #endif 363 #ifdef HAS_I422TOAR30ROW_AVX2 364 ANY31C(I422ToAR30Row_Any_AVX2, I422ToAR30Row_AVX2, 1, 0, 4, 15) 365 #endif 366 #ifdef HAS_I444TOARGBROW_SSSE3 367 ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7) 368 #endif 369 #ifdef HAS_I444TORGB24ROW_SSSE3 370 ANY31C(I444ToRGB24Row_Any_SSSE3, I444ToRGB24Row_SSSE3, 0, 0, 3, 15) 371 #endif 372 #ifdef HAS_I422TORGB24ROW_AVX2 373 ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31) 374 #endif 375 #ifdef HAS_I422TOARGBROW_AVX2 376 ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15) 377 #endif 378 #ifdef HAS_I422TOARGBROW_AVX512BW 379 ANY31C(I422ToARGBRow_Any_AVX512BW, I422ToARGBRow_AVX512BW, 1, 0, 4, 31) 380 #endif 381 #ifdef HAS_I422TORGBAROW_AVX2 382 ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15) 383 #endif 384 #ifdef HAS_I444TOARGBROW_AVX2 385 ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15) 386 #endif 387 #ifdef HAS_I444TORGB24ROW_AVX2 388 ANY31C(I444ToRGB24Row_Any_AVX2, I444ToRGB24Row_AVX2, 0, 0, 3, 31) 389 #endif 390 #ifdef HAS_I422TOARGB4444ROW_AVX2 391 ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 15) 392 #endif 393 #ifdef HAS_I422TOARGB1555ROW_AVX2 394 ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 15) 395 #endif 396 #ifdef HAS_I422TORGB565ROW_AVX2 397 ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 15) 398 #endif 399 #ifdef HAS_I444TORGB24ROW_NEON 400 ANY31C(I444ToRGB24Row_Any_NEON, I444ToRGB24Row_NEON, 0, 0, 3, 7) 401 #endif 402 #ifdef HAS_I422TOARGBROW_NEON 403 ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7) 404 ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7) 405 ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7) 406 ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7) 407 ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7) 408 ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7) 409 ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) 410 #endif 411 #ifdef HAS_I422TOARGBROW_MSA 412 ANY31C(I444ToARGBRow_Any_MSA, I444ToARGBRow_MSA, 0, 0, 4, 7) 413 ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7) 414 ANY31C(I422ToRGBARow_Any_MSA, I422ToRGBARow_MSA, 1, 0, 4, 7) 415 ANY31C(I422ToRGB24Row_Any_MSA, I422ToRGB24Row_MSA, 1, 0, 3, 15) 416 ANY31C(I422ToARGB4444Row_Any_MSA, I422ToARGB4444Row_MSA, 1, 0, 2, 7) 417 ANY31C(I422ToARGB1555Row_Any_MSA, I422ToARGB1555Row_MSA, 1, 0, 2, 7) 418 ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7) 419 #endif 420 #ifdef HAS_I422TOARGBROW_LSX 421 ANY31C(I422ToARGBRow_Any_LSX, I422ToARGBRow_LSX, 1, 0, 4, 15) 422 ANY31C(I422ToRGBARow_Any_LSX, I422ToRGBARow_LSX, 1, 0, 4, 15) 423 ANY31C(I422ToRGB24Row_Any_LSX, I422ToRGB24Row_LSX, 1, 0, 3, 15) 424 ANY31C(I422ToRGB565Row_Any_LSX, I422ToRGB565Row_LSX, 1, 0, 2, 15) 425 ANY31C(I422ToARGB4444Row_Any_LSX, I422ToARGB4444Row_LSX, 1, 0, 2, 15) 426 ANY31C(I422ToARGB1555Row_Any_LSX, I422ToARGB1555Row_LSX, 1, 0, 2, 15) 427 #endif 428 #ifdef HAS_I422TOARGBROW_LASX 429 ANY31C(I422ToARGBRow_Any_LASX, I422ToARGBRow_LASX, 1, 0, 4, 31) 430 ANY31C(I422ToRGBARow_Any_LASX, I422ToRGBARow_LASX, 1, 0, 4, 31) 431 ANY31C(I422ToRGB24Row_Any_LASX, I422ToRGB24Row_LASX, 1, 0, 3, 31) 432 ANY31C(I422ToRGB565Row_Any_LASX, I422ToRGB565Row_LASX, 1, 0, 2, 31) 433 ANY31C(I422ToARGB4444Row_Any_LASX, I422ToARGB4444Row_LASX, 1, 0, 2, 31) 434 ANY31C(I422ToARGB1555Row_Any_LASX, I422ToARGB1555Row_LASX, 1, 0, 2, 31) 435 #endif 436 #ifdef HAS_I444TOARGBROW_LSX 437 ANY31C(I444ToARGBRow_Any_LSX, I444ToARGBRow_LSX, 0, 0, 4, 15) 438 #endif 439 #undef ANY31C 440 441 // Any 3 planes of 16 bit to 1 with yuvconstants 442 // TODO(fbarchard): consider sharing this code with ANY31C 443 #define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \ 444 void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, \ 445 uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \ 446 int width) { \ 447 SIMD_ALIGNED(T vin[16 * 3]); \ 448 SIMD_ALIGNED(uint8_t vout[64]); \ 449 memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \ 450 int r = width & MASK; \ 451 int n = width & ~MASK; \ 452 if (n > 0) { \ 453 ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ 454 } \ 455 memcpy(vin, y_buf + n, r * SBPP); \ 456 memcpy(vin + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ 457 memcpy(vin + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ 458 ANY_SIMD(vin, vin + 16, vin + 32, vout, yuvconstants, MASK + 1); \ 459 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \ 460 } 461 462 #ifdef HAS_I210TOAR30ROW_SSSE3 463 ANY31CT(I210ToAR30Row_Any_SSSE3, I210ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7) 464 #endif 465 #ifdef HAS_I210TOARGBROW_SSSE3 466 ANY31CT(I210ToARGBRow_Any_SSSE3, I210ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7) 467 #endif 468 #ifdef HAS_I210TOARGBROW_AVX2 469 ANY31CT(I210ToARGBRow_Any_AVX2, I210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15) 470 #endif 471 #ifdef HAS_I210TOAR30ROW_AVX2 472 ANY31CT(I210ToAR30Row_Any_AVX2, I210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15) 473 #endif 474 #ifdef HAS_I410TOAR30ROW_SSSE3 475 ANY31CT(I410ToAR30Row_Any_SSSE3, I410ToAR30Row_SSSE3, 0, 0, uint16_t, 2, 4, 7) 476 #endif 477 #ifdef HAS_I410TOARGBROW_SSSE3 478 ANY31CT(I410ToARGBRow_Any_SSSE3, I410ToARGBRow_SSSE3, 0, 0, uint16_t, 2, 4, 7) 479 #endif 480 #ifdef HAS_I410TOARGBROW_AVX2 481 ANY31CT(I410ToARGBRow_Any_AVX2, I410ToARGBRow_AVX2, 0, 0, uint16_t, 2, 4, 15) 482 #endif 483 #ifdef HAS_I410TOAR30ROW_AVX2 484 ANY31CT(I410ToAR30Row_Any_AVX2, I410ToAR30Row_AVX2, 0, 0, uint16_t, 2, 4, 15) 485 #endif 486 #ifdef HAS_I212TOAR30ROW_SSSE3 487 ANY31CT(I212ToAR30Row_Any_SSSE3, I212ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7) 488 #endif 489 #ifdef HAS_I212TOARGBROW_SSSE3 490 ANY31CT(I212ToARGBRow_Any_SSSE3, I212ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7) 491 #endif 492 #ifdef HAS_I212TOARGBROW_AVX2 493 ANY31CT(I212ToARGBRow_Any_AVX2, I212ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15) 494 #endif 495 #ifdef HAS_I212TOAR30ROW_AVX2 496 ANY31CT(I212ToAR30Row_Any_AVX2, I212ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15) 497 #endif 498 #undef ANY31CT 499 500 // Any 3 planes to 1 plane with parameter 501 #define ANY31PT(NAMEANY, ANY_SIMD, STYPE, SBPP, DTYPE, BPP, MASK) \ 502 void NAMEANY(const STYPE* r_buf, const STYPE* g_buf, const STYPE* b_buf, \ 503 DTYPE* dst_ptr, int depth, int width) { \ 504 SIMD_ALIGNED(STYPE vin[16 * 3]); \ 505 SIMD_ALIGNED(DTYPE vout[64]); \ 506 memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \ 507 int r = width & MASK; \ 508 int n = width & ~MASK; \ 509 if (n > 0) { \ 510 ANY_SIMD(r_buf, g_buf, b_buf, dst_ptr, depth, n); \ 511 } \ 512 memcpy(vin, r_buf + n, r * SBPP); \ 513 memcpy(vin + 16, g_buf + n, r * SBPP); \ 514 memcpy(vin + 32, b_buf + n, r * SBPP); \ 515 ANY_SIMD(vin, vin + 16, vin + 32, vout, depth, MASK + 1); \ 516 memcpy((uint8_t*)dst_ptr + n * BPP, vout, r * BPP); \ 517 } 518 519 #ifdef HAS_MERGEXR30ROW_AVX2 520 ANY31PT(MergeXR30Row_Any_AVX2, MergeXR30Row_AVX2, uint16_t, 2, uint8_t, 4, 15) 521 #endif 522 523 #ifdef HAS_MERGEXR30ROW_NEON 524 ANY31PT(MergeXR30Row_Any_NEON, MergeXR30Row_NEON, uint16_t, 2, uint8_t, 4, 3) 525 ANY31PT(MergeXR30Row_10_Any_NEON, 526 MergeXR30Row_10_NEON, 527 uint16_t, 528 2, 529 uint8_t, 530 4, 531 3) 532 #endif 533 534 #ifdef HAS_MERGEXR64ROW_AVX2 535 ANY31PT(MergeXR64Row_Any_AVX2, MergeXR64Row_AVX2, uint16_t, 2, uint16_t, 8, 15) 536 #endif 537 538 #ifdef HAS_MERGEXR64ROW_NEON 539 ANY31PT(MergeXR64Row_Any_NEON, MergeXR64Row_NEON, uint16_t, 2, uint16_t, 8, 7) 540 #endif 541 542 #ifdef HAS_MERGEXRGB16TO8ROW_AVX2 543 ANY31PT(MergeXRGB16To8Row_Any_AVX2, 544 MergeXRGB16To8Row_AVX2, 545 uint16_t, 546 2, 547 uint8_t, 548 4, 549 15) 550 #endif 551 552 #ifdef HAS_MERGEXRGB16TO8ROW_NEON 553 ANY31PT(MergeXRGB16To8Row_Any_NEON, 554 MergeXRGB16To8Row_NEON, 555 uint16_t, 556 2, 557 uint8_t, 558 4, 559 7) 560 #endif 561 562 #undef ANY31PT 563 564 // Any 2 planes to 1. 565 #define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ 566 void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \ 567 int width) { \ 568 SIMD_ALIGNED(uint8_t vin[128 * 2]); \ 569 SIMD_ALIGNED(uint8_t vout[128]); \ 570 memset(vin, 0, sizeof(vin)); /* for msan */ \ 571 int r = width & MASK; \ 572 int n = width & ~MASK; \ 573 if (n > 0) { \ 574 ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \ 575 } \ 576 memcpy(vin, y_buf + n * SBPP, r * SBPP); \ 577 memcpy(vin + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \ 578 SS(r, UVSHIFT) * SBPP2); \ 579 ANY_SIMD(vin, vin + 128, vout, MASK + 1); \ 580 memcpy(dst_ptr + n * BPP, vout, r * BPP); \ 581 } 582 583 // Merge functions. 584 #ifdef HAS_MERGEUVROW_SSE2 585 ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15) 586 #endif 587 #ifdef HAS_MERGEUVROW_AVX2 588 ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 15) 589 #endif 590 #ifdef HAS_MERGEUVROW_AVX512BW 591 ANY21(MergeUVRow_Any_AVX512BW, MergeUVRow_AVX512BW, 0, 1, 1, 2, 31) 592 #endif 593 #ifdef HAS_MERGEUVROW_NEON 594 ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15) 595 #endif 596 #ifdef HAS_MERGEUVROW_MSA 597 ANY21(MergeUVRow_Any_MSA, MergeUVRow_MSA, 0, 1, 1, 2, 15) 598 #endif 599 #ifdef HAS_MERGEUVROW_LSX 600 ANY21(MergeUVRow_Any_LSX, MergeUVRow_LSX, 0, 1, 1, 2, 15) 601 #endif 602 #ifdef HAS_NV21TOYUV24ROW_NEON 603 ANY21(NV21ToYUV24Row_Any_NEON, NV21ToYUV24Row_NEON, 1, 1, 2, 3, 15) 604 #endif 605 #ifdef HAS_NV21TOYUV24ROW_SSSE3 606 ANY21(NV21ToYUV24Row_Any_SSSE3, NV21ToYUV24Row_SSSE3, 1, 1, 2, 3, 15) 607 #endif 608 #ifdef HAS_NV21TOYUV24ROW_AVX2 609 ANY21(NV21ToYUV24Row_Any_AVX2, NV21ToYUV24Row_AVX2, 1, 1, 2, 3, 31) 610 #endif 611 // Math functions. 612 #ifdef HAS_ARGBMULTIPLYROW_SSE2 613 ANY21(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, 0, 4, 4, 4, 3) 614 #endif 615 #ifdef HAS_ARGBADDROW_SSE2 616 ANY21(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, 0, 4, 4, 4, 3) 617 #endif 618 #ifdef HAS_ARGBSUBTRACTROW_SSE2 619 ANY21(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, 0, 4, 4, 4, 3) 620 #endif 621 #ifdef HAS_ARGBMULTIPLYROW_AVX2 622 ANY21(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, 0, 4, 4, 4, 7) 623 #endif 624 #ifdef HAS_ARGBADDROW_AVX2 625 ANY21(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, 0, 4, 4, 4, 7) 626 #endif 627 #ifdef HAS_ARGBSUBTRACTROW_AVX2 628 ANY21(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, 0, 4, 4, 4, 7) 629 #endif 630 #ifdef HAS_ARGBMULTIPLYROW_NEON 631 ANY21(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, 0, 4, 4, 4, 7) 632 #endif 633 #ifdef HAS_ARGBADDROW_NEON 634 ANY21(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, 0, 4, 4, 4, 7) 635 #endif 636 #ifdef HAS_ARGBSUBTRACTROW_NEON 637 ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7) 638 #endif 639 #ifdef HAS_ARGBMULTIPLYROW_MSA 640 ANY21(ARGBMultiplyRow_Any_MSA, ARGBMultiplyRow_MSA, 0, 4, 4, 4, 3) 641 #endif 642 #ifdef HAS_ARGBMULTIPLYROW_LSX 643 ANY21(ARGBMultiplyRow_Any_LSX, ARGBMultiplyRow_LSX, 0, 4, 4, 4, 3) 644 #endif 645 #ifdef HAS_ARGBMULTIPLYROW_LASX 646 ANY21(ARGBMultiplyRow_Any_LASX, ARGBMultiplyRow_LASX, 0, 4, 4, 4, 7) 647 #endif 648 #ifdef HAS_ARGBADDROW_MSA 649 ANY21(ARGBAddRow_Any_MSA, ARGBAddRow_MSA, 0, 4, 4, 4, 7) 650 #endif 651 #ifdef HAS_ARGBADDROW_LSX 652 ANY21(ARGBAddRow_Any_LSX, ARGBAddRow_LSX, 0, 4, 4, 4, 3) 653 #endif 654 #ifdef HAS_ARGBADDROW_LASX 655 ANY21(ARGBAddRow_Any_LASX, ARGBAddRow_LASX, 0, 4, 4, 4, 7) 656 #endif 657 #ifdef HAS_ARGBSUBTRACTROW_MSA 658 ANY21(ARGBSubtractRow_Any_MSA, ARGBSubtractRow_MSA, 0, 4, 4, 4, 7) 659 #endif 660 #ifdef HAS_ARGBSUBTRACTROW_LSX 661 ANY21(ARGBSubtractRow_Any_LSX, ARGBSubtractRow_LSX, 0, 4, 4, 4, 3) 662 #endif 663 #ifdef HAS_ARGBSUBTRACTROW_LASX 664 ANY21(ARGBSubtractRow_Any_LASX, ARGBSubtractRow_LASX, 0, 4, 4, 4, 7) 665 #endif 666 #ifdef HAS_SOBELROW_SSE2 667 ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15) 668 #endif 669 #ifdef HAS_SOBELROW_NEON 670 ANY21(SobelRow_Any_NEON, SobelRow_NEON, 0, 1, 1, 4, 7) 671 #endif 672 #ifdef HAS_SOBELROW_MSA 673 ANY21(SobelRow_Any_MSA, SobelRow_MSA, 0, 1, 1, 4, 15) 674 #endif 675 #ifdef HAS_SOBELROW_LSX 676 ANY21(SobelRow_Any_LSX, SobelRow_LSX, 0, 1, 1, 4, 15) 677 #endif 678 #ifdef HAS_SOBELTOPLANEROW_SSE2 679 ANY21(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, 0, 1, 1, 1, 15) 680 #endif 681 #ifdef HAS_SOBELTOPLANEROW_NEON 682 ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15) 683 #endif 684 #ifdef HAS_SOBELTOPLANEROW_MSA 685 ANY21(SobelToPlaneRow_Any_MSA, SobelToPlaneRow_MSA, 0, 1, 1, 1, 31) 686 #endif 687 #ifdef HAS_SOBELTOPLANEROW_LSX 688 ANY21(SobelToPlaneRow_Any_LSX, SobelToPlaneRow_LSX, 0, 1, 1, 1, 31) 689 #endif 690 #ifdef HAS_SOBELXYROW_SSE2 691 ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15) 692 #endif 693 #ifdef HAS_SOBELXYROW_NEON 694 ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7) 695 #endif 696 #ifdef HAS_SOBELXYROW_MSA 697 ANY21(SobelXYRow_Any_MSA, SobelXYRow_MSA, 0, 1, 1, 4, 15) 698 #endif 699 #ifdef HAS_SOBELXYROW_LSX 700 ANY21(SobelXYRow_Any_LSX, SobelXYRow_LSX, 0, 1, 1, 4, 15) 701 #endif 702 #undef ANY21 703 704 // Any 2 planes to 1 with stride 705 // width is measured in source pixels. 4 bytes contains 2 pixels 706 #define ANY21S(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \ 707 void NAMEANY(const uint8_t* src_yuy2, int stride_yuy2, uint8_t* dst_uv, \ 708 int width) { \ 709 SIMD_ALIGNED(uint8_t vin[32 * 2]); \ 710 SIMD_ALIGNED(uint8_t vout[32]); \ 711 memset(vin, 0, sizeof(vin)); /* for msan */ \ 712 int awidth = (width + 1) / 2; \ 713 int r = awidth & MASK; \ 714 int n = awidth & ~MASK; \ 715 if (n > 0) { \ 716 ANY_SIMD(src_yuy2, stride_yuy2, dst_uv, n * 2); \ 717 } \ 718 memcpy(vin, src_yuy2 + n * SBPP, r * SBPP); \ 719 memcpy(vin + 32, src_yuy2 + stride_yuy2 + n * SBPP, r * SBPP); \ 720 ANY_SIMD(vin, 32, vout, MASK + 1); \ 721 memcpy(dst_uv + n * BPP, vout, r * BPP); \ 722 } 723 724 #ifdef HAS_YUY2TONVUVROW_NEON 725 ANY21S(YUY2ToNVUVRow_Any_NEON, YUY2ToNVUVRow_NEON, 4, 2, 7) 726 #endif 727 #ifdef HAS_YUY2TONVUVROW_SSE2 728 ANY21S(YUY2ToNVUVRow_Any_SSE2, YUY2ToNVUVRow_SSE2, 4, 2, 7) 729 #endif 730 #ifdef HAS_YUY2TONVUVROW_AVX2 731 ANY21S(YUY2ToNVUVRow_Any_AVX2, YUY2ToNVUVRow_AVX2, 4, 2, 15) 732 #endif 733 734 // Any 2 planes to 1 with yuvconstants 735 #define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ 736 void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \ 737 const struct YuvConstants* yuvconstants, int width) { \ 738 SIMD_ALIGNED(uint8_t vin[128 * 2]); \ 739 SIMD_ALIGNED(uint8_t vout[128]); \ 740 memset(vin, 0, sizeof(vin)); /* for msan */ \ 741 int r = width & MASK; \ 742 int n = width & ~MASK; \ 743 if (n > 0) { \ 744 ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \ 745 } \ 746 memcpy(vin, y_buf + n * SBPP, r * SBPP); \ 747 memcpy(vin + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \ 748 SS(r, UVSHIFT) * SBPP2); \ 749 ANY_SIMD(vin, vin + 128, vout, yuvconstants, MASK + 1); \ 750 memcpy(dst_ptr + n * BPP, vout, r * BPP); \ 751 } 752 753 // Biplanar to RGB. 754 #ifdef HAS_NV12TOARGBROW_SSSE3 755 ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7) 756 #endif 757 #ifdef HAS_NV12TOARGBROW_AVX2 758 ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15) 759 #endif 760 #ifdef HAS_NV12TOARGBROW_NEON 761 ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7) 762 #endif 763 #ifdef HAS_NV12TOARGBROW_MSA 764 ANY21C(NV12ToARGBRow_Any_MSA, NV12ToARGBRow_MSA, 1, 1, 2, 4, 7) 765 #endif 766 #ifdef HAS_NV12TOARGBROW_LSX 767 ANY21C(NV12ToARGBRow_Any_LSX, NV12ToARGBRow_LSX, 1, 1, 2, 4, 7) 768 #endif 769 #ifdef HAS_NV12TOARGBROW_LASX 770 ANY21C(NV12ToARGBRow_Any_LASX, NV12ToARGBRow_LASX, 1, 1, 2, 4, 15) 771 #endif 772 #ifdef HAS_NV21TOARGBROW_SSSE3 773 ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7) 774 #endif 775 #ifdef HAS_NV21TOARGBROW_AVX2 776 ANY21C(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15) 777 #endif 778 #ifdef HAS_NV21TOARGBROW_NEON 779 ANY21C(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7) 780 #endif 781 #ifdef HAS_NV21TOARGBROW_MSA 782 ANY21C(NV21ToARGBRow_Any_MSA, NV21ToARGBRow_MSA, 1, 1, 2, 4, 7) 783 #endif 784 #ifdef HAS_NV21TOARGBROW_LSX 785 ANY21C(NV21ToARGBRow_Any_LSX, NV21ToARGBRow_LSX, 1, 1, 2, 4, 7) 786 #endif 787 #ifdef HAS_NV21TOARGBROW_LASX 788 ANY21C(NV21ToARGBRow_Any_LASX, NV21ToARGBRow_LASX, 1, 1, 2, 4, 15) 789 #endif 790 #ifdef HAS_NV12TORGB24ROW_NEON 791 ANY21C(NV12ToRGB24Row_Any_NEON, NV12ToRGB24Row_NEON, 1, 1, 2, 3, 7) 792 #endif 793 #ifdef HAS_NV21TORGB24ROW_NEON 794 ANY21C(NV21ToRGB24Row_Any_NEON, NV21ToRGB24Row_NEON, 1, 1, 2, 3, 7) 795 #endif 796 #ifdef HAS_NV12TORGB24ROW_SSSE3 797 ANY21C(NV12ToRGB24Row_Any_SSSE3, NV12ToRGB24Row_SSSE3, 1, 1, 2, 3, 15) 798 #endif 799 #ifdef HAS_NV21TORGB24ROW_SSSE3 800 ANY21C(NV21ToRGB24Row_Any_SSSE3, NV21ToRGB24Row_SSSE3, 1, 1, 2, 3, 15) 801 #endif 802 #ifdef HAS_NV12TORGB24ROW_AVX2 803 ANY21C(NV12ToRGB24Row_Any_AVX2, NV12ToRGB24Row_AVX2, 1, 1, 2, 3, 31) 804 #endif 805 #ifdef HAS_NV21TORGB24ROW_AVX2 806 ANY21C(NV21ToRGB24Row_Any_AVX2, NV21ToRGB24Row_AVX2, 1, 1, 2, 3, 31) 807 #endif 808 #ifdef HAS_NV12TORGB565ROW_SSSE3 809 ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7) 810 #endif 811 #ifdef HAS_NV12TORGB565ROW_AVX2 812 ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15) 813 #endif 814 #ifdef HAS_NV12TORGB565ROW_NEON 815 ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7) 816 #endif 817 #ifdef HAS_NV12TORGB565ROW_MSA 818 ANY21C(NV12ToRGB565Row_Any_MSA, NV12ToRGB565Row_MSA, 1, 1, 2, 2, 7) 819 #endif 820 #ifdef HAS_NV12TORGB565ROW_LSX 821 ANY21C(NV12ToRGB565Row_Any_LSX, NV12ToRGB565Row_LSX, 1, 1, 2, 2, 7) 822 #endif 823 #ifdef HAS_NV12TORGB565ROW_LASX 824 ANY21C(NV12ToRGB565Row_Any_LASX, NV12ToRGB565Row_LASX, 1, 1, 2, 2, 15) 825 #endif 826 #undef ANY21C 827 828 // Any 2 planes of 16 bit to 1 with yuvconstants 829 #define ANY21CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \ 830 void NAMEANY(const T* y_buf, const T* uv_buf, uint8_t* dst_ptr, \ 831 const struct YuvConstants* yuvconstants, int width) { \ 832 SIMD_ALIGNED(T vin[16 * 2]); \ 833 SIMD_ALIGNED(uint8_t vout[64]); \ 834 memset(vin, 0, sizeof(vin)); /* for msan */ \ 835 int r = width & MASK; \ 836 int n = width & ~MASK; \ 837 if (n > 0) { \ 838 ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \ 839 } \ 840 memcpy(vin, y_buf + n, r * SBPP); \ 841 memcpy(vin + 16, uv_buf + 2 * (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP * 2); \ 842 ANY_SIMD(vin, vin + 16, vout, yuvconstants, MASK + 1); \ 843 memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, vout, SS(r, DUVSHIFT) * BPP); \ 844 } 845 846 #ifdef HAS_P210TOAR30ROW_SSSE3 847 ANY21CT(P210ToAR30Row_Any_SSSE3, P210ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7) 848 #endif 849 #ifdef HAS_P210TOARGBROW_SSSE3 850 ANY21CT(P210ToARGBRow_Any_SSSE3, P210ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7) 851 #endif 852 #ifdef HAS_P210TOARGBROW_AVX2 853 ANY21CT(P210ToARGBRow_Any_AVX2, P210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15) 854 #endif 855 #ifdef HAS_P210TOAR30ROW_AVX2 856 ANY21CT(P210ToAR30Row_Any_AVX2, P210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15) 857 #endif 858 #ifdef HAS_P410TOAR30ROW_SSSE3 859 ANY21CT(P410ToAR30Row_Any_SSSE3, P410ToAR30Row_SSSE3, 0, 0, uint16_t, 2, 4, 7) 860 #endif 861 #ifdef HAS_P410TOARGBROW_SSSE3 862 ANY21CT(P410ToARGBRow_Any_SSSE3, P410ToARGBRow_SSSE3, 0, 0, uint16_t, 2, 4, 7) 863 #endif 864 #ifdef HAS_P410TOARGBROW_AVX2 865 ANY21CT(P410ToARGBRow_Any_AVX2, P410ToARGBRow_AVX2, 0, 0, uint16_t, 2, 4, 15) 866 #endif 867 #ifdef HAS_P410TOAR30ROW_AVX2 868 ANY21CT(P410ToAR30Row_Any_AVX2, P410ToAR30Row_AVX2, 0, 0, uint16_t, 2, 4, 15) 869 #endif 870 871 #undef ANY21CT 872 873 // Any 2 16 bit planes with parameter to 1 874 #define ANY21PT(NAMEANY, ANY_SIMD, T, BPP, MASK) \ 875 void NAMEANY(const T* src_u, const T* src_v, T* dst_uv, int depth, \ 876 int width) { \ 877 SIMD_ALIGNED(T vin[16 * 2]); \ 878 SIMD_ALIGNED(T vout[16]); \ 879 memset(vin, 0, sizeof(vin)); /* for msan */ \ 880 int r = width & MASK; \ 881 int n = width & ~MASK; \ 882 if (n > 0) { \ 883 ANY_SIMD(src_u, src_v, dst_uv, depth, n); \ 884 } \ 885 memcpy(vin, src_u + n, r * BPP); \ 886 memcpy(vin + 16, src_v + n, r * BPP); \ 887 ANY_SIMD(vin, vin + 16, vout, depth, MASK + 1); \ 888 memcpy(dst_uv + n * 2, vout, r * BPP * 2); \ 889 } 890 891 #ifdef HAS_MERGEUVROW_16_AVX2 892 ANY21PT(MergeUVRow_16_Any_AVX2, MergeUVRow_16_AVX2, uint16_t, 2, 7) 893 #endif 894 #ifdef HAS_MERGEUVROW_16_NEON 895 ANY21PT(MergeUVRow_16_Any_NEON, MergeUVRow_16_NEON, uint16_t, 2, 7) 896 #endif 897 898 #undef ANY21CT 899 900 // Any 1 to 1. 901 #define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ 902 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ 903 SIMD_ALIGNED(uint8_t vin[128]); \ 904 SIMD_ALIGNED(uint8_t vout[128]); \ 905 memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \ 906 int r = width & MASK; \ 907 int n = width & ~MASK; \ 908 if (n > 0) { \ 909 ANY_SIMD(src_ptr, dst_ptr, n); \ 910 } \ 911 memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ 912 ANY_SIMD(vin, vout, MASK + 1); \ 913 memcpy(dst_ptr + n * BPP, vout, r * BPP); \ 914 } 915 916 #ifdef HAS_COPYROW_AVX 917 ANY11(CopyRow_Any_AVX, CopyRow_AVX, 0, 1, 1, 63) 918 #endif 919 #ifdef HAS_COPYROW_SSE2 920 ANY11(CopyRow_Any_SSE2, CopyRow_SSE2, 0, 1, 1, 31) 921 #endif 922 #ifdef HAS_COPYROW_NEON 923 ANY11(CopyRow_Any_NEON, CopyRow_NEON, 0, 1, 1, 31) 924 #endif 925 #if defined(HAS_ARGBTORGB24ROW_SSSE3) 926 ANY11(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 0, 4, 3, 15) 927 ANY11(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 0, 4, 3, 15) 928 ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3) 929 ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3) 930 ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3) 931 #endif 932 #if defined(HAS_ARGBTORGB24ROW_AVX2) 933 ANY11(ARGBToRGB24Row_Any_AVX2, ARGBToRGB24Row_AVX2, 0, 4, 3, 31) 934 #endif 935 #if defined(HAS_ARGBTORGB24ROW_AVX512VBMI) 936 ANY11(ARGBToRGB24Row_Any_AVX512VBMI, ARGBToRGB24Row_AVX512VBMI, 0, 4, 3, 31) 937 #endif 938 #if defined(HAS_ARGBTORAWROW_AVX2) 939 ANY11(ARGBToRAWRow_Any_AVX2, ARGBToRAWRow_AVX2, 0, 4, 3, 31) 940 #endif 941 #if defined(HAS_ARGBTORGB565ROW_AVX2) 942 ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7) 943 #endif 944 #if defined(HAS_ARGBTOARGB4444ROW_AVX2) 945 ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7) 946 ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7) 947 #endif 948 #if defined(HAS_ABGRTOAR30ROW_SSSE3) 949 ANY11(ABGRToAR30Row_Any_SSSE3, ABGRToAR30Row_SSSE3, 0, 4, 4, 3) 950 #endif 951 #if defined(HAS_ARGBTOAR30ROW_SSSE3) 952 ANY11(ARGBToAR30Row_Any_SSSE3, ARGBToAR30Row_SSSE3, 0, 4, 4, 3) 953 #endif 954 #if defined(HAS_ABGRTOAR30ROW_AVX2) 955 ANY11(ABGRToAR30Row_Any_AVX2, ABGRToAR30Row_AVX2, 0, 4, 4, 7) 956 #endif 957 #if defined(HAS_ARGBTOAR30ROW_AVX2) 958 ANY11(ARGBToAR30Row_Any_AVX2, ARGBToAR30Row_AVX2, 0, 4, 4, 7) 959 #endif 960 #if defined(HAS_J400TOARGBROW_SSE2) 961 ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7) 962 #endif 963 #if defined(HAS_J400TOARGBROW_AVX2) 964 ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15) 965 #endif 966 #if defined(HAS_RGB24TOARGBROW_SSSE3) 967 ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15) 968 ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15) 969 ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7) 970 ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7) 971 ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7) 972 #endif 973 #if defined(HAS_RAWTORGBAROW_SSSE3) 974 ANY11(RAWToRGBARow_Any_SSSE3, RAWToRGBARow_SSSE3, 0, 3, 4, 15) 975 #endif 976 #if defined(HAS_RAWTORGB24ROW_SSSE3) 977 ANY11(RAWToRGB24Row_Any_SSSE3, RAWToRGB24Row_SSSE3, 0, 3, 3, 7) 978 #endif 979 #if defined(HAS_RGB565TOARGBROW_AVX2) 980 ANY11(RGB565ToARGBRow_Any_AVX2, RGB565ToARGBRow_AVX2, 0, 2, 4, 15) 981 #endif 982 #if defined(HAS_ARGB1555TOARGBROW_AVX2) 983 ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15) 984 #endif 985 #if defined(HAS_ARGB4444TOARGBROW_AVX2) 986 ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15) 987 #endif 988 #if defined(HAS_ARGBTORGB24ROW_NEON) 989 ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 15) 990 ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7) 991 ANY11(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, 0, 4, 2, 7) 992 ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7) 993 ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7) 994 ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7) 995 #endif 996 #if defined(HAS_ARGBTORGB24ROW_MSA) 997 ANY11(ARGBToRGB24Row_Any_MSA, ARGBToRGB24Row_MSA, 0, 4, 3, 15) 998 ANY11(ARGBToRAWRow_Any_MSA, ARGBToRAWRow_MSA, 0, 4, 3, 15) 999 ANY11(ARGBToRGB565Row_Any_MSA, ARGBToRGB565Row_MSA, 0, 4, 2, 7) 1000 ANY11(ARGBToARGB1555Row_Any_MSA, ARGBToARGB1555Row_MSA, 0, 4, 2, 7) 1001 ANY11(ARGBToARGB4444Row_Any_MSA, ARGBToARGB4444Row_MSA, 0, 4, 2, 7) 1002 ANY11(J400ToARGBRow_Any_MSA, J400ToARGBRow_MSA, 0, 1, 4, 15) 1003 #endif 1004 #if defined(HAS_ARGBTORGB24ROW_LSX) 1005 ANY11(ARGBToRGB24Row_Any_LSX, ARGBToRGB24Row_LSX, 0, 4, 3, 15) 1006 ANY11(ARGBToRAWRow_Any_LSX, ARGBToRAWRow_LSX, 0, 4, 3, 15) 1007 ANY11(ARGBToRGB565Row_Any_LSX, ARGBToRGB565Row_LSX, 0, 4, 2, 7) 1008 ANY11(ARGBToARGB1555Row_Any_LSX, ARGBToARGB1555Row_LSX, 0, 4, 2, 7) 1009 ANY11(ARGBToARGB4444Row_Any_LSX, ARGBToARGB4444Row_LSX, 0, 4, 2, 7) 1010 #endif 1011 #if defined(HAS_ARGBTORGB24ROW_LASX) 1012 ANY11(ARGBToRGB24Row_Any_LASX, ARGBToRGB24Row_LASX, 0, 4, 3, 31) 1013 ANY11(ARGBToRAWRow_Any_LASX, ARGBToRAWRow_LASX, 0, 4, 3, 31) 1014 ANY11(ARGBToRGB565Row_Any_LASX, ARGBToRGB565Row_LASX, 0, 4, 2, 15) 1015 ANY11(ARGBToARGB1555Row_Any_LASX, ARGBToARGB1555Row_LASX, 0, 4, 2, 15) 1016 ANY11(ARGBToARGB4444Row_Any_LASX, ARGBToARGB4444Row_LASX, 0, 4, 2, 15) 1017 #endif 1018 #if defined(HAS_J400TOARGBROW_LSX) 1019 ANY11(J400ToARGBRow_Any_LSX, J400ToARGBRow_LSX, 0, 1, 4, 15) 1020 #endif 1021 #if defined(HAS_RAWTORGB24ROW_NEON) 1022 ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7) 1023 #endif 1024 #if defined(HAS_RAWTORGB24ROW_MSA) 1025 ANY11(RAWToRGB24Row_Any_MSA, RAWToRGB24Row_MSA, 0, 3, 3, 15) 1026 #endif 1027 #if defined(HAS_RAWTORGB24ROW_LSX) 1028 ANY11(RAWToRGB24Row_Any_LSX, RAWToRGB24Row_LSX, 0, 3, 3, 15) 1029 #endif 1030 #ifdef HAS_ARGBTOYROW_AVX2 1031 ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31) 1032 #endif 1033 #ifdef HAS_ABGRTOYROW_AVX2 1034 ANY11(ABGRToYRow_Any_AVX2, ABGRToYRow_AVX2, 0, 4, 1, 31) 1035 #endif 1036 #ifdef HAS_ARGBTOYJROW_AVX2 1037 ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31) 1038 #endif 1039 #ifdef HAS_ABGRTOYJROW_AVX2 1040 ANY11(ABGRToYJRow_Any_AVX2, ABGRToYJRow_AVX2, 0, 4, 1, 31) 1041 #endif 1042 #ifdef HAS_RGBATOYJROW_AVX2 1043 ANY11(RGBAToYJRow_Any_AVX2, RGBAToYJRow_AVX2, 0, 4, 1, 31) 1044 #endif 1045 #ifdef HAS_UYVYTOYROW_AVX2 1046 ANY11(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 0, 2, 1, 31) 1047 #endif 1048 #ifdef HAS_YUY2TOYROW_AVX2 1049 ANY11(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 1, 4, 1, 31) 1050 #endif 1051 #ifdef HAS_ARGBTOYROW_SSSE3 1052 ANY11(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, 0, 4, 1, 15) 1053 #endif 1054 #ifdef HAS_BGRATOYROW_SSSE3 1055 ANY11(BGRAToYRow_Any_SSSE3, BGRAToYRow_SSSE3, 0, 4, 1, 15) 1056 ANY11(ABGRToYRow_Any_SSSE3, ABGRToYRow_SSSE3, 0, 4, 1, 15) 1057 ANY11(RGBAToYRow_Any_SSSE3, RGBAToYRow_SSSE3, 0, 4, 1, 15) 1058 #endif 1059 #ifdef HAS_YUY2TOYROW_SSE2 1060 ANY11(YUY2ToYRow_Any_SSE2, YUY2ToYRow_SSE2, 1, 4, 1, 15) 1061 ANY11(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, 1, 4, 1, 15) 1062 #endif 1063 #ifdef HAS_ARGBTOYJROW_SSSE3 1064 ANY11(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, 0, 4, 1, 15) 1065 #endif 1066 #ifdef HAS_ABGRTOYJROW_SSSE3 1067 ANY11(ABGRToYJRow_Any_SSSE3, ABGRToYJRow_SSSE3, 0, 4, 1, 15) 1068 #endif 1069 #ifdef HAS_RGBATOYJROW_SSSE3 1070 ANY11(RGBAToYJRow_Any_SSSE3, RGBAToYJRow_SSSE3, 0, 4, 1, 15) 1071 #endif 1072 #ifdef HAS_ARGBTOYROW_NEON 1073 ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 15) 1074 #endif 1075 #ifdef HAS_ARGBTOYROW_MSA 1076 ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15) 1077 #endif 1078 #ifdef HAS_ARGBTOYROW_LSX 1079 ANY11(ARGBToYRow_Any_LSX, ARGBToYRow_LSX, 0, 4, 1, 15) 1080 #endif 1081 #ifdef HAS_ARGBTOYROW_LASX 1082 ANY11(ARGBToYRow_Any_LASX, ARGBToYRow_LASX, 0, 4, 1, 31) 1083 #endif 1084 #ifdef HAS_ARGBTOYJROW_NEON 1085 ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 15) 1086 #endif 1087 #ifdef HAS_ABGRTOYJROW_NEON 1088 ANY11(ABGRToYJRow_Any_NEON, ABGRToYJRow_NEON, 0, 4, 1, 15) 1089 #endif 1090 #ifdef HAS_RGBATOYJROW_NEON 1091 ANY11(RGBAToYJRow_Any_NEON, RGBAToYJRow_NEON, 0, 4, 1, 15) 1092 #endif 1093 #ifdef HAS_ARGBTOYJROW_MSA 1094 ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15) 1095 #endif 1096 #ifdef HAS_ARGBTOYJROW_LSX 1097 ANY11(ARGBToYJRow_Any_LSX, ARGBToYJRow_LSX, 0, 4, 1, 15) 1098 #endif 1099 #ifdef HAS_RGBATOYJROW_LSX 1100 ANY11(RGBAToYJRow_Any_LSX, RGBAToYJRow_LSX, 0, 4, 1, 15) 1101 #endif 1102 #ifdef HAS_ABGRTOYJROW_LSX 1103 ANY11(ABGRToYJRow_Any_LSX, ABGRToYJRow_LSX, 0, 4, 1, 15) 1104 #endif 1105 #ifdef HAS_RGBATOYJROW_LASX 1106 ANY11(RGBAToYJRow_Any_LASX, RGBAToYJRow_LASX, 0, 4, 1, 31) 1107 #endif 1108 #ifdef HAS_ARGBTOYJROW_LASX 1109 ANY11(ARGBToYJRow_Any_LASX, ARGBToYJRow_LASX, 0, 4, 1, 31) 1110 #endif 1111 #ifdef HAS_ABGRTOYJROW_LASX 1112 ANY11(ABGRToYJRow_Any_LASX, ABGRToYJRow_LASX, 0, 4, 1, 31) 1113 #endif 1114 #ifdef HAS_BGRATOYROW_NEON 1115 ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 15) 1116 #endif 1117 #ifdef HAS_BGRATOYROW_MSA 1118 ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15) 1119 #endif 1120 #ifdef HAS_BGRATOYROW_LSX 1121 ANY11(BGRAToYRow_Any_LSX, BGRAToYRow_LSX, 0, 4, 1, 15) 1122 #endif 1123 #ifdef HAS_BGRATOYROW_LASX 1124 ANY11(BGRAToYRow_Any_LASX, BGRAToYRow_LASX, 0, 4, 1, 31) 1125 #endif 1126 #ifdef HAS_ABGRTOYROW_NEON 1127 ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 15) 1128 #endif 1129 #ifdef HAS_ABGRTOYROW_MSA 1130 ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7) 1131 #endif 1132 #ifdef HAS_ABGRTOYROW_LSX 1133 ANY11(ABGRToYRow_Any_LSX, ABGRToYRow_LSX, 0, 4, 1, 15) 1134 #endif 1135 #ifdef HAS_ABGRTOYROW_LASX 1136 ANY11(ABGRToYRow_Any_LASX, ABGRToYRow_LASX, 0, 4, 1, 31) 1137 #endif 1138 #ifdef HAS_RGBATOYROW_NEON 1139 ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 15) 1140 #endif 1141 #ifdef HAS_RGBATOYROW_MSA 1142 ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15) 1143 #endif 1144 #ifdef HAS_RGBATOYROW_LSX 1145 ANY11(RGBAToYRow_Any_LSX, RGBAToYRow_LSX, 0, 4, 1, 15) 1146 #endif 1147 #ifdef HAS_RGBATOYROW_LASX 1148 ANY11(RGBAToYRow_Any_LASX, RGBAToYRow_LASX, 0, 4, 1, 31) 1149 #endif 1150 #ifdef HAS_RGB24TOYROW_NEON 1151 ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 15) 1152 #endif 1153 #ifdef HAS_RGB24TOYJROW_AVX2 1154 ANY11(RGB24ToYJRow_Any_AVX2, RGB24ToYJRow_AVX2, 0, 3, 1, 31) 1155 #endif 1156 #ifdef HAS_RGB24TOYJROW_SSSE3 1157 ANY11(RGB24ToYJRow_Any_SSSE3, RGB24ToYJRow_SSSE3, 0, 3, 1, 15) 1158 #endif 1159 #ifdef HAS_RGB24TOYJROW_NEON 1160 ANY11(RGB24ToYJRow_Any_NEON, RGB24ToYJRow_NEON, 0, 3, 1, 15) 1161 #endif 1162 #ifdef HAS_RGB24TOYROW_MSA 1163 ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15) 1164 #endif 1165 #ifdef HAS_RGB24TOYROW_LSX 1166 ANY11(RGB24ToYRow_Any_LSX, RGB24ToYRow_LSX, 0, 3, 1, 15) 1167 #endif 1168 #ifdef HAS_RGB24TOYJROW_LSX 1169 ANY11(RGB24ToYJRow_Any_LSX, RGB24ToYJRow_LSX, 0, 3, 1, 15) 1170 #endif 1171 #ifdef HAS_RGB24TOYJROW_LASX 1172 ANY11(RGB24ToYJRow_Any_LASX, RGB24ToYJRow_LASX, 0, 3, 1, 31) 1173 #endif 1174 #ifdef HAS_RGB24TOYROW_LASX 1175 ANY11(RGB24ToYRow_Any_LASX, RGB24ToYRow_LASX, 0, 3, 1, 31) 1176 #endif 1177 #ifdef HAS_RAWTOYROW_NEON 1178 ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 15) 1179 #endif 1180 #ifdef HAS_RAWTOYJROW_AVX2 1181 ANY11(RAWToYJRow_Any_AVX2, RAWToYJRow_AVX2, 0, 3, 1, 31) 1182 #endif 1183 #ifdef HAS_RAWTOYJROW_SSSE3 1184 ANY11(RAWToYJRow_Any_SSSE3, RAWToYJRow_SSSE3, 0, 3, 1, 15) 1185 #endif 1186 #ifdef HAS_RAWTOYJROW_NEON 1187 ANY11(RAWToYJRow_Any_NEON, RAWToYJRow_NEON, 0, 3, 1, 15) 1188 #endif 1189 #ifdef HAS_RAWTOYROW_MSA 1190 ANY11(RAWToYRow_Any_MSA, RAWToYRow_MSA, 0, 3, 1, 15) 1191 #endif 1192 #ifdef HAS_RAWTOYROW_LSX 1193 ANY11(RAWToYRow_Any_LSX, RAWToYRow_LSX, 0, 3, 1, 15) 1194 #endif 1195 #ifdef HAS_RAWTOYROW_LASX 1196 ANY11(RAWToYRow_Any_LASX, RAWToYRow_LASX, 0, 3, 1, 31) 1197 #endif 1198 #ifdef HAS_RAWTOYJROW_LSX 1199 ANY11(RAWToYJRow_Any_LSX, RAWToYJRow_LSX, 0, 3, 1, 15) 1200 #endif 1201 #ifdef HAS_RAWTOYJROW_LASX 1202 ANY11(RAWToYJRow_Any_LASX, RAWToYJRow_LASX, 0, 3, 1, 31) 1203 #endif 1204 #ifdef HAS_RGB565TOYROW_NEON 1205 ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7) 1206 #endif 1207 #ifdef HAS_RGB565TOYROW_MSA 1208 ANY11(RGB565ToYRow_Any_MSA, RGB565ToYRow_MSA, 0, 2, 1, 15) 1209 #endif 1210 #ifdef HAS_RGB565TOYROW_LSX 1211 ANY11(RGB565ToYRow_Any_LSX, RGB565ToYRow_LSX, 0, 2, 1, 15) 1212 #endif 1213 #ifdef HAS_RGB565TOYROW_LASX 1214 ANY11(RGB565ToYRow_Any_LASX, RGB565ToYRow_LASX, 0, 2, 1, 31) 1215 #endif 1216 #ifdef HAS_ARGB1555TOYROW_NEON 1217 ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7) 1218 #endif 1219 #ifdef HAS_ARGB1555TOYROW_MSA 1220 ANY11(ARGB1555ToYRow_Any_MSA, ARGB1555ToYRow_MSA, 0, 2, 1, 15) 1221 #endif 1222 #ifdef HAS_ARGB1555TOYROW_LSX 1223 ANY11(ARGB1555ToYRow_Any_LSX, ARGB1555ToYRow_LSX, 0, 2, 1, 15) 1224 #endif 1225 #ifdef HAS_ARGB1555TOYROW_LASX 1226 ANY11(ARGB1555ToYRow_Any_LASX, ARGB1555ToYRow_LASX, 0, 2, 1, 31) 1227 #endif 1228 #ifdef HAS_ARGB4444TOYROW_NEON 1229 ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7) 1230 #endif 1231 #ifdef HAS_YUY2TOYROW_NEON 1232 ANY11(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 1, 4, 1, 15) 1233 #endif 1234 #ifdef HAS_UYVYTOYROW_NEON 1235 ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 1, 4, 1, 15) 1236 #endif 1237 #ifdef HAS_YUY2TOYROW_MSA 1238 ANY11(YUY2ToYRow_Any_MSA, YUY2ToYRow_MSA, 1, 4, 1, 31) 1239 #endif 1240 #ifdef HAS_YUY2TOYROW_LSX 1241 ANY11(YUY2ToYRow_Any_LSX, YUY2ToYRow_LSX, 1, 4, 1, 15) 1242 #endif 1243 #ifdef HAS_YUY2TOYROW_LASX 1244 ANY11(YUY2ToYRow_Any_LASX, YUY2ToYRow_LASX, 1, 4, 1, 31) 1245 #endif 1246 #ifdef HAS_UYVYTOYROW_MSA 1247 ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31) 1248 #endif 1249 #ifdef HAS_UYVYTOYROW_LSX 1250 ANY11(UYVYToYRow_Any_LSX, UYVYToYRow_LSX, 1, 4, 1, 15) 1251 #endif 1252 #ifdef HAS_UYVYTOYROW_LASX 1253 ANY11(UYVYToYRow_Any_LASX, UYVYToYRow_LASX, 1, 4, 1, 31) 1254 #endif 1255 #ifdef HAS_AYUVTOYROW_NEON 1256 ANY11(AYUVToYRow_Any_NEON, AYUVToYRow_NEON, 0, 4, 1, 15) 1257 #endif 1258 #ifdef HAS_SWAPUVROW_SSSE3 1259 ANY11(SwapUVRow_Any_SSSE3, SwapUVRow_SSSE3, 0, 2, 2, 15) 1260 #endif 1261 #ifdef HAS_SWAPUVROW_AVX2 1262 ANY11(SwapUVRow_Any_AVX2, SwapUVRow_AVX2, 0, 2, 2, 31) 1263 #endif 1264 #ifdef HAS_SWAPUVROW_NEON 1265 ANY11(SwapUVRow_Any_NEON, SwapUVRow_NEON, 0, 2, 2, 15) 1266 #endif 1267 #ifdef HAS_RGB24TOARGBROW_NEON 1268 ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7) 1269 #endif 1270 #ifdef HAS_RGB24TOARGBROW_MSA 1271 ANY11(RGB24ToARGBRow_Any_MSA, RGB24ToARGBRow_MSA, 0, 3, 4, 15) 1272 #endif 1273 #ifdef HAS_RGB24TOARGBROW_LSX 1274 ANY11(RGB24ToARGBRow_Any_LSX, RGB24ToARGBRow_LSX, 0, 3, 4, 15) 1275 #endif 1276 #ifdef HAS_RGB24TOARGBROW_LASX 1277 ANY11(RGB24ToARGBRow_Any_LASX, RGB24ToARGBRow_LASX, 0, 3, 4, 31) 1278 #endif 1279 #ifdef HAS_RAWTOARGBROW_NEON 1280 ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7) 1281 #endif 1282 #ifdef HAS_RAWTORGBAROW_NEON 1283 ANY11(RAWToRGBARow_Any_NEON, RAWToRGBARow_NEON, 0, 3, 4, 7) 1284 #endif 1285 #ifdef HAS_RAWTOARGBROW_MSA 1286 ANY11(RAWToARGBRow_Any_MSA, RAWToARGBRow_MSA, 0, 3, 4, 15) 1287 #endif 1288 #ifdef HAS_RAWTOARGBROW_LSX 1289 ANY11(RAWToARGBRow_Any_LSX, RAWToARGBRow_LSX, 0, 3, 4, 15) 1290 #endif 1291 #ifdef HAS_RAWTOARGBROW_LASX 1292 ANY11(RAWToARGBRow_Any_LASX, RAWToARGBRow_LASX, 0, 3, 4, 31) 1293 #endif 1294 #ifdef HAS_RGB565TOARGBROW_NEON 1295 ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 7) 1296 #endif 1297 #ifdef HAS_RGB565TOARGBROW_MSA 1298 ANY11(RGB565ToARGBRow_Any_MSA, RGB565ToARGBRow_MSA, 0, 2, 4, 15) 1299 #endif 1300 #ifdef HAS_RGB565TOARGBROW_LSX 1301 ANY11(RGB565ToARGBRow_Any_LSX, RGB565ToARGBRow_LSX, 0, 2, 4, 15) 1302 #endif 1303 #ifdef HAS_RGB565TOARGBROW_LASX 1304 ANY11(RGB565ToARGBRow_Any_LASX, RGB565ToARGBRow_LASX, 0, 2, 4, 31) 1305 #endif 1306 #ifdef HAS_ARGB1555TOARGBROW_NEON 1307 ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7) 1308 #endif 1309 #ifdef HAS_ARGB1555TOARGBROW_MSA 1310 ANY11(ARGB1555ToARGBRow_Any_MSA, ARGB1555ToARGBRow_MSA, 0, 2, 4, 15) 1311 #endif 1312 #ifdef HAS_ARGB1555TOARGBROW_LSX 1313 ANY11(ARGB1555ToARGBRow_Any_LSX, ARGB1555ToARGBRow_LSX, 0, 2, 4, 15) 1314 #endif 1315 #ifdef HAS_ARGB1555TOARGBROW_LASX 1316 ANY11(ARGB1555ToARGBRow_Any_LASX, ARGB1555ToARGBRow_LASX, 0, 2, 4, 31) 1317 #endif 1318 #ifdef HAS_ARGB4444TOARGBROW_NEON 1319 ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7) 1320 #endif 1321 #ifdef HAS_ARGB4444TOARGBROW_MSA 1322 ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15) 1323 #endif 1324 #ifdef HAS_ARGB4444TOARGBROW_LSX 1325 ANY11(ARGB4444ToARGBRow_Any_LSX, ARGB4444ToARGBRow_LSX, 0, 2, 4, 15) 1326 #endif 1327 #ifdef HAS_ARGB4444TOARGBROW_LASX 1328 ANY11(ARGB4444ToARGBRow_Any_LASX, ARGB4444ToARGBRow_LASX, 0, 2, 4, 31) 1329 #endif 1330 #ifdef HAS_ARGBATTENUATEROW_SSSE3 1331 ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3) 1332 #endif 1333 #ifdef HAS_ARGBUNATTENUATEROW_SSE2 1334 ANY11(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, 0, 4, 4, 3) 1335 #endif 1336 #ifdef HAS_ARGBATTENUATEROW_AVX2 1337 ANY11(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, 0, 4, 4, 7) 1338 #endif 1339 #ifdef HAS_ARGBUNATTENUATEROW_AVX2 1340 ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7) 1341 #endif 1342 #ifdef HAS_ARGBATTENUATEROW_NEON 1343 ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7) 1344 #endif 1345 #ifdef HAS_ARGBATTENUATEROW_MSA 1346 ANY11(ARGBAttenuateRow_Any_MSA, ARGBAttenuateRow_MSA, 0, 4, 4, 7) 1347 #endif 1348 #ifdef HAS_ARGBATTENUATEROW_LSX 1349 ANY11(ARGBAttenuateRow_Any_LSX, ARGBAttenuateRow_LSX, 0, 4, 4, 7) 1350 #endif 1351 #ifdef HAS_ARGBATTENUATEROW_LASX 1352 ANY11(ARGBAttenuateRow_Any_LASX, ARGBAttenuateRow_LASX, 0, 4, 4, 15) 1353 #endif 1354 #ifdef HAS_ARGBEXTRACTALPHAROW_SSE2 1355 ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7) 1356 #endif 1357 #ifdef HAS_ARGBEXTRACTALPHAROW_AVX2 1358 ANY11(ARGBExtractAlphaRow_Any_AVX2, ARGBExtractAlphaRow_AVX2, 0, 4, 1, 31) 1359 #endif 1360 #ifdef HAS_ARGBEXTRACTALPHAROW_NEON 1361 ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15) 1362 #endif 1363 #ifdef HAS_ARGBEXTRACTALPHAROW_MSA 1364 ANY11(ARGBExtractAlphaRow_Any_MSA, ARGBExtractAlphaRow_MSA, 0, 4, 1, 15) 1365 #endif 1366 #ifdef HAS_ARGBEXTRACTALPHAROW_LSX 1367 ANY11(ARGBExtractAlphaRow_Any_LSX, ARGBExtractAlphaRow_LSX, 0, 4, 1, 15) 1368 #endif 1369 #undef ANY11 1370 1371 // Any 1 to 1 blended. Destination is read, modify, write. 1372 #define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ 1373 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ 1374 SIMD_ALIGNED(uint8_t vin[64]); \ 1375 SIMD_ALIGNED(uint8_t vout[64]); \ 1376 memset(vin, 0, sizeof(vin)); /* for msan */ \ 1377 memset(vout, 0, sizeof(vout)); /* for msan */ \ 1378 int r = width & MASK; \ 1379 int n = width & ~MASK; \ 1380 if (n > 0) { \ 1381 ANY_SIMD(src_ptr, dst_ptr, n); \ 1382 } \ 1383 memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ 1384 memcpy(vout, dst_ptr + n * BPP, r * BPP); \ 1385 ANY_SIMD(vin, vout, MASK + 1); \ 1386 memcpy(dst_ptr + n * BPP, vout, r * BPP); \ 1387 } 1388 1389 #ifdef HAS_ARGBCOPYALPHAROW_AVX2 1390 ANY11B(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 4, 4, 15) 1391 #endif 1392 #ifdef HAS_ARGBCOPYALPHAROW_SSE2 1393 ANY11B(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 4, 4, 7) 1394 #endif 1395 #ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2 1396 ANY11B(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15) 1397 #endif 1398 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 1399 ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7) 1400 #endif 1401 #undef ANY11B 1402 1403 // Any 1 to 1 with parameter. 1404 #define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \ 1405 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, T param, int width) { \ 1406 SIMD_ALIGNED(uint8_t vin[64]); \ 1407 SIMD_ALIGNED(uint8_t vout[64]); \ 1408 memset(vin, 0, sizeof(vin)); /* for msan */ \ 1409 int r = width & MASK; \ 1410 int n = width & ~MASK; \ 1411 if (n > 0) { \ 1412 ANY_SIMD(src_ptr, dst_ptr, param, n); \ 1413 } \ 1414 memcpy(vin, src_ptr + n * SBPP, r * SBPP); \ 1415 ANY_SIMD(vin, vout, param, MASK + 1); \ 1416 memcpy(dst_ptr + n * BPP, vout, r * BPP); \ 1417 } 1418 1419 #if defined(HAS_I400TOARGBROW_SSE2) 1420 ANY11P(I400ToARGBRow_Any_SSE2, 1421 I400ToARGBRow_SSE2, 1422 const struct YuvConstants*, 1423 1, 1424 4, 1425 7) 1426 #endif 1427 #if defined(HAS_I400TOARGBROW_AVX2) 1428 ANY11P(I400ToARGBRow_Any_AVX2, 1429 I400ToARGBRow_AVX2, 1430 const struct YuvConstants*, 1431 1, 1432 4, 1433 15) 1434 #endif 1435 #if defined(HAS_I400TOARGBROW_NEON) 1436 ANY11P(I400ToARGBRow_Any_NEON, 1437 I400ToARGBRow_NEON, 1438 const struct YuvConstants*, 1439 1, 1440 4, 1441 7) 1442 #endif 1443 #if defined(HAS_I400TOARGBROW_MSA) 1444 ANY11P(I400ToARGBRow_Any_MSA, 1445 I400ToARGBRow_MSA, 1446 const struct YuvConstants*, 1447 1, 1448 4, 1449 15) 1450 #endif 1451 #if defined(HAS_I400TOARGBROW_LSX) 1452 ANY11P(I400ToARGBRow_Any_LSX, 1453 I400ToARGBRow_LSX, 1454 const struct YuvConstants*, 1455 1, 1456 4, 1457 15) 1458 #endif 1459 1460 #if defined(HAS_ARGBTORGB565DITHERROW_SSE2) 1461 ANY11P(ARGBToRGB565DitherRow_Any_SSE2, 1462 ARGBToRGB565DitherRow_SSE2, 1463 const uint32_t, 1464 4, 1465 2, 1466 3) 1467 #endif 1468 #if defined(HAS_ARGBTORGB565DITHERROW_AVX2) 1469 ANY11P(ARGBToRGB565DitherRow_Any_AVX2, 1470 ARGBToRGB565DitherRow_AVX2, 1471 const uint32_t, 1472 4, 1473 2, 1474 7) 1475 #endif 1476 #if defined(HAS_ARGBTORGB565DITHERROW_NEON) 1477 ANY11P(ARGBToRGB565DitherRow_Any_NEON, 1478 ARGBToRGB565DitherRow_NEON, 1479 const uint32_t, 1480 4, 1481 2, 1482 7) 1483 #endif 1484 #if defined(HAS_ARGBTORGB565DITHERROW_MSA) 1485 ANY11P(ARGBToRGB565DitherRow_Any_MSA, 1486 ARGBToRGB565DitherRow_MSA, 1487 const uint32_t, 1488 4, 1489 2, 1490 7) 1491 #endif 1492 #if defined(HAS_ARGBTORGB565DITHERROW_LSX) 1493 ANY11P(ARGBToRGB565DitherRow_Any_LSX, 1494 ARGBToRGB565DitherRow_LSX, 1495 const uint32_t, 1496 4, 1497 2, 1498 7) 1499 #endif 1500 #if defined(HAS_ARGBTORGB565DITHERROW_LASX) 1501 ANY11P(ARGBToRGB565DitherRow_Any_LASX, 1502 ARGBToRGB565DitherRow_LASX, 1503 const uint32_t, 1504 4, 1505 2, 1506 15) 1507 #endif 1508 #ifdef HAS_ARGBSHUFFLEROW_SSSE3 1509 ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8_t*, 4, 4, 7) 1510 #endif 1511 #ifdef HAS_ARGBSHUFFLEROW_AVX2 1512 ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8_t*, 4, 4, 15) 1513 #endif 1514 #ifdef HAS_ARGBSHUFFLEROW_NEON 1515 ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3) 1516 #endif 1517 #ifdef HAS_ARGBSHUFFLEROW_MSA 1518 ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8_t*, 4, 4, 7) 1519 #endif 1520 #ifdef HAS_ARGBSHUFFLEROW_LSX 1521 ANY11P(ARGBShuffleRow_Any_LSX, ARGBShuffleRow_LSX, const uint8_t*, 4, 4, 7) 1522 #endif 1523 #ifdef HAS_ARGBSHUFFLEROW_LASX 1524 ANY11P(ARGBShuffleRow_Any_LASX, ARGBShuffleRow_LASX, const uint8_t*, 4, 4, 15) 1525 #endif 1526 #undef ANY11P 1527 #undef ANY11P 1528 1529 // Any 1 to 1 with type 1530 #define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK) \ 1531 void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int width) { \ 1532 SIMD_ALIGNED(uint8_t vin[(MASK + 1) * SBPP]); \ 1533 SIMD_ALIGNED(uint8_t vout[(MASK + 1) * BPP]); \ 1534 memset(vin, 0, sizeof(vin)); /* for msan */ \ 1535 int r = width & MASK; \ 1536 int n = width & ~MASK; \ 1537 if (n > 0) { \ 1538 ANY_SIMD(src_ptr, dst_ptr, n); \ 1539 } \ 1540 memcpy(vin, (uint8_t*)(src_ptr) + n * SBPP, r * SBPP); \ 1541 ANY_SIMD((STYPE*)vin, (DTYPE*)vout, MASK + 1); \ 1542 memcpy((uint8_t*)(dst_ptr) + n * BPP, vout, r * BPP); \ 1543 } 1544 1545 #ifdef HAS_ARGBTOAR64ROW_SSSE3 1546 ANY11T(ARGBToAR64Row_Any_SSSE3, ARGBToAR64Row_SSSE3, 4, 8, uint8_t, uint16_t, 3) 1547 #endif 1548 1549 #ifdef HAS_ARGBTOAB64ROW_SSSE3 1550 ANY11T(ARGBToAB64Row_Any_SSSE3, ARGBToAB64Row_SSSE3, 4, 8, uint8_t, uint16_t, 3) 1551 #endif 1552 1553 #ifdef HAS_AR64TOARGBROW_SSSE3 1554 ANY11T(AR64ToARGBRow_Any_SSSE3, AR64ToARGBRow_SSSE3, 8, 4, uint16_t, uint8_t, 3) 1555 #endif 1556 1557 #ifdef HAS_ARGBTOAR64ROW_SSSE3 1558 ANY11T(AB64ToARGBRow_Any_SSSE3, AB64ToARGBRow_SSSE3, 8, 4, uint16_t, uint8_t, 3) 1559 #endif 1560 1561 #ifdef HAS_ARGBTOAR64ROW_AVX2 1562 ANY11T(ARGBToAR64Row_Any_AVX2, ARGBToAR64Row_AVX2, 4, 8, uint8_t, uint16_t, 7) 1563 #endif 1564 1565 #ifdef HAS_ARGBTOAB64ROW_AVX2 1566 ANY11T(ARGBToAB64Row_Any_AVX2, ARGBToAB64Row_AVX2, 4, 8, uint8_t, uint16_t, 7) 1567 #endif 1568 1569 #ifdef HAS_AR64TOARGBROW_AVX2 1570 ANY11T(AR64ToARGBRow_Any_AVX2, AR64ToARGBRow_AVX2, 8, 4, uint16_t, uint8_t, 7) 1571 #endif 1572 1573 #ifdef HAS_ARGBTOAR64ROW_AVX2 1574 ANY11T(AB64ToARGBRow_Any_AVX2, AB64ToARGBRow_AVX2, 8, 4, uint16_t, uint8_t, 7) 1575 #endif 1576 1577 #ifdef HAS_ARGBTOAR64ROW_NEON 1578 ANY11T(ARGBToAR64Row_Any_NEON, ARGBToAR64Row_NEON, 4, 8, uint8_t, uint16_t, 7) 1579 #endif 1580 1581 #ifdef HAS_ARGBTOAB64ROW_NEON 1582 ANY11T(ARGBToAB64Row_Any_NEON, ARGBToAB64Row_NEON, 4, 8, uint8_t, uint16_t, 7) 1583 #endif 1584 1585 #ifdef HAS_AR64TOARGBROW_NEON 1586 ANY11T(AR64ToARGBRow_Any_NEON, AR64ToARGBRow_NEON, 8, 4, uint16_t, uint8_t, 7) 1587 #endif 1588 1589 #ifdef HAS_ARGBTOAR64ROW_NEON 1590 ANY11T(AB64ToARGBRow_Any_NEON, AB64ToARGBRow_NEON, 8, 4, uint16_t, uint8_t, 7) 1591 #endif 1592 1593 #undef ANY11T 1594 1595 // Any 1 to 1 with parameter and shorts. BPP measures in shorts. 1596 #define ANY11C(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK) \ 1597 void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int scale, int width) { \ 1598 SIMD_ALIGNED(STYPE vin[32]); \ 1599 SIMD_ALIGNED(DTYPE vout[32]); \ 1600 memset(vin, 0, sizeof(vin)); /* for msan */ \ 1601 int r = width & MASK; \ 1602 int n = width & ~MASK; \ 1603 if (n > 0) { \ 1604 ANY_SIMD(src_ptr, dst_ptr, scale, n); \ 1605 } \ 1606 memcpy(vin, src_ptr + n, r * SBPP); \ 1607 ANY_SIMD(vin, vout, scale, MASK + 1); \ 1608 memcpy(dst_ptr + n, vout, r * BPP); \ 1609 } 1610 1611 #ifdef HAS_CONVERT16TO8ROW_SSSE3 1612 ANY11C(Convert16To8Row_Any_SSSE3, 1613 Convert16To8Row_SSSE3, 1614 2, 1615 1, 1616 uint16_t, 1617 uint8_t, 1618 15) 1619 #endif 1620 #ifdef HAS_CONVERT16TO8ROW_AVX2 1621 ANY11C(Convert16To8Row_Any_AVX2, 1622 Convert16To8Row_AVX2, 1623 2, 1624 1, 1625 uint16_t, 1626 uint8_t, 1627 31) 1628 #endif 1629 #ifdef HAS_CONVERT16TO8ROW_NEON 1630 ANY11C(Convert16To8Row_Any_NEON, 1631 Convert16To8Row_NEON, 1632 2, 1633 1, 1634 uint16_t, 1635 uint8_t, 1636 15) 1637 #endif 1638 #ifdef HAS_CONVERT8TO16ROW_SSE2 1639 ANY11C(Convert8To16Row_Any_SSE2, 1640 Convert8To16Row_SSE2, 1641 1, 1642 2, 1643 uint8_t, 1644 uint16_t, 1645 15) 1646 #endif 1647 #ifdef HAS_CONVERT8TO16ROW_AVX2 1648 ANY11C(Convert8To16Row_Any_AVX2, 1649 Convert8To16Row_AVX2, 1650 1, 1651 2, 1652 uint8_t, 1653 uint16_t, 1654 31) 1655 #endif 1656 #ifdef HAS_MULTIPLYROW_16_AVX2 1657 ANY11C(MultiplyRow_16_Any_AVX2, 1658 MultiplyRow_16_AVX2, 1659 2, 1660 2, 1661 uint16_t, 1662 uint16_t, 1663 31) 1664 #endif 1665 #ifdef HAS_MULTIPLYROW_16_NEON 1666 ANY11C(MultiplyRow_16_Any_NEON, 1667 MultiplyRow_16_NEON, 1668 2, 1669 2, 1670 uint16_t, 1671 uint16_t, 1672 15) 1673 #endif 1674 #ifdef HAS_DIVIDEROW_16_AVX2 1675 ANY11C(DivideRow_16_Any_AVX2, DivideRow_16_AVX2, 2, 2, uint16_t, uint16_t, 31) 1676 #endif 1677 #ifdef HAS_DIVIDEROW_16_NEON 1678 ANY11C(DivideRow_16_Any_NEON, DivideRow_16_NEON, 2, 2, uint16_t, uint16_t, 15) 1679 #endif 1680 #undef ANY11C 1681 1682 // Any 1 to 1 with parameter and shorts to byte. BPP measures in shorts. 1683 #define ANY11P16(NAMEANY, ANY_SIMD, ST, T, SBPP, BPP, MASK) \ 1684 void NAMEANY(const ST* src_ptr, T* dst_ptr, float param, int width) { \ 1685 SIMD_ALIGNED(ST vin[32]); \ 1686 SIMD_ALIGNED(T vout[32]); \ 1687 memset(vin, 0, sizeof(vin)); /* for msan */ \ 1688 int r = width & MASK; \ 1689 int n = width & ~MASK; \ 1690 if (n > 0) { \ 1691 ANY_SIMD(src_ptr, dst_ptr, param, n); \ 1692 } \ 1693 memcpy(vin, src_ptr + n, r * SBPP); \ 1694 ANY_SIMD(vin, vout, param, MASK + 1); \ 1695 memcpy(dst_ptr + n, vout, r * BPP); \ 1696 } 1697 1698 #ifdef HAS_HALFFLOATROW_SSE2 1699 ANY11P16(HalfFloatRow_Any_SSE2, HalfFloatRow_SSE2, uint16_t, uint16_t, 2, 2, 7) 1700 #endif 1701 #ifdef HAS_HALFFLOATROW_AVX2 1702 ANY11P16(HalfFloatRow_Any_AVX2, HalfFloatRow_AVX2, uint16_t, uint16_t, 2, 2, 15) 1703 #endif 1704 #ifdef HAS_HALFFLOATROW_F16C 1705 ANY11P16(HalfFloatRow_Any_F16C, HalfFloatRow_F16C, uint16_t, uint16_t, 2, 2, 15) 1706 ANY11P16(HalfFloat1Row_Any_F16C, 1707 HalfFloat1Row_F16C, 1708 uint16_t, 1709 uint16_t, 1710 2, 1711 2, 1712 15) 1713 #endif 1714 #ifdef HAS_HALFFLOATROW_NEON 1715 ANY11P16(HalfFloatRow_Any_NEON, HalfFloatRow_NEON, uint16_t, uint16_t, 2, 2, 7) 1716 ANY11P16(HalfFloat1Row_Any_NEON, 1717 HalfFloat1Row_NEON, 1718 uint16_t, 1719 uint16_t, 1720 2, 1721 2, 1722 7) 1723 #endif 1724 #ifdef HAS_HALFFLOATROW_MSA 1725 ANY11P16(HalfFloatRow_Any_MSA, HalfFloatRow_MSA, uint16_t, uint16_t, 2, 2, 31) 1726 #endif 1727 #ifdef HAS_BYTETOFLOATROW_NEON 1728 ANY11P16(ByteToFloatRow_Any_NEON, ByteToFloatRow_NEON, uint8_t, float, 1, 3, 7) 1729 #endif 1730 #ifdef HAS_HALFFLOATROW_LSX 1731 ANY11P16(HalfFloatRow_Any_LSX, HalfFloatRow_LSX, uint16_t, uint16_t, 2, 2, 31) 1732 #endif 1733 #undef ANY11P16 1734 1735 // Any 1 to 1 with yuvconstants 1736 #define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ 1737 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, \ 1738 const struct YuvConstants* yuvconstants, int width) { \ 1739 SIMD_ALIGNED(uint8_t vin[128]); \ 1740 SIMD_ALIGNED(uint8_t vout[128]); \ 1741 memset(vin, 0, sizeof(vin)); /* for YUY2 and msan */ \ 1742 int r = width & MASK; \ 1743 int n = width & ~MASK; \ 1744 if (n > 0) { \ 1745 ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \ 1746 } \ 1747 memcpy(vin, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ 1748 ANY_SIMD(vin, vout, yuvconstants, MASK + 1); \ 1749 memcpy(dst_ptr + n * BPP, vout, r * BPP); \ 1750 } 1751 1752 #if defined(HAS_YUY2TOARGBROW_SSSE3) 1753 ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15) 1754 ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15) 1755 #endif 1756 #if defined(HAS_YUY2TOARGBROW_AVX2) 1757 ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31) 1758 ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31) 1759 #endif 1760 #if defined(HAS_YUY2TOARGBROW_NEON) 1761 ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7) 1762 ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7) 1763 #endif 1764 #if defined(HAS_YUY2TOARGBROW_MSA) 1765 ANY11C(YUY2ToARGBRow_Any_MSA, YUY2ToARGBRow_MSA, 1, 4, 4, 7) 1766 ANY11C(UYVYToARGBRow_Any_MSA, UYVYToARGBRow_MSA, 1, 4, 4, 7) 1767 #endif 1768 #if defined(HAS_YUY2TOARGBROW_LSX) 1769 ANY11C(YUY2ToARGBRow_Any_LSX, YUY2ToARGBRow_LSX, 1, 4, 4, 7) 1770 ANY11C(UYVYToARGBRow_Any_LSX, UYVYToARGBRow_LSX, 1, 4, 4, 7) 1771 #endif 1772 #undef ANY11C 1773 1774 // Any 1 to 1 interpolate. Takes 2 rows of source via stride. 1775 #define ANY11I(NAMEANY, ANY_SIMD, TD, TS, SBPP, BPP, MASK) \ 1776 void NAMEANY(TD* dst_ptr, const TS* src_ptr, ptrdiff_t src_stride, \ 1777 int width, int source_y_fraction) { \ 1778 SIMD_ALIGNED(TS vin[64 * 2]); \ 1779 SIMD_ALIGNED(TD vout[64]); \ 1780 memset(vin, 0, sizeof(vin)); /* for msan */ \ 1781 int r = width & MASK; \ 1782 int n = width & ~MASK; \ 1783 if (n > 0) { \ 1784 ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction); \ 1785 } \ 1786 memcpy(vin, src_ptr + n * SBPP, r * SBPP * sizeof(TS)); \ 1787 if (source_y_fraction) { \ 1788 memcpy(vin + 64, src_ptr + src_stride + n * SBPP, \ 1789 r * SBPP * sizeof(TS)); \ 1790 } \ 1791 ANY_SIMD(vout, vin, 64, MASK + 1, source_y_fraction); \ 1792 memcpy(dst_ptr + n * BPP, vout, r * BPP * sizeof(TD)); \ 1793 } 1794 1795 #ifdef HAS_INTERPOLATEROW_AVX2 1796 ANY11I(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, uint8_t, uint8_t, 1, 1, 31) 1797 #endif 1798 #ifdef HAS_INTERPOLATEROW_SSSE3 1799 ANY11I(InterpolateRow_Any_SSSE3, 1800 InterpolateRow_SSSE3, 1801 uint8_t, 1802 uint8_t, 1803 1, 1804 1, 1805 15) 1806 #endif 1807 #ifdef HAS_INTERPOLATEROW_NEON 1808 ANY11I(InterpolateRow_Any_NEON, InterpolateRow_NEON, uint8_t, uint8_t, 1, 1, 15) 1809 #endif 1810 #ifdef HAS_INTERPOLATEROW_MSA 1811 ANY11I(InterpolateRow_Any_MSA, InterpolateRow_MSA, uint8_t, uint8_t, 1, 1, 31) 1812 #endif 1813 #ifdef HAS_INTERPOLATEROW_LSX 1814 ANY11I(InterpolateRow_Any_LSX, InterpolateRow_LSX, uint8_t, uint8_t, 1, 1, 31) 1815 #endif 1816 1817 #ifdef HAS_INTERPOLATEROW_16_NEON 1818 ANY11I(InterpolateRow_16_Any_NEON, 1819 InterpolateRow_16_NEON, 1820 uint16_t, 1821 uint16_t, 1822 1, 1823 1, 1824 7) 1825 #endif 1826 #undef ANY11I 1827 1828 // Any 1 to 1 interpolate with scale param 1829 #define ANY11IS(NAMEANY, ANY_SIMD, TD, TS, SBPP, BPP, MASK) \ 1830 void NAMEANY(TD* dst_ptr, const TS* src_ptr, ptrdiff_t src_stride, \ 1831 int scale, int width, int source_y_fraction) { \ 1832 SIMD_ALIGNED(TS vin[64 * 2]); \ 1833 SIMD_ALIGNED(TD vout[64]); \ 1834 memset(vin, 0, sizeof(vin)); /* for msan */ \ 1835 int r = width & MASK; \ 1836 int n = width & ~MASK; \ 1837 if (n > 0) { \ 1838 ANY_SIMD(dst_ptr, src_ptr, src_stride, scale, n, source_y_fraction); \ 1839 } \ 1840 memcpy(vin, src_ptr + n * SBPP, r * SBPP * sizeof(TS)); \ 1841 if (source_y_fraction) { \ 1842 memcpy(vin + 64, src_ptr + src_stride + n * SBPP, \ 1843 r * SBPP * sizeof(TS)); \ 1844 } \ 1845 ANY_SIMD(vout, vin, 64, scale, MASK + 1, source_y_fraction); \ 1846 memcpy(dst_ptr + n * BPP, vout, r * BPP * sizeof(TD)); \ 1847 } 1848 1849 #ifdef HAS_INTERPOLATEROW_16TO8_NEON 1850 ANY11IS(InterpolateRow_16To8_Any_NEON, 1851 InterpolateRow_16To8_NEON, 1852 uint8_t, 1853 uint16_t, 1854 1, 1855 1, 1856 7) 1857 #endif 1858 #ifdef HAS_INTERPOLATEROW_16TO8_AVX2 1859 ANY11IS(InterpolateRow_16To8_Any_AVX2, 1860 InterpolateRow_16To8_AVX2, 1861 uint8_t, 1862 uint16_t, 1863 1, 1864 1, 1865 31) 1866 #endif 1867 1868 #undef ANY11IS 1869 1870 // Any 1 to 1 mirror. 1871 #define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \ 1872 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ 1873 SIMD_ALIGNED(uint8_t vin[64]); \ 1874 SIMD_ALIGNED(uint8_t vout[64]); \ 1875 memset(vin, 0, sizeof(vin)); /* for msan */ \ 1876 int r = width & MASK; \ 1877 int n = width & ~MASK; \ 1878 if (n > 0) { \ 1879 ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \ 1880 } \ 1881 memcpy(vin, src_ptr, r* BPP); \ 1882 ANY_SIMD(vin, vout, MASK + 1); \ 1883 memcpy(dst_ptr + n * BPP, vout + (MASK + 1 - r) * BPP, r * BPP); \ 1884 } 1885 1886 #ifdef HAS_MIRRORROW_AVX2 1887 ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31) 1888 #endif 1889 #ifdef HAS_MIRRORROW_SSSE3 1890 ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15) 1891 #endif 1892 #ifdef HAS_MIRRORROW_NEON 1893 ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 31) 1894 #endif 1895 #ifdef HAS_MIRRORROW_MSA 1896 ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63) 1897 #endif 1898 #ifdef HAS_MIRRORROW_LSX 1899 ANY11M(MirrorRow_Any_LSX, MirrorRow_LSX, 1, 31) 1900 #endif 1901 #ifdef HAS_MIRRORROW_LASX 1902 ANY11M(MirrorRow_Any_LASX, MirrorRow_LASX, 1, 63) 1903 #endif 1904 #ifdef HAS_MIRRORUVROW_AVX2 1905 ANY11M(MirrorUVRow_Any_AVX2, MirrorUVRow_AVX2, 2, 15) 1906 #endif 1907 #ifdef HAS_MIRRORUVROW_SSSE3 1908 ANY11M(MirrorUVRow_Any_SSSE3, MirrorUVRow_SSSE3, 2, 7) 1909 #endif 1910 #ifdef HAS_MIRRORUVROW_NEON 1911 ANY11M(MirrorUVRow_Any_NEON, MirrorUVRow_NEON, 2, 31) 1912 #endif 1913 #ifdef HAS_MIRRORUVROW_MSA 1914 ANY11M(MirrorUVRow_Any_MSA, MirrorUVRow_MSA, 2, 7) 1915 #endif 1916 #ifdef HAS_MIRRORUVROW_LSX 1917 ANY11M(MirrorUVRow_Any_LSX, MirrorUVRow_LSX, 2, 7) 1918 #endif 1919 #ifdef HAS_MIRRORUVROW_LASX 1920 ANY11M(MirrorUVRow_Any_LASX, MirrorUVRow_LASX, 2, 15) 1921 #endif 1922 #ifdef HAS_ARGBMIRRORROW_AVX2 1923 ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7) 1924 #endif 1925 #ifdef HAS_ARGBMIRRORROW_SSE2 1926 ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3) 1927 #endif 1928 #ifdef HAS_ARGBMIRRORROW_NEON 1929 ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 7) 1930 #endif 1931 #ifdef HAS_ARGBMIRRORROW_MSA 1932 ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15) 1933 #endif 1934 #ifdef HAS_ARGBMIRRORROW_LSX 1935 ANY11M(ARGBMirrorRow_Any_LSX, ARGBMirrorRow_LSX, 4, 7) 1936 #endif 1937 #ifdef HAS_ARGBMIRRORROW_LASX 1938 ANY11M(ARGBMirrorRow_Any_LASX, ARGBMirrorRow_LASX, 4, 15) 1939 #endif 1940 #ifdef HAS_RGB24MIRRORROW_SSSE3 1941 ANY11M(RGB24MirrorRow_Any_SSSE3, RGB24MirrorRow_SSSE3, 3, 15) 1942 #endif 1943 #ifdef HAS_RGB24MIRRORROW_NEON 1944 ANY11M(RGB24MirrorRow_Any_NEON, RGB24MirrorRow_NEON, 3, 15) 1945 #endif 1946 #undef ANY11M 1947 1948 // Any 1 plane. (memset) 1949 #define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \ 1950 void NAMEANY(uint8_t* dst_ptr, T v32, int width) { \ 1951 SIMD_ALIGNED(uint8_t vout[64]); \ 1952 int r = width & MASK; \ 1953 int n = width & ~MASK; \ 1954 if (n > 0) { \ 1955 ANY_SIMD(dst_ptr, v32, n); \ 1956 } \ 1957 ANY_SIMD(vout, v32, MASK + 1); \ 1958 memcpy(dst_ptr + n * BPP, vout, r * BPP); \ 1959 } 1960 1961 #ifdef HAS_SETROW_X86 1962 ANY1(SetRow_Any_X86, SetRow_X86, uint8_t, 1, 3) 1963 #endif 1964 #ifdef HAS_SETROW_NEON 1965 ANY1(SetRow_Any_NEON, SetRow_NEON, uint8_t, 1, 15) 1966 #endif 1967 #ifdef HAS_SETROW_LSX 1968 ANY1(SetRow_Any_LSX, SetRow_LSX, uint8_t, 1, 15) 1969 #endif 1970 #ifdef HAS_ARGBSETROW_NEON 1971 ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32_t, 4, 3) 1972 #endif 1973 #ifdef HAS_ARGBSETROW_MSA 1974 ANY1(ARGBSetRow_Any_MSA, ARGBSetRow_MSA, uint32_t, 4, 3) 1975 #endif 1976 #ifdef HAS_ARGBSETROW_LSX 1977 ANY1(ARGBSetRow_Any_LSX, ARGBSetRow_LSX, uint32_t, 4, 3) 1978 #endif 1979 #undef ANY1 1980 1981 // Any 1 to 2. Outputs UV planes. 1982 #define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \ 1983 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \ 1984 int width) { \ 1985 SIMD_ALIGNED(uint8_t vin[128]); \ 1986 SIMD_ALIGNED(uint8_t vout[128 * 2]); \ 1987 memset(vin, 0, sizeof(vin)); /* for msan */ \ 1988 int r = width & MASK; \ 1989 int n = width & ~MASK; \ 1990 if (n > 0) { \ 1991 ANY_SIMD(src_ptr, dst_u, dst_v, n); \ 1992 } \ 1993 memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ 1994 ANY_SIMD(vin, vout, vout + 128, MASK + 1); \ 1995 memcpy(dst_u + (n >> DUVSHIFT), vout, SS(r, DUVSHIFT)); \ 1996 memcpy(dst_v + (n >> DUVSHIFT), vout + 128, SS(r, DUVSHIFT)); \ 1997 } 1998 1999 #ifdef HAS_SPLITUVROW_SSE2 2000 ANY12(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, 0, 2, 0, 15) 2001 #endif 2002 #ifdef HAS_SPLITUVROW_AVX2 2003 ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31) 2004 #endif 2005 #ifdef HAS_SPLITUVROW_NEON 2006 ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15) 2007 #endif 2008 #ifdef HAS_SPLITUVROW_MSA 2009 ANY12(SplitUVRow_Any_MSA, SplitUVRow_MSA, 0, 2, 0, 31) 2010 #endif 2011 #ifdef HAS_SPLITUVROW_LSX 2012 ANY12(SplitUVRow_Any_LSX, SplitUVRow_LSX, 0, 2, 0, 31) 2013 #endif 2014 #ifdef HAS_ARGBTOUV444ROW_SSSE3 2015 ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15) 2016 #endif 2017 #ifdef HAS_YUY2TOUV422ROW_AVX2 2018 ANY12(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1, 4, 1, 31) 2019 ANY12(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1, 4, 1, 31) 2020 #endif 2021 #ifdef HAS_YUY2TOUV422ROW_SSE2 2022 ANY12(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, 1, 4, 1, 15) 2023 ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15) 2024 #endif 2025 #ifdef HAS_YUY2TOUV422ROW_NEON 2026 ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7) 2027 ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15) 2028 ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15) 2029 #endif 2030 #ifdef HAS_YUY2TOUV422ROW_MSA 2031 ANY12(ARGBToUV444Row_Any_MSA, ARGBToUV444Row_MSA, 0, 4, 0, 15) 2032 ANY12(YUY2ToUV422Row_Any_MSA, YUY2ToUV422Row_MSA, 1, 4, 1, 31) 2033 ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31) 2034 #endif 2035 #ifdef HAS_YUY2TOUV422ROW_LSX 2036 ANY12(ARGBToUV444Row_Any_LSX, ARGBToUV444Row_LSX, 0, 4, 0, 15) 2037 ANY12(YUY2ToUV422Row_Any_LSX, YUY2ToUV422Row_LSX, 1, 4, 1, 15) 2038 ANY12(UYVYToUV422Row_Any_LSX, UYVYToUV422Row_LSX, 1, 4, 1, 15) 2039 #endif 2040 #ifdef HAS_YUY2TOUV422ROW_LASX 2041 ANY12(ARGBToUV444Row_Any_LASX, ARGBToUV444Row_LASX, 0, 4, 0, 31) 2042 ANY12(YUY2ToUV422Row_Any_LASX, YUY2ToUV422Row_LASX, 1, 4, 1, 31) 2043 ANY12(UYVYToUV422Row_Any_LASX, UYVYToUV422Row_LASX, 1, 4, 1, 31) 2044 #endif 2045 #undef ANY12 2046 2047 // Any 2 16 bit planes with parameter to 1 2048 #define ANY12PT(NAMEANY, ANY_SIMD, T, BPP, MASK) \ 2049 void NAMEANY(const T* src_uv, T* dst_u, T* dst_v, int depth, int width) { \ 2050 SIMD_ALIGNED(T vin[16 * 2]); \ 2051 SIMD_ALIGNED(T vout[16 * 2]); \ 2052 memset(vin, 0, sizeof(vin)); /* for msan */ \ 2053 int r = width & MASK; \ 2054 int n = width & ~MASK; \ 2055 if (n > 0) { \ 2056 ANY_SIMD(src_uv, dst_u, dst_v, depth, n); \ 2057 } \ 2058 memcpy(vin, src_uv + n * 2, r * BPP * 2); \ 2059 ANY_SIMD(vin, vout, vout + 16, depth, MASK + 1); \ 2060 memcpy(dst_u + n, vout, r * BPP); \ 2061 memcpy(dst_v + n, vout + 16, r * BPP); \ 2062 } 2063 2064 #ifdef HAS_SPLITUVROW_16_AVX2 2065 ANY12PT(SplitUVRow_16_Any_AVX2, SplitUVRow_16_AVX2, uint16_t, 2, 15) 2066 #endif 2067 2068 #ifdef HAS_SPLITUVROW_16_NEON 2069 ANY12PT(SplitUVRow_16_Any_NEON, SplitUVRow_16_NEON, uint16_t, 2, 7) 2070 #endif 2071 2072 #undef ANY21CT 2073 2074 // Any 1 to 3. Outputs RGB planes. 2075 #define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \ 2076 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \ 2077 uint8_t* dst_b, int width) { \ 2078 SIMD_ALIGNED(uint8_t vin[16 * 3]); \ 2079 SIMD_ALIGNED(uint8_t vout[16 * 3]); \ 2080 memset(vin, 0, sizeof(vin)); /* for msan */ \ 2081 int r = width & MASK; \ 2082 int n = width & ~MASK; \ 2083 if (n > 0) { \ 2084 ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n); \ 2085 } \ 2086 memcpy(vin, src_ptr + n * BPP, r * BPP); \ 2087 ANY_SIMD(vin, vout, vout + 16, vout + 32, MASK + 1); \ 2088 memcpy(dst_r + n, vout, r); \ 2089 memcpy(dst_g + n, vout + 16, r); \ 2090 memcpy(dst_b + n, vout + 32, r); \ 2091 } 2092 2093 #ifdef HAS_SPLITRGBROW_SSSE3 2094 ANY13(SplitRGBRow_Any_SSSE3, SplitRGBRow_SSSE3, 3, 15) 2095 #endif 2096 #ifdef HAS_SPLITRGBROW_NEON 2097 ANY13(SplitRGBRow_Any_NEON, SplitRGBRow_NEON, 3, 15) 2098 #endif 2099 #ifdef HAS_SPLITXRGBROW_SSE2 2100 ANY13(SplitXRGBRow_Any_SSE2, SplitXRGBRow_SSE2, 4, 7) 2101 #endif 2102 #ifdef HAS_SPLITXRGBROW_SSSE3 2103 ANY13(SplitXRGBRow_Any_SSSE3, SplitXRGBRow_SSSE3, 4, 7) 2104 #endif 2105 #ifdef HAS_SPLITXRGBROW_AVX2 2106 ANY13(SplitXRGBRow_Any_AVX2, SplitXRGBRow_AVX2, 4, 15) 2107 #endif 2108 #ifdef HAS_SPLITXRGBROW_NEON 2109 ANY13(SplitXRGBRow_Any_NEON, SplitXRGBRow_NEON, 4, 15) 2110 #endif 2111 2112 // Any 1 to 4. Outputs ARGB planes. 2113 #define ANY14(NAMEANY, ANY_SIMD, BPP, MASK) \ 2114 void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \ 2115 uint8_t* dst_b, uint8_t* dst_a, int width) { \ 2116 SIMD_ALIGNED(uint8_t vin[16 * 4]); \ 2117 SIMD_ALIGNED(uint8_t vout[16 * 4]); \ 2118 memset(vin, 0, sizeof(vin)); /* for msan */ \ 2119 int r = width & MASK; \ 2120 int n = width & ~MASK; \ 2121 if (n > 0) { \ 2122 ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, dst_a, n); \ 2123 } \ 2124 memcpy(vin, src_ptr + n * BPP, r * BPP); \ 2125 ANY_SIMD(vin, vout, vout + 16, vout + 32, vout + 48, MASK + 1); \ 2126 memcpy(dst_r + n, vout, r); \ 2127 memcpy(dst_g + n, vout + 16, r); \ 2128 memcpy(dst_b + n, vout + 32, r); \ 2129 memcpy(dst_a + n, vout + 48, r); \ 2130 } 2131 2132 #ifdef HAS_SPLITARGBROW_SSE2 2133 ANY14(SplitARGBRow_Any_SSE2, SplitARGBRow_SSE2, 4, 7) 2134 #endif 2135 #ifdef HAS_SPLITARGBROW_SSSE3 2136 ANY14(SplitARGBRow_Any_SSSE3, SplitARGBRow_SSSE3, 4, 7) 2137 #endif 2138 #ifdef HAS_SPLITARGBROW_AVX2 2139 ANY14(SplitARGBRow_Any_AVX2, SplitARGBRow_AVX2, 4, 15) 2140 #endif 2141 #ifdef HAS_SPLITARGBROW_NEON 2142 ANY14(SplitARGBRow_Any_NEON, SplitARGBRow_NEON, 4, 15) 2143 #endif 2144 2145 // Any 1 to 2 with source stride (2 rows of source). Outputs UV planes. 2146 // 128 byte row allows for 32 avx ARGB pixels. 2147 #define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \ 2148 void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_u, \ 2149 uint8_t* dst_v, int width) { \ 2150 SIMD_ALIGNED(uint8_t vin[128 * 2]); \ 2151 SIMD_ALIGNED(uint8_t vout[128 * 2]); \ 2152 memset(vin, 0, sizeof(vin)); /* for msan */ \ 2153 int r = width & MASK; \ 2154 int n = width & ~MASK; \ 2155 if (n > 0) { \ 2156 ANY_SIMD(src_ptr, src_stride, dst_u, dst_v, n); \ 2157 } \ 2158 memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ 2159 memcpy(vin + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \ 2160 SS(r, UVSHIFT) * BPP); \ 2161 if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \ 2162 memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP, \ 2163 BPP); \ 2164 memcpy(vin + 128 + SS(r, UVSHIFT) * BPP, \ 2165 vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \ 2166 } \ 2167 ANY_SIMD(vin, 128, vout, vout + 128, MASK + 1); \ 2168 memcpy(dst_u + (n >> 1), vout, SS(r, 1)); \ 2169 memcpy(dst_v + (n >> 1), vout + 128, SS(r, 1)); \ 2170 } 2171 2172 #ifdef HAS_ARGBTOUVROW_AVX2 2173 ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31) 2174 #endif 2175 #ifdef HAS_ABGRTOUVROW_AVX2 2176 ANY12S(ABGRToUVRow_Any_AVX2, ABGRToUVRow_AVX2, 0, 4, 31) 2177 #endif 2178 #ifdef HAS_ARGBTOUVJROW_AVX2 2179 ANY12S(ARGBToUVJRow_Any_AVX2, ARGBToUVJRow_AVX2, 0, 4, 31) 2180 #endif 2181 #ifdef HAS_ABGRTOUVJROW_AVX2 2182 ANY12S(ABGRToUVJRow_Any_AVX2, ABGRToUVJRow_AVX2, 0, 4, 31) 2183 #endif 2184 #ifdef HAS_ARGBTOUVJROW_SSSE3 2185 ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15) 2186 #endif 2187 #ifdef HAS_ABGRTOUVJROW_SSSE3 2188 ANY12S(ABGRToUVJRow_Any_SSSE3, ABGRToUVJRow_SSSE3, 0, 4, 15) 2189 #endif 2190 #ifdef HAS_ARGBTOUVROW_SSSE3 2191 ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15) 2192 ANY12S(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, 0, 4, 15) 2193 ANY12S(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, 0, 4, 15) 2194 ANY12S(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, 0, 4, 15) 2195 #endif 2196 #ifdef HAS_YUY2TOUVROW_AVX2 2197 ANY12S(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, 1, 4, 31) 2198 ANY12S(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, 1, 4, 31) 2199 #endif 2200 #ifdef HAS_YUY2TOUVROW_SSE2 2201 ANY12S(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_SSE2, 1, 4, 15) 2202 ANY12S(UYVYToUVRow_Any_SSE2, UYVYToUVRow_SSE2, 1, 4, 15) 2203 #endif 2204 #ifdef HAS_ARGBTOUVROW_NEON 2205 ANY12S(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, 0, 4, 15) 2206 #endif 2207 #ifdef HAS_ARGBTOUVROW_MSA 2208 ANY12S(ARGBToUVRow_Any_MSA, ARGBToUVRow_MSA, 0, 4, 31) 2209 #endif 2210 #ifdef HAS_ARGBTOUVROW_LSX 2211 ANY12S(ARGBToUVRow_Any_LSX, ARGBToUVRow_LSX, 0, 4, 15) 2212 #endif 2213 #ifdef HAS_ARGBTOUVROW_LASX 2214 ANY12S(ARGBToUVRow_Any_LASX, ARGBToUVRow_LASX, 0, 4, 31) 2215 #endif 2216 #ifdef HAS_ARGBTOUVJROW_NEON 2217 ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15) 2218 #endif 2219 #ifdef HAS_ABGRTOUVJROW_NEON 2220 ANY12S(ABGRToUVJRow_Any_NEON, ABGRToUVJRow_NEON, 0, 4, 15) 2221 #endif 2222 #ifdef HAS_ARGBTOUVJROW_MSA 2223 ANY12S(ARGBToUVJRow_Any_MSA, ARGBToUVJRow_MSA, 0, 4, 31) 2224 #endif 2225 #ifdef HAS_ARGBTOUVJROW_LSX 2226 ANY12S(ARGBToUVJRow_Any_LSX, ARGBToUVJRow_LSX, 0, 4, 15) 2227 #endif 2228 #ifdef HAS_ARGBTOUVJROW_LASX 2229 ANY12S(ARGBToUVJRow_Any_LASX, ARGBToUVJRow_LASX, 0, 4, 31) 2230 #endif 2231 #ifdef HAS_BGRATOUVROW_NEON 2232 ANY12S(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, 0, 4, 15) 2233 #endif 2234 #ifdef HAS_BGRATOUVROW_MSA 2235 ANY12S(BGRAToUVRow_Any_MSA, BGRAToUVRow_MSA, 0, 4, 15) 2236 #endif 2237 #ifdef HAS_BGRATOUVROW_LSX 2238 ANY12S(BGRAToUVRow_Any_LSX, BGRAToUVRow_LSX, 0, 4, 15) 2239 #endif 2240 #ifdef HAS_ABGRTOUVROW_NEON 2241 ANY12S(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, 0, 4, 15) 2242 #endif 2243 #ifdef HAS_ABGRTOUVROW_MSA 2244 ANY12S(ABGRToUVRow_Any_MSA, ABGRToUVRow_MSA, 0, 4, 15) 2245 #endif 2246 #ifdef HAS_ABGRTOUVROW_LSX 2247 ANY12S(ABGRToUVRow_Any_LSX, ABGRToUVRow_LSX, 0, 4, 15) 2248 #endif 2249 #ifdef HAS_RGBATOUVROW_NEON 2250 ANY12S(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, 0, 4, 15) 2251 #endif 2252 #ifdef HAS_RGBATOUVROW_MSA 2253 ANY12S(RGBAToUVRow_Any_MSA, RGBAToUVRow_MSA, 0, 4, 15) 2254 #endif 2255 #ifdef HAS_RGBATOUVROW_LSX 2256 ANY12S(RGBAToUVRow_Any_LSX, RGBAToUVRow_LSX, 0, 4, 15) 2257 #endif 2258 #ifdef HAS_RGB24TOUVROW_NEON 2259 ANY12S(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, 0, 3, 15) 2260 #endif 2261 #ifdef HAS_RGB24TOUVJROW_NEON 2262 ANY12S(RGB24ToUVJRow_Any_NEON, RGB24ToUVJRow_NEON, 0, 3, 15) 2263 #endif 2264 #ifdef HAS_RGB24TOUVROW_MSA 2265 ANY12S(RGB24ToUVRow_Any_MSA, RGB24ToUVRow_MSA, 0, 3, 15) 2266 #endif 2267 #ifdef HAS_RGB24TOUVROW_LSX 2268 ANY12S(RGB24ToUVRow_Any_LSX, RGB24ToUVRow_LSX, 0, 3, 15) 2269 #endif 2270 #ifdef HAS_RGB24TOUVROW_LASX 2271 ANY12S(RGB24ToUVRow_Any_LASX, RGB24ToUVRow_LASX, 0, 3, 31) 2272 #endif 2273 #ifdef HAS_RAWTOUVROW_NEON 2274 ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15) 2275 #endif 2276 #ifdef HAS_RAWTOUVJROW_NEON 2277 ANY12S(RAWToUVJRow_Any_NEON, RAWToUVJRow_NEON, 0, 3, 15) 2278 #endif 2279 #ifdef HAS_RAWTOUVROW_MSA 2280 ANY12S(RAWToUVRow_Any_MSA, RAWToUVRow_MSA, 0, 3, 15) 2281 #endif 2282 #ifdef HAS_RAWTOUVROW_LSX 2283 ANY12S(RAWToUVRow_Any_LSX, RAWToUVRow_LSX, 0, 3, 15) 2284 #endif 2285 #ifdef HAS_RAWTOUVROW_LASX 2286 ANY12S(RAWToUVRow_Any_LASX, RAWToUVRow_LASX, 0, 3, 31) 2287 #endif 2288 #ifdef HAS_RGB565TOUVROW_NEON 2289 ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15) 2290 #endif 2291 #ifdef HAS_RGB565TOUVROW_MSA 2292 ANY12S(RGB565ToUVRow_Any_MSA, RGB565ToUVRow_MSA, 0, 2, 15) 2293 #endif 2294 #ifdef HAS_RGB565TOUVROW_LSX 2295 ANY12S(RGB565ToUVRow_Any_LSX, RGB565ToUVRow_LSX, 0, 2, 15) 2296 #endif 2297 #ifdef HAS_RGB565TOUVROW_LASX 2298 ANY12S(RGB565ToUVRow_Any_LASX, RGB565ToUVRow_LASX, 0, 2, 31) 2299 #endif 2300 #ifdef HAS_ARGB1555TOUVROW_NEON 2301 ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15) 2302 #endif 2303 #ifdef HAS_ARGB1555TOUVROW_MSA 2304 ANY12S(ARGB1555ToUVRow_Any_MSA, ARGB1555ToUVRow_MSA, 0, 2, 15) 2305 #endif 2306 #ifdef HAS_ARGB1555TOUVROW_LSX 2307 ANY12S(ARGB1555ToUVRow_Any_LSX, ARGB1555ToUVRow_LSX, 0, 2, 15) 2308 #endif 2309 #ifdef HAS_ARGB1555TOUVROW_LASX 2310 ANY12S(ARGB1555ToUVRow_Any_LASX, ARGB1555ToUVRow_LASX, 0, 2, 31) 2311 #endif 2312 #ifdef HAS_ARGB4444TOUVROW_NEON 2313 ANY12S(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, 0, 2, 15) 2314 #endif 2315 #ifdef HAS_YUY2TOUVROW_NEON 2316 ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15) 2317 #endif 2318 #ifdef HAS_UYVYTOUVROW_NEON 2319 ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15) 2320 #endif 2321 #ifdef HAS_YUY2TOUVROW_MSA 2322 ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31) 2323 #endif 2324 #ifdef HAS_YUY2TOUVROW_LSX 2325 ANY12S(YUY2ToUVRow_Any_LSX, YUY2ToUVRow_LSX, 1, 4, 15) 2326 #endif 2327 #ifdef HAS_YUY2TOUVROW_LASX 2328 ANY12S(YUY2ToUVRow_Any_LASX, YUY2ToUVRow_LASX, 1, 4, 31) 2329 #endif 2330 #ifdef HAS_UYVYTOUVROW_MSA 2331 ANY12S(UYVYToUVRow_Any_MSA, UYVYToUVRow_MSA, 1, 4, 31) 2332 #endif 2333 #ifdef HAS_UYVYTOUVROW_LSX 2334 ANY12S(UYVYToUVRow_Any_LSX, UYVYToUVRow_LSX, 1, 4, 15) 2335 #endif 2336 #ifdef HAS_UYVYTOUVROW_LASX 2337 ANY12S(UYVYToUVRow_Any_LASX, UYVYToUVRow_LASX, 1, 4, 31) 2338 #endif 2339 #undef ANY12S 2340 2341 // Any 1 to 1 with source stride (2 rows of source). Outputs UV plane. 2342 // 128 byte row allows for 32 avx ARGB pixels. 2343 #define ANY11S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \ 2344 void NAMEANY(const uint8_t* src_ptr, int src_stride, uint8_t* dst_vu, \ 2345 int width) { \ 2346 SIMD_ALIGNED(uint8_t vin[128 * 2]); \ 2347 SIMD_ALIGNED(uint8_t vout[128]); \ 2348 memset(vin, 0, sizeof(vin)); /* for msan */ \ 2349 int r = width & MASK; \ 2350 int n = width & ~MASK; \ 2351 if (n > 0) { \ 2352 ANY_SIMD(src_ptr, src_stride, dst_vu, n); \ 2353 } \ 2354 memcpy(vin, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ 2355 memcpy(vin + 128, src_ptr + src_stride + (n >> UVSHIFT) * BPP, \ 2356 SS(r, UVSHIFT) * BPP); \ 2357 if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \ 2358 memcpy(vin + SS(r, UVSHIFT) * BPP, vin + SS(r, UVSHIFT) * BPP - BPP, \ 2359 BPP); \ 2360 memcpy(vin + 128 + SS(r, UVSHIFT) * BPP, \ 2361 vin + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \ 2362 } \ 2363 ANY_SIMD(vin, 128, vout, MASK + 1); \ 2364 memcpy(dst_vu + (n >> 1) * 2, vout, SS(r, 1) * 2); \ 2365 } 2366 2367 #ifdef HAS_AYUVTOVUROW_NEON 2368 ANY11S(AYUVToUVRow_Any_NEON, AYUVToUVRow_NEON, 0, 4, 15) 2369 ANY11S(AYUVToVURow_Any_NEON, AYUVToVURow_NEON, 0, 4, 15) 2370 #endif 2371 #undef ANY11S 2372 2373 #define ANYDETILE(NAMEANY, ANY_SIMD, T, BPP, MASK) \ 2374 void NAMEANY(const T* src, ptrdiff_t src_tile_stride, T* dst, int width) { \ 2375 SIMD_ALIGNED(T vin[16]); \ 2376 SIMD_ALIGNED(T vout[16]); \ 2377 memset(vin, 0, sizeof(vin)); /* for msan */ \ 2378 int r = width & MASK; \ 2379 int n = width & ~MASK; \ 2380 if (n > 0) { \ 2381 ANY_SIMD(src, src_tile_stride, dst, n); \ 2382 } \ 2383 memcpy(vin, src + (n / 16) * src_tile_stride, r * BPP); \ 2384 ANY_SIMD(vin, src_tile_stride, vout, MASK + 1); \ 2385 memcpy(dst + n, vout, r * BPP); \ 2386 } 2387 2388 #ifdef HAS_DETILEROW_NEON 2389 ANYDETILE(DetileRow_Any_NEON, DetileRow_NEON, uint8_t, 1, 15) 2390 #endif 2391 #ifdef HAS_DETILEROW_SSE2 2392 ANYDETILE(DetileRow_Any_SSE2, DetileRow_SSE2, uint8_t, 1, 15) 2393 #endif 2394 #ifdef HAS_DETILEROW_16_NEON 2395 ANYDETILE(DetileRow_16_Any_NEON, DetileRow_16_NEON, uint16_t, 2, 15) 2396 #endif 2397 #ifdef HAS_DETILEROW_16_SSE2 2398 ANYDETILE(DetileRow_16_Any_SSE2, DetileRow_16_SSE2, uint16_t, 2, 15) 2399 #endif 2400 #ifdef HAS_DETILEROW_16_AVX 2401 ANYDETILE(DetileRow_16_Any_AVX, DetileRow_16_AVX, uint16_t, 2, 15) 2402 #endif 2403 2404 // DetileSplitUVRow width is in bytes 2405 #define ANYDETILESPLITUV(NAMEANY, ANY_SIMD, MASK) \ 2406 void NAMEANY(const uint8_t* src_uv, ptrdiff_t src_tile_stride, \ 2407 uint8_t* dst_u, uint8_t* dst_v, int width) { \ 2408 SIMD_ALIGNED(uint8_t vin[16]); \ 2409 SIMD_ALIGNED(uint8_t vout[8 * 2]); \ 2410 memset(vin, 0, sizeof(vin)); /* for msan */ \ 2411 int r = width & MASK; \ 2412 int n = width & ~MASK; \ 2413 if (n > 0) { \ 2414 ANY_SIMD(src_uv, src_tile_stride, dst_u, dst_v, n); \ 2415 } \ 2416 memcpy(vin, src_uv + (n / 16) * src_tile_stride, r); \ 2417 ANY_SIMD(vin, src_tile_stride, vout, vout + 8, r); \ 2418 memcpy(dst_u + n / 2, vout, (r + 1) / 2); \ 2419 memcpy(dst_v + n / 2, vout + 8, (r + 1) / 2); \ 2420 } 2421 2422 #ifdef HAS_DETILESPLITUVROW_NEON 2423 ANYDETILESPLITUV(DetileSplitUVRow_Any_NEON, DetileSplitUVRow_NEON, 15) 2424 #endif 2425 #ifdef HAS_DETILESPLITUVROW_SSSE3 2426 ANYDETILESPLITUV(DetileSplitUVRow_Any_SSSE3, DetileSplitUVRow_SSSE3, 15) 2427 #endif 2428 2429 #define ANYDETILEMERGE(NAMEANY, ANY_SIMD, MASK) \ 2430 void NAMEANY(const uint8_t* src_y, ptrdiff_t src_y_tile_stride, \ 2431 const uint8_t* src_uv, ptrdiff_t src_uv_tile_stride, \ 2432 uint8_t* dst_yuy2, int width) { \ 2433 SIMD_ALIGNED(uint8_t vin[16 * 2]); \ 2434 SIMD_ALIGNED(uint8_t vout[16 * 2]); \ 2435 memset(vin, 0, sizeof(vin)); /* for msan */ \ 2436 int r = width & MASK; \ 2437 int n = width & ~MASK; \ 2438 if (n > 0) { \ 2439 ANY_SIMD(src_y, src_y_tile_stride, src_uv, src_uv_tile_stride, dst_yuy2, \ 2440 n); \ 2441 } \ 2442 memcpy(vin, src_y + (n / 16) * src_y_tile_stride, r); \ 2443 memcpy(vin + 16, src_uv + (n / 16) * src_uv_tile_stride, r); \ 2444 ANY_SIMD(vin, src_y_tile_stride, vin + 16, src_uv_tile_stride, vout, r); \ 2445 memcpy(dst_yuy2 + 2 * n, vout, 2 * r); \ 2446 } 2447 2448 #ifdef HAS_DETILETOYUY2_NEON 2449 ANYDETILEMERGE(DetileToYUY2_Any_NEON, DetileToYUY2_NEON, 15) 2450 #endif 2451 2452 #ifdef HAS_DETILETOYUY2_SSE2 2453 ANYDETILEMERGE(DetileToYUY2_Any_SSE2, DetileToYUY2_SSE2, 15) 2454 #endif 2455 2456 #ifdef __cplusplus 2457 } // extern "C" 2458 } // namespace libyuv 2459 #endif 2460