1/*! 2 * \copy 3 * Copyright (c) 2013, Cisco Systems 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 22 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 * 31 */ 32 33#ifdef HAVE_NEON_AARCH64 34#include "arm_arch64_common_macro.S" 35 36.macro CALC_AND_STORE_SAD 37 saddlv s2, v2.8h 38 fmov w0, s2 39.endm 40 41.macro CALC_AND_STORE_SAD_FOUR 42 saddlv s28, v28.8h 43 saddlv s29, v29.8h 44 saddlv s30, v30.8h 45 saddlv s31, v31.8h 46 st4 {v28.s, v29.s, v30.s, v31.s}[0], [x4] 47.endm 48 49.macro LOAD_8X8_1 50 ld1 {v0.8b}, [x0], x1 51 ld1 {v1.8b}, [x0], x1 52 ld1 {v2.8b}, [x0], x1 53 ld1 {v3.8b}, [x0], x1 54 ld1 {v4.8b}, [x0], x1 55 ld1 {v5.8b}, [x0], x1 56 ld1 {v6.8b}, [x0], x1 57 ld1 {v7.8b}, [x0], x1 58.endm 59 60.macro LOAD_16X8_1 61 ld1 {v0.16b}, [x0], x1 62 ld1 {v1.16b}, [x0], x1 63 ld1 {v2.16b}, [x0], x1 64 ld1 {v3.16b}, [x0], x1 65 ld1 {v4.16b}, [x0], x1 66 ld1 {v5.16b}, [x0], x1 67 ld1 {v6.16b}, [x0], x1 68 ld1 {v7.16b}, [x0], x1 69.endm 70 71.macro LOAD_8X8_2 arg0 72 ld1 {v16.8b}, [\arg0], x3 73 ld1 {v17.8b}, [\arg0], x3 74 ld1 {v18.8b}, [\arg0], x3 75 ld1 {v19.8b}, [\arg0], x3 76 ld1 {v20.8b}, [\arg0], x3 77 ld1 {v21.8b}, [\arg0], x3 78 ld1 {v22.8b}, [\arg0], x3 79 ld1 {v23.8b}, [\arg0], x3 80.endm 81 82.macro CALC_ABS_8X8_1 arg0, arg1 83 uab\arg1\()l \arg0, v0.8b, v16.8b 84 uabal \arg0, v1.8b, v17.8b 85 uabal \arg0, v2.8b, v18.8b 86 uabal \arg0, v3.8b, v19.8b 87 uabal \arg0, v4.8b, v20.8b 88 uabal \arg0, v5.8b, v21.8b 89 uabal \arg0, v6.8b, v22.8b 90 uabal \arg0, v7.8b, v23.8b 91.endm 92 93.macro CALC_ABS_8X8_2 arg0 94 uab\arg0\()l v29.8h, v0.8b, v18.8b 95 uabal v29.8h, v1.8b, v19.8b 96 uabal v29.8h, v2.8b, v20.8b 97 uabal v29.8h, v3.8b, v21.8b 98 uabal v29.8h, v4.8b, v22.8b 99 uabal v29.8h, v5.8b, v23.8b 100 uabal v29.8h, v6.8b, v24.8b 101 uabal v29.8h, v7.8b, v25.8b 102.endm 103 104.macro LOAD_16X8_2 arg0 105 ld1 {v16.16b}, [\arg0], x3 106 ld1 {v17.16b}, [\arg0], x3 107 ld1 {v18.16b}, [\arg0], x3 108 ld1 {v19.16b}, [\arg0], x3 109 ld1 {v20.16b}, [\arg0], x3 110 ld1 {v21.16b}, [\arg0], x3 111 ld1 {v22.16b}, [\arg0], x3 112 ld1 {v23.16b}, [\arg0], x3 113.endm 114 115.macro CALC_ABS_16X8_1 arg0, arg1 116 uab\arg1\()l \arg0, v0.8b, v16.8b 117 uabal2 \arg0, v0.16b,v16.16b 118 uabal \arg0, v1.8b, v17.8b 119 uabal2 \arg0, v1.16b,v17.16b 120 uabal \arg0, v2.8b, v18.8b 121 uabal2 \arg0, v2.16b,v18.16b 122 uabal \arg0, v3.8b, v19.8b 123 uabal2 \arg0, v3.16b,v19.16b 124 uabal \arg0, v4.8b, v20.8b 125 uabal2 \arg0, v4.16b,v20.16b 126 uabal \arg0, v5.8b, v21.8b 127 uabal2 \arg0, v5.16b,v21.16b 128 uabal \arg0, v6.8b, v22.8b 129 uabal2 \arg0, v6.16b,v22.16b 130 uabal \arg0, v7.8b, v23.8b 131 uabal2 \arg0, v7.16b,v23.16b 132.endm 133 134.macro CALC_ABS_16X8_2 arg0 135 uab\arg0\()l v29.8h, v0.8b, v18.8b 136 uabal2 v29.8h, v0.16b,v18.16b 137 uabal v29.8h, v1.8b, v19.8b 138 uabal2 v29.8h, v1.16b,v19.16b 139 uabal v29.8h, v2.8b, v20.8b 140 uabal2 v29.8h, v2.16b,v20.16b 141 uabal v29.8h, v3.8b, v21.8b 142 uabal2 v29.8h, v3.16b,v21.16b 143 uabal v29.8h, v4.8b, v22.8b 144 uabal2 v29.8h, v4.16b,v22.16b 145 uabal v29.8h, v5.8b, v23.8b 146 uabal2 v29.8h, v5.16b,v23.16b 147 uabal v29.8h, v6.8b, v24.8b 148 uabal2 v29.8h, v6.16b,v24.16b 149 uabal v29.8h, v7.8b, v25.8b 150 uabal2 v29.8h, v7.16b,v25.16b 151.endm 152 153WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSad4x4_AArch64_neon 154 sxtw x1, w1 155 sxtw x3, w3 156 ld1 {v0.s}[0], [x0], x1 157 ld1 {v1.s}[0], [x2], x3 158 uabdl v2.8h, v0.8b, v1.8b 159.rept 3 160 ld1 {v0.s}[0], [x0], x1 161 ld1 {v1.s}[0], [x2], x3 162 uabal v2.8h, v0.8b, v1.8b 163.endr 164 saddlv s2, v2.4h 165 fmov w0, s2 166WELS_ASM_AARCH64_FUNC_END 167 168WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSad8x8_AArch64_neon 169 sxtw x1, w1 170 sxtw x3, w3 171 ld1 {v0.8b}, [x0], x1 172 ld1 {v1.8b}, [x2], x3 173 uabdl v2.8h, v0.8b, v1.8b 174.rept 7 175 ld1 {v0.8b}, [x0], x1 176 ld1 {v1.8b}, [x2], x3 177 uabal v2.8h, v0.8b, v1.8b 178.endr 179 CALC_AND_STORE_SAD 180WELS_ASM_AARCH64_FUNC_END 181 182WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSad8x16_AArch64_neon 183 sxtw x1, w1 184 sxtw x3, w3 185 ld1 {v0.8b}, [x0], x1 186 ld1 {v1.8b}, [x2], x3 187 uabdl v2.8h, v0.8b, v1.8b 188.rept 15 189 ld1 {v0.8b}, [x0], x1 190 ld1 {v1.8b}, [x2], x3 191 uabal v2.8h, v0.8b, v1.8b 192.endr 193 CALC_AND_STORE_SAD 194WELS_ASM_AARCH64_FUNC_END 195 196WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSad16x8_AArch64_neon 197 sxtw x1, w1 198 sxtw x3, w3 199 ld1 {v0.16b}, [x0], x1 200 ld1 {v1.16b}, [x2], x3 201 uabdl v2.8h, v0.8b, v1.8b 202 uabal2 v2.8h, v0.16b, v1.16b 203.rept 7 204 ld1 {v0.16b}, [x0], x1 205 ld1 {v1.16b}, [x2], x3 206 uabal v2.8h, v0.8b, v1.8b 207 uabal2 v2.8h, v0.16b, v1.16b 208.endr 209 CALC_AND_STORE_SAD 210WELS_ASM_AARCH64_FUNC_END 211 212WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSad16x16_AArch64_neon 213 sxtw x1, w1 214 sxtw x3, w3 215 ld1 {v0.16b}, [x0], x1 216 ld1 {v1.16b}, [x2], x3 217 uabdl v2.8h, v0.8b, v1.8b 218 uabal2 v2.8h, v0.16b, v1.16b 219.rept 15 220 ld1 {v0.16b}, [x0], x1 221 ld1 {v1.16b}, [x2], x3 222 uabal v2.8h, v0.8b, v1.8b 223 uabal2 v2.8h, v0.16b, v1.16b 224.endr 225 CALC_AND_STORE_SAD 226WELS_ASM_AARCH64_FUNC_END 227 228WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSadFour4x4_AArch64_neon 229 sxtw x1, w1 230 sxtw x3, w3 231 ld1 {v0.s}[0], [x0], x1 232 ld1 {v0.s}[1], [x0], x1 233 ld1 {v1.s}[0], [x0], x1 234 ld1 {v1.s}[1], [x0] 235 sub x0, x2, x3 236 ld1 {v2.s}[0], [x0], x3 237 ld1 {v2.s}[1], [x0], x3 238 ld1 {v3.s}[0], [x0], x3 239 ld1 {v3.s}[1], [x0], x3 240 ld1 {v4.s}[0], [x0], x3 241 ld1 {v4.s}[1], [x0], x3 242 243 uabdl v28.8h, v0.8b, v2.8b 244 uabal v28.8h, v1.8b, v3.8b 245 246 uabdl v29.8h, v0.8b, v3.8b 247 uabal v29.8h, v1.8b, v4.8b 248 249 sub x0, x2, #1 250 ld1 {v2.s}[0], [x0], x3 251 ld1 {v2.s}[1], [x0], x3 252 ld1 {v3.s}[0], [x0], x3 253 ld1 {v3.s}[1], [x0] 254 uabdl v30.8h, v0.8b, v2.8b 255 uabal v30.8h, v1.8b, v3.8b 256 257 add x0, x2, #1 258 ld1 {v2.s}[0], [x0], x3 259 ld1 {v2.s}[1], [x0], x3 260 ld1 {v3.s}[0], [x0], x3 261 ld1 {v3.s}[1], [x0] 262 uabdl v31.8h, v0.8b, v2.8b 263 uabal v31.8h, v1.8b, v3.8b 264 265 CALC_AND_STORE_SAD_FOUR 266WELS_ASM_AARCH64_FUNC_END 267 268WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSadFour8x8_AArch64_neon 269 sxtw x1, w1 270 sxtw x3, w3 271 LOAD_8X8_1 272 sub x0, x2, x3 273 LOAD_8X8_2 x0 274 ld1 {v24.8b}, [x0], x3 275 ld1 {v25.8b}, [x0] 276 277 CALC_ABS_8X8_1 v28.8h, d 278 CALC_ABS_8X8_2 d 279 280 sub x0, x2, #1 281 LOAD_8X8_2 x0 282 CALC_ABS_8X8_1 v30.8h, d 283 284 add x0, x2, #1 285 LOAD_8X8_2 x0 286 CALC_ABS_8X8_1 v31.8h, d 287 288 CALC_AND_STORE_SAD_FOUR 289WELS_ASM_AARCH64_FUNC_END 290 291WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSadFour8x16_AArch64_neon 292 sxtw x1, w1 293 sxtw x3, w3 294 LOAD_8X8_1 295 sub x5, x2, x3 296 LOAD_8X8_2 x5 297 ld1 {v24.8b}, [x5], x3 298 ld1 {v25.8b}, [x5], x3 299 300 CALC_ABS_8X8_1 v28.8h, d 301 CALC_ABS_8X8_2 d 302 303 sub x6, x2, #1 304 LOAD_8X8_2 x6 305 CALC_ABS_8X8_1 v30.8h, d 306 307 add x7, x2, #1 308 LOAD_8X8_2 x7 309 CALC_ABS_8X8_1 v31.8h, d 310 311 LOAD_8X8_1 312 sub x5, x5, x3 313 sub x5, x5, x3 314 LOAD_8X8_2 x5 315 ld1 {v24.8b}, [x5], x3 316 ld1 {v25.8b}, [x5] 317 318 CALC_ABS_8X8_1 v28.8h, a 319 CALC_ABS_8X8_2 a 320 321 LOAD_8X8_2 x6 322 CALC_ABS_8X8_1 v30.8h, a 323 324 LOAD_8X8_2 x7 325 CALC_ABS_8X8_1 v31.8h, a 326 327 CALC_AND_STORE_SAD_FOUR 328WELS_ASM_AARCH64_FUNC_END 329 330WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSadFour16x8_AArch64_neon 331 sxtw x1, w1 332 sxtw x3, w3 333 LOAD_16X8_1 334 sub x0, x2, x3 335 LOAD_16X8_2 x0 336 ld1 {v24.16b}, [x0], x3 337 ld1 {v25.16b}, [x0] 338 339 CALC_ABS_16X8_1 v28.8h, d 340 CALC_ABS_16X8_2 d 341 342 sub x0, x2, #1 343 LOAD_16X8_2 x0 344 CALC_ABS_16X8_1 v30.8h, d 345 346 add x0, x2, #1 347 LOAD_16X8_2 x0 348 CALC_ABS_16X8_1 v31.8h, d 349 350 CALC_AND_STORE_SAD_FOUR 351WELS_ASM_AARCH64_FUNC_END 352 353WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSadFour16x16_AArch64_neon 354 sxtw x1, w1 355 sxtw x3, w3 356 357 LOAD_16X8_1 358 sub x5, x2, x3 359 LOAD_16X8_2 x5 360 ld1 {v24.16b}, [x5], x3 361 ld1 {v25.16b}, [x5], x3 362 363 CALC_ABS_16X8_1 v28.8h, d 364 CALC_ABS_16X8_2 d 365 366 sub x6, x2, #1 367 LOAD_16X8_2 x6 368 CALC_ABS_16X8_1 v30.8h, d 369 370 add x7, x2, #1 371 LOAD_16X8_2 x7 372 CALC_ABS_16X8_1 v31.8h, d 373 374 LOAD_16X8_1 375 sub x5, x5, x3 376 sub x5, x5, x3 377 LOAD_16X8_2 x5 378 ld1 {v24.16b}, [x5], x3 379 ld1 {v25.16b}, [x5] 380 381 CALC_ABS_16X8_1 v28.8h, a 382 CALC_ABS_16X8_2 a 383 384 LOAD_16X8_2 x6 385 CALC_ABS_16X8_1 v30.8h, a 386 387 LOAD_16X8_2 x7 388 CALC_ABS_16X8_1 v31.8h, a 389 390 CALC_AND_STORE_SAD_FOUR 391WELS_ASM_AARCH64_FUNC_END 392 393WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSatd4x4_AArch64_neon 394 sxtw x1, w1 395 sxtw x3, w3 396 ld1 {v0.s}[0], [x0], x1 397 ld1 {v0.s}[1], [x0], x1 398 ld1 {v1.s}[0], [x0], x1 399 ld1 {v1.s}[1], [x0] 400 401 ld1 {v2.s}[0], [x2], x3 402 ld1 {v2.s}[1], [x2], x3 403 ld1 {v3.s}[0], [x2], x3 404 ld1 {v3.s}[1], [x2] 405 usubl v4.8h, v0.8b, v2.8b //{0,1,2,3,4,5,6,7} 406 usubl v5.8h, v1.8b, v3.8b //{8,9,10,11,12,13,14,15} 407 408 //Do the vertical transform 409 add v6.8h, v4.8h, v5.8h //{0,4,8,12,1,5,9,13} 410 sub v7.8h, v4.8h, v5.8h //{2,6,10,14,3,7,11,15} 411 mov x4, v6.d[1] 412 mov v6.d[1], v7.d[0] 413 ins v7.d[0], x4 414 add v4.8h, v6.8h, v7.8h 415 sub v5.8h, v6.8h, v7.8h 416 417 //Do the horizontal transform 418 trn1 v6.4s, v4.4s, v5.4s 419 trn2 v7.4s, v4.4s, v5.4s 420 add v4.8h, v6.8h, v7.8h 421 sub v5.8h, v6.8h, v7.8h 422 trn1 v6.8h, v4.8h, v5.8h 423 trn2 v7.8h, v4.8h, v5.8h 424 add v4.8h, v6.8h, v7.8h 425 abs v4.8h, v4.8h 426 saba v4.8h, v6.8h, v7.8h 427 uaddlv s4, v4.8h 428 fmov w0, s4 429 add w0, w0, #1 430 lsr w0, w0, #1 431 432WELS_ASM_AARCH64_FUNC_END 433 434.macro SATD_8x4 435 ld1 {v0.8b}, [x0], x1 436 ld1 {v1.8b}, [x2], x3 437 ld1 {v2.8b}, [x0], x1 438 usubl v16.8h, v0.8b, v1.8b 439 440 ld1 {v3.8b}, [x2], x3 441 usubl v17.8h, v2.8b, v3.8b 442 ld1 {v4.8b}, [x0], x1 443 ld1 {v5.8b}, [x2], x3 444 445 add v25.8h, v16.8h, v17.8h 446 usubl v18.8h, v4.8b, v5.8b 447 448 ld1 {v6.8b}, [x0], x1 449 ld1 {v7.8b}, [x2], x3 450 451 usubl v19.8h, v6.8b, v7.8b 452 sub v26.8h, v16.8h, v17.8h 453 454 add v27.8h, v18.8h, v19.8h 455 sub v28.8h, v18.8h, v19.8h 456 457 add v0.8h, v25.8h, v27.8h 458 sub v1.8h, v25.8h, v27.8h 459 460 add v2.8h, v26.8h, v28.8h 461 sub v3.8h, v26.8h, v28.8h 462 463 trn1 v4.8h, v0.8h, v1.8h 464 trn2 v5.8h, v0.8h, v1.8h 465 trn1 v6.8h, v2.8h, v3.8h 466 trn2 v7.8h, v2.8h, v3.8h 467 468 add v16.8h, v4.8h, v5.8h 469 sabd v17.8h, v4.8h, v5.8h 470 abs v16.8h, v16.8h 471 add v18.8h, v6.8h, v7.8h 472 sabd v19.8h, v6.8h, v7.8h 473 abs v18.8h, v18.8h 474 475 trn1 v4.4s, v16.4s, v17.4s 476 trn2 v5.4s, v16.4s, v17.4s 477 trn1 v6.4s, v18.4s, v19.4s 478 trn2 v7.4s, v18.4s, v19.4s 479 480 smax v0.8h, v4.8h, v5.8h 481 smax v1.8h, v6.8h, v7.8h 482.endm 483 484.macro SATD_16x4 485 ld1 {v0.16b}, [x0], x1 486 ld1 {v1.16b}, [x2], x3 487 ld1 {v2.16b}, [x0], x1 488 usubl v16.8h, v0.8b, v1.8b 489 usubl2 v24.8h, v0.16b, v1.16b 490 491 ld1 {v3.16b}, [x2], x3 492 usubl v17.8h, v2.8b, v3.8b 493 usubl2 v25.8h, v2.16b, v3.16b 494 495 ld1 {v4.16b}, [x0], x1 496 ld1 {v5.16b}, [x2], x3 497 usubl v18.8h, v4.8b, v5.8b 498 usubl2 v26.8h, v4.16b, v5.16b 499 500 ld1 {v6.16b}, [x0], x1 501 ld1 {v7.16b}, [x2], x3 502 usubl v19.8h, v6.8b, v7.8b 503 usubl2 v27.8h, v6.16b, v7.16b 504 505 add v0.8h, v16.8h, v17.8h 506 sub v1.8h, v16.8h, v17.8h 507 add v2.8h, v18.8h, v19.8h 508 sub v3.8h, v18.8h, v19.8h 509 510 add v4.8h, v24.8h, v25.8h 511 sub v5.8h, v24.8h, v25.8h 512 add v6.8h, v26.8h, v27.8h 513 sub v7.8h, v26.8h, v27.8h 514 515 add v16.8h, v0.8h, v2.8h 516 sub v18.8h, v0.8h, v2.8h 517 add v17.8h, v4.8h, v6.8h 518 sub v19.8h, v4.8h, v6.8h 519 520 add v0.8h, v1.8h, v3.8h 521 sub v2.8h, v1.8h, v3.8h 522 add v1.8h, v5.8h, v7.8h 523 sub v3.8h, v5.8h, v7.8h 524 525 trn1 v4.8h, v16.8h, v18.8h 526 trn2 v6.8h, v16.8h, v18.8h 527 trn1 v5.8h, v17.8h, v19.8h 528 trn2 v7.8h, v17.8h, v19.8h 529 530 add v16.8h, v4.8h, v6.8h 531 sabd v18.8h, v4.8h, v6.8h 532 add v17.8h, v5.8h, v7.8h 533 sabd v19.8h, v5.8h, v7.8h 534 abs v16.8h, v16.8h 535 abs v17.8h, v17.8h 536 537 trn1 v4.8h, v0.8h, v2.8h 538 trn2 v6.8h, v0.8h, v2.8h 539 trn1 v5.8h, v1.8h, v3.8h 540 trn2 v7.8h, v1.8h, v3.8h 541 542 add v0.8h, v4.8h, v6.8h 543 sabd v2.8h, v4.8h, v6.8h 544 add v1.8h, v5.8h, v7.8h 545 sabd v3.8h, v5.8h, v7.8h 546 abs v0.8h, v0.8h 547 abs v1.8h, v1.8h 548 549 trn1 v4.4s, v16.4s, v18.4s 550 trn2 v6.4s, v16.4s, v18.4s 551 trn1 v5.4s, v17.4s, v19.4s 552 trn2 v7.4s, v17.4s, v19.4s 553 554 trn1 v16.4s, v0.4s, v2.4s 555 trn2 v18.4s, v0.4s, v2.4s 556 trn1 v17.4s, v1.4s, v3.4s 557 trn2 v19.4s, v1.4s, v3.4s 558 559 smax v0.8h, v4.8h, v6.8h 560 smax v1.8h, v5.8h, v7.8h 561 smax v2.8h, v16.8h, v18.8h 562 smax v3.8h, v17.8h, v19.8h 563 add v0.8h, v0.8h, v1.8h 564 add v2.8h, v2.8h, v3.8h 565.endm 566 567WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSatd16x16_AArch64_neon 568 sxtw x1, w1 569 sxtw x3, w3 570 SATD_16x4 571 add v31.8h, v0.8h, v2.8h 572.rept 3 573 SATD_16x4 574 add v31.8h, v31.8h, v0.8h 575 add v31.8h, v31.8h, v2.8h 576.endr 577 uaddlv s4, v31.8h 578 fmov w0, s4 579WELS_ASM_AARCH64_FUNC_END 580 581WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSatd16x8_AArch64_neon 582 sxtw x1, w1 583 sxtw x3, w3 584 SATD_16x4 585 add v31.8h, v0.8h, v2.8h 586 587 SATD_16x4 588 add v31.8h, v31.8h, v0.8h 589 add v31.8h, v31.8h, v2.8h 590 591 uaddlv s4, v31.8h 592 fmov w0, s4 593WELS_ASM_AARCH64_FUNC_END 594 595WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSatd8x16_AArch64_neon 596 sxtw x1, w1 597 sxtw x3, w3 598 SATD_8x4 599 add v31.8h, v0.8h, v1.8h 600.rept 3 601 SATD_8x4 602 add v31.8h, v31.8h, v0.8h 603 add v31.8h, v31.8h, v1.8h 604.endr 605 uaddlv s4, v31.8h 606 fmov w0, s4 607WELS_ASM_AARCH64_FUNC_END 608 609WELS_ASM_AARCH64_FUNC_BEGIN WelsSampleSatd8x8_AArch64_neon 610 sxtw x1, w1 611 sxtw x3, w3 612 SATD_8x4 613 add v31.8h, v0.8h, v1.8h 614 615 SATD_8x4 616 add v31.8h, v31.8h, v0.8h 617 add v31.8h, v31.8h, v1.8h 618 uaddlv s4, v31.8h 619 fmov w0, s4 620WELS_ASM_AARCH64_FUNC_END 621#endif 622