1///****************************************************************************** 2// * 3// * Copyright (C) 2018 The Android Open Source Project 4// * 5// * Licensed under the Apache License, Version 2.0 (the "License"); 6// * you may not use this file except in compliance with the License. 7// * You may obtain a copy of the License at: 8// * 9// * http://www.apache.org/licenses/LICENSE-2.0 10// * 11// * Unless required by applicable law or agreed to in writing, software 12// * distributed under the License is distributed on an "AS IS" BASIS, 13// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14// * See the License for the specific language governing permissions and 15// * limitations under the License. 16// * 17// ***************************************************************************** 18// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19//*/ 20 21 22.macro push_v_regs 23 stp d8, d9, [sp, #-16]! 24 stp d10, d11, [sp, #-16]! 25 stp d12, d13, [sp, #-16]! 26 stp d14, d15, [sp, #-16]! 27 stp X8, X9, [sp, #-16]! 28 stp X10, X11, [sp, #-16]! 29 stp X12, X13, [sp, #-16]! 30 stp X14, X15, [sp, #-16]! 31 stp X16, X17, [sp, #-16]! 32 stp X29, X30, [sp, #-16]! 33.endm 34.macro pop_v_regs 35 ldp X29, X30, [sp], #16 36 ldp X16, X17, [sp], #16 37 ldp X14, X15, [sp], #16 38 ldp X12, X13, [sp], #16 39 ldp X10, X11, [sp], #16 40 ldp X8, X9, [sp], #16 41 ldp d14, d15, [sp], #16 42 ldp d12, d13, [sp], #16 43 ldp d10, d11, [sp], #16 44 ldp d8, d9, [sp], #16 45.endm 46 47.macro swp reg1, reg2 48 MOV x16, \reg1 49 MOV \reg1, \reg2 50 MOV \reg2, x16 51.endm 52.text 53.p2align 2 54.global ixheaacd_sbr_imdct_using_fft 55ixheaacd_sbr_imdct_using_fft: 56 push_v_regs 57 58 59COND_6: cmp x1, #0x10 60 bne COND_7 61 MOV X8, #1 62 MOV X4, X7 63 B RADIX_4_FIRST_START 64 65COND_7: cmp x1, #0x20 66 67 mov x8, #1 68 mov x4, x7 69 70 71RADIX_8_FIRST_START: 72 73 LSR W9 , W1, #5 74 LSL W1, W1, #1 75 76RADIX_8_FIRST_LOOP: 77 78 MOV X5 , X2 79 MOV X6 , X2 80 MOV X7 , X2 81 MOV X11 , X2 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 LDRB W12, [X4] 105 ADD X5, X5, X12, LSL #3 106 LD2 {V0.S, V1.S}[0], [X5], X1 107 ADD X5, X5, X1 108 LD2 {V4.S, V5.S}[0], [X5], X1 109 SUB X5, X5, X1, LSL #1 110 LD2 {V2.S, V3.S}[0], [X5], X1 111 ADD X5, X5, X1 112 LD2 {V6.S, V7.S}[0], [X5], X1 113 SUB X5, X5, X1, LSL #2 114 115 LDRB W12, [X4, #1] 116 ADD X6, X6, X12, LSL #3 117 LD2 {V0.S, V1.S}[1], [X6] , X1 118 ADD X6, X6, X1 119 LD2 {V4.S, V5.S}[1], [X6] , X1 120 SUB X6, X6, X1, LSL #1 121 LD2 {V2.S, V3.S}[1], [X6] , X1 122 ADD X6, X6, X1 123 LD2 {V6.S, V7.S}[1], [X6], X1 124 SUB X6, X6, X1, LSL #2 125 126 127 LDRB W12, [X4, #2] 128 ADD X7, X7, X12, LSL #3 129 LD2 {V0.S, V1.S}[2], [X7] , X1 130 ADD X7, X7, X1 131 LD2 {V4.S, V5.S}[2], [X7] , X1 132 SUB X7, X7, X1, LSL #1 133 134 LDRB W12, [X4, #3] 135 ADD X11, X11, X12, LSL #3 136 LD2 {V0.S, V1.S}[3], [X11] , X1 137 ADD X11, X11, X1 138 LD2 {V4.S, V5.S}[3], [X11] , X1 139 SUB X11, X11, X1, LSL #1 140 141 142 ADD V8.4S, V0.4S, V4.4S 143 LD2 {V2.S, V3.S}[2], [X7] , X1 144 ADD X7, X7, X1 145 146 147 SUB V9.4S, V0.4S, V4.4S 148 LD2 {V6.S, V7.S}[2], [X7], X1 149 SUB X7, X7, X1, LSL #2 150 151 152 ADD V0.4S, V1.4S, V5.4S 153 LD2 {V2.S, V3.S}[3], [X11] , X1 154 ADD X11, X11, X1 155 156 SUB V4.4S, V1.4S, V5.4S 157 LD2 {V6.S, V7.S}[3], [X11], X1 158 SUB X11, X11, X1, LSL #2 159 160 ADD X4, X4, #4 161 162 ADD X5, X5, X1, LSR #1 163 ADD X6, X6, X1, LSR #1 164 ADD X7, X7, X1, LSR #1 165 ADD X11, X11, X1, LSR #1 166 167 168 ADD V1.4S, V2.4S, V6.4S 169 LD2 {V14.S, V15.S}[0], [X5] , X1 170 171 172 SUB V5.4S, V2.4S, V6.4S 173 LD2 {V10.S, V11.S}[0], [X5] , X1 174 175 176 ADD V2.4S, V3.4S, V7.4S 177 LD2 {V12.S, V13.S}[0], [X5] , X1 178 179 180 SUB V6.4S, V3.4S, V7.4S 181 LD2 {V14.S, V15.S}[1], [X6] , X1 182 183 ADD V3.4S, V9.4S, V6.4S 184 LD2 {V10.S, V11.S}[1], [X6] , X1 185 186 SUB V7.4S, V9.4S, V6.4S 187 LD2 {V12.S, V13.S}[1], [X6] , X1 188 189 SUB V6.4S, V4.4S, V5.4S 190 LD2 {V14.S, V15.S}[2], [X7] , X1 191 192 ADD V9.4S, V4.4S, V5.4S 193 LD2 {V10.S, V11.S}[2], [X7] , X1 194 195 ADD V4.4S, V8.4S, V1.4S 196 LD2 {V12.S, V13.S}[2], [X7] , X1 197 198 SUB V5.4S, V8.4S, V1.4S 199 LD2 {V14.S, V15.S}[3], [X11] , X1 200 201 ADD V8.4S, V0.4S, V2.4S 202 LD2 {V10.S, V11.S}[3], [X11] , X1 203 204 SUB V0.4S, V0.4S, V2.4S 205 LD2 {V12.S, V13.S}[3], [X11] , X1 206 207 208 LD2 {V1.S, V2.S}[0], [X5], X1 209 210 ADD V17.4S, V14.4S, V12.4S 211 212 LD2 {V1.S, V2.S}[1], [X6] , X1 213 214 SUB V16.4S, V14.4S, V12.4S 215 216 LD2 {V1.S, V2.S}[2], [X7] , X1 217 218 ADD V14.4S, V15.4S, V13.4S 219 220 LD2 {V1.S, V2.S}[3], [X11] , X1 221 222 SUB V12.4S, V15.4S, V13.4S 223 224 ADD V15.4S, V10.4S, V1.4S 225 SUB V13.4S, V10.4S, V1.4S 226 ADD V10.4S, V11.4S, V2.4S 227 SUB V1.4S, V11.4S, V2.4S 228 229 ADD V11.4S, V17.4S, V15.4S 230 SUB V2.4S, V17.4S, V15.4S 231 ADD V17.4S, V14.4S, V10.4S 232 SUB V15.4S, V14.4S, V10.4S 233 234 ADD V14.4S, V16.4S, V12.4S 235 SUB V10.4S, V16.4S, V12.4S 236 ADD V16.4S, V13.4S, V1.4S 237 SUB V12.4S, V13.4S, V1.4S 238 239 ADD V1.4S , V14.4S, V12.4S 240 SUB V13.4S, V14.4S, V12.4S 241 SUB V12.4S, V16.4S, V10.4S 242 243 UZP1 V22.8H, V1.8H, V1.8H 244 UZP2 V23.8H, V1.8H, V1.8H 245 ADD V14.4S, V16.4S, V10.4S 246 247 UZP1 V26.8H, V13.8H, V13.8H 248 UZP2 V27.8H, V13.8H, V13.8H 249 ADD V16.4S, V4.4S, V11.4S 250 251 UZP1 V24.8H, V12.8H, V12.8H 252 UZP2 V25.8H, V12.8H, V12.8H 253 SUB V10.4S, V4.4S, V11.4S 254 255 UZP1 V28.8H, V14.8H, V14.8H 256 UZP2 V29.8H, V14.8H, V14.8H 257 ADD V4.4S, V8.4S, V17.4S 258 259 MOV W14, #0x5a82 260 261 SUB V11.4S, V8.4S, V17.4S 262 263 ADD V8.4S, V5.4S, V15.4S 264 SUB V17.4S, V5.4S, V15.4S 265 SUB V5.4S, V0.4S, V2.4S 266 ADD V15.4S, V0.4S, V2.4S 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 DUP V31.4H, W14 283 284 UMULL V19.4S, V26.4H, V31.4H 285 UMULL V18.4S, V28.4H, V31.4H 286 SSHR V19.4S, V19.4S, #15 287 SSHR V18.4S, V18.4S, #15 288 289 SQDMLAL V19.4S, V27.4H, V31.4H 290 SQDMLAL V18.4S, V29.4H, V31.4H 291 292 UMULL V13.4S, V24.4H, V31.4H 293 UMULL V14.4S, V22.4H, V31.4H 294 295 ADD V20.4S, V3.4S, V19.4S 296 SUB V21.4S, V3.4S, V19.4S 297 ADD V30.4S, V6.4S, V18.4S 298 SUB V6.4S, V6.4S, V18.4S 299 300 SSHR V13.4S, V13.4S, #15 301 SSHR V14.4S, V14.4S, #15 302 303 SQDMLAL V13.4S, V25.4H, V31.4H 304 SQDMLAL V14.4S, V23.4H, V31.4H 305 306 ADD V3.4S, V7.4S, V13.4S 307 SUB V19.4S, V7.4S, V13.4S 308 ADD V1.4S, V9.4S, V14.4S 309 SUB V18.4S, V9.4S, V14.4S 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 swp V17.D[0], V8.D[0] 336 swp V17.D[1], V8.D[1] 337 swp V4.D[0], V16.D[0] 338 swp V4.D[1], V16.D[1] 339 340 TRN1 V12.4S, V4.4S, V20.4S 341 TRN2 V22.4S, V4.4S, V20.4S 342 343 SHL V12.4S, V12.4S, #1 344 TRN1 V9.4S, V17.4S, V3.4S 345 TRN2 V2.4S, V17.4S, V3.4S 346 SHL V22.4S, V22.4S, #1 347 348 SHL V9.4S, V9.4S, #1 349 TRN1 V24.4S, V10.4S, V21.4S 350 TRN2 V7.4S, V10.4S, V21.4S 351 SHL V2.4S, V2.4S, #1 352 353 SHL V24.4S, V24.4S, #1 354 TRN1 V13.4S, V16.4S, V6.4S 355 TRN2 V23.4S, V16.4S, V6.4S 356 SHL V7.4S, V7.4S, #1 357 358 SHL V13.4S, V13.4S, #1 359 TRN1 V10.4S, V5.4S, V18.4S 360 TRN2 V3.4S, V5.4S, V18.4S 361 SHL V23.4S, V23.4S, #1 362 363 SHL V10.4S, V10.4S, #1 364 TRN1 V26.4S, V8.4S, V19.4S 365 TRN2 V4.4S, V8.4S, V19.4S 366 SHL V3.4S, V3.4S, #1 367 368 SHL V26.4S, V26.4S, #1 369 TRN1 V25.4S, V11.4S, V30.4S 370 TRN2 V8.4S, V11.4S, V30.4S 371 SHL V4.4S, V4.4S, #1 372 373 SHL V25.4S, V25.4S, #1 374 TRN1 V27.4S, V15.4S, V1.4S 375 TRN2 V5.4S, V15.4S, V1.4S 376 SHL V8.4S, V8.4S, #1 377 378 SHL V27.4S, V27.4S, #1 379 swp V9.D[0], V12.D[1] 380 SHL V5.4S, V5.4S, #1 381 swp V2.D[0], V22.D[1] 382 383 swp V24.D[1], V26.D[0] 384 swp V7.D[1], V4.D[0] 385 swp V10.D[0], V13.D[1] 386 swp V3.D[0], V23.D[1] 387 swp V27.D[0], V25.D[1] 388 swp V5.D[0], V8.D[1] 389 390 391 MOV X15, #32 392 ST2 {V12.4S, V13.4S}, [X3], X15 393 ST2 {V24.4S, V25.4S}, [X3], X15 394 ST2 {V22.4S, V23.4S}, [X3], X15 395 ST2 {V7.4S, V8.4S}, [X3], X15 396 ST2 {V9.4S, V10.4S}, [X3], X15 397 ST2 {V26.4S, V27.4S}, [X3], X15 398 ST2 {V2.4S, V3.4S}, [X3], X15 399 ST2 {V4.4S, V5.4S}, [X3], X15 400 401 402 SUBS X9, X9, #1 403 BNE RADIX_8_FIRST_LOOP 404 405 LSR X1, X1, #1 406 LSL X15, X1, #3 407 SUB X3, X3, X15 408 409 MOV X5, #8 410 MOV X4, #32 411 LSR X15, X1, #5 412 MOV X6, X15 413 B RADIX_4_FIRST_ENDS 414 415RADIX_8_FIRST_ENDS: 416 417 418 419RADIX_4_FIRST_START: 420 421 422 LSR W9, W1, #4 423 LSL W1, W1, #1 424 425RADIX_4_LOOP: 426 427 MOV X5 , X2 428 MOV X6 , X2 429 MOV X7 , X2 430 MOV X11 , X2 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 LDRB W12, [X4, #0] 447 ADD X5, X5, X12, LSL #3 448 449 LD2 {V0.S, V1.S}[0], [X5] , X1 450 ADD X5, X5, X1 451 LD2 {V8.S, V9.S}[0], [X5] , X1 452 SUB X5, X5, X1, LSL #1 453 LD2 {V4.S, V5.S}[0], [X5] , X1 454 ADD X5, X5, X1 455 LD2 {V12.S, V13.S}[0], [X5] , X1 456 457 LDRB W12, [X4, #1] 458 ADD X6, X6, X12, LSL #3 459 LD2 {V0.S, V1.S}[1], [X6] , X1 460 ADD X6, X6, X1 461 LD2 {V8.S, V9.S}[1], [X6] , X1 462 SUB X6, X6, X1, LSL #1 463 LD2 {V4.S, V5.S}[1], [X6] , X1 464 ADD X6, X6, X1 465 LD2 {V12.S, V13.S}[1], [X6] , X1 466 467 LDRB W12, [X4, #2] 468 ADD X7, X7, X12, LSL #3 469 470 LD2 {V0.S, V1.S}[2], [X7] , X1 471 ADD X7, X7, X1 472 LD2 {V8.S, V9.S}[2], [X7] , X1 473 474 475 LDRB W12, [X4, #3] 476 ADD X11, X11, X12 , LSL #3 477 478 479 LD2 {V0.S, V1.S}[3], [X11] , X1 480 ADD X11, X11, X1 481 LD2 {V8.S, V9.S}[3], [X11] , X1 482 483 SUB X7, X7, X1, LSL #1 484 ADD V16.4S, V0.4S, V8.4S 485 LD2 {V4.S, V5.S}[2], [X7] , X1 486 ADD X7, X7, X1 487 ADD V18.4S, V1.4S, V9.4S 488 LD2 {V12.S, V13.S}[2], [X7] , X1 489 490 SUB X11, X11, X1, LSL #1 491 SUB V20.4S, V0.4S, V8.4S 492 LD2 {V4.S, V5.S}[3], [X11] , X1 493 ADD X11, X11, X1 494 SUB V22.4S, V1.4S, V9.4S 495 LD2 {V12.S, V13.S}[3], [X11] , X1 496 497 ADD X4, X4, #4 498 499 ADD V24.4S, V4.4S, V12.4S 500 ADD V26.4S, V5.4S, V13.4S 501 SUB V28.4S, V4.4S, V12.4S 502 SUB V30.4S, V5.4S, V13.4S 503 504 ADD V17.4S, V16.4S, V24.4S 505 ADD V11.4S, V18.4S, V26.4S 506 SUB V19.4S, V16.4S, V24.4S 507 SUB V15.4S, V18.4S, V26.4S 508 509 ADD V8.4S, V20.4S, V30.4S 510 SUB V9.4S, V22.4S, V28.4S 511 ADD V13.4S, V22.4S, V28.4S 512 SUB V12.4S, V20.4S, V30.4S 513 514 515 516 517 TRN1 V0.4S, V17.4S, V8.4S 518 TRN2 V8.4S, V17.4S, V8.4S 519 520 SHL V0.4S, V0.4S, #1 521 TRN1 V4.4S, V19.4S, V12.4S 522 TRN2 V12.4S, V19.4S, V12.4S 523 SHL V8.4S, V8.4S, #1 524 525 SHL V4.4S, V4.4S, #1 526 TRN1 V1.4S, V11.4S, V9.4S 527 TRN2 V9.4S, V11.4S, V9.4S 528 SHL V12.4S, V12.4S, #1 529 530 SHL V1.4S, V1.4S, #1 531 TRN1 V5.4S, V15.4S, V13.4S 532 TRN2 V13.4S, V15.4S, V13.4S 533 SHL V9.4S, V9.4S, #1 534 535 SHL V5.4S, V5.4S, #1 536 swp V4.D[0], V0.D[1] 537 SHL V13.4S, V13.4S, #1 538 539 swp V12.D[0], V8.D[1] 540 541 542 swp V5.D[0], V1.D[1] 543 swp V13.D[0], V9.D[1] 544 545 MOV X15, #32 546 ST2 {V0.4S, V1.4S}, [X3], X15 547 ST2 {V8.4S, V9.4S}, [X3], X15 548 ST2 {V4.4S, V5.4S}, [X3], X15 549 ST2 {V12.4S, V13.4S}, [X3], X15 550 551 552 SUBS W9, W9, #1 553 BNE RADIX_4_LOOP 554 555 LSR X1, X1, #1 556 SUB X3, X3, X1, LSL #3 557 MOV X5, #4 558 MOV X4, #64 559 LSR X6, X1, #4 560 561 562RADIX_4_FIRST_ENDS: 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 MOV x30, X3 586 LSR X5, X5, #2 587 588OUTER_LOOP_R4: 589 590 591 MOV X14, x30 592 593 MOV X7, X5 594 MOV X2, #0 595 MOV X9, X0 596 LSL X12, X5, #5 597MIDDLE_LOOP_R4: 598 599 600 LD2 {V20.H, V21.H}[0], [X9], X2 601 LD2 {V22.H, V23.H}[0], [X9], X2 602 ADD X11, X2, X4, LSL #2 603 LD2 {V24.H, V25.H}[0], [X9] 604 ADD X10, X0, X11 605 606 LD2 {V20.H, V21.H}[1], [X10], X11 607 LD2 {V22.H, V23.H}[1], [X10], X11 608 ADD X2, X11, X4, LSL #2 609 LD2 {V24.H, V25.H}[1], [X10] 610 ADD X9, X0, X2 611 612 LD2 {V20.H, V21.H}[2], [X9], X2 613 LD2 {V22.H, V23.H}[2], [X9], X2 614 ADD X11, X2, X4, LSL #2 615 LD2 {V24.H, V25.H}[2], [X9] 616 ADD X10, X0, X11 617 618 LD2 {V20.H, V21.H}[3], [X10], X11 619 LD2 {V22.H, V23.H}[3], [X10], X11 620 ADD X2, X11, X4, LSL #2 621 LD2 {V24.H, V25.H}[3], [X10] 622 ADD X9, X0, X2 623 624 MOV X10, X6 625INNER_LOOP_R4: 626 627 LD2 {V30.4S, V31.4S}, [X14], X12 628 SSHR V30.4S, V30.4S, #1 629 LD4 {V16.4H, V17.4H, V18.4H, V19.4H}, [X14], X12 630 SSHR V31.4S, V31.4S, #1 631 632 USHR V16.4H, V16.4H, #1 633 LD4 {V26.4H, V27.4H, V28.4H, V29.4H}, [X14], X12 634 USHR V18.4H, V18.4H, #1 635 636 SMULL V11.4S, V16.4H, V20.4H 637 SMLSL V11.4S, V18.4H, V21.4H 638 LD4 {V0.4H, V1.4H, V2.4H, V3.4H}, [X14], X12 639 SMULL V12.4S, V16.4H, V21.4H 640 SMLAL V12.4S, V18.4H, V20.4H 641 642 USHR V26.4H, V26.4H, #1 643 USHR V28.4H, V28.4H, #1 644 645 LSL x29, X12, #2 646 SUB X14, X14, X12, LSL #2 647 648 USHR V0.4H, V0.4H, #1 649 USHR V2.4H, V2.4H, #1 650 651 SMULL V13.4S, V26.4H, V22.4H 652 SMLSL V13.4S, V28.4H, V23.4H 653 654 SSHR V11.4S, V11.4S, #15 655 656 SMULL V14.4S, V26.4H, V23.4H 657 SMLAL V14.4S, V28.4H, V22.4H 658 659 SMULL V15.4S, V0.4H, V24.4H 660 SMLSL V15.4S, V2.4H, V25.4H 661 662 SMLAL V11.4S, V17.4H, V20.4H 663 SMLSL V11.4S, V19.4H, V21.4H 664 665 SSHR V12.4S, V12.4S, #15 666 SSHR V13.4S, V13.4S, #15 667 SSHR V14.4S, V14.4S, #15 668 SSHR V15.4S, V15.4S, #15 669 670 SMLAL V12.4S, V17.4H, V21.4H 671 SMLAL V12.4S, V19.4H, V20.4H 672 673 SMULL V5.4S, V0.4H, V25.4H 674 SMLAL V5.4S, V2.4H, V24.4H 675 676 SMLAL V13.4S, V27.4H, V22.4H 677 SMLSL V13.4S, V29.4H, V23.4H 678 679 SMLAL V14.4S, V27.4H, V23.4H 680 SMLAL V14.4S, V29.4H, V22.4H 681 682 SMLAL V15.4S, V1.4H, V24.4H 683 SMLSL V15.4S, V3.4H, V25.4H 684 685 SSHR V5.4S, V5.4S, #15 686 687 SMLAL V5.4S, V1.4H, V25.4H 688 SMLAL V5.4S, V3.4H, V24.4H 689 690 691 692 SUBS x17, X7, X5 693 BNE BYPASS_IF 694 695 ADD X14, X14, X12 696 697 LDR W3, [X14] 698 ADD X14, X14, X12 699 ASR W3, W3, #1 700 MOV V11.S[0], W3 701 702 LDR W3, [X14] 703 ADD X14, X14, X12 704 ASR W3, W3, #1 705 MOV V13.S[0], W3 706 707 LDR W3, [X14] 708 ASR W3, W3, #1 709 MOV V15.S[0], W3 710 711 SUB X14, X14, X12, LSL #1 712 ADD X14, X14, #4 713 714 LDR W3, [X14] 715 ADD X14, X14, X12 716 ASR W3, W3, #1 717 MOV V12.S[0], W3 718 719 LDR W3, [X14] 720 ADD X14, X14, X12 721 ASR W3, W3, #1 722 MOV V14.S[0], W3 723 724 LDR W3, [X14] 725 ADD X14, X14, X12 726 ASR W3, W3, #1 727 MOV V5.S[0], W3 728 729 SUB X14, X14, #4 730 731 SUB X14, X14, x29 732 733BYPASS_IF: 734 735 ADD V6.4S, V30.4S, V13.4S 736 ADD V7.4S, V31.4S, V14.4S 737 SUB V30.4S, V30.4S, V13.4S 738 SUB V31.4S, V31.4S, V14.4S 739 ADD V8.4S, V11.4S, V15.4S 740 ADD V9.4S, V12.4S, V5.4S 741 742 SUB V15.4S, V11.4S, V15.4S 743 SUB V14.4S, V12.4S, V5.4S 744 745 746 ADD V10.4S, V6.4S, V8.4S 747 ADD V11.4S, V7.4S, V9.4S 748 ADD V12.4S, V30.4S, V14.4S 749 SUB V13.4S, V31.4S, V15.4S 750 751 SUB V6.4S, V6.4S, V8.4S 752 ST2 {V10.4S, V11.4S}, [X14], X12 753 SUB V7.4S, V7.4S, V9.4S 754 755 SUB V8.4S, V30.4S, V14.4S 756 ST2 {V12.4S, V13.4S}, [X14], X12 757 ADD V9.4S, V31.4S, V15.4S 758 759 ST2 {V6.4S, V7.4S}, [X14], X12 760 ST2 {V8.4S, V9.4S}, [X14], X12 761 SUBS X10, X10, #1 762 BNE INNER_LOOP_R4 763 764 SUB X14, X14, X1, LSL #3 765 ADD X14, X14, #32 766 767 SUBS X7, X7, #1 768 BNE MIDDLE_LOOP_R4 769 770 LSR X4, X4, #2 771 LSL X5, X5, #2 772 LSR X6, X6, #2 773 SUBS X8, X8, #1 774 BNE OUTER_LOOP_R4 775END_LOOPS: 776 pop_v_regs 777 RET 778