///****************************************************************************** // * // * Copyright (C) 2018 The Android Open Source Project // * // * Licensed under the Apache License, Version 2.0 (the "License"); // * you may not use this file except in compliance with the License. // * You may obtain a copy of the License at: // * // * http://www.apache.org/licenses/LICENSE-2.0 // * // * Unless required by applicable law or agreed to in writing, software // * distributed under the License is distributed on an "AS IS" BASIS, // * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // * See the License for the specific language governing permissions and // * limitations under the License. // * // ***************************************************************************** // * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore //*/ .macro push_v_regs stp q8, q9, [sp, #-32]! stp q10, q11, [sp, #-32]! stp q12, q13, [sp, #-32]! stp q14, q15, [sp, #-32]! stp x21, x22, [sp, #-16]! stp x23, x24, [sp, #-16]! .endm .macro pop_v_regs ldp x23, x24, [sp], #16 ldp x21, x22, [sp], #16 ldp q14, q15, [sp], #32 ldp q12, q13, [sp], #32 ldp q10, q11, [sp], #32 ldp q8, q9, [sp], #32 .endm .macro swp reg1, reg2 MOV X16, \reg1 MOV \reg1, \reg2 MOV \reg2, x16 .endm .text .global ixheaacd_sbr_qmfsyn64_winadd ixheaacd_sbr_qmfsyn64_winadd: push_v_regs MOV w7, #0x8000 LD1 {v0.4h}, [x0], #8 MOV x12, x2 dup v30.4s, w7 LD1 {v1.4h}, [x2], #8 dup v22.4s, w4 MOV x10, x0 MOV x11, x2 ADD x0, x0, #504 ADD x2, x2, #248 NEG v28.4s, v22.4s sshL v20.4s, v30.4s, v28.4s MOV x6, #64 LSL x6, x6, #1 ADD x12, x12, x6 MOV x7, #128 LSL x9, x7, #1 ADD x1, x1, x9 MOV x6, #16 MOV x7, #128 LSL x9, x7, #1 MOV x7, #256 LSL x8, x7, #1 LSL x5, x5, #1 LD1 {v2.4h}, [x0], x8 mov v26.16b, v20.16b sMLAL v26.4s, v0.4h, v1.4h LD1 {v3.4h}, [x2], x9 LD1 {v4.4h}, [x0], x8 sMLAL v26.4s, v2.4h, v3.4h LD1 {v5.4h}, [x2], x9 LD1 {v6.4h}, [x0], x8 sMLAL v26.4s, v5.4h, v4.4h LD1 {v7.4h}, [x2], x9 LD1 {v8.4h}, [x0], x8 sMLAL v26.4s, v7.4h, v6.4h LD1 {v9.4h}, [x2], x9 MOV x0, x10 MOV x2, x11 LD1 {v10.4h}, [x1], #8 sMLAL v26.4s, v9.4h, v8.4h MOV x10, x1 LD1 {v11.4h}, [x12], #8 ADD x1, x1, #504 MOV x11, x12 LD1 {v12.4h}, [x1], x8 ADD x12, x12, #248 sMLAL v26.4s, v10.4h, v11.4h LD1 {v13.4h}, [x12], x9 LD1 {v14.4h}, [x1], x8 sMLAL v26.4s, v12.4h, v13.4h LD1 {v15.4h}, [x12], x9 LD1 {v16.4h}, [x1], x8 sMLAL v26.4s, v15.4h, v14.4h LD1 {v17.4h}, [x12], x9 LD1 {v18.4h}, [x1], x8 sMLAL v26.4s, v17.4h, v16.4h LD1 {v19.4h}, [x12], x9 sMLAL v26.4s, v19.4h, v18.4h LD1 {v0.4h}, [x0], #8 MOV x12, x11 MOV x1, x10 LD1 {v1.4h}, [x2], #8 MOV x10, x0 sQshL v26.4s, v26.4s, v22.4s ADD x0, x0, #504 MOV x11, x2 LD1 {v2.4h}, [x0], x8 ADD x2, x2, #248 sshR v28.4s, v26.4s, #16 LD1 {v3.4h}, [x2], x9 UZP2 v29.8h, v28.8h, v28.8h UZP1 v28.8h, v28.8h, v28.8h mov v26.16b, v20.16b LD1 {v4.4h}, [x0], x8 LD1 {v5.4h}, [x2], x9 LD1 {v6.4h}, [x0], x8 LD1 {v7.4h}, [x2], x9 LD1 {v8.4h}, [x0], x8 LD1 {v9.4h}, [x2], x9 MOV x0, x10 MOV x2, x11 LD1 {v10.4h}, [x1], #8 MOV x10, x1 LD1 {v11.4h}, [x12], #8 ADD x1, x1, #504 MOV x11, x12 LD1 {v12.4h}, [x1], x8 ADD x12, x12, #248 LD1 {v13.4h}, [x12], x9 LD1 {v14.4h}, [x1], x8 LD1 {v15.4h}, [x12], x9 LD1 {v16.4h}, [x1], x8 LD1 {v17.4h}, [x12], x9 LD1 {v18.4h}, [x1], x8 SUB x6, x6, #2 LD1 {v19.4h}, [x12], x9 MOV x1, x10 MOV x12, x11 LOOP_1: sMLAL v26.4s, v0.4h, v1.4h ST1 {v28.h}[0], [x3], x5 sMLAL v26.4s, v2.4h, v3.4h LD1 {v0.4h}, [x0], #8 sMLAL v26.4s, v5.4h, v4.4h sMLAL v26.4s, v7.4h, v6.4h ST1 {v28.h}[1], [x3], x5 MOV x10, x0 LD1 {v1.4h}, [x2], #8 ADD x0, x0, #504 sMLAL v26.4s, v9.4h, v8.4h ST1 {v28.h}[2], [x3], x5 sMLAL v26.4s, v10.4h, v11.4h ST1 {v28.h}[3], [x3], x5 MOV x11, x2 LD1 {v2.4h}, [x0], x8 ADD x2, x2, #248 sMLAL v26.4s, v12.4h, v13.4h LD1 {v3.4h}, [x2], x9 sMLAL v26.4s, v15.4h, v14.4h sMLAL v26.4s, v17.4h, v16.4h LD1 {v4.4h}, [x0], x8 sMLAL v26.4s, v19.4h, v18.4h LD1 {v5.4h}, [x2], x9 LD1 {v6.4h}, [x0], x8 sQshL v26.4s, v26.4s, v22.4s sshR v28.4s, v26.4s, #16 LD1 {v7.4h}, [x2], x9 mov v26.16b, v20.16b UZP2 v29.8h, v28.8h, v28.8h UZP1 v28.8h, v28.8h, v28.8h sMLAL v26.4s, v0.4h, v1.4h sMLAL v26.4s, v2.4h, v3.4h LD1 {v8.4h}, [x0], x8 sMLAL v26.4s, v5.4h, v4.4h sMLAL v26.4s, v7.4h, v6.4h LD1 {v9.4h}, [x2], x9 LD1 {v10.4h}, [x1], #8 sMLAL v26.4s, v9.4h, v8.4h MOV x2, x11 LD1 {v11.4h}, [x12], #8 MOV x0, x10 MOV x10, x1 ADD x1, x1, #504 MOV x11, x12 LD1 {v12.4h}, [x1], x8 ADD x12, x12, #248 LD1 {v13.4h}, [x12], x9 sMLAL v26.4s, v10.4h, v11.4h LD1 {v14.4h}, [x1], x8 sMLAL v26.4s, v12.4h, v13.4h LD1 {v15.4h}, [x12], x9 LD1 {v16.4h}, [x1], x8 sMLAL v26.4s, v15.4h, v14.4h LD1 {v17.4h}, [x12], x9 LD1 {v18.4h}, [x1], x8 sMLAL v26.4s, v17.4h, v16.4h LD1 {v19.4h}, [x12], x9 MOV x1, x10 sMLAL v26.4s, v19.4h, v18.4h ST1 {v28.h}[0], [x3], x5 MOV x12, x11 LD1 {v0.4h}, [x0], #8 LD1 {v1.4h}, [x2], #8 sQshL v26.4s, v26.4s, v22.4s ST1 {v28.h}[1], [x3], x5 MOV x10, x0 ST1 {v28.h}[2], [x3], x5 ADD x0, x0, #504 ST1 {v28.h}[3], [x3], x5 MOV x11, x2 sshR v28.4s, v26.4s, #16 LD1 {v2.4h}, [x0], x8 ADD x2, x2, #248 LD1 {v3.4h}, [x2], x9 LD1 {v4.4h}, [x0], x8 LD1 {v5.4h}, [x2], x9 LD1 {v6.4h}, [x0], x8 LD1 {v7.4h}, [x2], x9 LD1 {v8.4h}, [x0], x8 LD1 {v9.4h}, [x2], x9 UZP2 v29.8h, v28.8h, v28.8h UZP1 v28.8h, v28.8h, v28.8h mov v26.16b, v20.16b MOV x0, x10 LD1 {v10.4h}, [x1], #8 MOV x2, x11 MOV x10, x1 LD1 {v11.4h}, [x12], #8 ADD x1, x1, #504 MOV x11, x12 LD1 {v12.4h}, [x1], x8 ADD x12, x12, #248 LD1 {v13.4h}, [x12], x9 LD1 {v14.4h}, [x1], x8 LD1 {v15.4h}, [x12], x9 LD1 {v16.4h}, [x1], x8 LD1 {v17.4h}, [x12], x9 SUBS x6, x6, #2 LD1 {v18.4h}, [x1], x8 MOV x1, x10 LD1 {v19.4h}, [x12], x9 MOV x12, x11 BGT LOOP_1 sMLAL v26.4s, v0.4h, v1.4h ST1 {v28.h}[0], [x3], x5 sMLAL v26.4s, v2.4h, v3.4h sMLAL v26.4s, v5.4h, v4.4h ST1 {v28.h}[1], [x3], x5 sMLAL v26.4s, v7.4h, v6.4h sMLAL v26.4s, v9.4h, v8.4h ST1 {v28.h}[2], [x3], x5 sMLAL v26.4s, v10.4h, v11.4h sMLAL v26.4s, v12.4h, v13.4h ST1 {v28.h}[3], [x3], x5 sMLAL v26.4s, v15.4h, v14.4h sMLAL v26.4s, v17.4h, v16.4h sMLAL v26.4s, v19.4h, v18.4h sQshL v26.4s, v26.4s, v22.4s sshR v28.4s, v26.4s, #16 UZP2 v29.8h, v28.8h, v28.8h UZP1 v28.8h, v28.8h, v28.8h ST1 {v28.h}[0], [x3], x5 ST1 {v28.h}[1], [x3], x5 ST1 {v28.h}[2], [x3], x5 ST1 {v28.h}[3], [x3], x5 pop_v_regs ret