1@/****************************************************************************** 2@ * 3@ * Copyright (C) 2018 The Android Open Source Project 4@ * 5@ * Licensed under the Apache License, Version 2.0 (the "License"); 6@ * you may not use this file except in compliance with the License. 7@ * You may obtain a copy of the License at: 8@ * 9@ * http:@www.apache.org/licenses/LICENSE-2.0 10@ * 11@ * Unless required by applicable law or agreed to in writing, software 12@ * distributed under the License is distributed on an "AS IS" BASIS, 13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14@ * See the License for the specific language governing permissions and 15@ * limitations under the License. 16@ * 17@ ***************************************************************************** 18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19@*/ 20 21 22.text 23.p2align 2 24 25 .global ixheaacd_esbr_cos_sin_mod_loop2 26ixheaacd_esbr_cos_sin_mod_loop2: 27 28 STMFD sp!, {r4-r12, r14} 29 VPUSH {D8-D15} 30 @generating load addresses 31 ADD R3, R0, R2, LSL #3 @psubband1 = &subband[2 * M - 1]; 32 SUB R3, R3, #4 33 ADD R10, R0, #256 34 ADD R11, R10, R2, LSL #3 35 SUB R11, R11, #4 36 MOV R8, #-4 37 LDR R6, [R0] 38 MOV R4, R2, ASR #1 @M_2 = ixheaacd_shr32(M, 1); 39 SUB R4, R4, #1 40 41 ASR R6, R6, #1 @*psubband = *psubband >> 1; 42 VLD1.32 {D2[0]}, [R3] 43 44 STR R6, [R0], #4 @psubband++; 45 LDR R7, [R0] 46 ASR R7, R7, #1 47 RSB R6, R7, #0 48 STR R6, [R3], #-4 49 VLD1.32 {D3[0]}, [R3] @ im = *psubband1; 50 51 VLD2.32 {D0[0], D1[0]}, [R1]! 52 VDUP.32 D0, D0[0] 53 VDUP.32 D1, D1[0] 54 55 VLD1.32 {D2[1]}, [R11] @re = *psubband12; 56 57 LDR R6, [R10] 58 ASR R7, R6, #1 59 MOV R9, #0 60 QSUB R7, R9, R7 61 62 STR R7, [R11], #-4 63 64 LDR R6, [R10, #4] 65 ASR R6, R6, #1 66 STR R6, [R10], #4 67 68 VLD1.32 {D3[1]}, [R11] 69 70 VMULL.S32 q2, d0, d2 @qsub 2nd 71 VMULL.S32 q3, d0, d3 @add 2nd 72 VMULL.S32 q4, d1, d2 @add 1st 73 VMULL.S32 q5, d1, d3 @qsub 1st 74 75 vadd.I64 q6, q4, q3 76 VQSUB.S64 Q7, Q5, Q2 77 VQSUB.S64 Q8, Q2, Q5 78 79 VSHRN.I64 D12, Q6, #32 80 VSHRN.I64 D14, Q7, #32 81 VSHRN.I64 D16, Q8, #32 82 83 VST1.32 {D12[0]}, [R3], R8 84 85 VST1.32 {D14[0]}, [R0]! 86 87 VQNEG.S32 D12, D12 88 89 90 VST1.32 {D12[1]}, [R10]! 91 92 VST1.32 {D16[1]}, [R11], R8 93 94LOOP1: 95 VLD1.32 {D2}, [R0] 96 VLD1.32 {D3}, [R10] 97 LDR R5, [R3] @RE2 98 LDR R6, [R11] @RE3 99 VTRN.32 D2, D3 100 101 VMULL.S32 q2, d0, d2 @qsub 2nd 102 VMULL.S32 q3, d0, d3 @add 2nd 103 VMULL.S32 q4, d1, d2 @add 1st 104 VMULL.S32 q5, d1, d3 @qsub 1st 105 106 vadd.I64 q6, q4, q3 107 VQSUB.S64 Q7, Q2, Q5 108 VQSUB.S64 Q8, Q5, Q2 109 110 VSHRN.I64 D12, Q6, #32 111 VSHRN.I64 D14, Q7, #32 112 VSHRN.I64 D16, Q8, #32 113 114 VST1.32 {D12[0]}, [R0]! 115 VST1.32 {D14[0]}, [R3], R8 116 VQNEG.S32 D12, D12 117 118 VST1.32 {D12[1]}, [R11], R8 119 VST1.32 {D16[1]}, [R10]! 120 121 @ second part 122 VLD2.32 {D0[0], D1[0]}, [R1]! 123 VDUP.32 D0, D0[0] 124 VDUP.32 D1, D1[0] 125 126 VMOV D3, R5, R6 127 VLD1.32 {D2[0]}, [R3] 128 VLD1.32 {D2[1]}, [R11] 129 130 VMULL.S32 q2, d0, d2 @qsub 2nd 131 VMULL.S32 q3, d0, d3 @add 2nd 132 VMULL.S32 q4, d1, d2 @add 1st 133 VMULL.S32 q5, d1, d3 @qsub 1st 134 135 vadd.I64 q6, q2, q5 136 VQSUB.S64 Q7, Q4, Q3 137 VQSUB.S64 Q8, Q3, Q4 138 139 VSHRN.I64 D12, Q6, #32 140 VSHRN.I64 D14, Q7, #32 141 VSHRN.I64 D16, Q8, #32 142 143 VST1.32 {D12[0]}, [R3], R8 144 VST1.32 {D14[0]}, [R0]! 145 146 VQNEG.S32 D12, D12 147 148 subs r4, r4, #1 149 VST1.32 {D12[1]}, [R10]! 150 VST1.32 {D16[1]}, [R11], R8 151 152 BGT LOOP1 153 VPOP {D8-D15} 154 LDMFD sp!, {r4-r12, r15} 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181