1.macro push_v_regs 2 stp d8, d9, [sp, #-16]! 3 stp d10, d11, [sp, #-16]! 4 stp d12, d13, [sp, #-16]! 5 stp d14, d15, [sp, #-16]! 6 stp X8, X9, [sp, #-16]! 7 stp X10, X11, [sp, #-16]! 8 stp X12, X13, [sp, #-16]! 9 stp X14, X15, [sp, #-16]! 10 stp X16, X17, [sp, #-16]! 11 stp X29, X30, [sp, #-16]! 12.endm 13.macro pop_v_regs 14 ldp X29, X30, [sp], #16 15 ldp X16, X17, [sp], #16 16 ldp X14, X15, [sp], #16 17 ldp X12, X13, [sp], #16 18 ldp X10, X11, [sp], #16 19 ldp X8, X9, [sp], #16 20 ldp d14, d15, [sp], #16 21 ldp d12, d13, [sp], #16 22 ldp d10, d11, [sp], #16 23 ldp d8, d9, [sp], #16 24.endm 25 26.text 27.p2align 2 28 .global ixheaacd_sbr_qmfanal32_winadds 29 30ixheaacd_sbr_qmfanal32_winadds: // PROC 31 32 // STMFD sp!, {x4-x12, x14} 33 push_v_regs 34 stp x19, x20, [sp, #-16]! 35 //VPUSH {D8 - D15} 36 //LDR w5, [SP, #108] //filterStates 37 //sxtw x5,w5 38 //LDR w6, [SP, #112] //timeIn 39 //sxtw x6,w6 40 //LDR w7, [SP, #116] //stride 41 //sxtw x7,w7 42 43 LSL x9, x7, #1 44 45 46 MOV x20, x4 47 ADD x5, x5, #64 48 MOV w10, #3 49 50 //ADD x5, x5, #56 51 //MOV x10, #1 52 ////SUB x6, x6, x9 53 //CMP x7, #1 54 //MOV x11, #-8 55 //BGT LOOP_SKIP_ODD 56 57LOOP: 58 LDRSH w4 , [x6] 59 ADD x6, x6, x9 60 LDRSH w8 , [x6] 61 ADD x6, x6, x9 62 LDRSH w11 , [x6] 63 ADD x6, x6, x9 64 LDRSH w12 , [x6] 65 ADD x6, x6, x9 66 67 STRH w4 , [x5 , #-2]! 68 STRH w8 , [x5 , #-2]! 69 STRH w11 , [x5 , #-2]! 70 STRH w12 , [x5 , #-2]! 71 72 LDRSH w4 , [x6] 73 ADD x6, x6, x9 74 LDRSH w8 , [x6] 75 ADD x6, x6, x9 76 LDRSH w11 , [x6] 77 ADD x6, x6, x9 78 LDRSH w12 , [x6] 79 ADD x6, x6, x9 80 81 STRH w4 , [x5 , #-2]! 82 STRH w8 , [x5 , #-2]! 83 STRH w11 , [x5 , #-2]! 84 STRH w12 , [x5 , #-2]! 85 SUBS w10, w10, #1 86 87 BPL LOOP 88 89 90//LOOP: 91// LD1 {v0.4h} , [x6], #8 92// LD1 {v1.4h} , [x6], #8 93// 94// REV64 v4.4h , v0.4h 95// REV64 v5.4h , v1.4h 96// 97// ST1 {v4.4h} , [x5] , x11 98// ST1 {v5.4h} , [x5] , x11 99// 100// LD1 {v2.4h} , [x6], #8 101// LD1 {v3.4h} , [x6], #8 102// 103// REV64 v6.4h , v2.4h 104// REV64 v7.4h , v3.4h 105// 106// ST1 {v6.4h} , [x5] , x11 107// ST1 {v7.4h} , [x5] , x11 108// 109// SUBS x10, x10, #1 110// BPL LOOP 111// B SKIP_LOOP 112// 113//LOOP_SKIP_ODD: 114// LD2 {v0.4h , v1.4h} , [x6], #16 115// LD2 {v2.4h , v3.4h} , [x6], #16 116// 117// REV64 v1.4h , v0.4h 118// REV64 v3.4h , v2.4h 119// 120// ST1 {v1.4h} , [x5], x11 121// ST1 {v3.4h} , [x5], x11 122// 123// LD2 {v4.4h , v5.4h} , [x6], #16 124// LD2 {v6.4h , v7.4h} , [x6], #16 125// 126// 127// REV64 v5.4h , v4.4h 128// REV64 v7.4h , v6.4h 129// 130// ST1 {v5.4h} , [x5], x11 131// ST1 {v7.4h} , [x5], x11 132// 133// SUBS x10, x10, #1 134// BPL LOOP_SKIP_ODD 135 136SKIP_LOOP: 137 138 //LDR w4, [SP, #104] //winAdd 139 // sxtw x4,w4 140 141 MOV x4, x20 142 MOV x5, #8 143 LD1 {v0.4h}, [x0], #8 144 MOV x6, #64 145 146 LSL x6, x6, #1 147 LD2 {v1.4h, v2.4h}, [x2], #16 148 MOV x7, #244 149 150 MOV x9, x0 151 ADD x0, x0, #120 152 153 MOV x11, x4 154 LD1 {v2.4h}, [x0], x6 155 ADD x11, x11, #128 156 157 158 159 160 MOV x10, x2 161 ADD x2, x2, #240 162 163 sMULL v30.4s, v0.4h, v1.4h 164 LD2 {v3.4h, v4.4h}, [x2], #16 165 ADD x2, x2, #240 166 167 168 LD1 {v4.4h}, [x0], x6 169 sMLAL v30.4s, v2.4h, v3.4h 170 171 LD2 {v5.4h, v6.4h}, [x2], #16 172 173 174 ADD x2, x2, #240 175 LD1 {v6.4h}, [x0], x6 176 sMLAL v30.4s, v4.4h, v5.4h 177 178 LD2 {v7.4h, v8.4h}, [x2], #16 179 180 181 ADD x2, x2, #240 182 LD1 {v8.4h}, [x0], x6 183 sMLAL v30.4s, v6.4h, v7.4h 184 185 MOV x0, x9 186 LD2 {v9.4h, v10.4h}, [x2], #16 187 188 189 ADD x2, x2, #240 190 LD1 {v10.4h}, [x1], #8 191 sMLAL v30.4s, v8.4h, v9.4h 192 193 194 195 MOV x9, x1 196 LD2 {v11.4h, v12.4h}, [x3], #16 197 ADD x1, x1, #120 198 199 200 MOV x2, x10 201 LD1 {v12.4h}, [x1], x6 202 MOV x10, x3 203 204 ADD x3, x3, #240 205 LD2 {v13.4h, v14.4h}, [x3], #16 206 ADD x3, x3, #240 207 208 209 LD2 {v15.4h, v16.4h}, [x3], #16 210 211 LD1 {v14.4h}, [x1], x6 212 ADD x3, x3, #240 213 214 215 216 LD1 {v16.4h}, [x1], x6 217 SUB x5, x5, #1 218 219 LD2 {v17.4h, v18.4h}, [x3], #16 220 221 222 ADD x3, x3, #240 223 LD1 {v18.4h}, [x1], x6 224 225 MOV x1, x9 226 LD2 {v19.4h, v20.4h}, [x3], #16 227 228 ADD x3, x3, #240 229 230 MOV x3, x10 231 232 233LOOP_1: 234 235 236 LD1 {v0.4h}, [x0], #8 237 238 MOV x9, x0 239 LD2 {v1.4h, v2.4h}, [x2], #16 240 ADD x0, x0, #120 241 242 MOV x10, x2 243 ST1 { v30.4s}, [x4], #16 244 ADD x2, x2, #240 245 246 247 sMULL v30.4s, v10.4h, v11.4h 248 LD1 {v2.4h}, [x0], x6 249 sMLAL v30.4s, v12.4h, v13.4h 250 251 sMLAL v30.4s, v14.4h, v15.4h 252 LD2 {v3.4h, v4.4h}, [x2], #16 253 sMLAL v30.4s, v16.4h, v17.4h 254 255 sMLAL v30.4s, v18.4h, v19.4h 256 LD1 {v4.4h}, [x0], x6 257 ADD x2, x2, #240 258 259 ST1 { v30.4s}, [x11], #16 260 261 262 sMULL v30.4s, v0.4h, v1.4h 263 LD2 {v5.4h, v6.4h}, [x2], #16 264 sMLAL v30.4s, v2.4h, v3.4h 265 266 267 268 ADD x2, x2, #240 269 LD1 {v6.4h}, [x0], x6 270 sMLAL v30.4s, v4.4h, v5.4h 271 272 LD2 {v7.4h, v8.4h}, [x2], #16 273 274 275 ADD x2, x2, #240 276 LD1 {v8.4h}, [x0], x6 277 sMLAL v30.4s, v6.4h, v7.4h 278 279 MOV x0, x9 280 LD2 {v9.4h, v10.4h}, [x2], #16 281 282 283 284 ADD x2, x2, #240 285 LD1 {v10.4h}, [x1], #8 286 MOV x2, x10 287 288 MOV x9, x1 289 LD2 {v11.4h, v12.4h}, [x3], #16 290 ADD x1, x1, #120 291 292 293 sMLAL v30.4s, v8.4h, v9.4h 294 LD1 {v12.4h}, [x1], x6 295 MOV x10, x3 296 297 298 ADD x3, x3, #240 299 LD2 {v13.4h, v14.4h}, [x3], #16 300 ADD x3, x3, #240 301 302 303 304 LD1 {v14.4h}, [x1], x6 305 LD2 {v15.4h, v16.4h}, [x3], #16 306 ADD x3, x3, #240 307 308 309 LD1 {v16.4h}, [x1], x6 310 LD2 {v17.4h, v18.4h}, [x3], #16 311 ADD x3, x3, #240 312 313 314 LD1 {v18.4h}, [x1], x6 315 SUBS x5, x5, #1 316 317 MOV x1, x9 318 LD2 {v19.4h, v20.4h}, [x3], #16 319 320 ADD x3, x3, #240 321 322 MOV x3, x10 323 324 BGT LOOP_1 325 326 ST1 { v30.4s}, [x4], #16 327 sMULL v30.4s, v10.4h, v11.4h 328 sMLAL v30.4s, v12.4h, v13.4h 329 330 sMLAL v30.4s, v14.4h, v15.4h 331 sMLAL v30.4s, v16.4h, v17.4h 332 sMLAL v30.4s, v18.4h, v19.4h 333 334 ST1 { v30.4s}, [x11], #16 335 336 //VPOP {D8 - D15} 337 // LDMFD sp!, {x4-x12, x15} 338 ldp x19, x20, [sp], #16 339 pop_v_regs 340 ret 341 // ENDP 342