1.text 2.p2align 2 3.global ixheaacd_complex_ifft_p2_asm 4 5ixheaacd_complex_ifft_p2_asm: 6 STMFD sp!, {r0-r12, lr} 7 SUB sp, sp, #0x28 8 LDR r0, [sp, #0x2c] 9 @LDR r12,[sp,#0x5c+4] 10 EOR r0, r0, r0, ASR #31 11 CLZ r0, r0 12 SUB r12, r0, #16 @dig_rev_shift = norm32(npoints) + 1 -16@ 13 SUB r0, r0, #1 14 RSB r0, r0, #0x1e 15 AND r1, r0, #1 16 STR r1, [sp, #0x14] 17 MOV r1, r0, ASR #1 18 LDR r0, [sp, #0x2c] @npoints 19 STR r1, [sp, #-4]! 20 MOV lr, r0, LSL #1 @(npoints >>1) * 4 21 MOV r0, #0 22 23FIRST_STAGE_R4: 24 LDR r4, =0x33333333 25 LDR r5, =0x0F0F0F0F 26 AND r6, r4, r0 27 AND r7, r4, r0, LSR #2 28 ORR r4, r7, r6, LSL #2 29 AND r6, r5, r4 30 AND r7, r5, r4, LSR #4 31 ORR r4, r7, r6, LSL #4 32 BIC r6, r4, #0x0000FF00 33 BIC r7, r4, #0x00FF0000 34 MOV r7, r7, LSR #8 35 ORR r4, r7, r6, LSL #8 36 LDR r5, [sp, #0x18] 37 MOV r10, r4, LSR r12 38 CMP r5, #0 39 ADDNE r10, r10, #1 40 BICNE r10, r10, #1 41 42 ADD r1, r2, r10, LSL #2 43 LDRD r4, [r1] @r4=x0r, r5=x0i 44 ADD r1, r1, lr 45 LDRD r8, [r1] @r8=x1r, r9=x1i 46 ADD r1, r1, lr 47 LDRD r6, [r1] @r6=x2r, r7=x2i 48 ADD r1, r1, lr 49 LDRD r10, [r1] @r10=x3r, r11=x3i 50 ADD r0, r0, #4 51 CMP r0, lr, ASR #1 52 53 ADD r4, r4, r6 @x0r = x0r + x2r@ 54 ADD r5, r5, r7 @x0i = x0i + x2i@ 55 SUB r6, r4, r6, lsl#1 @x2r = x0r - (x2r << 1)@ 56 SUB r7, r5, r7, lsl#1 @x2i = x0i - (x2i << 1)@ 57 ADD r8, r8, r10 @x1r = x1r + x3r@ 58 ADD r9, r9, r11 @x1i = x1i + x3i@ 59 SUB r1, r8, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 60 SUB r11, r9, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 61 62 ADD r4, r4, r8 @x0r = x0r + x1r@ 63 ADD r5, r5, r9 @x0i = x0i + x1i@ 64 SUB r8, r4, r8, lsl#1 @x1r = x0r - (x1r << 1)@ 65 SUB r9, r5, r9, lsl#1 @x1i = x0i - (x1i << 1) 66 SUB r6, r6, r11 @x2r = x2r - x3i@ 67 ADD r7, r7, r1 @x2i = x2i + x3r@ 68 ADD r10, r6, r11, lsl#1 @x3i = x2r + (x3i << 1)@ 69 SUB r11, r7, r1, lsl#1 @x3r = x2i - (x3r << 1)@ 70 71 STMIA r3!, {r4-r11} 72 BLT FIRST_STAGE_R4 73 LDR r1, [sp], #4 74 LDR r0, [sp, #0x2c] 75 MOV r12, #0x40 @nodespacing = 64@ 76 STR r12, [sp, #0x1c] 77 LDR r12, [sp, #0x2c] 78 SUB r3, r3, r0, LSL #3 79 SUBS r1, r1, #1 80 STR r3, [sp, #0x34] 81 MOV r4, r12, ASR #4 82 MOV r0, #4 83 STR r4, [sp, #0x18] 84 STR r1, [sp, #0x20] 85 BLE RADIX2 86OUTER_LOOP: 87 LDR r1, [sp, #0x28] 88 LDR r12, [sp, #0x34] @WORD32 *data = ptr_y@ 89 STR r1, [sp, #0x10] 90 LDR r1, [sp, #0x18] 91 92 MOV r0, r0, LSL #3 @(del<<1) * 4 93LOOP_TRIVIAL_TWIDDLE: 94 LDRD r4, [r12] @r4=x0r, r5=x0i 95 ADD r12, r12, r0 96 LDRD r6, [r12] @r6=x1r, r7=x1i 97 ADD r12, r12, r0 98 LDRD r8, [r12] @r8=x2r, r9=x2i 99 ADD r12, r12, r0 100 LDRD r10, [r12] @r10=x3r, r11=x3i 101 102@MOV r4,r4,ASR #1 103@MOV r5,r5,ASR #1 104@MOV r6,r6,ASR #1 105@MOV r7,r7,ASR #1 106@MOV r8,r8,ASR #1 107@MOV r9,r9,ASR #1 108@MOV r10,r10,ASR #1 109@MOV r11,r11,ASR #1 110 111 ADD r4, r4, r8 @x0r = x0r + x2r@ 112 ADD r5, r5, r9 @x0i = x0i + x2i@ 113 SUB r8, r4, r8, lsl #1 @x2r = x0r - (x2r << 1)@ 114 SUB r9, r5, r9, lsl #1 @x2i = x0i - (x2i << 1)@ 115 ADD r6, r6, r10 @x1r = x1r + x3r@ 116 ADD r7, r7, r11 @x1i = x1i + x3i@ 117 SUB r2, r6, r10, lsl #1 @x3r = x1r - (x3r << 1)@ 118 SUB r11, r7, r11, lsl #1 @x3i = x1i - (x3i << 1)@ 119 120 ADD r4, r4, r6 @x0r = x0r + x1r@ 121 ADD r5, r5, r7 @x0i = x0i + x1i@ 122@MOV r4,r4,ASR #1 123@MOV r5,r5,ASR #1 124 SUB r6, r4, r6, lsl #1 @x1r = x0r - (x1r << 1)@ 125 SUB r7, r5, r7, lsl #1 @x1i = x0i - (x1i << 1) 126 SUB r8, r8, r11 @x2r = x2r - x3i@ 127 ADD r9, r9, r2 @x2i = x2i + x3r@ 128 ADD r10, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@ 129 SUB r11, r9, r2, lsl#1 @x3r = x2i - (x3r << 1) 130 131 STRD r10, [r12] @r10=x3r, r11=x3i 132 SUB r12, r12, r0 133 STRD r6, [r12] @r6=x1r, r7=x1i 134 SUB r12, r12, r0 135 STRD r8, [r12] @r8=x2r, r9=x2i 136 SUB r12, r12, r0 137 STRD r4, [r12] @r4=x0r, r5=x0i 138 ADD r12, r12, r0, lsl #2 139 140 SUBS r1, r1, #1 141 BNE LOOP_TRIVIAL_TWIDDLE 142 143 MOV r0, r0, ASR #3 144 LDR r4, [sp, #0x1c] 145 LDR r3, [sp, #0x34] 146 MUL r1, r0, r4 147 ADD r12, r3, #8 148 STR r1, [sp, #0x24] 149 MOV r3, r1, ASR #2 150 ADD r3, r3, r1, ASR #3 151 SUB r3, r3, r1, ASR #4 152 ADD r3, r3, r1, ASR #5 153 SUB r3, r3, r1, ASR #6 154 ADD r3, r3, r1, ASR #7 155 SUB r3, r3, r1, ASR #8 156 STR r3, [sp, #-4]! 157SECOND_LOOP: 158 LDR r3, [sp, #0x10+4] 159 LDR r14, [sp, #0x18+4] 160 MOV r0, r0, LSL #3 @(del<<1) * 4 161 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 162 LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ 163 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 164 LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ 165 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 166 LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 167 168 STR r4, [sp, #8+4] 169 STR r1, [sp, #-4] 170 STR r2, [sp, #-8] 171 STR r5, [sp, #-12] 172 STR r6, [sp, #-16] 173 STR r7, [sp, #-20] 174 STR r8, [sp, #-24] 175 176RADIX4_BFLY: 177 178 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 179 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 180 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 181 SUBS r14, r14, #1 182 183 LDR r1, [sp, #-4] 184 LDR r2, [sp, #-8] 185 186 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 187 LSR r3, r3, #31 188 ORR r4, r3, r4, LSL#1 189 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 190 LSR r3, r3, #31 191 ORR r6, r3, r6, LSL#1 192 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 193 LSR r3, r3, #31 194 ORR r5, r3, r5, LSL#1 195 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 196 LSR r3, r3, #31 197 ORR r7, r3, r7, LSL#1 198 SUB r7, r7, r6 199 ADD r6, r4, r5 @ 200 201 LDR r1, [sp, #-12] 202 LDR r2, [sp, #-16] 203 204 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 205 LSR r3, r3, #31 206 ORR r4, r3, r4, LSL#1 207 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 208 LSR r3, r3, #31 209 ORR r8, r3, r8, LSL#1 210 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 211 LSR r3, r3, #31 212 ORR r5, r3, r5, LSL#1 213 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 214 LSR r3, r3, #31 215 ORR r9, r3, r9, LSL#1 216 SUB r9, r9, r8 217 ADD r8, r4, r5 @ 218 219 LDR r1, [sp, #-20] 220 LDR r2, [sp, #-24] 221 222 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 223 LSR r3, r3, #31 224 ORR r4, r3, r4, LSL#1 225 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 226 LSR r3, r3, #31 227 ORR r10, r3, r10, LSL#1 228 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 229 LSR r3, r3, #31 230 ORR r5, r3, r5, LSL#1 231 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 232 LSR r3, r3, #31 233 ORR r11, r3, r11, LSL#1 234 SUB r11, r11, r10 235 ADD r10, r4, r5 @ 236 237 @SUB r12,r12,r0,lsl #1 238 @LDRD r4,[r12] @r4=x0r, r5=x0i 239 LDR r4, [r12, -r0, lsl #1]! @ 240 LDR r5, [r12, #4] 241 242 243 ADD r4, r8, r4 @x0r = x0r + x2r@ 244 ADD r5, r9, r5 @x0i = x0i + x2i@ 245 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 246 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 247 ADD r6, r6, r10 @x1r = x1r + x3r@ 248 ADD r7, r7, r11 @x1i = x1i + x3i@ 249 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 250 SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 251 252 ADD r4, r4, r6 @x0r = x0r + x1r@ 253 ADD r5, r5, r7 @x0i = x0i + x1i@ 254 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 255 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 256 STRD r4, [r12] @r4=x0r, r5=x0i 257 ADD r12, r12, r0 258 259 SUB r8, r8, r11 @x2r = x2r - x3i@ 260 ADD r9, r9, r10 @x2i = x2i + x3r@ 261 ADD r4, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@ 262 SUB r5, r9, r10, lsl#1 @x3r = x2i - (x3r << 1) 263 264 STRD r8, [r12] @r8=x2r, r9=x2i 265 ADD r12, r12, r0 266 STRD r6, [r12] @r6=x1r, r7=x1i 267 ADD r12, r12, r0 268 STRD r4, [r12] @r10=x3r, r11=x3i 269 ADD r12, r12, r0 270 271 BNE RADIX4_BFLY 272 MOV r0, r0, ASR #3 273 274 LDR r1, [sp, #0x2c+4] 275 LDR r4, [sp, #8+4] 276 SUB r1, r12, r1, LSL #3 277 LDR r6, [sp, #0x1c+4] 278 ADD r12, r1, #8 279 LDR r7, [sp, #0] 280 ADD r4, r4, r6 281 CMP r4, r7 282 BLE SECOND_LOOP 283 284SECOND_LOOP_2: 285 LDR r3, [sp, #0x10+4] 286 LDR r14, [sp, #0x18+4] 287 MOV r0, r0, LSL #3 @(del<<1) * 4 288 289 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 290 LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ 291 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 292 LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ 293 SUB r3, r3, #2048 @ 512 *4 294 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 295 LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 296 297 STR r4, [sp, #8+4] 298 299 STR r1, [sp, #-4] 300 STR r2, [sp, #-8] 301 STR r5, [sp, #-12] 302 STR r6, [sp, #-16] 303 STR r7, [sp, #-20] 304 STR r8, [sp, #-24] 305 306RADIX4_BFLY_2: 307 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 308 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 309 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 310 SUBS r14, r14, #1 311 LDR r1, [sp, #-4] 312 LDR r2, [sp, #-8] 313 314 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 315 LSR r3, r3, #31 316 ORR r4, r3, r4, LSL#1 317 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 318 LSR r3, r3, #31 319 ORR r6, r3, r6, LSL#1 320 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 321 LSR r3, r3, #31 322 ORR r5, r3, r5, LSL#1 323 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 324 LSR r3, r3, #31 325 ORR r7, r3, r7, LSL#1 326 SUB r7, r7, r6 327 ADD r6, r4, r5 @ 328 329 LDR r1, [sp, #-12] 330 LDR r2, [sp, #-16] 331 332 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 333 LSR r3, r3, #31 334 ORR r4, r3, r4, LSL#1 335 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 336 LSR r3, r3, #31 337 ORR r8, r3, r8, LSL#1 338 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 339 LSR r3, r3, #31 340 ORR r5, r3, r5, LSL#1 341 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 342 LSR r3, r3, #31 343 ORR r9, r3, r9, LSL#1 344 SUB r9, r9, r8 345 ADD r8, r4, r5 @ 346 347 LDR r1, [sp, #-20] 348 LDR r2, [sp, #-24] 349 350 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 351 LSR r3, r3, #31 352 ORR r4, r3, r4, LSL#1 353 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 354 LSR r3, r3, #31 355 ORR r10, r3, r10, LSL#1 356 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 357 LSR r3, r3, #31 358 ORR r5, r3, r5, LSL#1 359 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 360 LSR r3, r3, #31 361 ORR r11, r3, r11, LSL#1 362 SUB r10, r10, r11 363 ADD r11, r5, r4 @ 364 365 @SUB r12,r12,r0,lsl #1 366 @LDRD r4,[r12] @r4=x0r, r5=x0i 367 LDR r4, [r12, -r0, lsl #1]! @ 368 LDR r5, [r12, #4] 369 370 371 ADD r4, r8, r4 @x0r = x0r + x2r@ 372 ADD r5, r9, r5 @x0i = x0i + x2i@ 373 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 374 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 375 ADD r6, r6, r10 @x1r = x1r + x3r@ 376 ADD r7, r7, r11 @x1i = x1i + x3i@ 377 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 378 SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 379 380 ADD r4, r4, r6 @x0r = x0r + x1r@ 381 ADD r5, r5, r7 @x0i = x0i + x1i@ 382 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 383 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 384 STRD r4, [r12] @r4=x0r, r5=x0i 385 ADD r12, r12, r0 386 387 SUB r8, r8, r11 @x2r = x2r - x3i@ 388 ADD r9, r9, r10 @x2i = x2i + x3r@ 389 ADD r4, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@ 390 SUB r5, r9, r10, lsl#1 @x3r = x2i - (x3r << 1) 391 392 STRD r8, [r12] @r8=x2r, r9=x2i 393 ADD r12, r12, r0 394 STRD r6, [r12] @r6=x1r, r7=x1i 395 ADD r12, r12, r0 396 STRD r4, [r12] @r10=x3r, r11=x3i 397 ADD r12, r12, r0 398 399 BNE RADIX4_BFLY_2 400 MOV r0, r0, ASR #3 401 402 LDR r1, [sp, #0x2c+4] 403 LDR r4, [sp, #8+4] 404 SUB r1, r12, r1, LSL #3 405 LDR r6, [sp, #0x1c+4] 406 ADD r12, r1, #8 407 LDR r7, [sp, #0x24+4] 408 ADD r4, r4, r6 409 CMP r4, r7, ASR #1 410 BLE SECOND_LOOP_2 411 LDR r7, [sp, #0] 412 CMP r4, r7, LSL #1 413 BGT SECOND_LOOP_4 414 415SECOND_LOOP_3: 416 LDR r3, [sp, #0x10+4] 417 LDR r14, [sp, #0x18+4] 418 MOV r0, r0, LSL #3 @(del<<1) * 4 419 420 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 421 LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ 422 SUB r3, r3, #2048 @ 512 *4 423 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 424 LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ 425 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 426 LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 427 428 STR r4, [sp, #8+4] 429 STR r1, [sp, #-4] 430 STR r2, [sp, #-8] 431 STR r5, [sp, #-12] 432 STR r6, [sp, #-16] 433 STR r7, [sp, #-20] 434 STR r8, [sp, #-24] 435 436 437RADIX4_BFLY_3: 438 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 439 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 440 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 441 SUBS r14, r14, #1 442 443 LDR r1, [sp, #-4] 444 LDR r2, [sp, #-8] 445 446 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 447 LSR r3, r3, #31 448 ORR r4, r3, r4, LSL#1 449 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 450 LSR r3, r3, #31 451 ORR r6, r3, r6, LSL#1 452 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 453 LSR r3, r3, #31 454 ORR r5, r3, r5, LSL#1 455 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 456 LSR r3, r3, #31 457 ORR r7, r3, r7, LSL#1 458 SUB r7, r7, r6 459 ADD r6, r4, r5 @ 460 461 LDR r1, [sp, #-12] 462 LDR r2, [sp, #-16] 463 464 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 465 LSR r3, r3, #31 466 ORR r4, r3, r4, LSL#1 467 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 468 LSR r3, r3, #31 469 ORR r8, r3, r8, LSL#1 470 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 471 LSR r3, r3, #31 472 ORR r5, r3, r5, LSL#1 473 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 474 LSR r3, r3, #31 475 ORR r9, r3, r9, LSL#1 476 SUB r8, r8, r9 477 ADD r9, r5, r4 @ 478 479 LDR r1, [sp, #-20] 480 LDR r2, [sp, #-24] 481 482 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 483 LSR r3, r3, #31 484 ORR r4, r3, r4, LSL#1 485 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 486 LSR r3, r3, #31 487 ORR r10, r3, r10, LSL#1 488 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 489 LSR r3, r3, #31 490 ORR r5, r3, r5, LSL#1 491 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 492 LSR r3, r3, #31 493 ORR r11, r3, r11, LSL#1 494 SUB r10, r10, r11 495 ADD r11, r5, r4 @ 496 497 @SUB r12,r12,r0,lsl #1 498 @LDRD r4,[r12] @r4=x0r, r5=x0i 499 LDR r4, [r12, -r0, lsl #1]! @ 500 LDR r5, [r12, #4] 501 502 503 ADD r4, r8, r4 @x0r = x0r + x2r@ 504 ADD r5, r9, r5 @x0i = x0i + x2i@ 505 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 506 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 507 ADD r6, r6, r10 @x1r = x1r + x3r@ 508 ADD r7, r7, r11 @x1i = x1i + x3i@ 509 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 510 SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 511 512 ADD r4, r4, r6 @x0r = x0r + x1r@ 513 ADD r5, r5, r7 @x0i = x0i + x1i@ 514 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 515 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 516 STRD r4, [r12] @r4=x0r, r5=x0i 517 ADD r12, r12, r0 518 519 SUB r8, r8, r11 @x2r = x2r - x3i@ 520 ADD r9, r9, r10 @x2i = x2i + x3r@ 521 ADD r4, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@ 522 SUB r5, r9, r10, lsl#1 @x3r = x2i - (x3r << 1) 523 524 STRD r8, [r12] @r8=x2r, r9=x2i 525 ADD r12, r12, r0 526 STRD r6, [r12] @r6=x1r, r7=x1i 527 ADD r12, r12, r0 528 STRD r4, [r12] @r10=x3r, r11=x3i 529 ADD r12, r12, r0 530 531 BNE RADIX4_BFLY_3 532 MOV r0, r0, ASR #3 533 534 LDR r1, [sp, #0x2c+4] 535 LDR r4, [sp, #8+4] 536 SUB r1, r12, r1, LSL #3 537 LDR r6, [sp, #0x1c+4] 538 ADD r12, r1, #8 539 LDR r7, [sp, #0] 540 ADD r4, r4, r6 541 CMP r4, r7, LSL #1 542 BLE SECOND_LOOP_3 543 544SECOND_LOOP_4: 545 LDR r3, [sp, #0x10+4] 546 LDR r14, [sp, #0x18+4] 547 MOV r0, r0, LSL #3 @(del<<1) * 4 548 549 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 550 LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ 551 SUB r3, r3, #2048 @ 512 *4 552 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 553 LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ 554 SUB r3, r3, #2048 @ 512 *4 555 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 556 LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 557 558 559 STR r4, [sp, #8+4] 560 STR r1, [sp, #-4] 561 STR r2, [sp, #-8] 562 STR r5, [sp, #-12] 563 STR r6, [sp, #-16] 564 STR r7, [sp, #-20] 565 STR r8, [sp, #-24] 566 567RADIX4_BFLY_4: 568 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 569 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 570 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 571 SUBS r14, r14, #1 572 573 LDR r1, [sp, #-4] 574 LDR r2, [sp, #-8] 575 576 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 577 LSR r3, r3, #31 578 ORR r4, r3, r4, LSL#1 579 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 580 LSR r3, r3, #31 581 ORR r6, r3, r6, LSL#1 582 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 583 LSR r3, r3, #31 584 ORR r5, r3, r5, LSL#1 585 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 586 LSR r3, r3, #31 587 ORR r7, r3, r7, LSL#1 588 SUB r7, r7, r6 589 ADD r6, r4, r5 @ 590 591 LDR r1, [sp, #-12] 592 LDR r2, [sp, #-16] 593 594 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 595 LSR r3, r3, #31 596 ORR r4, r3, r4, LSL#1 597 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 598 LSR r3, r3, #31 599 ORR r8, r3, r8, LSL#1 600 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 601 LSR r3, r3, #31 602 ORR r5, r3, r5, LSL#1 603 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 604 LSR r3, r3, #31 605 ORR r9, r3, r9, LSL#1 606 SUB r8, r8, r9 607 ADD r9, r5, r4 @ 608 609 LDR r1, [sp, #-20] 610 LDR r2, [sp, #-24] 611 612 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 613 LSR r3, r3, #31 614 ORR r4, r3, r4, LSL#1 615 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 616 LSR r3, r3, #31 617 ORR r10, r3, r10, LSL#1 618 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 619 LSR r3, r3, #31 620 ORR r5, r3, r5, LSL#1 621 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 622 LSR r3, r3, #31 623 ORR r11, r3, r11, LSL#1 624 SUB r11, r11, r10 625 ADD r10, r5, r4 @ 626 RSB r10, r10, #0 627 628 @SUB r12,r12,r0,lsl #1 629 @LDRD r4,[r12] @r4=x0r, r5=x0i 630 LDR r4, [r12, -r0, lsl #1]! @ 631 LDR r5, [r12, #4] 632 633 634 ADD r4, r8, r4 @x0r = x0r + x2r@ 635 ADD r5, r9, r5 @x0i = x0i + x2i@ 636 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 637 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 638 ADD r6, r6, r10 @x1r = x1r + x3r@ 639 SUB r7, r7, r11 @x1i = x1i - x3i@ 640 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 641 ADD r11, r7, r11, lsl#1 @x3i = x1i + (x3i << 1)@ 642 643 ADD r4, r4, r6 @x0r = x0r + x1r@ 644 ADD r5, r5, r7 @x0i = x0i + x1i@ 645 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 646 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 647 STRD r4, [r12] @r4=x0r, r5=x0i 648 ADD r12, r12, r0 649 650 SUB r8, r8, r11 @x2r = x2r - x3i@ 651 ADD r9, r9, r10 @x2i = x2i + x3r@ 652 ADD r4, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@ 653 SUB r5, r9, r10, lsl#1 @x3r = x2i - (x3r << 1) 654 655 STRD r8, [r12] @r8=x2r, r9=x2i 656 ADD r12, r12, r0 657 STRD r6, [r12] @r6=x1r, r7=x1i 658 ADD r12, r12, r0 659 STRD r4, [r12] @r10=x3r, r11=x3i 660 ADD r12, r12, r0 661 662 BNE RADIX4_BFLY_4 663 MOV r0, r0, ASR #3 664 665 LDR r1, [sp, #0x2c+4] 666 LDR r4, [sp, #8+4] 667 SUB r1, r12, r1, LSL #3 668 LDR r6, [sp, #0x1c+4] 669 ADD r12, r1, #8 670 LDR r7, [sp, #0x24+4] 671 ADD r4, r4, r6 672 CMP r4, r7 673 BLT SECOND_LOOP_4 674 ADD sp, sp, #4 675 676 LDR r1, [sp, #0x1c] 677 MOV r0, r0, LSL #2 678 MOV r1, r1, ASR #2 679 STR r1, [sp, #0x1c] 680 LDR r1, [sp, #0x18] 681 MOV r1, r1, ASR #2 682 STR r1, [sp, #0x18] 683 LDR r1, [sp, #0x20] 684 SUBS r1, r1, #1 685 STR r1, [sp, #0x20] 686 BGT OUTER_LOOP 687 688RADIX2: 689 LDR r1, [sp, #0x14] 690 CMP r1, #0 691 BEQ EXIT 692 LDR r12, [sp, #0x1c] 693 LDR r1, [sp, #0x28] 694 CMP r12, #0 695 LDRNE r12, [sp, #0x1c] 696 MOVEQ r4, #1 697 MOVNE r4, r12, LSL #1 698 MOVS r3, r0 699 BEQ EXIT 700 701 MOV r3, r3, ASR #1 702 LDR r5, [sp, #0x34] 703 MOV r0, r0, LSL #3 @(del<<1) * 4 704 STR r1, [sp, #-4] 705RADIX2_BFLY: 706 LDR r1, [sp, #-4] 707 LDRD r6, [r5] @r6 = x0r 708 ADD r5, r5, r0 709 LDRD r8, [r5] @r8 = x1r 710 711 LDR r2, [r1] 712 SUBS r3, r3, #1 713 714 715 SMULL r1, r11, r8, r2 @mult32x16hin32(x1r,W1h) 716 LSR r1, r1, #31 717 ORR r11, r1, r11, LSL#1 718 SMULL r1, r10, r9, r2 @mult32x16hin32(x1i,W1h) 719 LSR r1, r1, #31 720 ORR r10, r1, r10, LSL#1 721 722 723 LDR r1, [sp, #-4] 724 LDR r2, [r1, #4] 725 ADD r1, r1, r4, LSL #3 726 STR r1, [sp, #-4] 727 728 SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l) 729 LSR r1, r1, #31 730 ORR r8, r1, r8, LSL#1 731 SMULL r1, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 732 LSR r1, r1, #31 733 ORR r9, r1, r9, LSL#1 734 735 ADD r8, r8, r10 736 SUB r9, r9, r11 737 738 ASR r8, r8, #1 739 ASR r6, r6, #1 740 ASR r9, r9, #1 741 ASR r7, r7, #1 742 ADD r10, r8, r6 @(x0r/2) + (x1r/2) 743 ADD r11, r9, r7 @(x0i/2) + (x1i/2)@ 744 SUB r8, r6, r8 @(x0r/2) - (x1r/2) 745 SUB r9, r7, r9 @(x0i/2) - (x1i/2)@ 746 747 STRD r8, [r5] 748 SUB r5, r5, r0 749 STRD r10, [r5], #8 750 751 BNE RADIX2_BFLY 752 753 LDR r1, [sp, #0x28] 754 MOV r3, r0, ASR #4 755 STR r1, [sp, #-4] 756RADIX2_BFLY_2: 757 LDR r1, [sp, #-4] 758 LDRD r6, [r5] @r6 = x0r 759 ADD r5, r5, r0 760 LDRD r8, [r5] @r8 = x1r 761 762 LDR r2, [r1] 763 SUBS r3, r3, #1 764 765 766 767 SMULL r1, r11, r8, r2 @mult32x16hin32(x1r,W1h) 768 LSR r1, r1, #31 769 ORR r11, r1, r11, LSL#1 770 SMULL r1, r10, r9, r2 @mult32x16hin32(x1i,W1h) 771 LSR r1, r1, #31 772 ORR r10, r1, r10, LSL#1 773 774 775 LDR r1, [sp, #-4] 776 LDR r2, [r1, #4] 777 ADD r1, r1, r4, LSL #3 778 STR r1, [sp, #-4] 779 780 SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l) 781 LSR r1, r1, #31 782 ORR r8, r1, r8, LSL#1 783 SMULL r1, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 784 LSR r1, r1, #31 785 ORR r9, r1, r9, LSL#1 786 787 SUB r11, r11, r9 788 ADD r9, r10, r8 @ 789 MOV r8, r11 790 791 ASR r8, r8, #1 792 ASR r6, r6, #1 793 ASR r9, r9, #1 794 ASR r7, r7, #1 795 ADD r10, r8, r6 @(x0r>>1) + (x1r) 796 ADD r11, r9, r7 @(x0i>>1) + (x1i)@ 797 SUB r8, r6, r8 @(x0r>>1) - (x1r) 798 SUB r9, r7, r9 @(x0i>>1) - (x1i)@ 799 800 STRD r8, [r5] 801 SUB r5, r5, r0 802 STRD r10, [r5], #8 803 804 BNE RADIX2_BFLY_2 805 806EXIT: 807 ADD sp, sp, #0x38 808 LDMFD sp!, {r4-r12, pc} 809 810