1 2 3.text 4.p2align 2 5.global ixheaacd_fft_15_ld_armv7 6 7ixheaacd_fft_15_ld_armv7: 8 9 STMFD r13!, {r4 - r12, r14} @ 10 STR r1 , [r13, #-4]! @ 11 STR r3 , [r13, #-4]! @ 12 MOV lr, r2 @ lr - fft3out 13 MOV r12, #384 @ 14 15 16LOOP_FFT5: 17 LDRD r2, [r0] @ r2 = buf1a[0] and r3 = buf1a[1] 18 ADD r0, r0, r12 19 LDRD r4, [r0] @ r4 = buf1a[2] and r5 = buf1a[3] 20 ADD r0, r0, r12 21 LDRD r6, [r0] @ r6 = buf1a[4] and r7 = buf1a[5] 22 ADD r0, r0, r12 23 LDRD r8, [r0] @ r8 = buf1a[6] and r9 = buf1a[7] 24 ADD r0, r0, r12 25 LDRD r10, [r0] @ r10 = buf1a[8] and r11 = buf1a[9] 26 27 28 ADD r1, r4, r10 @ r1 = buf1a[2] + buf1a[8] 29 SUB r4, r4, r10 @ r4 = buf1a[2] - buf1a[8]@ 30 MOVW r10, #0xB000 31 MOVT r10, #0x478E 32 ADD r12, r6, r8 @ r3 = buf1a[4] + buf1a[6] 33 SUB r8, r6, r8 @ r2 = buf1a[4] - buf1a[6] 34 35 SUB r6, r1, r12 @ (r1 - r3) 36 SMULWT r6, r6, r10 @ t = mult32x16in32_shl((r1 - r3), C54) 37 ADD r1, r1, r12 @ r1 = r1 + r3@ 38 ADD r2, r2, r1 @ temp1 = inp[0] + r1@ 39 SMULWB r1, r1, r10 @ mult32_shl(r1, C55) 40 ADD r1, r2, r1, lsl #2 @ r1 = temp1 + ((mult32_shl(r1, C55)) << 1)@ 41 MOVW r10, #0x9D84 42 MOVT r10, #0x79BC 43 STR r2, [lr], #4 @ *buf2++ = temp1@ 44 45 SUB r12, r1, r6, LSL #1 @ r3 = r1 - t@ 46 ADD r1, r1, r6, LSL #1 @ r1 = r1 + t@ 47 48 ADD r2, r4, r8 @ (r4 + r2) 49 SMULWT r2, r2, r10 @ t = mult32_shl((r4 + r2), C51)@ 50 51 @LSL r2, r2, #1 52 MOV r2, r2, LSL #1 53 54 SMULWB r4, r4, r10 @ mult32_shl(r4, C52) 55 MOVW r10, #0xD180 56 MOVT r10, #0xFFFF 57 ADD r4, r2, r4, LSL #2 @ r4 = t + (mult32_shl(r4, C52) << 1)@ 58 59 SMULWB r8, r8, r10 @ mult32_shl(r2, C53) 60 ADD r2, r2, r8, LSL #1 @ r2 = t + mult32_shl(r2, C53)@ 61 62 ADD r6, r5, r11 @ s1 = buf1a[3] + buf1a[9] 63 SUB r8, r5, r11 @ s4 = buf1a[3] - buf1a[9] 64 MOVW r10, #0xB000 65 MOVT r10, #0x478E 66 ADD r5, r7, r9 @ s3 = buf1a[5] + buf1a[7]@ 67 SUB r7, r7, r9 @ s2 = buf1a[5] + buf1a[7]@ 68 69 70 SUB r9, r6, r5 @ (s1 - s3) 71 SMULWT r9, r9, r10 @ t = mult32x16in32_shl((s1 - s3), C54) 72 ADD r6, r6, r5 @ s1 = s1 + s3@ 73 ADD r3, r3, r6 @ temp2 = buf1a[1] + s1 74 SMULWB r6, r6, r10 @ mult32_shl(s1, C55) 75 ADD r6, r3, r6, lsl #2 @ s1 = temp1 + ((mult32_shl(s1, C55)) << 1)@ 76 MOVW r10, #0x9D84 77 MOVT r10, #0x79BC 78 STR r3, [lr], #4 @ *buf2++ = temp2@ 79 80 SUB r5, r6, r9, LSL #1 @ s3 = s1 - t@ 81 ADD r6, r6, r9, LSL #1 @ s1 = s1 + t@ 82 SUB r0, r0, #896 @ r0 -inp[160] 83 84 ADD r11, r7, r8 @ (s4 + s2) 85 SMULWT r11, r11, r10 @ t = mult32_shl((s4 + s2), C51)@ 86 @LSL r11, r11, #1 @ 87 MOV r11, r11, LSL #1 88 89 90 SMULWB r8, r8, r10 @ mult32_shl(s4, C52) 91 MOVW r10, #0xD180 92 MOVT r10, #0xFFFF 93 ADD r8, r11, r8, LSL #2 @ s4 = t + (mult32_shl(s4, C52) << 1)@ 94 95 SMULWB r7, r7, r10 @ mult32_shl(s2, C53) 96 ADD r7, r11, r7, LSL #1 @ s2 = t + mult32_shl(s2, C53)@ 97 98 99 ADD r3, r1, r7 @ buf2[2] = r1 + s2 100 SUB r9, r6, r2 @ buf2[3] = s1 - r2 101 SUB r10, r12, r8 @ buf2[4] = r3 - s4 102 ADD r11, r5, r4 @ buf2[5] = s3 + r4 103 ADD r12, r12, r8 @ buf2[6] = r3 + s4 104 SUB r4, r5, r4 @ buf2[7] = s3 - r4 105 SUB r5, r1, r7 @ buf2[8] = r1 - s2 106 ADD r6, r6, r2 @ buf2[9] = s1 + r2 107 STMIA lr!, {r3, r9-r12} @ 108 109 MOV r12, #384 @ 110 MOVW r1, #0xFA00 111 MOVT r1, #0xFFFF 112 113 STMIA lr!, {r4-r6} @ 114 115 116 LDRD r2, [r0] @ r2 = buf1a[0] and r3 = buf1a[1] 117 ADD r0, r0, r12 118 LDRD r4, [r0] @ r4 = buf1a[2] and r5 = buf1a[3] 119 ADD r0, r0, r12 120 LDRD r6, [r0] @ r6 = buf1a[4] and r7 = buf1a[5] 121 ADD r0, r0, r12 122 LDRD r8, [r0] @ r8 = buf1a[6] and r9 = buf1a[7] 123 ADD r0, r0, r1 124 LDRD r10, [r0] @ r10 = buf1a[8] and r11 = buf1a[9] 125 ADD r0, r0, #1024 @ r0 -inp[320] 126 127 ADD r1, r4, r10 @ r1 = buf1a[2] + buf1a[8] 128 SUB r4, r4, r10 @ r4 = buf1a[2] - buf1a[8]@ 129 MOVW r10, #0xB000 130 MOVT r10, #0x478E 131 ADD r12, r6, r8 @ r3 = buf1a[4] + buf1a[6] 132 SUB r8, r6, r8 @ r2 = buf1a[4] - buf1a[6] 133 134 SUB r6, r1, r12 @ (r1 - r3) 135 SMULWT r6, r6, r10 @ t = mult32x16in32_shl((r1 - r3), C54) 136 ADD r1, r1, r12 @ r1 = r1 + r3@ 137 ADD r2, r2, r1 @ temp1 = inp[0] + r1@ 138 SMULWB r1, r1, r10 @ mult32_shl(r1, C55) 139 ADD r1, r2, r1, lsl #2 @ r1 = temp1 + ((mult32_shl(r1, C55)) << 1)@ 140 MOVW r10, #0x9D84 141 MOVT r10, #0x79BC 142 STR r2, [lr], #4 @ *buf2++ = temp1@ 143 144 SUB r12, r1, r6, LSL #1 @ r3 = r1 - t@ 145 ADD r1, r1, r6, LSL #1 @ r1 = r1 + t@ 146 147 ADD r2, r4, r8 @ (r4 + r2) 148 SMULWT r2, r2, r10 @ t = mult32_shl((r4 + r2), C51)@ 149 @LSL r2, r2, #1 150 MOV r2, r2, LSL #1 151 152 153 SMULWB r4, r4, r10 @ mult32_shl(r4, C52) 154 MOVW r10, #0xD180 155 MOVT r10, #0xFFFF 156 ADD r4, r2, r4, LSL #2 @ r4 = t + (mult32_shl(r4, C52) << 1)@ 157 158 SMULWB r8, r8, r10 @ mult32_shl(r2, C53) 159 ADD r2, r2, r8, LSL #1 @ r2 = t + mult32_shl(r2, C53)@ 160 161 ADD r6, r5, r11 @ s1 = buf1a[3] + buf1a[9] 162 SUB r8, r5, r11 @ s4 = buf1a[3] - buf1a[9] 163 MOVW r10, #0xB000 164 MOVT r10, #0x478E 165 ADD r5, r7, r9 @ s3 = buf1a[5] + buf1a[7]@ 166 SUB r7, r7, r9 @ s2 = buf1a[5] + buf1a[7]@ 167 168 169 SUB r9, r6, r5 @ (s1 - s3) 170 SMULWT r9, r9, r10 @ t = mult32x16in32_shl((s1 - s3), C54) 171 ADD r6, r6, r5 @ s1 = s1 + s3@ 172 ADD r3, r3, r6 @ temp2 = buf1a[1] + s1 173 SMULWB r6, r6, r10 @ mult32_shl(s1, C55) 174 ADD r6, r3, r6, lsl #2 @ s1 = temp1 + ((mult32_shl(s1, C55)) << 1)@ 175 MOVW r10, #0x9D84 176 MOVT r10, #0x79BC 177 STR r3, [lr], #4 @ *buf2++ = temp2@ 178 179 180 SUB r5, r6, r9, LSL #1 @ s3 = s1 - t@ 181 ADD r6, r6, r9, LSL #1 @ s1 = s1 + t@ 182 183 ADD r11, r7, r8 @ (s4 + s2) 184 SMULWT r11, r11, r10 @ t = mult32_shl((s4 + s2), C51)@ 185 @LSL r11, r11, #1 186 MOV r11, r11, LSL #1 187 188 SMULWB r8, r8, r10 @mult32_shl(s4, C52) 189 MOVW r10, #0xD180 190 MOVT r10, #0xFFFF 191 ADD r8, r11, r8, LSL #2 @s4 = t + (mult32_shl(s4, C52) << 1)@ 192 193 SMULWB r7, r7, r10 @mult32_shl(s2, C53) 194 ADD r7, r11, r7, LSL #1 @s2 = t + mult32_shl(s2, C53)@ 195 196 ADD r3, r1, r7 @buf2[2] = r1 + s2 197 SUB r9, r6, r2 @buf2[3] = s1 - r2 198 SUB r10, r12, r8 @buf2[4] = r3 - s4 199 ADD r11, r5, r4 @buf2[5] = s3 + r4 200 ADD r12, r12, r8 @buf2[6] = r3 + s4 201 SUB r4, r5, r4 @buf2[7] = s3 - r4 202 SUB r5, r1, r7 @buf2[8] = r1 - s2 203 ADD r6, r6, r2 @buf2[9] = s1 + r2 204 MOVW r1, #0xFA00 205 MOVT r1, #0xFFFF 206 207 STMIA lr!, {r3, r9-r12} 208 MOV r12, #384 @ 209 STMIA lr!, {r4-r6} @ 210 211 LDRD r2, [r0] @ r2 = buf1a[0] and r3 = buf1a[1] 212 ADD r0, r0, r12 213 LDRD r4, [r0] @ r4 = buf1a[2] and r5 = buf1a[3] 214 ADD r0, r0, r1 215 216 LDRD r6, [r0] @ r6 = buf1a[4] and r7 = buf1a[5] 217 ADD r0, r0, r12 218 LDRD r8, [r0] @ r8 = buf1a[6] and r9 = buf1a[7] 219 ADD r0, r0, r12 220 LDRD r10, [r0] @ r10 = buf1a[8] and r11 = buf1a[9] 221 ADD r0, r0, r12 222 223 ADD r1, r4, r10 @ r1 = buf1a[2] + buf1a[8] 224 SUB r4, r4, r10 @ r4 = buf1a[2] - buf1a[8]@ 225 MOVW r10, #0xB000 226 MOVT r10, #0x478E 227 ADD r12, r6, r8 @ r3 = buf1a[4] + buf1a[6] 228 SUB r8, r6, r8 @ r2 = buf1a[4] - buf1a[6] 229 230 SUB r6, r1, r12 @ (r1 - r3) 231 SMULWT r6, r6, r10 @ t = mult32x16in32_shl((r1 - r3), C54) 232 ADD r1, r1, r12 @ r1 = r1 + r3@ 233 ADD r2, r2, r1 @ temp1 = inp[0] + r1@ 234 SMULWB r1, r1, r10 @ mult32_shl(r1, C55) 235 ADD r1, r2, r1, lsl #2 @ r1 = temp1 + ((mult32_shl(r1, C55)) << 1)@ 236 MOVW r10, #0x9D84 237 MOVT r10, #0x79BC 238 STR r2, [lr], #4 @ *buf2++ = temp1@ 239 240 SUB r12, r1, r6, LSL #1 @ r3 = r1 - t@ 241 ADD r1, r1, r6, LSL #1 @ r1 = r1 + t@ 242 243 ADD r2, r4, r8 @ (r4 + r2) 244 SMULWT r2, r2, r10 @ t = mult32_shl((r4 + r2), C51)@ 245 @LSL r2, r2, #1 246 MOV r2, r2, LSL #1 247 248 SMULWB r4, r4, r10 @ mult32_shl(r4, C52) 249 MOVW r10, #0xD180 250 MOVT r10, #0xFFFF 251 ADD r4, r2, r4, LSL #2 @ r4 = t + (mult32_shl(r4, C52) << 1)@ 252 253 SMULWB r8, r8, r10 @ mult32_shl(r2, C53) 254 ADD r2, r2, r8, LSL #1 @ r2 = t + mult32_shl(r2, C53)@ 255 256 ADD r6, r5, r11 @ s1 = buf1a[3] + buf1a[9] 257 SUB r8, r5, r11 @ s4 = buf1a[3] - buf1a[9] 258 MOVW r10, #0xB000 259 MOVT r10, #0x478E 260 ADD r5, r7, r9 @ s3 = buf1a[5] + buf1a[7]@ 261 SUB r7, r7, r9 @ s2 = buf1a[5] + buf1a[7]@ 262 263 SUB r9, r6, r5 @ (s1 - s3) 264 SMULWT r9, r9, r10 @ t = mult32x16in32_shl((s1 - s3), C54) 265 ADD r6, r6, r5 @ s1 = s1 + s3@ 266 ADD r3, r3, r6 @ temp2 = buf1a[1] + s1 267 SMULWB r6, r6, r10 @ mult32_shl(s1, C55) 268 ADD r6, r3, r6, lsl #2 @ s1 = temp1 + ((mult32_shl(s1, C55)) << 1)@ 269 MOVW r10, #0x9D84 270 MOVT r10, #0x79BC 271 STR r3, [lr], #4 @ *buf2++ = temp2@ 272 273 SUB r5, r6, r9, LSL #1 @ s3 = s1 - t@ 274 ADD r6, r6, r9, LSL #1 @ s1 = s1 + t@ 275 276 ADD r11, r7, r8 @ (s4 + s2) 277 SMULWT r11, r11, r10 @ t = mult32_shl((s4 + s2), C51)@ 278 @LSL r11, r11, #1 @ 279 MOV r11, r11, LSL #1 280 281 SMULWB r8, r8, r10 @mult32_shl(s4, C52) 282 MOVW r10, #0xD180 283 MOVT r10, #0xFFFF 284 ADD r8, r11, r8, LSL #2 @s4 = t + (mult32_shl(s4, C52) << 1)@ 285 286 287 SMULWB r7, r7, r10 @mult32_shl(s2, C53) 288 ADD r7, r11, r7, LSL #1 @s2 = t + mult32_shl(s2, C53)@ 289 290 ADD r3, r1, r7 @buf2[2] = r1 + s2 291 SUB r9, r6, r2 @buf2[3] = s1 - r2 292 SUB r10, r12, r8 @buf2[4] = r3 - s4 293 ADD r11, r5, r4 @buf2[5] = s3 + r4 294 ADD r12, r12, r8 @buf2[6] = r3 + s4 295 SUB r4, r5, r4 @buf2[7] = s3 - r4 296 SUB r5, r1, r7 @buf2[8] = r1 - s2 297 ADD r6, r6, r2 @buf2[9] = s1 + r2 298 299 STMIA lr!, {r3, r9-r12} 300 STMIA lr!, {r4-r6} @ 301 302 SUB lr, lr, #120 @ 303 MOVW r12, # 28378 @ 304 LDMFD r13!, {r10, r11} @ 305 306 307LOOP_FFT3: 308 LDRD r0, [lr] @ r0 = fft3outptr[0] and r1 = fft3outptr[1] 309 LDRD r2, [lr, #40] @ r2 = fft3outptr[10] and r3 = fft3outptr[11] 310 LDRD r4, [lr, #80] @ r4 = fft3outptr[20] and r5 = fft3outptr[21] 311 ADD lr, lr, #8 @ 312 313 ADD r6, r0, r2 @ X01r = add32(buf1[0], buf1[2]) 314 ADD r7, r1, r3 @ X01i = add32(buf1[1], buf1[3]) 315 316 ADD r8, r2, r4 @ add_r = add32(buf1[2], buf1[4]) 317 ADD r9, r3, r5 @ add_i = add32(buf1[3], buf1[5]) 318 319 SUB r2, r2, r4 @ sub_r = sub32(buf1[2], buf1[4])@ 320 SUB r3, r3, r5 @ sub_i = sub32(buf1[3], buf1[5])@ 321 322 @ASR r8, r8, #1 @ p1 = add_r >> 1@ 323 MOV r8, r8, ASR #1 324 325 @ASR r9, r9, #1 @ p4 = add_i >> 1@ 326 MOV r9, r9, ASR #1 327 328 SMULWB r3, r3, r12 @ p2 = mult32x16in32_shl(sub_i, sinmu)@ 329 SMULWB r2, r2, r12 @ p3 = mult32x16in32_shl(sub_r, sinmu)@ 330 331 SUB r0, r0, r8 @ temp = sub32(buf1a[0], p1)@ 332 ADD r8, r1, r2, LSL #1 @ temp1 = add32(buf1a[1], p3)@ 333 SUB r2, r1, r2, LSL #1 @ temp2 = sub32(buf1a[1], p3)@ 334 335 ADD r4, r6, r4 @ add32(X01r, buf1a[4])@ 336 ADD r5, r7, r5 @ add32(X01i, buf1a[5])@ 337 ADD r6, r0, r3, LSL #1 @ add32(temp, p2)@ 338 SUB r7, r2, r9 @ sub32(temp2, p4)@ 339 SUB r9, r8, r9 @ sub32(temp1, p4)@ 340 SUB r8, r0, r3, LSL #1 @ sub32(temp, p2)@ 341 342 MOV r3, r11 @ 343 LDRB r0, [r10], #1 @ 344 LDRB r1, [r10], #1 @ 345 LDRB r2, [r10], #1 @ 346 ADD r0, r11, r0, lsl #3 @ 347 ADD r1, r11, r1, lsl #3 @ 348 ADD r2, r11, r2, lsl #3 @ 349 STRD r4, [r0] @ 350 STRD r6, [r1] @ 351 STRD r8, [r2] @ 352 353 LDRD r0, [lr] @ r0 = fft3outptr[0] and r1 = fft3outptr[1] 354 LDRD r2, [lr, #40] @ r2 = fft3outptr[10] and r3 = fft3outptr[11] 355 LDRD r4, [lr, #80] @ r4 = fft3outptr[20] and r5 = fft3outptr[21] 356 ADD lr, lr, #8 @ 357 358 359 ADD r6, r0, r2 @ X01r = add32(buf1[0], buf1[2]) 360 ADD r7, r1, r3 @ X01i = add32(buf1[1], buf1[3]) 361 362 ADD r8, r2, r4 @ add_r = add32(buf1[2], buf1[4]) 363 ADD r9, r3, r5 @ add_i = add32(buf1[3], buf1[5]) 364 365 SUB r2, r2, r4 @ sub_r = sub32(buf1[2], buf1[4])@ 366 SUB r3, r3, r5 @ sub_i = sub32(buf1[3], buf1[5])@ 367 368 @ASR r8, r8, #1 @ p1 = add_r >> 1@ 369 MOV r8, r8, ASR #1 370 @ASR r9, r9, #1 @ p4 = add_i >> 1@ 371 MOV r9, r9, ASR #1 372 373 SMULWB r3, r3, r12 @ p2 = mult32x16in32_shl(sub_i, sinmu)@ 374 SMULWB r2, r2, r12 @ p3 = mult32x16in32_shl(sub_r, sinmu)@ 375 376 SUB r0, r0, r8 @ temp = sub32(buf1a[0], p1)@ 377 ADD r8, r1, r2, LSL #1 @ temp1 = add32(buf1a[1], p3)@ 378 SUB r2, r1, r2, LSL #1 @ temp2 = sub32(buf1a[1], p3)@ 379 380 ADD r4, r6, r4 @ add32(X01r, buf1a[4])@ 381 ADD r5, r7, r5 @ add32(X01i, buf1a[5])@ 382 ADD r6, r0, r3, LSL #1 @ add32(temp, p2)@ 383 SUB r7, r2, r9 @ sub32(temp2, p4)@ 384 SUB r9, r8, r9 @ sub32(temp1, p4)@ 385 SUB r8, r0, r3, LSL #1 @ sub32(temp, p2)@ 386 387 LDRB r0, [r10], #1 @ 388 LDRB r1, [r10], #1 @ 389 LDRB r2, [r10], #1 @ 390 ADD r0, r11, r0, lsl #3 @ 391 ADD r1, r11, r1, lsl #3 @ 392 ADD r2, r11, r2, lsl #3 @ 393 STRD r4, [r0] @ 394 STRD r6, [r1] @ 395 STRD r8, [r2] @ 396 397 LDRD r0, [lr] @ r0 = fft3outptr[0] and r1 = fft3outptr[1] 398 LDRD r2, [lr, #40] @ r2 = fft3outptr[10] and r3 = fft3outptr[11] 399 LDRD r4, [lr, #80] @ r4 = fft3outptr[20] and r5 = fft3outptr[21] 400 ADD lr, lr, #8 @ 401 402 403 ADD r6, r0, r2 @ X01r = add32(buf1[0], buf1[2]) 404 ADD r7, r1, r3 @ X01i = add32(buf1[1], buf1[3]) 405 406 ADD r8, r2, r4 @ add_r = add32(buf1[2], buf1[4]) 407 ADD r9, r3, r5 @ add_i = add32(buf1[3], buf1[5]) 408 409 SUB r2, r2, r4 @ sub_r = sub32(buf1[2], buf1[4])@ 410 SUB r3, r3, r5 @ sub_i = sub32(buf1[3], buf1[5])@ 411 412 413 @ASR r8, r8, #1 @ p1 = add_r >> 1@ 414 MOV r8, r8, ASR #1 415 @ASR r9, r9, #1 @ p4 = add_i >> 1@ 416 MOV r9, r9, ASR #1 417 418 SMULWB r3, r3, r12 @ p2 = mult32x16in32_shl(sub_i, sinmu)@ 419 SMULWB r2, r2, r12 @ p3 = mult32x16in32_shl(sub_r, sinmu)@ 420 421 SUB r0, r0, r8 @ temp = sub32(buf1a[0], p1)@ 422 ADD r8, r1, r2, LSL #1 @ temp1 = add32(buf1a[1], p3)@ 423 SUB r2, r1, r2, LSL #1 @ temp2 = sub32(buf1a[1], p3)@ 424 425 ADD r4, r6, r4 @ add32(X01r, buf1a[4])@ 426 ADD r5, r7, r5 @ add32(X01i, buf1a[5])@ 427 ADD r6, r0, r3, LSL #1 @ add32(temp, p2)@ 428 SUB r7, r2, r9 @ sub32(temp2, p4)@ 429 SUB r9, r8, r9 @ sub32(temp1, p4)@ 430 SUB r8, r0, r3, LSL #1 @ sub32(temp, p2)@ 431 432 LDRB r0, [r10], #1 @ 433 LDRB r1, [r10], #1 @ 434 LDRB r2, [r10], #1 @ 435 ADD r0, r11, r0, lsl #3 @ 436 ADD r1, r11, r1, lsl #3 @ 437 ADD r2, r11, r2, lsl #3 @ 438 STRD r4, [r0] @ 439 STRD r6, [r1] @ 440 STRD r8, [r2] @ 441 442 LDRD r0, [lr] @ r0 = fft3outptr[0] and r1 = fft3outptr[1] 443 LDRD r2, [lr, #40] @ r2 = fft3outptr[10] and r3 = fft3outptr[11] 444 LDRD r4, [lr, #80] @ r4 = fft3outptr[20] and r5 = fft3outptr[21] 445 ADD lr, lr, #8 @ 446 447 ADD r6, r0, r2 @ X01r = add32(buf1[0], buf1[2]) 448 ADD r7, r1, r3 @ X01i = add32(buf1[1], buf1[3]) 449 450 ADD r8, r2, r4 @ add_r = add32(buf1[2], buf1[4]) 451 ADD r9, r3, r5 @ add_i = add32(buf1[3], buf1[5]) 452 453 SUB r2, r2, r4 @ sub_r = sub32(buf1[2], buf1[4])@ 454 SUB r3, r3, r5 @ sub_i = sub32(buf1[3], buf1[5])@ 455 456 @ASR r8, r8, #1 @ p1 = add_r >> 1@ 457 MOV r8, r8, ASR #1 458 @ASR r9, r9, #1 @ p4 = add_i >> 1@ 459 MOV r9, r9, ASR #1 460 461 SMULWB r3, r3, r12 @ p2 = mult32x16in32_shl(sub_i, sinmu)@ 462 SMULWB r2, r2, r12 @ p3 = mult32x16in32_shl(sub_r, sinmu)@ 463 464 SUB r0, r0, r8 @ temp = sub32(buf1a[0], p1)@ 465 ADD r8, r1, r2, LSL #1 @ temp1 = add32(buf1a[1], p3)@ 466 SUB r2, r1, r2, LSL #1 @ temp2 = sub32(buf1a[1], p3)@ 467 468 ADD r4, r6, r4 @ add32(X01r, buf1a[4])@ 469 ADD r5, r7, r5 @ add32(X01i, buf1a[5])@ 470 ADD r6, r0, r3, LSL #1 @ add32(temp, p2)@ 471 SUB r7, r2, r9 @ sub32(temp2, p4)@ 472 SUB r9, r8, r9 @ sub32(temp1, p4)@ 473 SUB r8, r0, r3, LSL #1 @ sub32(temp, p2)@ 474 475 LDRB r0, [r10], #1 @ 476 LDRB r1, [r10], #1 @ 477 LDRB r2, [r10], #1 @ 478 ADD r0, r11, r0, lsl #3 @ 479 ADD r1, r11, r1, lsl #3 @ 480 ADD r2, r11, r2, lsl #3 @ 481 STRD r4, [r0] @ 482 STRD r6, [r1] @ 483 STRD r8, [r2] @ 484 485 LDRD r0, [lr] @ r0 = fft3outptr[0] and r1 = fft3outptr[1] 486 LDRD r2, [lr, #40] @ r2 = fft3outptr[10] and r3 = fft3outptr[11] 487 LDRD r4, [lr, #80] @ r4 = fft3outptr[20] and r5 = fft3outptr[21] 488 489 ADD r6, r0, r2 @ X01r = add32(buf1[0], buf1[2]) 490 ADD r7, r1, r3 @ X01i = add32(buf1[1], buf1[3]) 491 492 ADD r8, r2, r4 @ add_r = add32(buf1[2], buf1[4]) 493 ADD r9, r3, r5 @ add_i = add32(buf1[3], buf1[5]) 494 495 SUB r2, r2, r4 @ sub_r = sub32(buf1[2], buf1[4])@ 496 SUB r3, r3, r5 @ sub_i = sub32(buf1[3], buf1[5])@ 497 498 @ASR r8, r8, #1 @ p1 = add_r >> 1@ 499 MOV r8, r8, ASR #1 500 @ASR r9, r9, #1 @ p4 = add_i >> 1@ 501 MOV r9, r9, ASR #1 502 503 SMULWB r3, r3, r12 @ p2 = mult32x16in32_shl(sub_i, sinmu)@ 504 SMULWB r2, r2, r12 @ p3 = mult32x16in32_shl(sub_r, sinmu)@ 505 506 SUB r0, r0, r8 @ temp = sub32(buf1a[0], p1)@ 507 ADD r8, r1, r2, LSL #1 @ temp1 = add32(buf1a[1], p3)@ 508 SUB r2, r1, r2, LSL #1 @ temp2 = sub32(buf1a[1], p3)@ 509 510 ADD r4, r6, r4 @ add32(X01r, buf1a[4])@ 511 ADD r5, r7, r5 @ add32(X01i, buf1a[5])@ 512 ADD r6, r0, r3, LSL #1 @ add32(temp, p2)@ 513 SUB r7, r2, r9 @ sub32(temp2, p4)@ 514 SUB r9, r8, r9 @ sub32(temp1, p4)@ 515 SUB r8, r0, r3, LSL #1 @ sub32(temp, p2)@ 516 517 LDRB r0, [r10], #1 @ 518 LDRB r1, [r10], #1 @ 519 LDRB r2, [r10], #1 @ 520 ADD r0, r11, r0, lsl #3 @ 521 ADD r1, r11, r1, lsl #3 @ 522 ADD r2, r11, r2, lsl #3 @ 523 STRD r4, [r0] @ 524 STRD r6, [r1] @ 525 STRD r8, [r2] @ 526 527 LDMFD r13!, {r4 - r12, r15} 528 529 530