1///****************************************************************************** 2// * 3// * Copyright (C) 2018 The Android Open Source Project 4// * 5// * Licensed under the Apache License, Version 2.0 (the "License"); 6// * you may not use this file except in compliance with the License. 7// * You may obtain a copy of the License at: 8// * 9// * http://www.apache.org/licenses/LICENSE-2.0 10// * 11// * Unless required by applicable law or agreed to in writing, software 12// * distributed under the License is distributed on an "AS IS" BASIS, 13// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14// * See the License for the specific language governing permissions and 15// * limitations under the License. 16// * 17// ***************************************************************************** 18// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19//*/ 20 21 22.macro push_v_regs 23 stp q8, q9, [sp, #-32]! 24 stp q10, q11, [sp, #-32]! 25 stp q12, q13, [sp, #-32]! 26 stp q14, q15, [sp, #-32]! 27 stp x21, x22, [sp, #-16]! 28 stp x23, x24, [sp, #-16]! 29.endm 30.macro pop_v_regs 31 ldp x23, x24, [sp], #16 32 ldp x21, x22, [sp], #16 33 ldp q14, q15, [sp], #32 34 ldp q12, q13, [sp], #32 35 ldp q10, q11, [sp], #32 36 ldp q8, q9, [sp], #32 37.endm 38 39.macro swp reg1, reg2 40 MOV X16, \reg1 41 MOV \reg1, \reg2 42 MOV \reg2, x16 43.endm 44.text 45.global ixheaacd_sbr_qmfsyn64_winadd 46 47ixheaacd_sbr_qmfsyn64_winadd: 48 49 push_v_regs 50 51 52 53 MOV w7, #0x8000 54 LD1 {v0.4h}, [x0], #8 55 MOV x12, x2 56 57 dup v30.4s, w7 58 LD1 {v1.4h}, [x2], #8 59 dup v22.4s, w4 60 61 MOV x10, x0 62 MOV x11, x2 63 ADD x0, x0, #504 64 ADD x2, x2, #248 65 66 NEG v28.4s, v22.4s 67 sshL v20.4s, v30.4s, v28.4s 68 MOV x6, #64 69 LSL x6, x6, #1 70 ADD x12, x12, x6 71 MOV x7, #128 72 LSL x9, x7, #1 73 ADD x1, x1, x9 74 MOV x6, #16 75 MOV x7, #128 76 LSL x9, x7, #1 77 MOV x7, #256 78 LSL x8, x7, #1 79 80 LSL x5, x5, #1 81 LD1 {v2.4h}, [x0], x8 82 mov v26.16b, v20.16b 83 84 85 sMLAL v26.4s, v0.4h, v1.4h 86 LD1 {v3.4h}, [x2], x9 87 88 LD1 {v4.4h}, [x0], x8 89 sMLAL v26.4s, v2.4h, v3.4h 90 91 LD1 {v5.4h}, [x2], x9 92 93 LD1 {v6.4h}, [x0], x8 94 sMLAL v26.4s, v5.4h, v4.4h 95 96 LD1 {v7.4h}, [x2], x9 97 98 LD1 {v8.4h}, [x0], x8 99 sMLAL v26.4s, v7.4h, v6.4h 100 101 LD1 {v9.4h}, [x2], x9 102 MOV x0, x10 103 104 105 MOV x2, x11 106 LD1 {v10.4h}, [x1], #8 107 sMLAL v26.4s, v9.4h, v8.4h 108 109 MOV x10, x1 110 LD1 {v11.4h}, [x12], #8 111 ADD x1, x1, #504 112 113 114 115 MOV x11, x12 116 LD1 {v12.4h}, [x1], x8 117 ADD x12, x12, #248 118 119 sMLAL v26.4s, v10.4h, v11.4h 120 LD1 {v13.4h}, [x12], x9 121 122 LD1 {v14.4h}, [x1], x8 123 sMLAL v26.4s, v12.4h, v13.4h 124 125 LD1 {v15.4h}, [x12], x9 126 127 LD1 {v16.4h}, [x1], x8 128 sMLAL v26.4s, v15.4h, v14.4h 129 130 LD1 {v17.4h}, [x12], x9 131 132 LD1 {v18.4h}, [x1], x8 133 sMLAL v26.4s, v17.4h, v16.4h 134 135 LD1 {v19.4h}, [x12], x9 136 137 sMLAL v26.4s, v19.4h, v18.4h 138 LD1 {v0.4h}, [x0], #8 139 MOV x12, x11 140 141 MOV x1, x10 142 LD1 {v1.4h}, [x2], #8 143 MOV x10, x0 144 145 sQshL v26.4s, v26.4s, v22.4s 146 147 ADD x0, x0, #504 148 149 MOV x11, x2 150 LD1 {v2.4h}, [x0], x8 151 ADD x2, x2, #248 152 153 sshR v28.4s, v26.4s, #16 154 LD1 {v3.4h}, [x2], x9 155 156 157 UZP2 v29.8h, v28.8h, v28.8h 158 UZP1 v28.8h, v28.8h, v28.8h 159 mov v26.16b, v20.16b 160 161 162 163 164 LD1 {v4.4h}, [x0], x8 165 LD1 {v5.4h}, [x2], x9 166 167 LD1 {v6.4h}, [x0], x8 168 LD1 {v7.4h}, [x2], x9 169 170 LD1 {v8.4h}, [x0], x8 171 LD1 {v9.4h}, [x2], x9 172 MOV x0, x10 173 174 175 MOV x2, x11 176 LD1 {v10.4h}, [x1], #8 177 178 MOV x10, x1 179 LD1 {v11.4h}, [x12], #8 180 ADD x1, x1, #504 181 182 183 MOV x11, x12 184 LD1 {v12.4h}, [x1], x8 185 ADD x12, x12, #248 186 187 188 LD1 {v13.4h}, [x12], x9 189 190 LD1 {v14.4h}, [x1], x8 191 LD1 {v15.4h}, [x12], x9 192 193 LD1 {v16.4h}, [x1], x8 194 LD1 {v17.4h}, [x12], x9 195 196 LD1 {v18.4h}, [x1], x8 197 SUB x6, x6, #2 198 LD1 {v19.4h}, [x12], x9 199 MOV x1, x10 200 201 MOV x12, x11 202 203LOOP_1: 204 205 sMLAL v26.4s, v0.4h, v1.4h 206 ST1 {v28.h}[0], [x3], x5 207 208 sMLAL v26.4s, v2.4h, v3.4h 209 LD1 {v0.4h}, [x0], #8 210 sMLAL v26.4s, v5.4h, v4.4h 211 212 sMLAL v26.4s, v7.4h, v6.4h 213 ST1 {v28.h}[1], [x3], x5 214 215 216 MOV x10, x0 217 LD1 {v1.4h}, [x2], #8 218 ADD x0, x0, #504 219 220 sMLAL v26.4s, v9.4h, v8.4h 221 ST1 {v28.h}[2], [x3], x5 222 223 sMLAL v26.4s, v10.4h, v11.4h 224 ST1 {v28.h}[3], [x3], x5 225 226 MOV x11, x2 227 LD1 {v2.4h}, [x0], x8 228 ADD x2, x2, #248 229 230 sMLAL v26.4s, v12.4h, v13.4h 231 LD1 {v3.4h}, [x2], x9 232 sMLAL v26.4s, v15.4h, v14.4h 233 234 sMLAL v26.4s, v17.4h, v16.4h 235 LD1 {v4.4h}, [x0], x8 236 sMLAL v26.4s, v19.4h, v18.4h 237 238 LD1 {v5.4h}, [x2], x9 239 240 LD1 {v6.4h}, [x0], x8 241 sQshL v26.4s, v26.4s, v22.4s 242 243 sshR v28.4s, v26.4s, #16 244 LD1 {v7.4h}, [x2], x9 245 mov v26.16b, v20.16b 246 247 248 UZP2 v29.8h, v28.8h, v28.8h 249 UZP1 v28.8h, v28.8h, v28.8h 250 sMLAL v26.4s, v0.4h, v1.4h 251 252 sMLAL v26.4s, v2.4h, v3.4h 253 LD1 {v8.4h}, [x0], x8 254 sMLAL v26.4s, v5.4h, v4.4h 255 256 sMLAL v26.4s, v7.4h, v6.4h 257 LD1 {v9.4h}, [x2], x9 258 259 260 LD1 {v10.4h}, [x1], #8 261 sMLAL v26.4s, v9.4h, v8.4h 262 263 MOV x2, x11 264 LD1 {v11.4h}, [x12], #8 265 MOV x0, x10 266 267 MOV x10, x1 268 269 ADD x1, x1, #504 270 271 MOV x11, x12 272 LD1 {v12.4h}, [x1], x8 273 ADD x12, x12, #248 274 275 LD1 {v13.4h}, [x12], x9 276 sMLAL v26.4s, v10.4h, v11.4h 277 278 LD1 {v14.4h}, [x1], x8 279 sMLAL v26.4s, v12.4h, v13.4h 280 281 LD1 {v15.4h}, [x12], x9 282 283 LD1 {v16.4h}, [x1], x8 284 sMLAL v26.4s, v15.4h, v14.4h 285 286 LD1 {v17.4h}, [x12], x9 287 288 LD1 {v18.4h}, [x1], x8 289 sMLAL v26.4s, v17.4h, v16.4h 290 291 LD1 {v19.4h}, [x12], x9 292 MOV x1, x10 293 294 sMLAL v26.4s, v19.4h, v18.4h 295 ST1 {v28.h}[0], [x3], x5 296 297 MOV x12, x11 298 LD1 {v0.4h}, [x0], #8 299 300 LD1 {v1.4h}, [x2], #8 301 sQshL v26.4s, v26.4s, v22.4s 302 303 304 ST1 {v28.h}[1], [x3], x5 305 MOV x10, x0 306 307 ST1 {v28.h}[2], [x3], x5 308 ADD x0, x0, #504 309 310 ST1 {v28.h}[3], [x3], x5 311 MOV x11, x2 312 313 sshR v28.4s, v26.4s, #16 314 LD1 {v2.4h}, [x0], x8 315 ADD x2, x2, #248 316 317 LD1 {v3.4h}, [x2], x9 318 LD1 {v4.4h}, [x0], x8 319 LD1 {v5.4h}, [x2], x9 320 LD1 {v6.4h}, [x0], x8 321 LD1 {v7.4h}, [x2], x9 322 LD1 {v8.4h}, [x0], x8 323 LD1 {v9.4h}, [x2], x9 324 325 UZP2 v29.8h, v28.8h, v28.8h 326 UZP1 v28.8h, v28.8h, v28.8h 327 mov v26.16b, v20.16b 328 329 330 331 332 MOV x0, x10 333 LD1 {v10.4h}, [x1], #8 334 MOV x2, x11 335 336 MOV x10, x1 337 LD1 {v11.4h}, [x12], #8 338 ADD x1, x1, #504 339 340 341 MOV x11, x12 342 LD1 {v12.4h}, [x1], x8 343 ADD x12, x12, #248 344 345 346 LD1 {v13.4h}, [x12], x9 347 348 LD1 {v14.4h}, [x1], x8 349 LD1 {v15.4h}, [x12], x9 350 351 LD1 {v16.4h}, [x1], x8 352 LD1 {v17.4h}, [x12], x9 353 354 SUBS x6, x6, #2 355 LD1 {v18.4h}, [x1], x8 356 357 MOV x1, x10 358 LD1 {v19.4h}, [x12], x9 359 360 MOV x12, x11 361 362 363 BGT LOOP_1 364 365 sMLAL v26.4s, v0.4h, v1.4h 366 ST1 {v28.h}[0], [x3], x5 367 sMLAL v26.4s, v2.4h, v3.4h 368 369 sMLAL v26.4s, v5.4h, v4.4h 370 ST1 {v28.h}[1], [x3], x5 371 sMLAL v26.4s, v7.4h, v6.4h 372 373 sMLAL v26.4s, v9.4h, v8.4h 374 ST1 {v28.h}[2], [x3], x5 375 sMLAL v26.4s, v10.4h, v11.4h 376 377 sMLAL v26.4s, v12.4h, v13.4h 378 ST1 {v28.h}[3], [x3], x5 379 sMLAL v26.4s, v15.4h, v14.4h 380 381 382 383 sMLAL v26.4s, v17.4h, v16.4h 384 385 sMLAL v26.4s, v19.4h, v18.4h 386 387 sQshL v26.4s, v26.4s, v22.4s 388 389 sshR v28.4s, v26.4s, #16 390 391 UZP2 v29.8h, v28.8h, v28.8h 392 UZP1 v28.8h, v28.8h, v28.8h 393 394 395 ST1 {v28.h}[0], [x3], x5 396 ST1 {v28.h}[1], [x3], x5 397 ST1 {v28.h}[2], [x3], x5 398 ST1 {v28.h}[3], [x3], x5 399 400 401 pop_v_regs 402 ret 403 404