• Home
  • Raw
  • Download

Lines Matching refs:s

187     smull       v20.4s, v2.4h, v0.h[0]      //// y0 * cos4(part of c0 and c1)
189 smull v18.4s, v3.4h, v1.h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1)
192 smull v24.4s, v6.4h, v0.h[1] //// y1 * cos1(part of b0)
194 smull v26.4s, v6.4h, v0.h[3] //// y1 * cos3(part of b1)
196 smull v28.4s, v6.4h, v1.h[1] //// y1 * sin3(part of b2)
198 smull v30.4s, v6.4h, v1.h[3] //// y1 * sin1(part of b3)
200 smlal v24.4s, v7.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0)
202 smlsl v26.4s, v7.4h, v1.h[3] //// y1 * cos3 - y3 * sin1(part of b1)
204 smlsl v28.4s, v7.4h, v0.h[1] //// y1 * sin3 - y3 * cos1(part of b2)
206 smlsl v30.4s, v7.4h, v1.h[1] //// y1 * sin1 - y3 * sin3(part of b3)
208 smull v22.4s, v10.4h, v0.h[0] //// y4 * cos4(part of c0 and c1)
210 smull v6.4s, v3.4h, v0.h[2] //// y2 * cos2(part of d0)
234 smlal v24.4s, v14.4h, v1.h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
235 smlsl v26.4s, v14.4h, v0.h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
236 smlal v28.4s, v14.4h, v1.h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
237 smlal v30.4s, v14.4h, v0.h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
239 smlsl v18.4s, v11.4h, v0.h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
240 smlal v6.4s, v11.4h, v1.h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
242 add v10.4s, v20.4s , v22.4s //// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1)
243 sub v20.4s, v20.4s , v22.4s //// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1)
245 …smlal v24.4s, v15.4h, v1.h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(pa…
246 …smlsl v26.4s, v15.4h, v1.h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(pa…
247 …smlal v28.4s, v15.4h, v0.h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(pa…
248 …smlsl v30.4s, v15.4h, v0.h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(pa…
250 add v14.4s, v10.4s , v6.4s //// a0 = c0 + d0(part of x0,x7)
251 sub v10.4s, v10.4s , v6.4s //// a3 = c0 - d0(part of x3,x4)
252 sub v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5)
253 add v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6)
255 add v20.4s, v14.4s , v24.4s //// a0 + b0(part of x0)
256 sub v6.4s, v14.4s , v24.4s //// a0 - b0(part of x7)
258 add v24.4s, v22.4s , v28.4s //// a2 + b2(part of x2)
259 sub v22.4s, v22.4s , v28.4s //// a2 - b2(part of x5)
261 add v28.4s, v18.4s , v26.4s //// a1 + b1(part of x1)
262 sub v18.4s, v18.4s , v26.4s //// a1 - b1(part of x6)
264 add v26.4s, v10.4s , v30.4s //// a3 + b3(part of x3)
265 sub v30.4s, v10.4s , v30.4s //// a3 - b3(part of x4)
267 sqrshrn v2.4h, v20.4s,#shift_stage1_idct //// x0 = (a0 + b0 + rnd) >> 7(shift_stage1_idct)
268 sqrshrn v15.4h, v6.4s,#shift_stage1_idct //// x7 = (a0 - b0 + rnd) >> 7(shift_stage1_idct)
269 sqrshrn v3.4h, v24.4s,#shift_stage1_idct //// x2 = (a2 + b2 + rnd) >> 7(shift_stage1_idct)
270 sqrshrn v14.4h, v22.4s,#shift_stage1_idct //// x5 = (a2 - b2 + rnd) >> 7(shift_stage1_idct)
271 sqrshrn v6.4h, v28.4s,#shift_stage1_idct //// x1 = (a1 + b1 + rnd) >> 7(shift_stage1_idct)
272 sqrshrn v11.4h, v18.4s,#shift_stage1_idct //// x6 = (a1 - b1 + rnd) >> 7(shift_stage1_idct)
273 sqrshrn v7.4h, v26.4s,#shift_stage1_idct //// x3 = (a3 + b3 + rnd) >> 7(shift_stage1_idct)
274 sqrshrn v10.4h, v30.4s,#shift_stage1_idct //// x4 = (a3 - b3 + rnd) >> 7(shift_stage1_idct)
304 smull v24.4s, v6.4h, v0.h[1] //// y1 * cos1(part of b0)
305 smull v26.4s, v6.4h, v0.h[3] //// y1 * cos3(part of b1)
306 smull v28.4s, v6.4h, v1.h[1] //// y1 * sin3(part of b2)
307 smull v30.4s, v6.4h, v1.h[3] //// y1 * sin1(part of b3)
309 smlal v24.4s, v7.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0)
310 smlsl v26.4s, v7.4h, v1.h[3] //// y1 * cos3 - y3 * sin1(part of b1)
311 smlsl v28.4s, v7.4h, v0.h[1] //// y1 * sin3 - y3 * cos1(part of b2)
312 smlsl v30.4s, v7.4h, v1.h[1] //// y1 * sin1 - y3 * sin3(part of b3)
314 smull v18.4s, v3.4h, v1.h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1)
315 smull v6.4s, v3.4h, v0.h[2] //// y2 * cos2(part of d0)
317 smull v20.4s, v2.4h, v0.h[0] //// y0 * cos4(part of c0 and c1)
320 add v14.4s, v20.4s , v6.4s //// a0 = c0 + d0(part of x0,x7)
321 sub v10.4s, v20.4s , v6.4s //// a3 = c0 - d0(part of x3,x4)
322 sub v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5)
323 add v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6)
325 add v20.4s, v14.4s , v24.4s //// a0 + b0(part of x0)
326 sub v6.4s, v14.4s , v24.4s //// a0 - b0(part of x7)
328 add v24.4s, v22.4s , v28.4s //// a2 + b2(part of x2)
329 sub v22.4s, v22.4s , v28.4s //// a2 - b2(part of x5)
331 add v28.4s, v18.4s , v26.4s //// a1 + b1(part of x1)
332 sub v18.4s, v18.4s , v26.4s //// a1 - b1(part of x6)
334 add v26.4s, v10.4s , v30.4s //// a3 + b3(part of x3)
335 sub v30.4s, v10.4s , v30.4s //// a3 - b3(part of x4)
337 sqrshrn v2.4h, v20.4s,#shift_stage1_idct //// x0 = (a0 + b0 + rnd) >> 7(shift_stage1_idct)
338 sqrshrn v15.4h, v6.4s,#shift_stage1_idct //// x7 = (a0 - b0 + rnd) >> 7(shift_stage1_idct)
339 sqrshrn v3.4h, v24.4s,#shift_stage1_idct //// x2 = (a2 + b2 + rnd) >> 7(shift_stage1_idct)
340 sqrshrn v14.4h, v22.4s,#shift_stage1_idct //// x5 = (a2 - b2 + rnd) >> 7(shift_stage1_idct)
341 sqrshrn v6.4h, v28.4s,#shift_stage1_idct //// x1 = (a1 + b1 + rnd) >> 7(shift_stage1_idct)
342 sqrshrn v11.4h, v18.4s,#shift_stage1_idct //// x6 = (a1 - b1 + rnd) >> 7(shift_stage1_idct)
343 sqrshrn v7.4h, v26.4s,#shift_stage1_idct //// x3 = (a3 + b3 + rnd) >> 7(shift_stage1_idct)
344 sqrshrn v10.4h, v30.4s,#shift_stage1_idct //// x4 = (a3 - b3 + rnd) >> 7(shift_stage1_idct)
353 smull v24.4s, v8.4h, v0.h[1] //// y1 * cos1(part of b0)
354 smull v26.4s, v8.4h, v0.h[3] //// y1 * cos3(part of b1)
355 smull v28.4s, v8.4h, v1.h[1] //// y1 * sin3(part of b2)
356 smull v30.4s, v8.4h, v1.h[3] //// y1 * sin1(part of b3)
358 smlal v24.4s, v9.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0)
359 smlsl v26.4s, v9.4h, v1.h[3] //// y1 * cos3 - y3 * sin1(part of b1)
360 smlsl v28.4s, v9.4h, v0.h[1] //// y1 * sin3 - y3 * cos1(part of b2)
361 smlsl v30.4s, v9.4h, v1.h[1] //// y1 * sin1 - y3 * sin3(part of b3)
363 smull v18.4s, v5.4h, v1.h[2] //// y2 * sin2 (q4 is freed by this time)(part of d1)
364 smull v8.4s, v5.4h, v0.h[2] //// y2 * cos2(part of d0)
366 smull v20.4s, v4.4h, v0.h[0] //// y0 * cos4(part of c0 and c1)
367 smull v22.4s, v12.4h, v0.h[0] //// y4 * cos4(part of c0 and c1)
369 smlal v24.4s, v16.4h, v1.h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
370 smlsl v26.4s, v16.4h, v0.h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
371 smlal v28.4s, v16.4h, v1.h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
372 smlal v30.4s, v16.4h, v0.h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
374 smlsl v18.4s, v13.4h, v0.h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
375 smlal v8.4s, v13.4h, v1.h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
377 add v12.4s, v20.4s , v22.4s //// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1)
378 sub v20.4s, v20.4s , v22.4s //// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1)
380 …smlal v24.4s, v17.4h, v1.h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(pa…
381 …smlsl v26.4s, v17.4h, v1.h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(pa…
382 …smlal v28.4s, v17.4h, v0.h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(pa…
383 …smlsl v30.4s, v17.4h, v0.h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(pa…
385 add v16.4s, v12.4s , v8.4s //// a0 = c0 + d0(part of e0,e7)
386 sub v12.4s, v12.4s , v8.4s //// a3 = c0 - d0(part of e3,e4)
387 sub v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of e2,e5)
388 add v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of e1,e6)
390 add v20.4s, v16.4s , v24.4s //// a0 + b0(part of e0)
391 sub v8.4s, v16.4s , v24.4s //// a0 - b0(part of e7)
393 add v24.4s, v22.4s , v28.4s //// a2 + b2(part of e2)
394 sub v22.4s, v22.4s , v28.4s //// a2 - b2(part of e5)
396 add v28.4s, v18.4s , v26.4s //// a1 + b1(part of e1)
397 sub v18.4s, v18.4s , v26.4s //// a1 - b1(part of e6)
399 add v26.4s, v12.4s , v30.4s //// a3 + b3(part of e3)
400 sub v30.4s, v12.4s , v30.4s //// a3 - b3(part of x4)
402 sqrshrn v4.4h, v20.4s,#shift_stage1_idct //// x0 = (a0 + b0 + rnd) >> 7(shift_stage1_idct)
403 sqrshrn v17.4h, v8.4s,#shift_stage1_idct //// x7 = (a0 - b0 + rnd) >> 7(shift_stage1_idct)
404 sqrshrn v5.4h, v24.4s,#shift_stage1_idct //// x2 = (a2 + b2 + rnd) >> 7(shift_stage1_idct)
405 sqrshrn v16.4h, v22.4s,#shift_stage1_idct //// x5 = (a2 - b2 + rnd) >> 7(shift_stage1_idct)
406 sqrshrn v8.4h, v28.4s,#shift_stage1_idct //// x1 = (a1 + b1 + rnd) >> 7(shift_stage1_idct)
407 sqrshrn v13.4h, v18.4s,#shift_stage1_idct //// x6 = (a1 - b1 + rnd) >> 7(shift_stage1_idct)
408 sqrshrn v9.4h, v26.4s,#shift_stage1_idct //// x3 = (a3 + b3 + rnd) >> 7(shift_stage1_idct)
409 sqrshrn v12.4h, v30.4s,#shift_stage1_idct //// x4 = (a3 - b3 + rnd) >> 7(shift_stage1_idct)
424 trn1 v6.2s, v29.2s, v31.2s
425 trn2 v7.2s, v29.2s, v31.2s ////x0,x1,x2,x3 first qudrant transposing continued.....
426 trn1 v2.2s, v25.2s, v27.2s
427 trn2 v3.2s, v25.2s, v27.2s ////x0,x1,x2,x3 first qudrant transposing continued.....
436 trn1 v10.2s, v25.2s, v27.2s
437 trn2 v11.2s, v25.2s, v27.2s ////x4,x5,x6,x7 third qudrant transposing continued.....
438 trn1 v14.2s, v29.2s, v31.2s
439 trn2 v15.2s, v29.2s, v31.2s ////x4,x5,x6,x7 third qudrant transposing continued.....
443 smull v24.4s, v6.4h, v0.h[1] //// y1 * cos1(part of b0)
444 smull v26.4s, v6.4h, v0.h[3] //// y1 * cos3(part of b1)
445 smull v28.4s, v6.4h, v1.h[1] //// y1 * sin3(part of b2)
446 smull v30.4s, v6.4h, v1.h[3] //// y1 * sin1(part of b3)
448 smlal v24.4s, v7.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0)
449 smlsl v26.4s, v7.4h, v1.h[3] //// y1 * cos3 - y3 * sin1(part of b1)
450 smlsl v28.4s, v7.4h, v0.h[1] //// y1 * sin3 - y3 * cos1(part of b2)
451 smlsl v30.4s, v7.4h, v1.h[1] //// y1 * sin1 - y3 * sin3(part of b3)
453 smull v20.4s, v2.4h, v0.h[0] //// y0 * cos4(part of c0 and c1)
456 smull v18.4s, v3.4h, v1.h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1)
457 smull v6.4s, v3.4h, v0.h[2] //// y2 * cos2(part of d0)
462 sub v22.4s, v20.4s , v6.4s //// a3 = c0 - d0(part of x3,x4)
463 add v4.4s, v20.4s , v6.4s //// a0 = c0 + d0(part of x0,x7)
466 add v2.4s, v4.4s , v24.4s
468 sub v6.4s, v4.4s , v24.4s
470 add v8.4s, v22.4s , v30.4s
472 sub v24.4s, v22.4s , v30.4s
474 sqrshrn v5.4h, v8.4s,#shift_stage2_idct
475 sqrshrn v2.4h, v2.4s,#shift_stage2_idct
476 sqrshrn v9.4h, v6.4s,#shift_stage2_idct
477 sqrshrn v6.4h, v24.4s,#shift_stage2_idct
479 sub v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5)
480 add v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6)
483 add v30.4s, v22.4s , v28.4s
485 sub v24.4s, v22.4s , v28.4s
487 add v28.4s, v18.4s , v26.4s
489 sub v22.4s, v18.4s , v26.4s
490 sqrshrn v4.4h, v30.4s,#shift_stage2_idct
491 sqrshrn v7.4h, v24.4s,#shift_stage2_idct
492 sqrshrn v3.4h, v28.4s,#shift_stage2_idct
493 sqrshrn v8.4h, v22.4s,#shift_stage2_idct
505 trn1 v2.2s, v27.2s, v25.2s
506 trn2 v4.2s, v27.2s, v25.2s
507 trn1 v3.2s, v29.2s, v31.2s
508 trn2 v5.2s, v29.2s, v31.2s
515 trn1 v6.2s, v27.2s, v25.2s
516 trn2 v8.2s, v27.2s, v25.2s
517 trn1 v7.2s, v29.2s, v31.2s
518 trn2 v9.2s, v29.2s, v31.2s
523 smull v24.4s, v14.4h, v0.h[1] //// y1 * cos1(part of b0)
525 smull v26.4s, v14.4h, v0.h[3] //// y1 * cos3(part of b1)
526 smull v28.4s, v14.4h, v1.h[1] //// y1 * sin3(part of b2)
527 smull v30.4s, v14.4h, v1.h[3] //// y1 * sin1(part of b3)
529 smlal v24.4s, v15.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0)
530 smlsl v26.4s, v15.4h, v1.h[3] //// y1 * cos3 - y3 * sin1(part of b1)
531 smlsl v28.4s, v15.4h, v0.h[1] //// y1 * sin3 - y3 * cos1(part of b2)
532 smlsl v30.4s, v15.4h, v1.h[1] //// y1 * sin1 - y3 * sin3(part of b3)
533 smull v20.4s, v10.4h, v0.h[0] //// y0 * cos4(part of c0 and c1)
534 smull v18.4s, v11.4h, v1.h[2] //// y2 * sin2 (q7 is freed by this time)(part of d1)
535 smull v14.4s, v11.4h, v0.h[2] //// y2 * cos2(part of d0)
560 sub v22.4s, v20.4s , v14.4s //// a3 = c0 - d0(part of x3,x4)
561 add v12.4s, v20.4s , v14.4s //// a0 = c0 + d0(part of x0,x7)
564 add v0.4s, v12.4s , v24.4s
567 sub v24.4s, v12.4s , v24.4s
570 add v12.4s, v22.4s , v30.4s
573 sub v14.4s, v22.4s , v30.4s
575 sqrshrn v10.4h, v0.4s,#shift_stage2_idct
576 sqrshrn v17.4h, v24.4s,#shift_stage2_idct
577 sqrshrn v13.4h, v12.4s,#shift_stage2_idct
578 sqrshrn v14.4h, v14.4s,#shift_stage2_idct
580 sub v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5)
581 add v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6)
584 add v0.4s, v22.4s , v28.4s
587 sub v24.4s, v22.4s , v28.4s
590 add v28.4s, v18.4s , v26.4s
593 sub v26.4s, v18.4s , v26.4s
596 sqrshrn v12.4h, v0.4s,#shift_stage2_idct
600 sqrshrn v15.4h, v24.4s,#shift_stage2_idct
606 sqrshrn v11.4h, v28.4s,#shift_stage2_idct
612 sqrshrn v16.4h, v26.4s,#shift_stage2_idct
629 trn1 v2.2s, v27.2s, v25.2s
630 trn2 v3.2s, v27.2s, v25.2s ////x0,x1,x2,x3 first qudrant transposing continued.....
631 trn1 v6.2s, v29.2s, v31.2s
632 trn2 v7.2s, v29.2s, v31.2s ////x0,x1,x2,x3 first qudrant transposing continued.....
639 trn1 v4.2s, v27.2s, v25.2s
640 … trn2 v5.2s, v27.2s, v25.2s ////x0,x1,x2,x3 second qudrant transposing continued.....
641 trn1 v8.2s, v29.2s, v31.2s
642 … trn2 v9.2s, v29.2s, v31.2s ////x0,x1,x2,x3 second qudrant transposing continued.....
649 trn1 v10.2s, v27.2s, v25.2s
650 trn2 v11.2s, v27.2s, v25.2s ////x4,x5,x6,x7 third qudrant transposing continued.....
651 trn1 v14.2s, v29.2s, v31.2s
652 trn2 v15.2s, v29.2s, v31.2s ////x4,x5,x6,x7 third qudrant transposing continued.....
659 trn1 v12.2s, v27.2s, v25.2s
660 … trn2 v13.2s, v27.2s, v25.2s ////x4,x5,x6,x7 fourth qudrant transposing continued.....
661 trn1 v16.2s, v29.2s, v31.2s
662 … trn2 v17.2s, v29.2s, v31.2s ////x4,x5,x6,x7 fourth qudrant transposing continued.....
701 smull v24.4s, v6.4h, v0.h[1] //// y1 * cos1(part of b0)
702 smull v26.4s, v6.4h, v0.h[3] //// y1 * cos3(part of b1)
703 smull v28.4s, v6.4h, v1.h[1] //// y1 * sin3(part of b2)
704 smull v30.4s, v6.4h, v1.h[3] //// y1 * sin1(part of b3)
706 smlal v24.4s, v7.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0)
707 smlsl v26.4s, v7.4h, v1.h[3] //// y1 * cos3 - y3 * sin1(part of b1)
708 smlsl v28.4s, v7.4h, v0.h[1] //// y1 * sin3 - y3 * cos1(part of b2)
709 smlsl v30.4s, v7.4h, v1.h[1] //// y1 * sin1 - y3 * sin3(part of b3)
711 smull v20.4s, v2.4h, v0.h[0] //// y0 * cos4(part of c0 and c1)
712 smull v22.4s, v4.4h, v0.h[0] //// y4 * cos4(part of c0 and c1)
714 smull v18.4s, v3.4h, v1.h[2] //// y2 * sin2 (q3 is freed by this time)(part of d1)
715 smull v6.4s, v3.4h, v0.h[2] //// y2 * cos2(part of d0)
718 smlal v24.4s, v8.4h, v1.h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
719 smlsl v26.4s, v8.4h, v0.h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
720 smlal v28.4s, v8.4h, v1.h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
721 smlal v30.4s, v8.4h, v0.h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
723 smlsl v18.4s, v5.4h, v0.h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
724 smlal v6.4s, v5.4h, v1.h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
726 add v2.4s, v20.4s , v22.4s //// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1)
727 sub v20.4s, v20.4s , v22.4s //// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1)
729 …smlal v24.4s, v9.4h, v1.h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(pa…
730 …smlsl v26.4s, v9.4h, v1.h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(pa…
731 …smlal v28.4s, v9.4h, v0.h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(pa…
732 …smlsl v30.4s, v9.4h, v0.h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(pa…
734 sub v22.4s, v2.4s , v6.4s //// a3 = c0 - d0(part of x3,x4)
735 add v4.4s, v2.4s , v6.4s //// a0 = c0 + d0(part of x0,x7)
738 add v2.4s, v4.4s , v24.4s
740 sub v6.4s, v4.4s , v24.4s
742 add v8.4s, v22.4s , v30.4s
744 sub v24.4s, v22.4s , v30.4s
746 sqrshrn v5.4h, v8.4s,#shift_stage2_idct
747 sqrshrn v2.4h, v2.4s,#shift_stage2_idct
748 sqrshrn v9.4h, v6.4s,#shift_stage2_idct
749 sqrshrn v6.4h, v24.4s,#shift_stage2_idct
751 sub v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5)
752 add v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6)
755 add v30.4s, v22.4s , v28.4s
757 sub v24.4s, v22.4s , v28.4s
759 add v28.4s, v18.4s , v26.4s
761 sub v22.4s, v18.4s , v26.4s
762 sqrshrn v4.4h, v30.4s,#shift_stage2_idct
763 sqrshrn v7.4h, v24.4s,#shift_stage2_idct
764 sqrshrn v3.4h, v28.4s,#shift_stage2_idct
765 sqrshrn v8.4h, v22.4s,#shift_stage2_idct
777 trn1 v2.2s, v27.2s, v25.2s
778 trn2 v4.2s, v27.2s, v25.2s
779 trn1 v3.2s, v29.2s, v31.2s
780 trn2 v5.2s, v29.2s, v31.2s
787 trn1 v6.2s, v27.2s, v25.2s
788 trn2 v8.2s, v27.2s, v25.2s
789 trn1 v7.2s, v29.2s, v31.2s
790 trn2 v9.2s, v29.2s, v31.2s
797 smull v24.4s, v14.4h, v0.h[1] //// y1 * cos1(part of b0)
798 smull v26.4s, v14.4h, v0.h[3] //// y1 * cos3(part of b1)
799 smull v28.4s, v14.4h, v1.h[1] //// y1 * sin3(part of b2)
800 smull v30.4s, v14.4h, v1.h[3] //// y1 * sin1(part of b3)
801 smlal v24.4s, v15.4h, v0.h[3] //// y1 * cos1 + y3 * cos3(part of b0)
802 smlsl v26.4s, v15.4h, v1.h[3] //// y1 * cos3 - y3 * sin1(part of b1)
803 smlsl v28.4s, v15.4h, v0.h[1] //// y1 * sin3 - y3 * cos1(part of b2)
804 smlsl v30.4s, v15.4h, v1.h[1] //// y1 * sin1 - y3 * sin3(part of b3)
805 smull v20.4s, v10.4h, v0.h[0] //// y0 * cos4(part of c0 and c1)
806 smull v22.4s, v12.4h, v0.h[0] //// y4 * cos4(part of c0 and c1)
807 smull v18.4s, v11.4h, v1.h[2] //// y2 * sin2 (q7 is freed by this time)(part of d1)
808 smull v14.4s, v11.4h, v0.h[2] //// y2 * cos2(part of d0)
809 smlal v24.4s, v16.4h, v1.h[1] //// y1 * cos1 + y3 * cos3 + y5 * sin3(part of b0)
812 smlsl v26.4s, v16.4h, v0.h[1] //// y1 * cos3 - y3 * sin1 - y5 * cos1(part of b1)
815 smlal v28.4s, v16.4h, v1.h[3] //// y1 * sin3 - y3 * cos1 + y5 * sin1(part of b2)
818 smlal v30.4s, v16.4h, v0.h[3] //// y1 * sin1 - y3 * sin3 + y5 * cos3(part of b3)
821 smlsl v18.4s, v13.4h, v0.h[2] //// d1 = y2 * sin2 - y6 * cos2(part of a0 and a1)
824 smlal v14.4s, v13.4h, v1.h[2] //// d0 = y2 * cos2 + y6 * sin2(part of a0 and a1)
826 add v12.4s, v20.4s , v22.4s //// c0 = y0 * cos4 + y4 * cos4(part of a0 and a1)
827 sub v20.4s, v20.4s , v22.4s //// c1 = y0 * cos4 - y4 * cos4(part of a0 and a1)
829 …smlal v24.4s, v17.4h, v1.h[3] //// b0 = y1 * cos1 + y3 * cos3 + y5 * sin3 + y7 * sin1(pa…
836 …smlsl v26.4s, v17.4h, v1.h[1] //// b1 = y1 * cos3 - y3 * sin1 - y5 * cos1 - y7 * sin3(pa…
842 …smlal v28.4s, v17.4h, v0.h[3] //// b2 = y1 * sin3 - y3 * cos1 + y5 * sin1 + y7 * cos3(pa…
843 …smlsl v30.4s, v17.4h, v0.h[1] //// b3 = y1 * sin1 - y3 * sin3 + y5 * cos3 - y7 * cos1(pa…
845 sub v22.4s, v12.4s , v14.4s //// a3 = c0 - d0(part of x3,x4)
846 add v12.4s, v12.4s , v14.4s //// a0 = c0 + d0(part of x0,x7)
849 add v0.4s, v12.4s , v24.4s
852 sub v24.4s, v12.4s , v24.4s
855 add v12.4s, v22.4s , v30.4s
858 sub v14.4s, v22.4s , v30.4s
860 sqrshrn v10.4h, v0.4s,#shift_stage2_idct
861 sqrshrn v17.4h, v24.4s,#shift_stage2_idct
862 sqrshrn v13.4h, v12.4s,#shift_stage2_idct
863 sqrshrn v14.4h, v14.4s,#shift_stage2_idct
865 sub v22.4s, v20.4s , v18.4s //// a2 = c1 - d1(part of x2,x5)
866 add v18.4s, v20.4s , v18.4s //// a1 = c1 + d1(part of x1,x6)
869 add v0.4s, v22.4s , v28.4s
872 sub v24.4s, v22.4s , v28.4s
875 add v28.4s, v18.4s , v26.4s
878 sub v26.4s, v18.4s , v26.4s
881 sqrshrn v12.4h, v0.4s,#shift_stage2_idct
885 sqrshrn v15.4h, v24.4s,#shift_stage2_idct
891 sqrshrn v11.4h, v28.4s,#shift_stage2_idct
897 sqrshrn v16.4h, v26.4s,#shift_stage2_idct
913 trn1 v10.2s, v27.2s, v25.2s
914 trn2 v12.2s, v27.2s, v25.2s
915 trn1 v11.2s, v29.2s, v31.2s
916 trn2 v13.2s, v29.2s, v31.2s
923 trn1 v14.2s, v27.2s, v25.2s
924 trn2 v16.2s, v27.2s, v25.2s
925 trn1 v15.2s, v29.2s, v31.2s
926 trn2 v17.2s, v29.2s, v31.2s