Lines Matching refs:t32
86 static INLINE void dct_const_round_shift_low_8_dual(const int32x4_t *const t32, in dct_const_round_shift_low_8_dual() argument
89 *d0 = dct_const_round_shift_low_8(t32 + 0); in dct_const_round_shift_low_8_dual()
90 *d1 = dct_const_round_shift_low_8(t32 + 2); in dct_const_round_shift_low_8_dual()
324 int32x4_t t32[2]; in idct8x8_12_pass1_bd8() local
350 t32[1] = vmull_lane_s16(step2[6], cospis0, 2); in idct8x8_12_pass1_bd8()
351 t32[0] = vmlsl_lane_s16(t32[1], step2[5], cospis0, 2); in idct8x8_12_pass1_bd8()
352 t32[1] = vmlal_lane_s16(t32[1], step2[5], cospis0, 2); in idct8x8_12_pass1_bd8()
353 step1[5] = vrshrn_n_s32(t32[0], DCT_CONST_BITS); in idct8x8_12_pass1_bd8()
354 step1[6] = vrshrn_n_s32(t32[1], DCT_CONST_BITS); in idct8x8_12_pass1_bd8()
374 int32x4_t t32[8]; in idct8x8_12_pass2_bd8() local
401 t32[2] = vmull_lane_s16(vget_low_s16(step2[6]), cospis0, 2); in idct8x8_12_pass2_bd8()
402 t32[3] = vmull_lane_s16(vget_high_s16(step2[6]), cospis0, 2); in idct8x8_12_pass2_bd8()
403 t32[0] = vmlsl_lane_s16(t32[2], vget_low_s16(step2[5]), cospis0, 2); in idct8x8_12_pass2_bd8()
404 t32[1] = vmlsl_lane_s16(t32[3], vget_high_s16(step2[5]), cospis0, 2); in idct8x8_12_pass2_bd8()
405 t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(step2[5]), cospis0, 2); in idct8x8_12_pass2_bd8()
406 t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(step2[5]), cospis0, 2); in idct8x8_12_pass2_bd8()
407 dct_const_round_shift_low_8_dual(t32, &step1[5], &step1[6]); in idct8x8_12_pass2_bd8()
427 int32x4_t t32[8]; in idct8x8_64_1d_bd8_kernel() local
447 t32[0] = vmull_lane_s16(input1l, cospis1, 3); in idct8x8_64_1d_bd8_kernel()
448 t32[1] = vmull_lane_s16(input1h, cospis1, 3); in idct8x8_64_1d_bd8_kernel()
449 t32[2] = vmull_lane_s16(input3l, cospis1, 2); in idct8x8_64_1d_bd8_kernel()
450 t32[3] = vmull_lane_s16(input3h, cospis1, 2); in idct8x8_64_1d_bd8_kernel()
451 t32[4] = vmull_lane_s16(input3l, cospis1, 1); in idct8x8_64_1d_bd8_kernel()
452 t32[5] = vmull_lane_s16(input3h, cospis1, 1); in idct8x8_64_1d_bd8_kernel()
453 t32[6] = vmull_lane_s16(input1l, cospis1, 0); in idct8x8_64_1d_bd8_kernel()
454 t32[7] = vmull_lane_s16(input1h, cospis1, 0); in idct8x8_64_1d_bd8_kernel()
455 t32[0] = vmlsl_lane_s16(t32[0], input7l, cospis1, 0); in idct8x8_64_1d_bd8_kernel()
456 t32[1] = vmlsl_lane_s16(t32[1], input7h, cospis1, 0); in idct8x8_64_1d_bd8_kernel()
457 t32[2] = vmlal_lane_s16(t32[2], input5l, cospis1, 1); in idct8x8_64_1d_bd8_kernel()
458 t32[3] = vmlal_lane_s16(t32[3], input5h, cospis1, 1); in idct8x8_64_1d_bd8_kernel()
459 t32[4] = vmlsl_lane_s16(t32[4], input5l, cospis1, 2); in idct8x8_64_1d_bd8_kernel()
460 t32[5] = vmlsl_lane_s16(t32[5], input5h, cospis1, 2); in idct8x8_64_1d_bd8_kernel()
461 t32[6] = vmlal_lane_s16(t32[6], input7l, cospis1, 3); in idct8x8_64_1d_bd8_kernel()
462 t32[7] = vmlal_lane_s16(t32[7], input7h, cospis1, 3); in idct8x8_64_1d_bd8_kernel()
463 dct_const_round_shift_low_8_dual(&t32[0], &step1[4], &step1[5]); in idct8x8_64_1d_bd8_kernel()
464 dct_const_round_shift_low_8_dual(&t32[4], &step1[6], &step1[7]); in idct8x8_64_1d_bd8_kernel()
467 t32[2] = vmull_lane_s16(step1l[0], cospis0, 2); in idct8x8_64_1d_bd8_kernel()
468 t32[3] = vmull_lane_s16(step1h[0], cospis0, 2); in idct8x8_64_1d_bd8_kernel()
469 t32[4] = vmull_lane_s16(step1l[1], cospis0, 3); in idct8x8_64_1d_bd8_kernel()
470 t32[5] = vmull_lane_s16(step1h[1], cospis0, 3); in idct8x8_64_1d_bd8_kernel()
471 t32[6] = vmull_lane_s16(step1l[1], cospis0, 1); in idct8x8_64_1d_bd8_kernel()
472 t32[7] = vmull_lane_s16(step1h[1], cospis0, 1); in idct8x8_64_1d_bd8_kernel()
473 t32[0] = vmlal_lane_s16(t32[2], step1l[2], cospis0, 2); in idct8x8_64_1d_bd8_kernel()
474 t32[1] = vmlal_lane_s16(t32[3], step1h[2], cospis0, 2); in idct8x8_64_1d_bd8_kernel()
475 t32[2] = vmlsl_lane_s16(t32[2], step1l[2], cospis0, 2); in idct8x8_64_1d_bd8_kernel()
476 t32[3] = vmlsl_lane_s16(t32[3], step1h[2], cospis0, 2); in idct8x8_64_1d_bd8_kernel()
477 t32[4] = vmlsl_lane_s16(t32[4], step1l[3], cospis0, 1); in idct8x8_64_1d_bd8_kernel()
478 t32[5] = vmlsl_lane_s16(t32[5], step1h[3], cospis0, 1); in idct8x8_64_1d_bd8_kernel()
479 t32[6] = vmlal_lane_s16(t32[6], step1l[3], cospis0, 3); in idct8x8_64_1d_bd8_kernel()
480 t32[7] = vmlal_lane_s16(t32[7], step1h[3], cospis0, 3); in idct8x8_64_1d_bd8_kernel()
481 dct_const_round_shift_low_8_dual(&t32[0], &step2[0], &step2[1]); in idct8x8_64_1d_bd8_kernel()
482 dct_const_round_shift_low_8_dual(&t32[4], &step2[2], &step2[3]); in idct8x8_64_1d_bd8_kernel()
495 t32[2] = vmull_lane_s16(vget_low_s16(step2[6]), cospis0, 2); in idct8x8_64_1d_bd8_kernel()
496 t32[3] = vmull_lane_s16(vget_high_s16(step2[6]), cospis0, 2); in idct8x8_64_1d_bd8_kernel()
497 t32[0] = vmlsl_lane_s16(t32[2], vget_low_s16(step2[5]), cospis0, 2); in idct8x8_64_1d_bd8_kernel()
498 t32[1] = vmlsl_lane_s16(t32[3], vget_high_s16(step2[5]), cospis0, 2); in idct8x8_64_1d_bd8_kernel()
499 t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(step2[5]), cospis0, 2); in idct8x8_64_1d_bd8_kernel()
500 t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(step2[5]), cospis0, 2); in idct8x8_64_1d_bd8_kernel()
501 dct_const_round_shift_low_8_dual(t32, &step1[5], &step1[6]); in idct8x8_64_1d_bd8_kernel()
525 int32x4_t *const t32) { in idct_cospi_8_24_q_kernel() argument
526 t32[0] = vmull_lane_s16(vget_low_s16(s0), cospi_0_8_16_24, 3); in idct_cospi_8_24_q_kernel()
527 t32[1] = vmull_lane_s16(vget_high_s16(s0), cospi_0_8_16_24, 3); in idct_cospi_8_24_q_kernel()
528 t32[2] = vmull_lane_s16(vget_low_s16(s1), cospi_0_8_16_24, 3); in idct_cospi_8_24_q_kernel()
529 t32[3] = vmull_lane_s16(vget_high_s16(s1), cospi_0_8_16_24, 3); in idct_cospi_8_24_q_kernel()
530 t32[0] = vmlsl_lane_s16(t32[0], vget_low_s16(s1), cospi_0_8_16_24, 1); in idct_cospi_8_24_q_kernel()
531 t32[1] = vmlsl_lane_s16(t32[1], vget_high_s16(s1), cospi_0_8_16_24, 1); in idct_cospi_8_24_q_kernel()
532 t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(s0), cospi_0_8_16_24, 1); in idct_cospi_8_24_q_kernel()
533 t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(s0), cospi_0_8_16_24, 1); in idct_cospi_8_24_q_kernel()
539 int32x4_t t32[4]; in idct_cospi_8_24_q() local
541 idct_cospi_8_24_q_kernel(s0, s1, cospi_0_8_16_24, t32); in idct_cospi_8_24_q()
542 dct_const_round_shift_low_8_dual(t32, d0, d1); in idct_cospi_8_24_q()
549 int32x4_t t32[4]; in idct_cospi_8_24_neg_q() local
551 idct_cospi_8_24_q_kernel(s0, s1, cospi_0_8_16_24, t32); in idct_cospi_8_24_neg_q()
552 t32[2] = vnegq_s32(t32[2]); in idct_cospi_8_24_neg_q()
553 t32[3] = vnegq_s32(t32[3]); in idct_cospi_8_24_neg_q()
554 dct_const_round_shift_low_8_dual(t32, d0, d1); in idct_cospi_8_24_neg_q()
561 int32x4_t t32[6]; in idct_cospi_16_16_q() local
563 t32[4] = vmull_lane_s16(vget_low_s16(s1), cospi_0_8_16_24, 2); in idct_cospi_16_16_q()
564 t32[5] = vmull_lane_s16(vget_high_s16(s1), cospi_0_8_16_24, 2); in idct_cospi_16_16_q()
565 t32[0] = vmlsl_lane_s16(t32[4], vget_low_s16(s0), cospi_0_8_16_24, 2); in idct_cospi_16_16_q()
566 t32[1] = vmlsl_lane_s16(t32[5], vget_high_s16(s0), cospi_0_8_16_24, 2); in idct_cospi_16_16_q()
567 t32[2] = vmlal_lane_s16(t32[4], vget_low_s16(s0), cospi_0_8_16_24, 2); in idct_cospi_16_16_q()
568 t32[3] = vmlal_lane_s16(t32[5], vget_high_s16(s0), cospi_0_8_16_24, 2); in idct_cospi_16_16_q()
569 dct_const_round_shift_low_8_dual(t32, d0, d1); in idct_cospi_16_16_q()
575 int32x4_t t32[4]; in idct_cospi_2_30() local
577 t32[0] = vmull_lane_s16(vget_low_s16(s0), cospi_2_30_10_22, 1); in idct_cospi_2_30()
578 t32[1] = vmull_lane_s16(vget_high_s16(s0), cospi_2_30_10_22, 1); in idct_cospi_2_30()
579 t32[2] = vmull_lane_s16(vget_low_s16(s1), cospi_2_30_10_22, 1); in idct_cospi_2_30()
580 t32[3] = vmull_lane_s16(vget_high_s16(s1), cospi_2_30_10_22, 1); in idct_cospi_2_30()
581 t32[0] = vmlsl_lane_s16(t32[0], vget_low_s16(s1), cospi_2_30_10_22, 0); in idct_cospi_2_30()
582 t32[1] = vmlsl_lane_s16(t32[1], vget_high_s16(s1), cospi_2_30_10_22, 0); in idct_cospi_2_30()
583 t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(s0), cospi_2_30_10_22, 0); in idct_cospi_2_30()
584 t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(s0), cospi_2_30_10_22, 0); in idct_cospi_2_30()
585 dct_const_round_shift_low_8_dual(t32, d0, d1); in idct_cospi_2_30()
591 int32x4_t t32[4]; in idct_cospi_4_28() local
593 t32[0] = vmull_lane_s16(vget_low_s16(s0), cospi_4_12_20N_28, 3); in idct_cospi_4_28()
594 t32[1] = vmull_lane_s16(vget_high_s16(s0), cospi_4_12_20N_28, 3); in idct_cospi_4_28()
595 t32[2] = vmull_lane_s16(vget_low_s16(s1), cospi_4_12_20N_28, 3); in idct_cospi_4_28()
596 t32[3] = vmull_lane_s16(vget_high_s16(s1), cospi_4_12_20N_28, 3); in idct_cospi_4_28()
597 t32[0] = vmlsl_lane_s16(t32[0], vget_low_s16(s1), cospi_4_12_20N_28, 0); in idct_cospi_4_28()
598 t32[1] = vmlsl_lane_s16(t32[1], vget_high_s16(s1), cospi_4_12_20N_28, 0); in idct_cospi_4_28()
599 t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(s0), cospi_4_12_20N_28, 0); in idct_cospi_4_28()
600 t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(s0), cospi_4_12_20N_28, 0); in idct_cospi_4_28()
601 dct_const_round_shift_low_8_dual(t32, d0, d1); in idct_cospi_4_28()
607 int32x4_t t32[4]; in idct_cospi_6_26() local
609 t32[0] = vmull_lane_s16(vget_low_s16(s0), cospi_6_26N_14_18N, 0); in idct_cospi_6_26()
610 t32[1] = vmull_lane_s16(vget_high_s16(s0), cospi_6_26N_14_18N, 0); in idct_cospi_6_26()
611 t32[2] = vmull_lane_s16(vget_low_s16(s1), cospi_6_26N_14_18N, 0); in idct_cospi_6_26()
612 t32[3] = vmull_lane_s16(vget_high_s16(s1), cospi_6_26N_14_18N, 0); in idct_cospi_6_26()
613 t32[0] = vmlal_lane_s16(t32[0], vget_low_s16(s1), cospi_6_26N_14_18N, 1); in idct_cospi_6_26()
614 t32[1] = vmlal_lane_s16(t32[1], vget_high_s16(s1), cospi_6_26N_14_18N, 1); in idct_cospi_6_26()
615 t32[2] = vmlsl_lane_s16(t32[2], vget_low_s16(s0), cospi_6_26N_14_18N, 1); in idct_cospi_6_26()
616 t32[3] = vmlsl_lane_s16(t32[3], vget_high_s16(s0), cospi_6_26N_14_18N, 1); in idct_cospi_6_26()
617 dct_const_round_shift_low_8_dual(t32, d0, d1); in idct_cospi_6_26()
623 int32x4_t t32[4]; in idct_cospi_10_22() local
625 t32[0] = vmull_lane_s16(vget_low_s16(s0), cospi_2_30_10_22, 3); in idct_cospi_10_22()
626 t32[1] = vmull_lane_s16(vget_high_s16(s0), cospi_2_30_10_22, 3); in idct_cospi_10_22()
627 t32[2] = vmull_lane_s16(vget_low_s16(s1), cospi_2_30_10_22, 3); in idct_cospi_10_22()
628 t32[3] = vmull_lane_s16(vget_high_s16(s1), cospi_2_30_10_22, 3); in idct_cospi_10_22()
629 t32[0] = vmlsl_lane_s16(t32[0], vget_low_s16(s1), cospi_2_30_10_22, 2); in idct_cospi_10_22()
630 t32[1] = vmlsl_lane_s16(t32[1], vget_high_s16(s1), cospi_2_30_10_22, 2); in idct_cospi_10_22()
631 t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(s0), cospi_2_30_10_22, 2); in idct_cospi_10_22()
632 t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(s0), cospi_2_30_10_22, 2); in idct_cospi_10_22()
633 dct_const_round_shift_low_8_dual(t32, d0, d1); in idct_cospi_10_22()
639 int32x4_t t32[4]; in idct_cospi_12_20() local
641 t32[0] = vmull_lane_s16(vget_low_s16(s0), cospi_4_12_20N_28, 1); in idct_cospi_12_20()
642 t32[1] = vmull_lane_s16(vget_high_s16(s0), cospi_4_12_20N_28, 1); in idct_cospi_12_20()
643 t32[2] = vmull_lane_s16(vget_low_s16(s1), cospi_4_12_20N_28, 1); in idct_cospi_12_20()
644 t32[3] = vmull_lane_s16(vget_high_s16(s1), cospi_4_12_20N_28, 1); in idct_cospi_12_20()
645 t32[0] = vmlal_lane_s16(t32[0], vget_low_s16(s1), cospi_4_12_20N_28, 2); in idct_cospi_12_20()
646 t32[1] = vmlal_lane_s16(t32[1], vget_high_s16(s1), cospi_4_12_20N_28, 2); in idct_cospi_12_20()
647 t32[2] = vmlsl_lane_s16(t32[2], vget_low_s16(s0), cospi_4_12_20N_28, 2); in idct_cospi_12_20()
648 t32[3] = vmlsl_lane_s16(t32[3], vget_high_s16(s0), cospi_4_12_20N_28, 2); in idct_cospi_12_20()
649 dct_const_round_shift_low_8_dual(t32, d0, d1); in idct_cospi_12_20()
655 int32x4_t t32[4]; in idct_cospi_14_18() local
657 t32[0] = vmull_lane_s16(vget_low_s16(s0), cospi_6_26N_14_18N, 2); in idct_cospi_14_18()
658 t32[1] = vmull_lane_s16(vget_high_s16(s0), cospi_6_26N_14_18N, 2); in idct_cospi_14_18()
659 t32[2] = vmull_lane_s16(vget_low_s16(s1), cospi_6_26N_14_18N, 2); in idct_cospi_14_18()
660 t32[3] = vmull_lane_s16(vget_high_s16(s1), cospi_6_26N_14_18N, 2); in idct_cospi_14_18()
661 t32[0] = vmlal_lane_s16(t32[0], vget_low_s16(s1), cospi_6_26N_14_18N, 3); in idct_cospi_14_18()
662 t32[1] = vmlal_lane_s16(t32[1], vget_high_s16(s1), cospi_6_26N_14_18N, 3); in idct_cospi_14_18()
663 t32[2] = vmlsl_lane_s16(t32[2], vget_low_s16(s0), cospi_6_26N_14_18N, 3); in idct_cospi_14_18()
664 t32[3] = vmlsl_lane_s16(t32[3], vget_high_s16(s0), cospi_6_26N_14_18N, 3); in idct_cospi_14_18()
665 dct_const_round_shift_low_8_dual(t32, d0, d1); in idct_cospi_14_18()