Lines Matching refs:int8x16_t
453 static int8x16_t FlipSign(const uint8x16_t v) { in FlipSign()
458 static uint8x16_t FlipSignBack(const int8x16_t v) { in FlipSignBack()
459 const int8x16_t sign_bit = vdupq_n_s8(0x80); in FlipSignBack()
463 static int8x16_t GetBaseDelta(const int8x16_t p1, const int8x16_t p0, in GetBaseDelta()
464 const int8x16_t q0, const int8x16_t q1) { in GetBaseDelta()
465 const int8x16_t q0_p0 = vqsubq_s8(q0, p0); // (q0-p0) in GetBaseDelta()
466 const int8x16_t p1_q1 = vqsubq_s8(p1, q1); // (p1-q1) in GetBaseDelta()
467 const int8x16_t s1 = vqaddq_s8(p1_q1, q0_p0); // (p1-q1) + 1 * (q0 - p0) in GetBaseDelta()
468 const int8x16_t s2 = vqaddq_s8(q0_p0, s1); // (p1-q1) + 2 * (q0 - p0) in GetBaseDelta()
469 const int8x16_t s3 = vqaddq_s8(q0_p0, s2); // (p1-q1) + 3 * (q0 - p0) in GetBaseDelta()
473 static int8x16_t GetBaseDelta0(const int8x16_t p0, const int8x16_t q0) { in GetBaseDelta0()
474 const int8x16_t q0_p0 = vqsubq_s8(q0, p0); // (q0-p0) in GetBaseDelta0()
475 const int8x16_t s1 = vqaddq_s8(q0_p0, q0_p0); // 2 * (q0 - p0) in GetBaseDelta0()
476 const int8x16_t s2 = vqaddq_s8(q0_p0, s1); // 3 * (q0 - p0) in GetBaseDelta0()
482 static void ApplyFilter2NoFlip(const int8x16_t p0s, const int8x16_t q0s, in ApplyFilter2NoFlip()
483 const int8x16_t delta, in ApplyFilter2NoFlip()
484 int8x16_t* const op0, int8x16_t* const oq0) { in ApplyFilter2NoFlip()
485 const int8x16_t kCst3 = vdupq_n_s8(0x03); in ApplyFilter2NoFlip()
486 const int8x16_t kCst4 = vdupq_n_s8(0x04); in ApplyFilter2NoFlip()
487 const int8x16_t delta_p3 = vqaddq_s8(delta, kCst3); in ApplyFilter2NoFlip()
488 const int8x16_t delta_p4 = vqaddq_s8(delta, kCst4); in ApplyFilter2NoFlip()
489 const int8x16_t delta3 = vshrq_n_s8(delta_p3, 3); in ApplyFilter2NoFlip()
490 const int8x16_t delta4 = vshrq_n_s8(delta_p4, 3); in ApplyFilter2NoFlip()
497 static void ApplyFilter2(const int8x16_t p0s, const int8x16_t q0s, in ApplyFilter2()
498 const int8x16_t delta, in ApplyFilter2()
500 const int8x16_t kCst3 = vdupq_n_s8(0x03); in ApplyFilter2()
501 const int8x16_t kCst4 = vdupq_n_s8(0x04); in ApplyFilter2()
502 const int8x16_t delta_p3 = vqaddq_s8(delta, kCst3); in ApplyFilter2()
503 const int8x16_t delta_p4 = vqaddq_s8(delta, kCst4); in ApplyFilter2()
504 const int8x16_t delta3 = vshrq_n_s8(delta_p3, 3); in ApplyFilter2()
505 const int8x16_t delta4 = vshrq_n_s8(delta_p4, 3); in ApplyFilter2()
506 const int8x16_t sp0 = vqaddq_s8(p0s, delta3); in ApplyFilter2()
507 const int8x16_t sq0 = vqsubq_s8(q0s, delta4); in ApplyFilter2()
516 const int8x16_t p1s = FlipSign(p1); in DoFilter2()
517 const int8x16_t p0s = FlipSign(p0); in DoFilter2()
518 const int8x16_t q0s = FlipSign(q0); in DoFilter2()
519 const int8x16_t q1s = FlipSign(q1); in DoFilter2()
520 const int8x16_t delta0 = GetBaseDelta(p1s, p0s, q0s, q1s); in DoFilter2()
521 const int8x16_t delta1 = vandq_s8(delta0, vreinterpretq_s8_u8(mask)); in DoFilter2()
700 const int8x16_t p1, const int8x16_t p0, in ApplyFilter4()
701 const int8x16_t q0, const int8x16_t q1, in ApplyFilter4()
702 const int8x16_t delta0, in ApplyFilter4()
705 const int8x16_t kCst3 = vdupq_n_s8(0x03); in ApplyFilter4()
706 const int8x16_t kCst4 = vdupq_n_s8(0x04); in ApplyFilter4()
707 const int8x16_t delta1 = vqaddq_s8(delta0, kCst4); in ApplyFilter4()
708 const int8x16_t delta2 = vqaddq_s8(delta0, kCst3); in ApplyFilter4()
709 const int8x16_t a1 = vshrq_n_s8(delta1, 3); in ApplyFilter4()
710 const int8x16_t a2 = vshrq_n_s8(delta2, 3); in ApplyFilter4()
711 const int8x16_t a3 = vrshrq_n_s8(a1, 1); // a3 = (a1 + 1) >> 1 in ApplyFilter4()
725 const int8x16_t p1s = FlipSign(p1); in DoFilter4()
726 int8x16_t p0s = FlipSign(p0); in DoFilter4()
727 int8x16_t q0s = FlipSign(q0); in DoFilter4()
728 const int8x16_t q1s = FlipSign(q1); in DoFilter4()
733 const int8x16_t delta = GetBaseDelta(p1s, p0s, q0s, q1s); in DoFilter4()
734 const int8x16_t simple_lf_delta = in DoFilter4()
741 const int8x16_t delta0 = GetBaseDelta0(p0s, q0s); in DoFilter4()
744 const int8x16_t complex_lf_delta = in DoFilter4()
753 const int8x16_t p2, const int8x16_t p1, const int8x16_t p0, in ApplyFilter6()
754 const int8x16_t q0, const int8x16_t q1, const int8x16_t q2, in ApplyFilter6()
755 const int8x16_t delta, in ApplyFilter6()
777 const int8x16_t a1 = vcombine_s8(a1_lo, a1_hi); in ApplyFilter6()
778 const int8x16_t a2 = vcombine_s8(a2_lo, a2_hi); in ApplyFilter6()
779 const int8x16_t a3 = vcombine_s8(a3_lo, a3_hi); in ApplyFilter6()
796 const int8x16_t p2s = FlipSign(p2); in DoFilter6()
797 const int8x16_t p1s = FlipSign(p1); in DoFilter6()
798 int8x16_t p0s = FlipSign(p0); in DoFilter6()
799 int8x16_t q0s = FlipSign(q0); in DoFilter6()
800 const int8x16_t q1s = FlipSign(q1); in DoFilter6()
801 const int8x16_t q2s = FlipSign(q2); in DoFilter6()
803 const int8x16_t delta0 = GetBaseDelta(p1s, p0s, q0s, q1s); in DoFilter6()
807 const int8x16_t simple_lf_delta = in DoFilter6()
816 const int8x16_t complex_lf_delta = in DoFilter6()