Lines Matching refs:int8x16_t
440 static int8x16_t FlipSign_NEON(const uint8x16_t v) { in FlipSign_NEON()
445 static uint8x16_t FlipSignBack_NEON(const int8x16_t v) { in FlipSignBack_NEON()
446 const int8x16_t sign_bit = vdupq_n_s8(0x80); in FlipSignBack_NEON()
450 static int8x16_t GetBaseDelta_NEON(const int8x16_t p1, const int8x16_t p0, in GetBaseDelta_NEON()
451 const int8x16_t q0, const int8x16_t q1) { in GetBaseDelta_NEON()
452 const int8x16_t q0_p0 = vqsubq_s8(q0, p0); // (q0-p0) in GetBaseDelta_NEON()
453 const int8x16_t p1_q1 = vqsubq_s8(p1, q1); // (p1-q1) in GetBaseDelta_NEON()
454 const int8x16_t s1 = vqaddq_s8(p1_q1, q0_p0); // (p1-q1) + 1 * (q0 - p0) in GetBaseDelta_NEON()
455 const int8x16_t s2 = vqaddq_s8(q0_p0, s1); // (p1-q1) + 2 * (q0 - p0) in GetBaseDelta_NEON()
456 const int8x16_t s3 = vqaddq_s8(q0_p0, s2); // (p1-q1) + 3 * (q0 - p0) in GetBaseDelta_NEON()
460 static int8x16_t GetBaseDelta0_NEON(const int8x16_t p0, const int8x16_t q0) { in GetBaseDelta0_NEON()
461 const int8x16_t q0_p0 = vqsubq_s8(q0, p0); // (q0-p0) in GetBaseDelta0_NEON()
462 const int8x16_t s1 = vqaddq_s8(q0_p0, q0_p0); // 2 * (q0 - p0) in GetBaseDelta0_NEON()
463 const int8x16_t s2 = vqaddq_s8(q0_p0, s1); // 3 * (q0 - p0) in GetBaseDelta0_NEON()
469 static void ApplyFilter2NoFlip_NEON(const int8x16_t p0s, const int8x16_t q0s, in ApplyFilter2NoFlip_NEON()
470 const int8x16_t delta, in ApplyFilter2NoFlip_NEON()
471 int8x16_t* const op0, in ApplyFilter2NoFlip_NEON()
472 int8x16_t* const oq0) { in ApplyFilter2NoFlip_NEON()
473 const int8x16_t kCst3 = vdupq_n_s8(0x03); in ApplyFilter2NoFlip_NEON()
474 const int8x16_t kCst4 = vdupq_n_s8(0x04); in ApplyFilter2NoFlip_NEON()
475 const int8x16_t delta_p3 = vqaddq_s8(delta, kCst3); in ApplyFilter2NoFlip_NEON()
476 const int8x16_t delta_p4 = vqaddq_s8(delta, kCst4); in ApplyFilter2NoFlip_NEON()
477 const int8x16_t delta3 = vshrq_n_s8(delta_p3, 3); in ApplyFilter2NoFlip_NEON()
478 const int8x16_t delta4 = vshrq_n_s8(delta_p4, 3); in ApplyFilter2NoFlip_NEON()
485 static void ApplyFilter2_NEON(const int8x16_t p0s, const int8x16_t q0s, in ApplyFilter2_NEON()
486 const int8x16_t delta, in ApplyFilter2_NEON()
488 const int8x16_t kCst3 = vdupq_n_s8(0x03); in ApplyFilter2_NEON()
489 const int8x16_t kCst4 = vdupq_n_s8(0x04); in ApplyFilter2_NEON()
490 const int8x16_t delta_p3 = vqaddq_s8(delta, kCst3); in ApplyFilter2_NEON()
491 const int8x16_t delta_p4 = vqaddq_s8(delta, kCst4); in ApplyFilter2_NEON()
492 const int8x16_t delta3 = vshrq_n_s8(delta_p3, 3); in ApplyFilter2_NEON()
493 const int8x16_t delta4 = vshrq_n_s8(delta_p4, 3); in ApplyFilter2_NEON()
494 const int8x16_t sp0 = vqaddq_s8(p0s, delta3); in ApplyFilter2_NEON()
495 const int8x16_t sq0 = vqsubq_s8(q0s, delta4); in ApplyFilter2_NEON()
504 const int8x16_t p1s = FlipSign_NEON(p1); in DoFilter2_NEON()
505 const int8x16_t p0s = FlipSign_NEON(p0); in DoFilter2_NEON()
506 const int8x16_t q0s = FlipSign_NEON(q0); in DoFilter2_NEON()
507 const int8x16_t q1s = FlipSign_NEON(q1); in DoFilter2_NEON()
508 const int8x16_t delta0 = GetBaseDelta_NEON(p1s, p0s, q0s, q1s); in DoFilter2_NEON()
509 const int8x16_t delta1 = vandq_s8(delta0, vreinterpretq_s8_u8(mask)); in DoFilter2_NEON()
712 const int8x16_t p1, const int8x16_t p0, in ApplyFilter4_NEON()
713 const int8x16_t q0, const int8x16_t q1, in ApplyFilter4_NEON()
714 const int8x16_t delta0, in ApplyFilter4_NEON()
717 const int8x16_t kCst3 = vdupq_n_s8(0x03); in ApplyFilter4_NEON()
718 const int8x16_t kCst4 = vdupq_n_s8(0x04); in ApplyFilter4_NEON()
719 const int8x16_t delta1 = vqaddq_s8(delta0, kCst4); in ApplyFilter4_NEON()
720 const int8x16_t delta2 = vqaddq_s8(delta0, kCst3); in ApplyFilter4_NEON()
721 const int8x16_t a1 = vshrq_n_s8(delta1, 3); in ApplyFilter4_NEON()
722 const int8x16_t a2 = vshrq_n_s8(delta2, 3); in ApplyFilter4_NEON()
723 const int8x16_t a3 = vrshrq_n_s8(a1, 1); // a3 = (a1 + 1) >> 1 in ApplyFilter4_NEON()
737 const int8x16_t p1s = FlipSign_NEON(p1); in DoFilter4_NEON()
738 int8x16_t p0s = FlipSign_NEON(p0); in DoFilter4_NEON()
739 int8x16_t q0s = FlipSign_NEON(q0); in DoFilter4_NEON()
740 const int8x16_t q1s = FlipSign_NEON(q1); in DoFilter4_NEON()
745 const int8x16_t delta = GetBaseDelta_NEON(p1s, p0s, q0s, q1s); in DoFilter4_NEON()
746 const int8x16_t simple_lf_delta = in DoFilter4_NEON()
753 const int8x16_t delta0 = GetBaseDelta0_NEON(p0s, q0s); in DoFilter4_NEON()
756 const int8x16_t complex_lf_delta = in DoFilter4_NEON()
765 const int8x16_t p2, const int8x16_t p1, const int8x16_t p0, in ApplyFilter6_NEON()
766 const int8x16_t q0, const int8x16_t q1, const int8x16_t q2, in ApplyFilter6_NEON()
767 const int8x16_t delta, in ApplyFilter6_NEON()
789 const int8x16_t a1 = vcombine_s8(a1_lo, a1_hi); in ApplyFilter6_NEON()
790 const int8x16_t a2 = vcombine_s8(a2_lo, a2_hi); in ApplyFilter6_NEON()
791 const int8x16_t a3 = vcombine_s8(a3_lo, a3_hi); in ApplyFilter6_NEON()
808 const int8x16_t p2s = FlipSign_NEON(p2); in DoFilter6_NEON()
809 const int8x16_t p1s = FlipSign_NEON(p1); in DoFilter6_NEON()
810 int8x16_t p0s = FlipSign_NEON(p0); in DoFilter6_NEON()
811 int8x16_t q0s = FlipSign_NEON(q0); in DoFilter6_NEON()
812 const int8x16_t q1s = FlipSign_NEON(q1); in DoFilter6_NEON()
813 const int8x16_t q2s = FlipSign_NEON(q2); in DoFilter6_NEON()
815 const int8x16_t delta0 = GetBaseDelta_NEON(p1s, p0s, q0s, q1s); in DoFilter6_NEON()
819 const int8x16_t simple_lf_delta = in DoFilter6_NEON()
828 const int8x16_t complex_lf_delta = in DoFilter6_NEON()