/external/libaom/libaom/aom_dsp/x86/ |
D | loopfilter_sse2.c | 30 __m128i *q1p1, __m128i *q2p2, in transpose_pq_14_sse2() argument 72 *q2p2 = _mm_unpackhi_epi32( in transpose_pq_14_sse2() 395 __m128i *q6p6, __m128i *q5p5, __m128i *q4p4, __m128i *q3p3, __m128i *q2p2, in lpf_internal_14_dual_sse2() argument 434 work = _mm_max_epu8(abs_diff(*q2p2, *q1p1), abs_diff(*q3p3, *q2p2)); in lpf_internal_14_dual_sse2() 451 flat = _mm_max_epu8(abs_diff(*q2p2, *q0p0), abs_diff(*q3p3, *q0p0)); in lpf_internal_14_dual_sse2() 477 p2_16 = _mm_unpacklo_epi8(*q2p2, zero); in lpf_internal_14_dual_sse2() 482 q2_16 = _mm_unpackhi_epi8(*q2p2, zero); in lpf_internal_14_dual_sse2() 571 *q2p2 = _mm_andnot_si128(flat, *q2p2); in lpf_internal_14_dual_sse2() 573 *q2p2 = _mm_or_si128(*q2p2, flat_q2p2); in lpf_internal_14_dual_sse2() 679 *q2p2 = _mm_andnot_si128(flat2, *q2p2); in lpf_internal_14_dual_sse2() [all …]
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | loopfilter_avx2.c | 23 __m128i q7p7, q6p6, q5p5, q4p4, q3p3, q2p2, q1p1, q0p0, p0q0, p1q1; in vpx_lpf_horizontal_16_avx2() local 38 q2p2 = _mm_loadl_epi64((__m128i *)(s - 3 * pitch)); in vpx_lpf_horizontal_16_avx2() 39 q2p2 = _mm_castps_si128( in vpx_lpf_horizontal_16_avx2() 40 _mm_loadh_pi(_mm_castsi128_ps(q2p2), (__m64 *)(s + 2 * pitch))); in vpx_lpf_horizontal_16_avx2() 75 _mm_or_si128(_mm_subs_epu8(q2p2, q1p1), _mm_subs_epu8(q1p1, q2p2)), in vpx_lpf_horizontal_16_avx2() 76 _mm_or_si128(_mm_subs_epu8(q3p3, q2p2), _mm_subs_epu8(q2p2, q3p3))); in vpx_lpf_horizontal_16_avx2() 131 _mm_or_si128(_mm_subs_epu8(q2p2, q0p0), _mm_subs_epu8(q0p0, q2p2)), in vpx_lpf_horizontal_16_avx2() 182 p2_16 = _mm_unpacklo_epi8(q2p2, zero); in vpx_lpf_horizontal_16_avx2() 187 q2_16 = _mm_unpackhi_epi8(q2p2, zero); in vpx_lpf_horizontal_16_avx2() 309 q2p2 = _mm_andnot_si128(flat, q2p2); in vpx_lpf_horizontal_16_avx2() [all …]
|
D | loopfilter_sse2.c | 241 __m128i q7p7, q6p6, q5p5, q4p4, q3p3, q2p2, q1p1, q0p0, p0q0, p1q1; in vpx_lpf_horizontal_16_sse2() local 250 q2p2 = _mm_loadl_epi64((__m128i *)(s - 3 * pitch)); in vpx_lpf_horizontal_16_sse2() 251 q2p2 = _mm_castps_si128( in vpx_lpf_horizontal_16_sse2() 252 _mm_loadh_pi(_mm_castsi128_ps(q2p2), (__m64 *)(s + 2 * pitch))); in vpx_lpf_horizontal_16_sse2() 283 work = _mm_max_epu8(abs_diff(q2p2, q1p1), abs_diff(q3p3, q2p2)); in vpx_lpf_horizontal_16_sse2() 337 flat = _mm_max_epu8(abs_diff(q2p2, q0p0), abs_diff(q3p3, q0p0)); in vpx_lpf_horizontal_16_sse2() 380 p2_16 = _mm_unpacklo_epi8(q2p2, zero); in vpx_lpf_horizontal_16_sse2() 385 q2_16 = _mm_unpackhi_epi8(q2p2, zero); in vpx_lpf_horizontal_16_sse2() 507 q2p2 = _mm_andnot_si128(flat, q2p2); in vpx_lpf_horizontal_16_sse2() 509 q2p2 = _mm_or_si128(q2p2, flat_q2p2); in vpx_lpf_horizontal_16_sse2() [all …]
|
/external/libaom/libaom/aom_dsp/arm/ |
D | loopfilter_neon.c | 159 uint8x8_t q0p0, q1p1, q2p2; in lpf_14_neon() local 227 q2p2 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p2q2))); in lpf_14_neon() 244 out_pq0 = vaddw_u8(out_pq0, q2p2); in lpf_14_neon() 286 qp_sum = vaddl_u8(q2p2, q1p1); in lpf_14_neon() 408 uint8x8_t q0p0, q1p1, q2p2; in lpf_8_neon() local 417 q2p2 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p2q2))); in lpf_8_neon() 428 out_pq0 = vaddw_u8(out_pq0, q2p2); in lpf_8_neon()
|
/external/libgav1/libgav1/src/dsp/arm/ |
D | loop_filter_neon.cc | 566 const uint8x8_t q2p2 = Transpose32(p2q2); in Filter8() local 567 sum = vaddq_u16(vaddl_u8(p0q0, q2p2), sum); in Filter8() 796 const uint8x8_t q2p2 = Transpose32(p2q2); in Filter14() local 797 sum = vaddq_u16(vaddl_u8(p2q2, q2p2), sum); in Filter14()
|
/external/libgav1/libgav1/src/dsp/x86/ |
D | loop_filter_sse4.cc | 915 __m128i* q0p0, __m128i* q1p1, __m128i* q2p2, in DualTranspose8x4To4x8() argument 945 *q2p2 = _mm_unpackhi_epi32(_mm_slli_si128(ww1, 4), ww2); in DualTranspose8x4To4x8()
|