/external/libaom/libaom/aom_dsp/x86/ |
D | loopfilter_sse2.c | 29 __m128i *x3, __m128i *q0p0, in transpose_pq_14_sse2() argument 79 *q0p0 = _mm_unpacklo_epi32( in transpose_pq_14_sse2() 247 __m128i q1p1, q0p0, p1p0, q1q0; in lpf_internal_4_sse2() local 253 q0p0 = _mm_unpacklo_epi32(*p0, *q0); in lpf_internal_4_sse2() 255 p1p0 = _mm_unpacklo_epi32(q0p0, q1p1); in lpf_internal_4_sse2() 259 flat = abs_diff(q1p1, q0p0); in lpf_internal_4_sse2() 290 __m128i q1p1, q0p0, p1p0, q1q0; in lpf_internal_4_dual_sse2() local 296 q0p0 = _mm_unpacklo_epi64(*p0, *q0); in lpf_internal_4_dual_sse2() 298 p1p0 = _mm_unpacklo_epi64(q0p0, q1p1); in lpf_internal_4_dual_sse2() 299 q1q0 = _mm_unpackhi_epi64(q0p0, q1p1); in lpf_internal_4_dual_sse2() [all …]
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | loopfilter_avx2.c | 23 __m128i q7p7, q6p6, q5p5, q4p4, q3p3, q2p2, q1p1, q0p0, p0q0, p1q1; in vpx_lpf_horizontal_16_avx2() local 45 q0p0 = _mm_loadl_epi64((__m128i *)(s - 1 * pitch)); in vpx_lpf_horizontal_16_avx2() 46 q0p0 = _mm_castps_si128( in vpx_lpf_horizontal_16_avx2() 47 _mm_loadh_pi(_mm_castsi128_ps(q0p0), (__m64 *)(s - 0 * pitch))); in vpx_lpf_horizontal_16_avx2() 48 p0q0 = _mm_shuffle_epi32(q0p0, 78); in vpx_lpf_horizontal_16_avx2() 53 _mm_or_si128(_mm_subs_epu8(q1p1, q0p0), _mm_subs_epu8(q0p0, q1p1)); in vpx_lpf_horizontal_16_avx2() 58 _mm_or_si128(_mm_subs_epu8(q0p0, p0q0), _mm_subs_epu8(p0q0, q0p0)); in vpx_lpf_horizontal_16_avx2() 90 __m128i qs0ps0 = _mm_xor_si128(q0p0, t80); in vpx_lpf_horizontal_16_avx2() 131 _mm_or_si128(_mm_subs_epu8(q2p2, q0p0), _mm_subs_epu8(q0p0, q2p2)), in vpx_lpf_horizontal_16_avx2() 132 _mm_or_si128(_mm_subs_epu8(q3p3, q0p0), _mm_subs_epu8(q0p0, q3p3))); in vpx_lpf_horizontal_16_avx2() [all …]
|
D | loopfilter_sse2.c | 26 __m128i flat = abs_diff(q1p1, q0p0); \ 116 __m128i q1p1, q0p0, p3p2, p2p1, p1p0, q3q2, q2q1, q1q0, ps1ps0, qs1qs0; in vpx_lpf_horizontal_4_sse2() local 123 q0p0 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 1 * pitch)), in vpx_lpf_horizontal_4_sse2() 127 p1p0 = _mm_unpacklo_epi64(q0p0, q1p1); in vpx_lpf_horizontal_4_sse2() 129 q1q0 = _mm_unpackhi_epi64(q0p0, q1p1); in vpx_lpf_horizontal_4_sse2() 151 __m128i q1p1, q0p0, p3p2, p2p1, p1p0, q3q2, q2q1, q1q0, ps1ps0, qs1qs0; in vpx_lpf_vertical_4_sse2() local 191 q0p0 = _mm_unpacklo_epi64(p1p0, q1q0); in vpx_lpf_vertical_4_sse2() 193 p1p0 = _mm_unpacklo_epi64(q0p0, q1p1); in vpx_lpf_vertical_4_sse2() 241 __m128i q7p7, q6p6, q5p5, q4p4, q3p3, q2p2, q1p1, q0p0, p0q0, p1q1; in vpx_lpf_horizontal_16_sse2() local 257 q0p0 = _mm_loadl_epi64((__m128i *)(s - 1 * pitch)); in vpx_lpf_horizontal_16_sse2() [all …]
|
/external/libgav1/libgav1/src/dsp/arm/ |
D | loop_filter_neon.cc | 312 const uint8x8_t q0p0 = Transpose32(p0q0); in Filter6() local 313 sum = vaddw_u8(sum, q0p0); in Filter6() 322 sum = vaddq_u16(vaddl_u8(q0p0, q1p1), sum); in Filter6() 548 const uint8x8_t q0p0 = Transpose32(p0q0); in Filter8() local 549 sum = vaddw_u8(sum, q0p0); in Filter8() 778 const uint8x8_t q0p0 = Transpose32(p0q0); in Filter14() local 779 sum = vaddw_u8(sum, q0p0); in Filter14() 824 sum = vaddq_u16(vaddl_u8(q0p0, q5p5), sum); in Filter14()
|
/external/libaom/libaom/aom_dsp/arm/ |
D | loopfilter_neon.c | 159 uint8x8_t q0p0, q1p1, q2p2; in lpf_14_neon() local 225 q0p0 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p0q0))); in lpf_14_neon() 235 out = vaddw_u8(out, q0p0); in lpf_14_neon() 301 out_pq0 = vaddw_u8(out_pq0, q0p0); in lpf_14_neon() 408 uint8x8_t q0p0, q1p1, q2p2; in lpf_8_neon() local 415 q0p0 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(*p0q0))); in lpf_8_neon() 419 out = vaddw_u8(out, q0p0); in lpf_8_neon()
|
/external/libgav1/libgav1/src/dsp/x86/ |
D | loop_filter_sse4.cc | 915 __m128i* q0p0, __m128i* q1p1, __m128i* q2p2, in DualTranspose8x4To4x8() argument 949 *q0p0 = _mm_unpacklo_epi32(_mm_srli_si128(ww1, 12), ww2); in DualTranspose8x4To4x8()
|