Home
last modified time | relevance | path

Searched refs:_mm_max_epu8 (Results 1 – 25 of 47) sorted by relevance

12

/external/XNNPACK/src/u8-maxpool/
D9p8x-minmax-sse2-c16.c92 const __m128i vmax018 = _mm_max_epu8(_mm_max_epu8(vi0, vi1), vi8); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16()
93 const __m128i vmax23 = _mm_max_epu8(vi2, vi3); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16()
94 const __m128i vmax45 = _mm_max_epu8(vi4, vi5); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16()
95 const __m128i vmax67 = _mm_max_epu8(vi6, vi7); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16()
97 const __m128i vmax2345 = _mm_max_epu8(vmax23, vmax45); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16()
98 const __m128i vmax01678 = _mm_max_epu8(vmax018, vmax67); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16()
99 const __m128i vmax = _mm_max_epu8(vmax2345, vmax01678); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16()
100 const __m128i vout = _mm_max_epu8(_mm_min_epu8(vmax, voutput_max), voutput_min); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16()
115 const __m128i vmax018 = _mm_max_epu8(_mm_max_epu8(vi0, vi1), vi8); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16()
116 const __m128i vmax23 = _mm_max_epu8(vi2, vi3); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16()
[all …]
/external/XNNPACK/src/u8-rmax/
Dsse2.c28 vmax = _mm_max_epu8(vmax, vx); in xnn_u8_rmax_ukernel__sse2()
35 vmax = _mm_max_epu8(vmax, vx); in xnn_u8_rmax_ukernel__sse2()
37 vmax = _mm_max_epu8(vmax, _mm_unpackhi_epi64(vmax, vmax)); in xnn_u8_rmax_ukernel__sse2()
38 vmax = _mm_max_epu8(vmax, _mm_srli_epi64(vmax, 32)); in xnn_u8_rmax_ukernel__sse2()
39 vmax = _mm_max_epu8(vmax, _mm_srli_epi32(vmax, 16)); in xnn_u8_rmax_ukernel__sse2()
40 vmax = _mm_max_epu8(vmax, _mm_srli_epi16(vmax, 8)); in xnn_u8_rmax_ukernel__sse2()
/external/XNNPACK/src/u8-clamp/
Dsse2-x64.c33 const __m128i vy0 = _mm_min_epu8(_mm_max_epu8(vx0, voutput_min), voutput_max); in xnn_u8_clamp_ukernel__sse2_x64()
34 const __m128i vy1 = _mm_min_epu8(_mm_max_epu8(vx1, voutput_min), voutput_max); in xnn_u8_clamp_ukernel__sse2_x64()
35 const __m128i vy2 = _mm_min_epu8(_mm_max_epu8(vx2, voutput_min), voutput_max); in xnn_u8_clamp_ukernel__sse2_x64()
36 const __m128i vy3 = _mm_min_epu8(_mm_max_epu8(vx3, voutput_min), voutput_max); in xnn_u8_clamp_ukernel__sse2_x64()
48 vout = _mm_max_epu8(vout, voutput_min); in xnn_u8_clamp_ukernel__sse2_x64()
55 vout = _mm_max_epu8(vout, voutput_min); in xnn_u8_clamp_ukernel__sse2_x64()
/external/libvpx/libvpx/vpx_dsp/x86/
Dloopfilter_avx2.c61 flat = _mm_max_epu8(abs_p1p0, abs_q1q0); in vpx_lpf_horizontal_16_avx2()
70 mask = _mm_max_epu8(abs_p1p0, mask); in vpx_lpf_horizontal_16_avx2()
74 work = _mm_max_epu8( in vpx_lpf_horizontal_16_avx2()
77 mask = _mm_max_epu8(work, mask); in vpx_lpf_horizontal_16_avx2()
78 mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8)); in vpx_lpf_horizontal_16_avx2()
130 flat = _mm_max_epu8( in vpx_lpf_horizontal_16_avx2()
133 flat = _mm_max_epu8(abs_p1p0, flat); in vpx_lpf_horizontal_16_avx2()
134 flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); in vpx_lpf_horizontal_16_avx2()
147 flat2 = _mm_max_epu8( in vpx_lpf_horizontal_16_avx2()
155 work = _mm_max_epu8( in vpx_lpf_horizontal_16_avx2()
[all …]
Dloopfilter_sse2.c33 _mm_unpacklo_epi8(_mm_max_epu8(flat, _mm_srli_si128(flat, 8)), zero); \
49 flat = _mm_max_epu8(work, flat); \
52 flat = _mm_max_epu8(work, flat); \
53 flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); \
270 flat = _mm_max_epu8(abs_p1p0, abs_q1q0); in vpx_lpf_horizontal_16_sse2()
279 mask = _mm_max_epu8(abs_p1p0, mask); in vpx_lpf_horizontal_16_sse2()
283 work = _mm_max_epu8(abs_diff(q2p2, q1p1), abs_diff(q3p3, q2p2)); in vpx_lpf_horizontal_16_sse2()
284 mask = _mm_max_epu8(work, mask); in vpx_lpf_horizontal_16_sse2()
285 mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8)); in vpx_lpf_horizontal_16_sse2()
337 flat = _mm_max_epu8(abs_diff(q2p2, q0p0), abs_diff(q3p3, q0p0)); in vpx_lpf_horizontal_16_sse2()
[all …]
/external/libaom/libaom/aom_dsp/x86/
Dloopfilter_sse2.c264 flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 4)); in lpf_internal_4_sse2()
307 flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); in lpf_internal_4_dual_sse2()
419 flat = _mm_max_epu8(abs_p1p0, abs_q1q0); in lpf_internal_14_dual_sse2()
430 mask = _mm_max_epu8(abs_p1p0, mask); in lpf_internal_14_dual_sse2()
434 work = _mm_max_epu8(abs_diff(*q2p2, *q1p1), abs_diff(*q3p3, *q2p2)); in lpf_internal_14_dual_sse2()
435 mask = _mm_max_epu8(work, mask); in lpf_internal_14_dual_sse2()
436 mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8)); in lpf_internal_14_dual_sse2()
451 flat = _mm_max_epu8(abs_diff(*q2p2, *q0p0), abs_diff(*q3p3, *q0p0)); in lpf_internal_14_dual_sse2()
452 flat = _mm_max_epu8(abs_p1p0, flat); in lpf_internal_14_dual_sse2()
453 flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); in lpf_internal_14_dual_sse2()
[all …]
/external/libgav1/libgav1/src/dsp/x86/
Dcdef_sse4.cc580 const __m128i max_p01 = _mm_max_epu8(primary_val[0], primary_val[1]); in CdefFilter_SSE4_1()
581 const __m128i max_p23 = _mm_max_epu8(primary_val[2], primary_val[3]); in CdefFilter_SSE4_1()
582 const __m128i max_p = _mm_max_epu8(max_p01, max_p23); in CdefFilter_SSE4_1()
623 _mm_max_epu8(secondary_val[0], secondary_val[1]); in CdefFilter_SSE4_1()
625 _mm_max_epu8(secondary_val[2], secondary_val[3]); in CdefFilter_SSE4_1()
627 _mm_max_epu8(secondary_val[4], secondary_val[5]); in CdefFilter_SSE4_1()
629 _mm_max_epu8(secondary_val[6], secondary_val[7]); in CdefFilter_SSE4_1()
630 const __m128i max_s = _mm_max_epu8(_mm_max_epu8(max_s01, max_s23), in CdefFilter_SSE4_1()
631 _mm_max_epu8(max_s45, max_s67)); in CdefFilter_SSE4_1()
Dloop_filter_sse4.cc67 _mm_max_epu8(abs_qp1mqp0, _mm_srli_si128(abs_qp1mqp0, 4)); in Hev()
98 _mm_max_epu8(abs_qp1mqp0, _mm_srli_si128(abs_qp1mqp0, 4)), inner_thresh); in NeedsFilter4()
264 const __m128i max_pq = _mm_max_epu8(abs_qp2mqp1, abs_qp1mqp0); in NeedsFilter6()
266 _mm_max_epu8(max_pq, _mm_srli_si128(max_pq, 4)), inner_thresh); in NeedsFilter6()
278 const __m128i max_pq = _mm_max_epu8(abs_pq2mpq0, abs_qp1mqp0); in IsFlat3()
280 _mm_max_epu8(max_pq, _mm_srli_si128(max_pq, 4)), flat_thresh); in IsFlat3()
491 const __m128i max_pq_a = _mm_max_epu8(abs_qp2mqp1, abs_qp1mqp0); in NeedsFilter8()
493 const __m128i max_pq = _mm_max_epu8(max_pq_a, abs_pq3mpq2); in NeedsFilter8()
495 _mm_max_epu8(max_pq, _mm_srli_si128(max_pq, 4)), inner_thresh); in NeedsFilter8()
508 const __m128i max_pq_a = _mm_max_epu8(abs_pq2mpq0, abs_qp1mqp0); in IsFlat4()
[all …]
/external/skia/src/opts/
DSk4px_SSE2.h54 as = _mm_max_epu8(as, _mm_srli_epi32(as, 8)); // 33xx 22xx 11xx 00xx in alphas()
55 as = _mm_max_epu8(as, _mm_srli_epi32(as, 16)); // 3333 2222 1111 0000 in alphas()
/external/skqp/src/opts/
DSk4px_SSE2.h54 as = _mm_max_epu8(as, _mm_srli_epi32(as, 8)); // 33xx 22xx 11xx 00xx in alphas()
55 as = _mm_max_epu8(as, _mm_srli_epi32(as, 16)); // 3333 2222 1111 0000 in alphas()
/external/XNNPACK/src/qu8-vadd/
Dminmax-sse2.c71 vy = _mm_max_epu8(vy, _mm_load_si128((const __m128i*) params->sse2.y_min)); in xnn_qu8_vadd_minmax_ukernel__sse2()
113 vy = _mm_max_epu8(vy, _mm_load_si128((const __m128i*) params->sse2.y_min)); in xnn_qu8_vadd_minmax_ukernel__sse2()
/external/tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/
DPacketMathAVX512.h474 _mm_max_epu8(_mm_max_epu8(lane0, lane1), _mm_max_epu8(lane2, lane3));
475 res = _mm_max_epu8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2)));
476 res = _mm_max_epu8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1)));
/external/XNNPACK/src/qu8-requantization/
Dfp32-sse2.c77 const __m128i xyzw_clamped = _mm_max_epu8(_mm_min_epu8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_fp32__sse2()
Dprecise-sse4.c96 const __m128i xyzw_clamped = _mm_max_epu8(_mm_min_epu8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_precise__sse4()
Dprecise-ssse3.c104 const __m128i xyzw_clamped = _mm_max_epu8(_mm_min_epu8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_precise__ssse3()
Dq31-sse4.c112 const __m128i xyzw_clamped = _mm_max_epu8(_mm_min_epu8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_q31__sse4()
Dprecise-sse2.c109 const __m128i xyzw_clamped = _mm_max_epu8(_mm_min_epu8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_precise__sse2()
Dq31-ssse3.c161 const __m128i xyzw_clamped = _mm_max_epu8(_mm_min_epu8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_q31__ssse3()
/external/XNNPACK/src/qu8-gavgpool/
D7x-minmax-sse2-c8.c124 vout = _mm_max_epu8(vout, _mm_load_si128((const __m128i*) params->sse2.output_min)); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
193 vout = _mm_max_epu8(vout, _mm_load_si128((const __m128i*) params->sse2.output_min)); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
D7p7x-minmax-sse2-c8.c218 vout = _mm_max_epu8(vout, _mm_load_si128((const __m128i*) params->sse2.output_min)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8()
289 vout = _mm_max_epu8(vout, _mm_load_si128((const __m128i*) params->sse2.output_min)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8()
/external/libmpeg2/common/x86/
Dideint_cac_ssse3.c182 max = _mm_max_epu8(top_avg, bot_avg); in ideint_cac_8x8_ssse3()
/external/XNNPACK/src/qu8-avgpool/
D9x-minmax-sse2-c8.c180 vout = _mm_max_epu8(vout, _mm_load_si128((const __m128i*) &params->sse2.output_min)); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8()
255 vout = _mm_max_epu8(vout, _mm_load_si128((const __m128i*) &params->sse2.output_min)); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8()
/external/webp/src/dsp/
Ddec_sse2.c283 const __m128i t_max = _mm_max_epu8(t_1, t_2); in GetNotHEV_SSE2()
638 (m) = _mm_max_epu8(m, MM_ABS(p3, p2)); \
639 (m) = _mm_max_epu8(m, MM_ABS(p2, p1)); \
643 (m) = _mm_max_epu8(m, MM_ABS(p1, p0)); \
644 (m) = _mm_max_epu8(m, MM_ABS(p3, p2)); \
645 (m) = _mm_max_epu8(m, MM_ABS(p2, p1)); \
/external/libhevc/common/x86/
Dihevc_deblk_ssse3_intr.c376 src_row0_8x16b = _mm_max_epu8(src_row0_8x16b, temp_min0_16x8b); in ihevc_deblk_luma_vert_ssse3()
377 src_row1_8x16b = _mm_max_epu8(src_row1_8x16b, temp_min1_16x8b); in ihevc_deblk_luma_vert_ssse3()
803 temp_pq0_str0_16x8b = _mm_max_epu8(temp_pq0_str0_16x8b, src_p1_8x16b); in ihevc_deblk_luma_horz_ssse3()
804 src_p2_8x16b = _mm_max_epu8(src_p2_8x16b, tmp_pq_str1_8x16b); in ihevc_deblk_luma_horz_ssse3()
/external/libaom/libaom/av1/encoder/x86/
Dtemporal_filter_sse2.c50 __m128i vmax = _mm_max_epu8(vsrc1, vsrc2); in get_squared_error()

12