/external/XNNPACK/src/u8-maxpool/ |
D | 9p8x-minmax-sse2-c16.c | 92 const __m128i vmax018 = _mm_max_epu8(_mm_max_epu8(vi0, vi1), vi8); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() 93 const __m128i vmax23 = _mm_max_epu8(vi2, vi3); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() 94 const __m128i vmax45 = _mm_max_epu8(vi4, vi5); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() 95 const __m128i vmax67 = _mm_max_epu8(vi6, vi7); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() 97 const __m128i vmax2345 = _mm_max_epu8(vmax23, vmax45); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() 98 const __m128i vmax01678 = _mm_max_epu8(vmax018, vmax67); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() 99 const __m128i vmax = _mm_max_epu8(vmax2345, vmax01678); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() 100 const __m128i vout = _mm_max_epu8(_mm_min_epu8(vmax, voutput_max), voutput_min); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() 115 const __m128i vmax018 = _mm_max_epu8(_mm_max_epu8(vi0, vi1), vi8); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() 116 const __m128i vmax23 = _mm_max_epu8(vi2, vi3); in xnn_u8_maxpool_minmax_ukernel_9p8x__sse2_c16() [all …]
|
/external/XNNPACK/src/u8-rmax/ |
D | sse2.c | 28 vmax = _mm_max_epu8(vmax, vx); in xnn_u8_rmax_ukernel__sse2() 35 vmax = _mm_max_epu8(vmax, vx); in xnn_u8_rmax_ukernel__sse2() 37 vmax = _mm_max_epu8(vmax, _mm_unpackhi_epi64(vmax, vmax)); in xnn_u8_rmax_ukernel__sse2() 38 vmax = _mm_max_epu8(vmax, _mm_srli_epi64(vmax, 32)); in xnn_u8_rmax_ukernel__sse2() 39 vmax = _mm_max_epu8(vmax, _mm_srli_epi32(vmax, 16)); in xnn_u8_rmax_ukernel__sse2() 40 vmax = _mm_max_epu8(vmax, _mm_srli_epi16(vmax, 8)); in xnn_u8_rmax_ukernel__sse2()
|
/external/XNNPACK/src/u8-clamp/ |
D | sse2-x64.c | 33 const __m128i vy0 = _mm_min_epu8(_mm_max_epu8(vx0, voutput_min), voutput_max); in xnn_u8_clamp_ukernel__sse2_x64() 34 const __m128i vy1 = _mm_min_epu8(_mm_max_epu8(vx1, voutput_min), voutput_max); in xnn_u8_clamp_ukernel__sse2_x64() 35 const __m128i vy2 = _mm_min_epu8(_mm_max_epu8(vx2, voutput_min), voutput_max); in xnn_u8_clamp_ukernel__sse2_x64() 36 const __m128i vy3 = _mm_min_epu8(_mm_max_epu8(vx3, voutput_min), voutput_max); in xnn_u8_clamp_ukernel__sse2_x64() 48 vout = _mm_max_epu8(vout, voutput_min); in xnn_u8_clamp_ukernel__sse2_x64() 55 vout = _mm_max_epu8(vout, voutput_min); in xnn_u8_clamp_ukernel__sse2_x64()
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | loopfilter_avx2.c | 61 flat = _mm_max_epu8(abs_p1p0, abs_q1q0); in vpx_lpf_horizontal_16_avx2() 70 mask = _mm_max_epu8(abs_p1p0, mask); in vpx_lpf_horizontal_16_avx2() 74 work = _mm_max_epu8( in vpx_lpf_horizontal_16_avx2() 77 mask = _mm_max_epu8(work, mask); in vpx_lpf_horizontal_16_avx2() 78 mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8)); in vpx_lpf_horizontal_16_avx2() 130 flat = _mm_max_epu8( in vpx_lpf_horizontal_16_avx2() 133 flat = _mm_max_epu8(abs_p1p0, flat); in vpx_lpf_horizontal_16_avx2() 134 flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); in vpx_lpf_horizontal_16_avx2() 147 flat2 = _mm_max_epu8( in vpx_lpf_horizontal_16_avx2() 155 work = _mm_max_epu8( in vpx_lpf_horizontal_16_avx2() [all …]
|
D | loopfilter_sse2.c | 33 _mm_unpacklo_epi8(_mm_max_epu8(flat, _mm_srli_si128(flat, 8)), zero); \ 49 flat = _mm_max_epu8(work, flat); \ 52 flat = _mm_max_epu8(work, flat); \ 53 flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); \ 270 flat = _mm_max_epu8(abs_p1p0, abs_q1q0); in vpx_lpf_horizontal_16_sse2() 279 mask = _mm_max_epu8(abs_p1p0, mask); in vpx_lpf_horizontal_16_sse2() 283 work = _mm_max_epu8(abs_diff(q2p2, q1p1), abs_diff(q3p3, q2p2)); in vpx_lpf_horizontal_16_sse2() 284 mask = _mm_max_epu8(work, mask); in vpx_lpf_horizontal_16_sse2() 285 mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8)); in vpx_lpf_horizontal_16_sse2() 337 flat = _mm_max_epu8(abs_diff(q2p2, q0p0), abs_diff(q3p3, q0p0)); in vpx_lpf_horizontal_16_sse2() [all …]
|
/external/libaom/libaom/aom_dsp/x86/ |
D | loopfilter_sse2.c | 264 flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 4)); in lpf_internal_4_sse2() 307 flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); in lpf_internal_4_dual_sse2() 419 flat = _mm_max_epu8(abs_p1p0, abs_q1q0); in lpf_internal_14_dual_sse2() 430 mask = _mm_max_epu8(abs_p1p0, mask); in lpf_internal_14_dual_sse2() 434 work = _mm_max_epu8(abs_diff(*q2p2, *q1p1), abs_diff(*q3p3, *q2p2)); in lpf_internal_14_dual_sse2() 435 mask = _mm_max_epu8(work, mask); in lpf_internal_14_dual_sse2() 436 mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8)); in lpf_internal_14_dual_sse2() 451 flat = _mm_max_epu8(abs_diff(*q2p2, *q0p0), abs_diff(*q3p3, *q0p0)); in lpf_internal_14_dual_sse2() 452 flat = _mm_max_epu8(abs_p1p0, flat); in lpf_internal_14_dual_sse2() 453 flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); in lpf_internal_14_dual_sse2() [all …]
|
/external/libgav1/libgav1/src/dsp/x86/ |
D | cdef_sse4.cc | 580 const __m128i max_p01 = _mm_max_epu8(primary_val[0], primary_val[1]); in CdefFilter_SSE4_1() 581 const __m128i max_p23 = _mm_max_epu8(primary_val[2], primary_val[3]); in CdefFilter_SSE4_1() 582 const __m128i max_p = _mm_max_epu8(max_p01, max_p23); in CdefFilter_SSE4_1() 623 _mm_max_epu8(secondary_val[0], secondary_val[1]); in CdefFilter_SSE4_1() 625 _mm_max_epu8(secondary_val[2], secondary_val[3]); in CdefFilter_SSE4_1() 627 _mm_max_epu8(secondary_val[4], secondary_val[5]); in CdefFilter_SSE4_1() 629 _mm_max_epu8(secondary_val[6], secondary_val[7]); in CdefFilter_SSE4_1() 630 const __m128i max_s = _mm_max_epu8(_mm_max_epu8(max_s01, max_s23), in CdefFilter_SSE4_1() 631 _mm_max_epu8(max_s45, max_s67)); in CdefFilter_SSE4_1()
|
D | loop_filter_sse4.cc | 67 _mm_max_epu8(abs_qp1mqp0, _mm_srli_si128(abs_qp1mqp0, 4)); in Hev() 98 _mm_max_epu8(abs_qp1mqp0, _mm_srli_si128(abs_qp1mqp0, 4)), inner_thresh); in NeedsFilter4() 264 const __m128i max_pq = _mm_max_epu8(abs_qp2mqp1, abs_qp1mqp0); in NeedsFilter6() 266 _mm_max_epu8(max_pq, _mm_srli_si128(max_pq, 4)), inner_thresh); in NeedsFilter6() 278 const __m128i max_pq = _mm_max_epu8(abs_pq2mpq0, abs_qp1mqp0); in IsFlat3() 280 _mm_max_epu8(max_pq, _mm_srli_si128(max_pq, 4)), flat_thresh); in IsFlat3() 491 const __m128i max_pq_a = _mm_max_epu8(abs_qp2mqp1, abs_qp1mqp0); in NeedsFilter8() 493 const __m128i max_pq = _mm_max_epu8(max_pq_a, abs_pq3mpq2); in NeedsFilter8() 495 _mm_max_epu8(max_pq, _mm_srli_si128(max_pq, 4)), inner_thresh); in NeedsFilter8() 508 const __m128i max_pq_a = _mm_max_epu8(abs_pq2mpq0, abs_qp1mqp0); in IsFlat4() [all …]
|
/external/skia/src/opts/ |
D | Sk4px_SSE2.h | 54 as = _mm_max_epu8(as, _mm_srli_epi32(as, 8)); // 33xx 22xx 11xx 00xx in alphas() 55 as = _mm_max_epu8(as, _mm_srli_epi32(as, 16)); // 3333 2222 1111 0000 in alphas()
|
/external/skqp/src/opts/ |
D | Sk4px_SSE2.h | 54 as = _mm_max_epu8(as, _mm_srli_epi32(as, 8)); // 33xx 22xx 11xx 00xx in alphas() 55 as = _mm_max_epu8(as, _mm_srli_epi32(as, 16)); // 3333 2222 1111 0000 in alphas()
|
/external/XNNPACK/src/qu8-vadd/ |
D | minmax-sse2.c | 71 vy = _mm_max_epu8(vy, _mm_load_si128((const __m128i*) params->sse2.y_min)); in xnn_qu8_vadd_minmax_ukernel__sse2() 113 vy = _mm_max_epu8(vy, _mm_load_si128((const __m128i*) params->sse2.y_min)); in xnn_qu8_vadd_minmax_ukernel__sse2()
|
/external/tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/ |
D | PacketMathAVX512.h | 474 _mm_max_epu8(_mm_max_epu8(lane0, lane1), _mm_max_epu8(lane2, lane3)); 475 res = _mm_max_epu8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 3, 2))); 476 res = _mm_max_epu8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1)));
|
/external/XNNPACK/src/qu8-requantization/ |
D | fp32-sse2.c | 77 const __m128i xyzw_clamped = _mm_max_epu8(_mm_min_epu8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_fp32__sse2()
|
D | precise-sse4.c | 96 const __m128i xyzw_clamped = _mm_max_epu8(_mm_min_epu8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_precise__sse4()
|
D | precise-ssse3.c | 104 const __m128i xyzw_clamped = _mm_max_epu8(_mm_min_epu8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_precise__ssse3()
|
D | q31-sse4.c | 112 const __m128i xyzw_clamped = _mm_max_epu8(_mm_min_epu8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_q31__sse4()
|
D | precise-sse2.c | 109 const __m128i xyzw_clamped = _mm_max_epu8(_mm_min_epu8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_precise__sse2()
|
D | q31-ssse3.c | 161 const __m128i xyzw_clamped = _mm_max_epu8(_mm_min_epu8(xyzw_packed, vqmax), vqmin); in xnn_qu8_requantize_q31__ssse3()
|
/external/XNNPACK/src/qu8-gavgpool/ |
D | 7x-minmax-sse2-c8.c | 124 vout = _mm_max_epu8(vout, _mm_load_si128((const __m128i*) params->sse2.output_min)); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() 193 vout = _mm_max_epu8(vout, _mm_load_si128((const __m128i*) params->sse2.output_min)); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
|
D | 7p7x-minmax-sse2-c8.c | 218 vout = _mm_max_epu8(vout, _mm_load_si128((const __m128i*) params->sse2.output_min)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8() 289 vout = _mm_max_epu8(vout, _mm_load_si128((const __m128i*) params->sse2.output_min)); in xnn_qu8_gavgpool_minmax_ukernel_7p7x__sse2_c8()
|
/external/libmpeg2/common/x86/ |
D | ideint_cac_ssse3.c | 182 max = _mm_max_epu8(top_avg, bot_avg); in ideint_cac_8x8_ssse3()
|
/external/XNNPACK/src/qu8-avgpool/ |
D | 9x-minmax-sse2-c8.c | 180 vout = _mm_max_epu8(vout, _mm_load_si128((const __m128i*) ¶ms->sse2.output_min)); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8() 255 vout = _mm_max_epu8(vout, _mm_load_si128((const __m128i*) ¶ms->sse2.output_min)); in xnn_qu8_avgpool_minmax_ukernel_9x__sse2_c8()
|
/external/webp/src/dsp/ |
D | dec_sse2.c | 283 const __m128i t_max = _mm_max_epu8(t_1, t_2); in GetNotHEV_SSE2() 638 (m) = _mm_max_epu8(m, MM_ABS(p3, p2)); \ 639 (m) = _mm_max_epu8(m, MM_ABS(p2, p1)); \ 643 (m) = _mm_max_epu8(m, MM_ABS(p1, p0)); \ 644 (m) = _mm_max_epu8(m, MM_ABS(p3, p2)); \ 645 (m) = _mm_max_epu8(m, MM_ABS(p2, p1)); \
|
/external/libhevc/common/x86/ |
D | ihevc_deblk_ssse3_intr.c | 376 src_row0_8x16b = _mm_max_epu8(src_row0_8x16b, temp_min0_16x8b); in ihevc_deblk_luma_vert_ssse3() 377 src_row1_8x16b = _mm_max_epu8(src_row1_8x16b, temp_min1_16x8b); in ihevc_deblk_luma_vert_ssse3() 803 temp_pq0_str0_16x8b = _mm_max_epu8(temp_pq0_str0_16x8b, src_p1_8x16b); in ihevc_deblk_luma_horz_ssse3() 804 src_p2_8x16b = _mm_max_epu8(src_p2_8x16b, tmp_pq_str1_8x16b); in ihevc_deblk_luma_horz_ssse3()
|
/external/libaom/libaom/av1/encoder/x86/ |
D | temporal_filter_sse2.c | 50 __m128i vmax = _mm_max_epu8(vsrc1, vsrc2); in get_squared_error()
|