Home
last modified time | relevance | path

Searched refs:_mm_load_si128 (Results 1 – 25 of 382) sorted by relevance

12345678910>>...16

/external/libvpx/libvpx/vpx_dsp/x86/
Dfwd_txfm_sse2.c48 __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride)); in vpx_fdct8x8_1_sse2()
49 __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride)); in vpx_fdct8x8_1_sse2()
50 __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride)); in vpx_fdct8x8_1_sse2()
51 __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride)); in vpx_fdct8x8_1_sse2()
57 in0 = _mm_load_si128((const __m128i *)(input + 4 * stride)); in vpx_fdct8x8_1_sse2()
58 in1 = _mm_load_si128((const __m128i *)(input + 5 * stride)); in vpx_fdct8x8_1_sse2()
59 in2 = _mm_load_si128((const __m128i *)(input + 6 * stride)); in vpx_fdct8x8_1_sse2()
60 in3 = _mm_load_si128((const __m128i *)(input + 7 * stride)); in vpx_fdct8x8_1_sse2()
95 in0 = _mm_load_si128((const __m128i *)(input + 0 * stride + 0)); in vpx_fdct16x16_1_sse2()
96 in1 = _mm_load_si128((const __m128i *)(input + 0 * stride + 8)); in vpx_fdct16x16_1_sse2()
[all …]
Dhighbd_idct8x8_add_sse4.c100 io[0] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 0)); in vpx_highbd_idct8x8_64_add_sse4_1()
101 io[4] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 4)); in vpx_highbd_idct8x8_64_add_sse4_1()
102 io[1] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 0)); in vpx_highbd_idct8x8_64_add_sse4_1()
103 io[5] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 4)); in vpx_highbd_idct8x8_64_add_sse4_1()
104 io[2] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 0)); in vpx_highbd_idct8x8_64_add_sse4_1()
105 io[6] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 4)); in vpx_highbd_idct8x8_64_add_sse4_1()
106 io[3] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 0)); in vpx_highbd_idct8x8_64_add_sse4_1()
107 io[7] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 4)); in vpx_highbd_idct8x8_64_add_sse4_1()
116 io[8] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 0)); in vpx_highbd_idct8x8_64_add_sse4_1()
117 io[12] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 4)); in vpx_highbd_idct8x8_64_add_sse4_1()
[all …]
Dhighbd_idct8x8_add_sse2.c98 io[0] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 0)); in vpx_highbd_idct8x8_64_add_sse2()
99 io[4] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 4)); in vpx_highbd_idct8x8_64_add_sse2()
100 io[1] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 0)); in vpx_highbd_idct8x8_64_add_sse2()
101 io[5] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 4)); in vpx_highbd_idct8x8_64_add_sse2()
102 io[2] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 0)); in vpx_highbd_idct8x8_64_add_sse2()
103 io[6] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 4)); in vpx_highbd_idct8x8_64_add_sse2()
104 io[3] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 0)); in vpx_highbd_idct8x8_64_add_sse2()
105 io[7] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 4)); in vpx_highbd_idct8x8_64_add_sse2()
114 io[8] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 0)); in vpx_highbd_idct8x8_64_add_sse2()
115 io[12] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 4)); in vpx_highbd_idct8x8_64_add_sse2()
[all …]
/external/libhevc/common/x86/
Dihevc_itrans_recon_32x32_ssse3_intr.c260 m_temp_reg_70 = _mm_load_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_32x32_ssse3()
262 m_temp_reg_71 = _mm_load_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_32x32_ssse3()
264 m_temp_reg_72 = _mm_load_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_32x32_ssse3()
266 m_temp_reg_73 = _mm_load_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_32x32_ssse3()
268 m_temp_reg_74 = _mm_load_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_32x32_ssse3()
270 m_temp_reg_75 = _mm_load_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_32x32_ssse3()
272 m_temp_reg_76 = _mm_load_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_32x32_ssse3()
274 m_temp_reg_77 = _mm_load_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_32x32_ssse3()
277 m_temp_reg_80 = _mm_load_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_32x32_ssse3()
279 m_temp_reg_81 = _mm_load_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_32x32_ssse3()
[all …]
Dihevc_itrans_recon_16x16_ssse3_intr.c210 m_temp_reg_70 = _mm_load_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_16x16_ssse3()
212 m_temp_reg_71 = _mm_load_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_16x16_ssse3()
214 m_temp_reg_72 = _mm_load_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_16x16_ssse3()
216 m_temp_reg_73 = _mm_load_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_16x16_ssse3()
218 m_temp_reg_74 = _mm_load_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_16x16_ssse3()
220 m_temp_reg_75 = _mm_load_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_16x16_ssse3()
222 m_temp_reg_76 = _mm_load_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_16x16_ssse3()
224 m_temp_reg_77 = _mm_load_si128((__m128i *)pi2_tmp_src); in ihevc_itrans_recon_16x16_ssse3()
249 m_coeff1 = _mm_load_si128((__m128i *)&g_ai2_ihevc_trans_16_even[2][0]); //89 75 in ihevc_itrans_recon_16x16_ssse3()
295 m_coeff3 = _mm_load_si128((__m128i *)&g_ai2_ihevc_trans_16_even[3][0]); //75 -18 in ihevc_itrans_recon_16x16_ssse3()
[all …]
/external/libvpx/libvpx/vp8/encoder/x86/
Dvp8_quantize_sse2.c41 __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift)); in vp8_regular_quantize_b_sse2()
42 __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8)); in vp8_regular_quantize_b_sse2()
43 __m128i z0 = _mm_load_si128((__m128i *)(b->coeff)); in vp8_regular_quantize_b_sse2()
44 __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8)); in vp8_regular_quantize_b_sse2()
46 __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin)); in vp8_regular_quantize_b_sse2()
47 __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8)); in vp8_regular_quantize_b_sse2()
48 __m128i round0 = _mm_load_si128((__m128i *)(b->round)); in vp8_regular_quantize_b_sse2()
49 __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8)); in vp8_regular_quantize_b_sse2()
50 __m128i quant0 = _mm_load_si128((__m128i *)(b->quant)); in vp8_regular_quantize_b_sse2()
51 __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8)); in vp8_regular_quantize_b_sse2()
[all …]
Dquantize_sse4.c37 __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift)); in vp8_regular_quantize_b_sse4_1()
38 __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8)); in vp8_regular_quantize_b_sse4_1()
39 __m128i z0 = _mm_load_si128((__m128i *)(b->coeff)); in vp8_regular_quantize_b_sse4_1()
40 __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8)); in vp8_regular_quantize_b_sse4_1()
42 __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin)); in vp8_regular_quantize_b_sse4_1()
43 __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8)); in vp8_regular_quantize_b_sse4_1()
44 __m128i round0 = _mm_load_si128((__m128i *)(b->round)); in vp8_regular_quantize_b_sse4_1()
45 __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8)); in vp8_regular_quantize_b_sse4_1()
46 __m128i quant0 = _mm_load_si128((__m128i *)(b->quant)); in vp8_regular_quantize_b_sse4_1()
47 __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8)); in vp8_regular_quantize_b_sse4_1()
[all …]
Dvp8_quantize_ssse3.c44 __m128i z0 = _mm_load_si128((__m128i *)(b->coeff)); in vp8_fast_quantize_b_ssse3()
45 __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8)); in vp8_fast_quantize_b_ssse3()
46 __m128i round0 = _mm_load_si128((__m128i *)(b->round)); in vp8_fast_quantize_b_ssse3()
47 __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8)); in vp8_fast_quantize_b_ssse3()
48 __m128i quant_fast0 = _mm_load_si128((__m128i *)(b->quant_fast)); in vp8_fast_quantize_b_ssse3()
49 __m128i quant_fast1 = _mm_load_si128((__m128i *)(b->quant_fast + 8)); in vp8_fast_quantize_b_ssse3()
50 __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant)); in vp8_fast_quantize_b_ssse3()
51 __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8)); in vp8_fast_quantize_b_ssse3()
58 __m128i zig_zag = _mm_load_si128((const __m128i *)pshufb_zig_zag_mask); in vp8_fast_quantize_b_ssse3()
/external/XNNPACK/src/qs8-gemm/gen/
D1x4c2-xw-minmax-xop.c58 const __m128i vxb0 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop()
62 const __m128i vxb1 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 8 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop()
66 const __m128i vxb2 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop()
70 const __m128i vxb3 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 24 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop()
83 const __m128i vxb0 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop()
90 const __m128i vxb1 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop()
97 const __m128i vxb2 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop()
106 const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop()
107 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop()
120 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop()
[all …]
D1x4c2-xw-minmax-sse41.c53 const __m128i vxb0 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41()
57 const __m128i vxb1 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 8 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41()
61 const __m128i vxb2 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41()
65 const __m128i vxb3 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 24 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41()
78 const __m128i vxb0 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41()
85 const __m128i vxb1 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41()
92 const __m128i vxb2 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41()
101 const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41()
102 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41()
115 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41()
[all …]
D1x4c2-xw-minmax-sse2.c53 const __m128i vxb0 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2()
57 const __m128i vxb1 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 8 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2()
61 const __m128i vxb2 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2()
65 const __m128i vxb3 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 24 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2()
78 const __m128i vxb0 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2()
85 const __m128i vxb1 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2()
92 const __m128i vxb2 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2()
101 const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2()
102 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2()
131 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2()
[all …]
D1x4c2-xw-minmax-ssse3.c53 const __m128i vxb0 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3()
57 const __m128i vxb1 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 8 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3()
61 const __m128i vxb2 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3()
65 const __m128i vxb3 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 24 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3()
78 const __m128i vxb0 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3()
85 const __m128i vxb1 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3()
92 const __m128i vxb2 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3()
101 const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3()
102 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3()
131 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3()
[all …]
D1x4c8-xw-minmax-xop.c61 const __m128i vxb0 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop()
64 const __m128i vxb1 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 8 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop()
67 const __m128i vxb2 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop()
70 const __m128i vxb3 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 24 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop()
83 const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop()
84 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop()
97 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop()
101 …const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_thresh… in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop()
102 const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop()
106 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop()
[all …]
D1x4c8-xw-minmax-sse41.c56 const __m128i vxb0 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41()
59 const __m128i vxb1 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 8 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41()
62 const __m128i vxb2 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41()
65 const __m128i vxb3 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 24 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41()
78 const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41()
79 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41()
92 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41()
96 …const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_thresh… in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41()
97 const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41()
101 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41()
[all …]
D1x4c8-xw-minmax-ssse3.c56 const __m128i vxb0 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3()
59 const __m128i vxb1 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 8 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3()
62 const __m128i vxb2 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3()
65 const __m128i vxb3 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 24 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3()
78 const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3()
79 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3()
108 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3()
112 …const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_thresh… in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3()
113 const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3()
117 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3()
[all …]
D1x4c8-xw-minmax-sse2.c56 const __m128i vxb0 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2()
59 const __m128i vxb1 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 8 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2()
62 const __m128i vxb2 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2()
65 const __m128i vxb3 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 24 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2()
78 const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2()
79 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2()
108 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2()
112 …const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_thresh… in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2()
113 const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2()
117 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2()
[all …]
D1x8c8-minmax-avx2.c65 const __m128i vb01 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
69 const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t))); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
73 const __m128i vb45 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int8_t))); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
77 const __m128i vb67 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 48 * sizeof(int8_t))); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
94 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
95 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
108 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
112 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
113 const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
117 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
[all …]
D1x4c8-minmax-xop-ld128.c61 const __m128i vb01 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128()
68 const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t))); in xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128()
85 const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier); in xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128()
86 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128()
99 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128()
103 …const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_thresh… in xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128()
104 const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift); in xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128()
108 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128()
111 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min); in xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128()
112 const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max); in xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128()
D1x4c8-minmax-sse41-ld128.c56 const __m128i vb01 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128()
63 const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t))); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128()
80 const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128()
81 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128()
94 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128()
98 …const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_thresh… in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128()
99 const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128()
103 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128()
106 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128()
107 const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128()
D2x4c8-xw-minmax-xop.c74 const __m128i vxb0 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
78 const __m128i vxb1 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 8 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
82 const __m128i vxb2 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
86 const __m128i vxb3 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 24 * sizeof(int16_t))); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
103 const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
104 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
123 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
129 …const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_thresh… in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
130 const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
136 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
[all …]
/external/XNNPACK/src/qu8-vadd/
Dminmax-sse2.c23 …const __m128i vzero_point_product = _mm_load_si128((const __m128i*) &params->sse2.zero_point_produ… in xnn_qu8_vadd_minmax_ukernel__sse2()
24 const __m128i va_multiplier_lo = _mm_load_si128((const __m128i*) &params->sse2.a_multiplier_lo); in xnn_qu8_vadd_minmax_ukernel__sse2()
25 const __m128i va_multiplier_hi = _mm_load_si128((const __m128i*) &params->sse2.a_multiplier_hi); in xnn_qu8_vadd_minmax_ukernel__sse2()
26 const __m128i vb_multiplier_lo = _mm_load_si128((const __m128i*) &params->sse2.b_multiplier_lo); in xnn_qu8_vadd_minmax_ukernel__sse2()
27 const __m128i vb_multiplier_hi = _mm_load_si128((const __m128i*) &params->sse2.b_multiplier_hi); in xnn_qu8_vadd_minmax_ukernel__sse2()
28 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qu8_vadd_minmax_ukernel__sse2()
29 …const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_thresh… in xnn_qu8_vadd_minmax_ukernel__sse2()
68 const __m128i vy_zero_point = _mm_load_si128((const __m128i*) params->sse2.y_zero_point); in xnn_qu8_vadd_minmax_ukernel__sse2()
71 vy = _mm_max_epu8(vy, _mm_load_si128((const __m128i*) params->sse2.y_min)); in xnn_qu8_vadd_minmax_ukernel__sse2()
72 vy = _mm_min_epu8(vy, _mm_load_si128((const __m128i*) params->sse2.y_max)); in xnn_qu8_vadd_minmax_ukernel__sse2()
[all …]
/external/XNNPACK/src/qs8-dwconv/gen/
Dup32x9-minmax-avx512skx-mul32.c34 …const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.mu… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
35 …const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.roun… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
36 …const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
37 …const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
38 const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
39 …const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
40 …const __m512i voutput_min = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.ou… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
41 …const __m512i voutput_max = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.ou… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
100 …const __m512i vk0x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_load_si128((const __m128i*) ((uintpt… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
102 …const __m512i vk0xGHIJKLMNOPQRSTUV = _mm512_cvtepi8_epi32(_mm_load_si128((const __m128i*) ((uintpt… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
[all …]
/external/XNNPACK/src/qs8-igemm/gen/
D1x8c8-minmax-avx2.c76 const __m128i vb01 = _mm_load_si128((const __m128i*) w); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
80 const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t))); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
84 const __m128i vb45 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int8_t))); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
88 const __m128i vb67 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 48 * sizeof(int8_t))); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
107 …const __m256i vmultiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->ss… in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
108 …const __m256i vrounding = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2… in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
121 …const __m256i vremainder_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params… in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
125 …const __m256i vremainder_threshold = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) p… in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
126 const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
130 …const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) par… in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
[all …]
D1x4c8-minmax-xop-ld128.c72 const __m128i vb01 = _mm_load_si128((const __m128i*) w); in xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128()
79 const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16)); in xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128()
98 const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier); in xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128()
99 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128()
112 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128()
116 …const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_thresh… in xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128()
117 const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift); in xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128()
121 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128()
124 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min); in xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128()
125 const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max); in xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld128()
/external/libvpx/libvpx/vp9/common/x86/
Dvp9_highbd_iht8x8_add_sse4.c177 io[0] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 0)); in vp9_highbd_iht8x8_64_add_sse4_1()
178 io[4] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 4)); in vp9_highbd_iht8x8_64_add_sse4_1()
179 io[1] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 0)); in vp9_highbd_iht8x8_64_add_sse4_1()
180 io[5] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 4)); in vp9_highbd_iht8x8_64_add_sse4_1()
181 io[2] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 0)); in vp9_highbd_iht8x8_64_add_sse4_1()
182 io[6] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 4)); in vp9_highbd_iht8x8_64_add_sse4_1()
183 io[3] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 0)); in vp9_highbd_iht8x8_64_add_sse4_1()
184 io[7] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 4)); in vp9_highbd_iht8x8_64_add_sse4_1()
185 io[8] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 0)); in vp9_highbd_iht8x8_64_add_sse4_1()
186 io[12] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 4)); in vp9_highbd_iht8x8_64_add_sse4_1()
[all …]

12345678910>>...16