Home
last modified time | relevance | path

Searched refs:_mm_hadd_epi32 (Results 1 – 25 of 75) sorted by relevance

123

/external/XNNPACK/src/qs8-gemm/gen/
D3x4c8-minmax-xop-ld128.c114 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
115 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
116 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
117 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
118 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
119 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
121 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
122 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
123 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
D3x4c8-minmax-sse41-ld128.c109 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128()
110 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128()
111 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128()
112 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128()
113 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128()
114 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128()
116 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128()
117 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128()
118 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128()
D3x4c8-minmax-xop-ld64.c116 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64()
117 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64()
118 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64()
119 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64()
120 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64()
121 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64()
123 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64()
124 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64()
125 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64()
D3x4c8-xw-minmax-xop.c112 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop()
113 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop()
114 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop()
115 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop()
116 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop()
117 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop()
119 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop()
120 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop()
121 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop()
D3x4c8-minmax-sse41-ld64.c111 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64()
112 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64()
113 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64()
114 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64()
115 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64()
116 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64()
118 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64()
119 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64()
120 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64()
D3x4c8-xw-minmax-sse41.c107 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41()
108 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41()
109 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41()
110 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41()
111 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41()
112 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41()
114 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41()
115 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41()
116 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41()
D3x4c8-xw-minmax-ssse3.c107 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3()
108 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3()
109 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3()
110 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3()
111 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3()
112 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3()
114 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3()
115 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3()
116 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3()
D2x4c8-xw-minmax-xop.c95 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
96 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
97 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
98 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
100 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
101 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
D3x4c8-minmax-ssse3-ld128.c109 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128()
110 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128()
111 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128()
112 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128()
113 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128()
114 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128()
116 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128()
117 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128()
118 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128()
D3x4c8-minmax-ssse3-ld64.c111 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64()
112 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64()
113 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64()
114 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64()
115 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64()
116 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64()
118 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64()
119 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64()
120 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64()
D2x4c8-minmax-sse41-ld64.c94 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64()
95 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64()
96 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64()
97 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64()
99 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64()
100 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64()
D2x4c8-xw-minmax-sse41.c90 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41()
91 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41()
92 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41()
93 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41()
95 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41()
96 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41()
D2x4c8-minmax-xop-ld128.c97 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128()
98 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128()
99 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128()
100 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128()
102 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128()
103 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128()
D2x4c8-minmax-sse41-ld128.c92 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128()
93 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128()
94 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128()
95 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128()
97 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128()
98 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128()
/external/XNNPACK/src/qs8-igemm/gen/
D3x4c8-minmax-xop-ld64.c133 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64()
134 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64()
135 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64()
136 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64()
137 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64()
138 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64()
140 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64()
141 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64()
142 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64()
D3x4c8-minmax-xop-ld128.c131 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128()
132 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128()
133 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128()
134 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128()
135 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128()
136 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128()
138 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128()
139 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128()
140 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128()
D3x4c8-minmax-sse41-ld64.c128 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64()
129 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64()
130 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64()
131 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64()
132 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64()
133 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64()
135 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64()
136 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64()
137 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64()
D3x4c8-minmax-sse41-ld128.c126 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128()
127 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128()
128 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128()
129 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128()
130 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128()
131 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128()
133 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128()
134 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128()
135 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128()
D3x4c8-minmax-ssse3-ld64.c128 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64()
129 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64()
130 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64()
131 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64()
132 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64()
133 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64()
135 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64()
136 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64()
137 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64()
D3x4c8-minmax-ssse3-ld128.c126 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128()
127 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128()
128 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128()
129 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128()
130 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128()
131 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128()
133 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128()
134 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128()
135 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128()
D2x4c8-minmax-xop-ld64.c114 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64()
115 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64()
116 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64()
117 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64()
119 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64()
120 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64()
D2x4c8-minmax-xop-ld128.c112 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128()
113 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128()
114 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128()
115 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128()
117 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128()
118 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128()
/external/libgav1/libgav1/src/dsp/x86/
Dsuper_res_sse4.cc259 a[0] = _mm_hadd_epi32(weighted_src[0], weighted_src[1]); in SuperRes_SSE4_1()
260 a[1] = _mm_hadd_epi32(weighted_src[2], weighted_src[3]); in SuperRes_SSE4_1()
261 a[2] = _mm_hadd_epi32(weighted_src[4], weighted_src[5]); in SuperRes_SSE4_1()
262 a[3] = _mm_hadd_epi32(weighted_src[6], weighted_src[7]); in SuperRes_SSE4_1()
264 a[0] = _mm_hadd_epi32(a[0], a[1]); in SuperRes_SSE4_1()
265 a[1] = _mm_hadd_epi32(a[2], a[3]); in SuperRes_SSE4_1()
/external/libaom/libaom/av1/common/x86/
Dav1_convolve_horiz_rs_sse4.c105 const __m128i conv01_32 = _mm_hadd_epi32(conv0_32, conv1_32); in av1_convolve_horiz_rs_sse4_1()
106 const __m128i conv23_32 = _mm_hadd_epi32(conv2_32, conv3_32); in av1_convolve_horiz_rs_sse4_1()
108 const __m128i conv0123_32 = _mm_hadd_epi32(conv01_32, conv23_32); in av1_convolve_horiz_rs_sse4_1()
208 const __m128i conv01_32 = _mm_hadd_epi32(conv0_32, conv1_32); in av1_highbd_convolve_horiz_rs_sse4_1()
209 const __m128i conv23_32 = _mm_hadd_epi32(conv2_32, conv3_32); in av1_highbd_convolve_horiz_rs_sse4_1()
211 const __m128i conv0123_32 = _mm_hadd_epi32(conv01_32, conv23_32); in av1_highbd_convolve_horiz_rs_sse4_1()
/external/libaom/libaom/aom_dsp/x86/
Dhighbd_variance_sse4.c54 s2 = _mm_hadd_epi32(t0, t1); in variance4x4_64_sse4_1()
55 s3 = _mm_hadd_epi32(s2, s2); in variance4x4_64_sse4_1()
56 y0 = _mm_hadd_epi32(s3, s3); in variance4x4_64_sse4_1()
61 s2 = _mm_hadd_epi32(t0, t1); in variance4x4_64_sse4_1()
62 s3 = _mm_hadd_epi32(s2, s2); in variance4x4_64_sse4_1()
63 x0 = _mm_hadd_epi32(s3, s3); in variance4x4_64_sse4_1()

123