Lines Matching refs:_mm_shuffle_epi32
89 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
91 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
93 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
95 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
101 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
103 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
105 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
107 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
113 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
115 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
117 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
119 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
126 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
128 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
130 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
132 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(3, 3, 3, 3)), vxb3)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
155 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
157 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
159 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
161 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(0, 0, 0, 0)), vxb0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
169 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
171 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
173 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
175 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(1, 1, 1, 1)), vxb1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
183 _mm_madd_epi16(_mm_shuffle_epi32(vxa0, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
185 _mm_madd_epi16(_mm_shuffle_epi32(vxa1, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
187 _mm_madd_epi16(_mm_shuffle_epi32(vxa2, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
189 _mm_madd_epi16(_mm_shuffle_epi32(vxa3, _MM_SHUFFLE(2, 2, 2, 2)), vxb2)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
207 const __m128i vabsacc0x1032 = _mm_shuffle_epi32(vabsacc0x0123, _MM_SHUFFLE(2, 3, 0, 1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
208 const __m128i vabsacc1x1032 = _mm_shuffle_epi32(vabsacc1x0123, _MM_SHUFFLE(2, 3, 0, 1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
209 const __m128i vabsacc2x1032 = _mm_shuffle_epi32(vabsacc2x0123, _MM_SHUFFLE(2, 3, 0, 1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
210 const __m128i vabsacc3x1032 = _mm_shuffle_epi32(vabsacc3x0123, _MM_SHUFFLE(2, 3, 0, 1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
217 const __m128i vnmask0x02 = _mm_shuffle_epi32(vnmask0x0123, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
218 const __m128i vnmask1x02 = _mm_shuffle_epi32(vnmask1x0123, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
219 const __m128i vnmask2x02 = _mm_shuffle_epi32(vnmask2x0123, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
220 const __m128i vnmask3x02 = _mm_shuffle_epi32(vnmask3x0123, _MM_SHUFFLE(2, 2, 0, 0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
237 const __m128i vnmask0x13 = _mm_shuffle_epi32(vnmask0x0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
238 const __m128i vnmask1x13 = _mm_shuffle_epi32(vnmask1x0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
239 const __m128i vnmask2x13 = _mm_shuffle_epi32(vnmask2x0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
240 const __m128i vnmask3x13 = _mm_shuffle_epi32(vnmask3x0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
261 const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
262 const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
263 const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()
264 const __m128i vq31prod3x0123 = _mm_shuffle_epi32(vq31prod3x0213, _MM_SHUFFLE(3, 1, 2, 0)); in xnn_qu8_gemm_minmax_ukernel_4x4c2__sse2()