Lines Matching refs:vidx
76 __m128i vidx = _mm_setzero_si128(); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() local
80 vidx = _mm_or_si128(_mm_andnot_si128(vm1, vidx), _mm_and_si128(vm1, _mm_set1_epi32(1))); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
84 vidx = _mm_or_si128(_mm_andnot_si128(vm2, vidx), _mm_and_si128(vm2, _mm_set1_epi32(2))); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
88 vidx = _mm_or_si128(_mm_andnot_si128(vm3, vidx), _mm_and_si128(vm3, _mm_set1_epi32(3))); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
92 vidx = _mm_or_si128(_mm_andnot_si128(vm4, vidx), _mm_and_si128(vm4, _mm_set1_epi32(4))); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
96 vidx = _mm_or_si128(_mm_andnot_si128(vm5, vidx), _mm_and_si128(vm5, _mm_set1_epi32(5))); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
100 vidx = _mm_or_si128(_mm_andnot_si128(vm6, vidx), _mm_and_si128(vm6, _mm_set1_epi32(6))); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
104 vidx = _mm_or_si128(_mm_andnot_si128(vm7, vidx), _mm_and_si128(vm7, _mm_set1_epi32(7))); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
108 vidx = _mm_or_si128(_mm_andnot_si128(vm8, vidx), _mm_and_si128(vm8, _mm_set1_epi32(8))); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
112 _mm_store_si128((__m128i*) ib, vidx); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
161 __m128i vidx = _mm_load_si128((const __m128i*) ib); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() local
165 vidx = _mm_or_si128(_mm_andnot_si128(vm0, vidx), _mm_and_si128(vm0, vidx0)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
170 vidx = _mm_or_si128(_mm_andnot_si128(vm1, vidx), _mm_and_si128(vm1, vidx1)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
175 vidx = _mm_or_si128(_mm_andnot_si128(vm2, vidx), _mm_and_si128(vm2, vidx2)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
180 vidx = _mm_or_si128(_mm_andnot_si128(vm3, vidx), _mm_and_si128(vm3, vidx3)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
185 vidx = _mm_or_si128(_mm_andnot_si128(vm4, vidx), _mm_and_si128(vm4, vidx4)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
190 vidx = _mm_or_si128(_mm_andnot_si128(vm5, vidx), _mm_and_si128(vm5, vidx5)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
195 vidx = _mm_or_si128(_mm_andnot_si128(vm6, vidx), _mm_and_si128(vm6, vidx6)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
200 vidx = _mm_or_si128(_mm_andnot_si128(vm7, vidx), _mm_and_si128(vm7, vidx7)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
204 _mm_store_si128((__m128i*) ib, vidx); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
275 __m128i vidx = _mm_load_si128((const __m128i*) ib); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() local
280 vidx = _mm_or_si128(_mm_andnot_si128(vm0, vidx), _mm_and_si128(vm0, vidx0)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
285 vidx = _mm_or_si128(_mm_andnot_si128(vm1, vidx), _mm_and_si128(vm1, vidx1)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
290 vidx = _mm_or_si128(_mm_andnot_si128(vm2, vidx), _mm_and_si128(vm2, vidx2)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
295 vidx = _mm_or_si128(_mm_andnot_si128(vm3, vidx), _mm_and_si128(vm3, vidx3)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
300 vidx = _mm_or_si128(_mm_andnot_si128(vm4, vidx), _mm_and_si128(vm4, vidx4)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
305 vidx = _mm_or_si128(_mm_andnot_si128(vm5, vidx), _mm_and_si128(vm5, vidx5)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
310 vidx = _mm_or_si128(_mm_andnot_si128(vm6, vidx), _mm_and_si128(vm6, vidx6)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
315 vidx = _mm_or_si128(_mm_andnot_si128(vm7, vidx), _mm_and_si128(vm7, vidx7)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
319 _mm_storeu_si128((__m128i*) i, vidx); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
333 __m128i vidx = _mm_load_si128((const __m128i*) ib); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() local
337 vidx = _mm_or_si128(_mm_andnot_si128(vm0, vidx), _mm_and_si128(vm0, vidx0)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
342 vidx = _mm_or_si128(_mm_andnot_si128(vm1, vidx), _mm_and_si128(vm1, vidx1)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
347 vidx = _mm_or_si128(_mm_andnot_si128(vm2, vidx), _mm_and_si128(vm2, vidx2)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
352 vidx = _mm_or_si128(_mm_andnot_si128(vm3, vidx), _mm_and_si128(vm3, vidx3)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
357 vidx = _mm_or_si128(_mm_andnot_si128(vm4, vidx), _mm_and_si128(vm4, vidx4)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
362 vidx = _mm_or_si128(_mm_andnot_si128(vm5, vidx), _mm_and_si128(vm5, vidx5)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
367 vidx = _mm_or_si128(_mm_andnot_si128(vm6, vidx), _mm_and_si128(vm6, vidx6)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
372 vidx = _mm_or_si128(_mm_andnot_si128(vm7, vidx), _mm_and_si128(vm7, vidx7)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
376 _mm_storel_epi64((__m128i*) i, vidx); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
378 vidx = _mm_unpackhi_epi64(vidx, vidx); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
384 *i = (uint32_t) _mm_cvtsi128_si32(vidx); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()