Home
last modified time | relevance | path

Searched refs:vidx4 (Results 1 – 25 of 37) sorted by relevance

12

/external/XNNPACK/src/f32-argmaxpool/
D9p8x-wasmsimd-c4.c183 const v128_t vidx4 = wasm_i32x4_add(vidx3, v1); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() local
185 vidx = wasm_v128_bitselect(vidx4, vidx, vm4); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4()
188 const v128_t vidx5 = wasm_i32x4_add(vidx4, v1); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4()
298 const v128_t vidx4 = wasm_i32x4_add(vidx3, v1); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() local
300 vidx = wasm_v128_bitselect(vidx4, vidx, vm4); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4()
303 const v128_t vidx5 = wasm_i32x4_add(vidx4, v1); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4()
355 const v128_t vidx4 = wasm_i32x4_add(vidx3, v1); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4() local
357 vidx = wasm_v128_bitselect(vidx4, vidx, vm4); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4()
360 const v128_t vidx5 = wasm_i32x4_add(vidx4, v1); in xnn_f32_argmaxpool_ukernel_9p8x__wasmsimd_c4()
D9p8x-neon-c4.c164 const uint32x4_t vidx4 = vaddq_u32(vidx3, v1); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() local
166 vidx = vbslq_u32(vm4, vidx4, vidx); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4()
169 const uint32x4_t vidx5 = vaddq_u32(vidx4, v1); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4()
267 const uint32x4_t vidx4 = vaddq_u32(vidx3, v1); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() local
269 vidx = vbslq_u32(vm4, vidx4, vidx); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4()
272 const uint32x4_t vidx5 = vaddq_u32(vidx4, v1); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4()
322 const uint32x4_t vidx4 = vaddq_u32(vidx3, v1); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4() local
324 vidx = vbslq_u32(vm4, vidx4, vidx); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4()
327 const uint32x4_t vidx5 = vaddq_u32(vidx4, v1); in xnn_f32_argmaxpool_ukernel_9p8x__neon_c4()
D9p8x-sse2-c4.c183 const __m128i vidx4 = _mm_add_epi32(vidx3, v1); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() local
185 vidx = _mm_or_si128(_mm_andnot_si128(vm4, vidx), _mm_and_si128(vm4, vidx4)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
188 const __m128i vidx5 = _mm_add_epi32(vidx4, v1); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
298 const __m128i vidx4 = _mm_add_epi32(vidx3, v1); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() local
300 vidx = _mm_or_si128(_mm_andnot_si128(vm4, vidx), _mm_and_si128(vm4, vidx4)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
303 const __m128i vidx5 = _mm_add_epi32(vidx4, v1); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
355 const __m128i vidx4 = _mm_add_epi32(vidx3, v1); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() local
357 vidx = _mm_or_si128(_mm_andnot_si128(vm4, vidx), _mm_and_si128(vm4, vidx4)); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
360 const __m128i vidx5 = _mm_add_epi32(vidx4, v1); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
/external/XNNPACK/src/f32-velu/gen/
Dvelu-wasm-rr2-lut16-p3-x5.c74 const uint32_t vidx4 = float_as_uint32(vn4) & vindex_mask; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() local
86 float vs4 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx4] + ven4); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
Dvelu-scalar-rr2-lut16-p3-x5.c74 const uint32_t vidx4 = float_as_uint32(vn4) & vindex_mask; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() local
86 float vs4 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx4] + ven4); in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
Dvelu-wasm-rr2-lut16-p3-x6.c77 const uint32_t vidx4 = float_as_uint32(vn4) & vindex_mask; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() local
92 float vs4 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx4] + ven4); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
Dvelu-scalar-rr2-lut16-p3-x6.c77 const uint32_t vidx4 = float_as_uint32(vn4) & vindex_mask; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() local
92 float vs4 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx4] + ven4); in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
Dvelu-avx2-rr1-lut16-p3-gather-x40.c67 const __m256i vidx4 = _mm256_and_si256(_mm256_castps_si256(vn4), vindex_mask); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40() local
68 const __m256i vl4 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx4, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40()
Dvelu-avx2-rr1-lut16-p3-gather-x48.c70 const __m256i vidx4 = _mm256_and_si256(_mm256_castps_si256(vn4), vindex_mask); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48() local
71 const __m256i vl4 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx4, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48()
Dvelu-sse41-rr2-lut16-p3-x8.c85 const uint32_t vidx4 = (uint32_t) _mm_cvtsi128_si32(vidx4567); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() local
89 …28i vl4 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + vidx4))); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8()
Dvelu-avx2-rr1-lut16-p3-gather-x56.c73 const __m256i vidx4 = _mm256_and_si256(_mm256_castps_si256(vn4), vindex_mask); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56() local
74 const __m256i vl4 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx4, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56()
Dvelu-sse2-rr2-lut16-p3-x8.c91 const uint32_t vidx4 = (uint32_t) _mm_cvtsi128_si32(vidx4567); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() local
95 …28i vl4 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + vidx4))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
Dvelu-avx2-rr1-lut16-p3-gather-x64.c76 const __m256i vidx4 = _mm256_and_si256(_mm256_castps_si256(vn4), vindex_mask); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64() local
77 const __m256i vl4 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx4, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64()
Dvelu-sse41-rr2-lut16-p3-x12.c97 const uint32_t vidx4 = (uint32_t) _mm_cvtsi128_si32(vidx4567); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() local
101 …28i vl4 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + vidx4))); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
Dvelu-avx2-rr1-lut16-p3-gather-x72.c79 const __m256i vidx4 = _mm256_and_si256(_mm256_castps_si256(vn4), vindex_mask); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() local
80 const __m256i vl4 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx4, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
Dvelu-sse2-rr2-lut16-p3-x12.c105 const uint32_t vidx4 = (uint32_t) _mm_cvtsi128_si32(vidx4567); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() local
109 …28i vl4 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + vidx4))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
Dvelu-sse41-rr2-lut16-p3-x16.c109 const uint32_t vidx4 = (uint32_t) _mm_cvtsi128_si32(vidx4567); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() local
113 …28i vl4 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + vidx4))); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
Dvelu-avx2-rr1-lut16-p3-gather-x80.c82 const __m256i vidx4 = _mm256_and_si256(_mm256_castps_si256(vn4), vindex_mask); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() local
83 const __m256i vl4 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx4, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
Dvelu-avx-rr2-lut16-p3-x40.c178 const __m256 vidx4 = _mm256_and_ps(vn4, vindex_mask); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40() local
180 const __m128i vidx4_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx4)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
181 const __m128i vidx4_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx4, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
/external/XNNPACK/src/f32-vsigmoid/gen/
Dvsigmoid-sse41-rr2-lut64-p2-div-x8.c80 const uint32_t vidx4 = (uint32_t) _mm_cvtsi128_si32(vidx4567); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8() local
84 …28i vl4 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + vidx4))); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8()
Dvsigmoid-sse2-rr2-lut64-p2-div-x8.c86 const uint32_t vidx4 = (uint32_t) _mm_cvtsi128_si32(vidx4567); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8() local
90 …28i vl4 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + vidx4))); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8()
Dvsigmoid-sse41-rr2-lut64-p2-div-x12.c92 const uint32_t vidx4 = (uint32_t) _mm_cvtsi128_si32(vidx4567); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12() local
96 …28i vl4 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + vidx4))); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12()
Dvsigmoid-sse2-rr2-lut64-p2-div-x12.c100 const uint32_t vidx4 = (uint32_t) _mm_cvtsi128_si32(vidx4567); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12() local
104 …28i vl4 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + vidx4))); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12()
Dvsigmoid-sse41-rr2-lut64-p2-div-x16.c104 const uint32_t vidx4 = (uint32_t) _mm_cvtsi128_si32(vidx4567); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16() local
108 …28i vl4 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + vidx4))); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16()
Dvsigmoid-sse2-rr2-lut64-p2-div-x16.c114 const uint32_t vidx4 = (uint32_t) _mm_cvtsi128_si32(vidx4567); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16() local
118 …28i vl4 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + vidx4))); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16()

12