Home
last modified time | relevance | path

Searched refs:vidx_lo (Results 1 – 25 of 187) sorted by relevance

12345678

/external/XNNPACK/src/f32-velu/gen/
Dvelu-avx-rr2-lut16-p3-x8.c52 const __m128i vidx_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local
55 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
56 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
68 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_cvtsi128_si32(vidx_lo)))); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
69 …nst int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_lo, 2)))); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
72 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_lo, 1))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
73 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_lo, 3))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
118 const __m128i vidx_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local
121 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
122 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
[all …]
Dvelu-avx-rr2-lut16-p3-x16.c167 const __m128i vidx_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local
170 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
171 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
183 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_cvtsi128_si32(vidx_lo)))); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
184 …nst int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_lo, 2)))); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
187 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_lo, 1))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
188 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_lo, 3))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
233 const __m128i vidx_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local
236 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
237 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
[all …]
Dvelu-wasmsimd-x86-rr2-lut16-p3-x4.c54 const uint64_t vidx_lo = wasm_i64x2_extract_lane(vidx, 0); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4() local
56 …nst float vl0 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lo)); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4()
57 …t vl1 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lo >> 32))); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4()
95 const uint64_t vidx_lo = wasm_i64x2_extract_lane(vidx, 0); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4() local
97 …nst float vl0 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lo)); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4()
98 …t vl1 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lo >> 32))); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4()
Dvelu-neon-rr2-lut16-p3-x4.c53 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4() local
55 … = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lo)); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4()
57 …const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lo >> 32)), vl_lo, 1… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4()
90 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4() local
92 … = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lo)); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4()
94 …const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lo >> 32)), vl_lo, 1… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4()
Dvelu-neonfma-rr1-lut16-p3-x4.c52 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4() local
54 … = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lo)); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4()
56 …const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lo >> 32)), vl_lo, 1… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4()
88 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4() local
90 … = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lo)); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4()
92 …const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lo >> 32)), vl_lo, 1… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4()
Dvelu-wasmsimd-arm-rr2-lut16-p3-x4.c54 const uint64_t vidx_lo = wasm_i64x2_extract_lane(vidx, 0); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4() local
56 …nst float vl0 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lo)); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4()
57 …t vl1 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lo >> 32))); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4()
92 const uint64_t vidx_lo = wasm_i64x2_extract_lane(vidx, 0); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4() local
94 …nst float vl0 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lo)); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4()
95 …t vl1 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lo >> 32))); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4()
Dvelu-sse41-rr2-lut16-p3-x4.c55 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local
57 …mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lo))); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4()
59 …l_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lo >> 32))), 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4()
98 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local
100 …mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_lo))); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4()
102 …l_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_lo >> 32))), 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4()
Dvelu-avx-rr2-lut16-p3-x24.c216 const __m128i vidx_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local
219 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
220 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
232 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_cvtsi128_si32(vidx_lo)))); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
233 …nst int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_lo, 2)))); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
236 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_lo, 1))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
237 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_lo, 3))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
282 const __m128i vidx_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local
285 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
286 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
[all …]
/external/XNNPACK/src/math/
Dexpm1minus-f32-avx-rr2-lut16-p3.c73 const __m128i vidx_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx)), 2); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3() local
76 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
77 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
89 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_cvtsi128_si32(vidx_lo)))); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
90 …nst int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_lo, 2)))); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
93 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_lo, 1))), 1); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
94 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_lo, 3))), 1); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
Dsigmoid-f32-avx-rr2-lut64-p2-div.c77 const __m128i vidx_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx)), 2); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() local
80 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
81 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
93 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_cvtsi128_si32(vidx_lo)))); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
94 …nst int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi32(vidx_lo, 2)))); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
97 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi32(vidx_lo, 1))), 1); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
98 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi32(vidx_lo, 3))), 1); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
/external/XNNPACK/src/f32-vsigmoid/gen/
Dvsigmoid-neonfma-rr1-lut2048-p1-div-x4.c45 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4() local
47 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4()
49 vl_lo = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4()
78 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4() local
80 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4()
82 vl_lo = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4()
Dvsigmoid-wasmsimd-rr2-lut64-p2-div-x4.c47 const uint64_t vidx_lo = wasm_i64x2_extract_lane(vidx, 0); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x4() local
49 …nst float vl0 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx_lo)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x4()
50 …t vl1 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx_lo >> 32))); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x4()
83 const uint64_t vidx_lo = wasm_i64x2_extract_lane(vidx, 0); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x4() local
85 …nst float vl0 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx_lo)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x4()
86 …t vl1 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx_lo >> 32))); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x4()
Dvsigmoid-neonfma-rr1-lut64-p2-div-x4.c45 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4() local
47 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4()
49 vl_lo = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4()
79 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4() local
81 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4()
83 vl_lo = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4()
Dvsigmoid-neonfma-rr1-lut64-p2-nr1recps1fma-x4.c45 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4() local
47 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4()
49 vl_lo = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4()
83 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4() local
85 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4()
87 vl_lo = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4()
Dvsigmoid-neonfma-rr1-lut2048-p1-nr2fma-x4.c45 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4() local
47 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4()
49 vl_lo = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4()
82 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4() local
84 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4()
86 vl_lo = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4()
Dvsigmoid-neonfma-rr1-lut64-p2-nr2fma-x4.c45 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4() local
47 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4()
49 vl_lo = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4()
83 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4() local
85 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4()
87 vl_lo = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4()
Dvsigmoid-neon-rr2-lut2048-p1-nr2recps-x4.c46 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4() local
48 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
50 vl_lo = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
84 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4() local
86 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
88 vl_lo = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
Dvsigmoid-neonfma-rr1-lut2048-p1-nr1recps1fma-x4.c45 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4() local
47 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4()
49 vl_lo = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4()
82 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4() local
84 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4()
86 vl_lo = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4()
Dvsigmoid-neon-rr2-lut64-p2-nr2recps-x4.c46 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4() local
48 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
50 vl_lo = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
85 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4() local
87 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
89 vl_lo = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
Dvsigmoid-neonfma-rr1-lut2048-p1-nr2recps-x4.c45 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4() local
47 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4()
49 vl_lo = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4()
82 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4() local
84 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4()
86 vl_lo = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4()
Dvsigmoid-neonfma-rr1-lut64-p2-nr2recps-x4.c45 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4() local
47 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4()
49 vl_lo = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4()
83 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4() local
85 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4()
87 vl_lo = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4()
Dvsigmoid-sse41-rr2-lut64-p2-div-x4.c49 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x4() local
51 …mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx_lo))); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x4()
53 …l_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx_lo >> 32))), 1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x4()
91 const uint64_t vidx_lo = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x4() local
93 …mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx_lo))); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x4()
95 …l_ll, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx_lo >> 32))), 1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x4()
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Dneonfma-rr1-lut64-p2-x4.c49 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x4() local
51 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x4()
53 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x4()
90 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x4() local
92 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x4()
94 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x4()
Dneon-rr2-lut64-p2-x4.c50 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x4() local
52 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x4()
54 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x4()
92 const uint64_t vidx_lo = vgetq_lane_u64(vidx, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x4() local
94 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x4()
96 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x4()
/external/XNNPACK/src/f32-velu/
Davx-rr2-lut16-p3.c.in134 const __m128i vidx_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx)), 2);
137 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo);
138 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1);
150 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_cvtsi128_si32(vidx_lo))));
151 …nst int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_lo, 2))));
154 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_lo, 1))), 1);
155 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_lo, 3))), 1);
200 const __m128i vidx_lo = _mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(vidx)), 2);
203 const uint64_t vidx_ll = (uint64_t) _mm_cvtsi128_si64(vidx_lo);
204 const uint64_t vidx_lh = (uint64_t) _mm_extract_epi64(vidx_lo, 1);
[all …]

12345678