Home
last modified time | relevance | path

Searched refs:vidx_hi (Results 1 – 25 of 182) sorted by relevance

12345678

/external/XNNPACK/src/f32-velu/gen/
Dvelu-avx-rr2-lut16-p3-x8.c56 const __m128i vidx_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local
60 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
61 const uint64_t vidx_hh = (uint64_t) _mm_extract_epi64(vidx_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
73 …(const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_cvtsi128_si32(vidx_hi))); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
74 …nst void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 2))); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
77 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 1))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
78 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 3))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
122 const __m128i vidx_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local
126 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
127 const uint64_t vidx_hh = (uint64_t) _mm_extract_epi64(vidx_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
[all …]
Dvelu-neonfma-rr1-lut16-p3-x4.c54 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4() local
56 … = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi)); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4()
58 …const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32)), vl_hi, 1… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4()
90 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4() local
92 … = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi)); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4()
94 …const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32)), vl_hi, 1… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4()
Dvelu-neon-rr2-lut16-p3-x4.c55 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4() local
57 … = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi)); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4()
59 …const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32)), vl_hi, 1… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4()
92 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4() local
94 … = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi)); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4()
96 …const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32)), vl_hi, 1… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4()
Dvelu-wasmsimd-arm-rr2-lut16-p3-x4.c56 const uint64_t vidx_hi = wasm_i64x2_extract_lane(vidx, 1); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4() local
59 …nst float vl2 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi)); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4()
60 …t vl3 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32))); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4()
94 const uint64_t vidx_hi = wasm_i64x2_extract_lane(vidx, 1); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4() local
97 …nst float vl2 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi)); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4()
98 …t vl3 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32))); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4()
Dvelu-wasmsimd-x86-rr2-lut16-p3-x4.c56 const uint64_t vidx_hi = wasm_i64x2_extract_lane(vidx, 1); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4() local
59 …nst float vl2 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi)); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4()
60 …t vl3 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32))); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4()
97 const uint64_t vidx_hi = wasm_i64x2_extract_lane(vidx, 1); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4() local
100 …nst float vl2 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi)); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4()
101 …t vl3 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32))); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4()
Dvelu-avx-rr2-lut16-p3-x16.c171 const __m128i vidx_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local
175 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
176 const uint64_t vidx_hh = (uint64_t) _mm_extract_epi64(vidx_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
188 …(const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_cvtsi128_si32(vidx_hi))); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
189 …nst void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 2))); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
192 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 1))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
193 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 3))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
237 const __m128i vidx_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local
241 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
242 const uint64_t vidx_hh = (uint64_t) _mm_extract_epi64(vidx_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
[all …]
Dvelu-sse41-rr2-lut16-p3-x4.c57 const uint64_t vidx_hi = (uint64_t) _mm_extract_epi64(vidx, 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local
59 …mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi))); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4()
61 …l_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32))), 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4()
100 const uint64_t vidx_hi = (uint64_t) _mm_extract_epi64(vidx, 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local
102 …mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi))); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4()
104 …l_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32))), 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4()
Dvelu-sse2-rr2-lut16-p3-x4.c57 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local
59 …mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4()
62 …32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32)))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4()
105 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local
107 …mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4()
110 …32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32)))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4()
/external/XNNPACK/src/math/
Dexpm1minus-avx-rr2-lut16-p3.c74 const __m128i vidx_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx, 1)), 2); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3() local
78 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
79 const uint64_t vidx_hh = (uint64_t) _mm_extract_epi64(vidx_hi, 1); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
91 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_cvtsi128_si32(vidx_hi)))); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
92 …nst int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 2)))); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
95 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 1))), 1); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
96 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 3))), 1); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
Dsigmoid-avx-rr2-lut64-p2-div.c78 const __m128i vidx_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx, 1)), 2); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() local
82 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
83 const uint64_t vidx_hh = (uint64_t) _mm_extract_epi64(vidx_hi, 1); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
95 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_cvtsi128_si32(vidx_hi)))); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
96 …nst int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi32(vidx_hi, 2)))); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
99 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi32(vidx_hi, 1))), 1); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
100 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi32(vidx_hi, 3))), 1); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
/external/XNNPACK/src/f32-sigmoid/gen/
Dneonfma-rr1-lut64-p2-div-x4.c46 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4() local
48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4()
50 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4()
80 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4() local
82 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4()
84 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4()
Dwasmsimd-lut64-p2-div-x4.c48 const uint64_t vidx_hi = wasm_i64x2_extract_lane(vidx, 1); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x4() local
51 …nst float vl2 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x4()
52 …t vl3 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx_hi >> 32))); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x4()
84 const uint64_t vidx_hi = wasm_i64x2_extract_lane(vidx, 1); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x4() local
87 …nst float vl2 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx_hi)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x4()
88 …t vl3 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx_hi >> 32))); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x4()
Dneonfma-rr1-lut2048-p1-div-x4.c46 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4() local
48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4()
50 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4()
79 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4() local
81 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4()
83 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4()
Dneonfma-rr1-lut2048-p1-nr1recps1fma-x4.c46 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4() local
48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4()
50 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4()
83 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4() local
85 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4()
87 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4()
Dneonfma-rr1-lut2048-p1-nr2recps-x4.c46 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4() local
48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4()
50 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4()
83 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4() local
85 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4()
87 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4()
Dneonfma-rr1-lut2048-p1-nr2fma-x4.c46 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4() local
48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4()
50 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4()
83 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4() local
85 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4()
87 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4()
Dneonfma-rr1-lut64-p2-nr2recps-x4.c46 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4() local
48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4()
50 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4()
84 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4() local
86 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4()
88 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4()
Dneon-rr2-lut2048-p1-nr2recps-x4.c47 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4() local
49 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
51 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
85 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4() local
87 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
89 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
Dneonfma-rr1-lut64-p2-nr2fma-x4.c46 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4() local
48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4()
50 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4()
84 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4() local
86 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4()
88 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4()
Dneon-rr2-lut64-p2-nr2recps-x4.c47 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4() local
49 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
51 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
86 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4() local
88 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
90 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
Dneonfma-rr1-lut64-p2-nr1recps1fma-x4.c46 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4() local
48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4()
50 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4()
84 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4() local
86 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4()
88 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4()
Dsse41-lut64-p2-div-x4.c50 const uint64_t vidx_hi = (uint64_t) _mm_extract_epi64(vidx, 1); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() local
52 …mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx_hi))); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4()
54 …l_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx_hi >> 32))), 1); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4()
92 const uint64_t vidx_hi = (uint64_t) _mm_extract_epi64(vidx, 1); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4() local
94 …mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx_hi))); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4()
96 …l_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx_hi >> 32))), 1); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4()
Dsse2-lut64-p2-div-x4.c50 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() local
52 …mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx_hi))); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4()
55 …32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx_hi >> 32)))); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4()
97 const uint64_t vidx_hi = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx, vidx)); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4() local
99 …mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx_hi))); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4()
102 …32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx_hi >> 32)))); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4()
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Dneonfma-lut64-p2-x4.c74 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() local
76 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
78 vl_hi = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
150 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() local
152 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
154 vl_hi = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
Dneon-lut64-p2-x4.c75 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() local
77 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
79 vl_hi = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
151 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() local
153 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
155 vl_hi = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()

12345678