Home
last modified time | relevance | path

Searched refs:vidx_hi (Results 1 – 25 of 184) sorted by relevance

12345678

/external/XNNPACK/src/f32-velu/gen/
Dvelu-avx-rr2-lut16-p3-x8.c53 const __m128i vidx_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local
57 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
58 const uint64_t vidx_hh = (uint64_t) _mm_extract_epi64(vidx_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
70 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_cvtsi128_si32(vidx_hi)))); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
71 …nst int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 2)))); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
74 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 1))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
75 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 3))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
119 const __m128i vidx_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local
123 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
124 const uint64_t vidx_hh = (uint64_t) _mm_extract_epi64(vidx_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
[all …]
Dvelu-avx-rr2-lut16-p3-x16.c168 const __m128i vidx_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local
172 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
173 const uint64_t vidx_hh = (uint64_t) _mm_extract_epi64(vidx_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
185 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_cvtsi128_si32(vidx_hi)))); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
186 …nst int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 2)))); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
189 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 1))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
190 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 3))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
234 const __m128i vidx_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16() local
238 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
239 const uint64_t vidx_hh = (uint64_t) _mm_extract_epi64(vidx_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
[all …]
Dvelu-wasmsimd-x86-rr2-lut16-p3-x4.c55 const uint64_t vidx_hi = wasm_i64x2_extract_lane(vidx, 1); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4() local
58 …nst float vl2 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi)); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4()
59 …t vl3 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32))); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4()
96 const uint64_t vidx_hi = wasm_i64x2_extract_lane(vidx, 1); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4() local
99 …nst float vl2 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi)); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4()
100 …t vl3 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32))); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4()
Dvelu-neon-rr2-lut16-p3-x4.c54 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4() local
56 … = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi)); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4()
58 …const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32)), vl_hi, 1… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4()
91 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4() local
93 … = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi)); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4()
95 …const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32)), vl_hi, 1… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4()
Dvelu-neonfma-rr1-lut16-p3-x4.c53 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4() local
55 … = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi)); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4()
57 …const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32)), vl_hi, 1… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4()
89 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4() local
91 … = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi)); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4()
93 …const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32)), vl_hi, 1… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4()
Dvelu-wasmsimd-arm-rr2-lut16-p3-x4.c55 const uint64_t vidx_hi = wasm_i64x2_extract_lane(vidx, 1); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4() local
58 …nst float vl2 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi)); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4()
59 …t vl3 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32))); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4()
93 const uint64_t vidx_hi = wasm_i64x2_extract_lane(vidx, 1); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4() local
96 …nst float vl2 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi)); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4()
97 …t vl3 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32))); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4()
Dvelu-sse41-rr2-lut16-p3-x4.c56 const uint64_t vidx_hi = (uint64_t) _mm_extract_epi64(vidx, 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local
58 …mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi))); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4()
60 …l_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32))), 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4()
99 const uint64_t vidx_hi = (uint64_t) _mm_extract_epi64(vidx, 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local
101 …mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx_hi))); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4()
103 …l_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx_hi >> 32))), 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4()
Dvelu-avx-rr2-lut16-p3-x24.c217 const __m128i vidx_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local
221 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
222 const uint64_t vidx_hh = (uint64_t) _mm_extract_epi64(vidx_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
234 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_cvtsi128_si32(vidx_hi)))); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
235 …nst int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 2)))); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
238 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 1))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
239 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 3))), 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
283 const __m128i vidx_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx, 1)), 2); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24() local
287 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
288 const uint64_t vidx_hh = (uint64_t) _mm_extract_epi64(vidx_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
[all …]
/external/XNNPACK/src/math/
Dexpm1minus-f32-avx-rr2-lut16-p3.c74 const __m128i vidx_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx, 1)), 2); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3() local
78 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
79 const uint64_t vidx_hh = (uint64_t) _mm_extract_epi64(vidx_hi, 1); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
91 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_cvtsi128_si32(vidx_hi)))); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
92 …nst int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 2)))); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
95 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 1))), 1); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
96 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 3))), 1); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
Dsigmoid-f32-avx-rr2-lut64-p2-div.c78 const __m128i vidx_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx, 1)), 2); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() local
82 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
83 const uint64_t vidx_hh = (uint64_t) _mm_extract_epi64(vidx_hi, 1); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
95 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_cvtsi128_si32(vidx_hi)))); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
96 …nst int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi32(vidx_hi, 2)))); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
99 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi32(vidx_hi, 1))), 1); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
100 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) _mm_extract_epi32(vidx_hi, 3))), 1); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
/external/XNNPACK/src/f32-vsigmoid/gen/
Dvsigmoid-neonfma-rr1-lut2048-p1-div-x4.c46 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4() local
48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4()
50 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4()
79 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4() local
81 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4()
83 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4()
Dvsigmoid-wasmsimd-rr2-lut64-p2-div-x4.c48 const uint64_t vidx_hi = wasm_i64x2_extract_lane(vidx, 1); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x4() local
51 …nst float vl2 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx_hi)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x4()
52 …t vl3 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx_hi >> 32))); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x4()
84 const uint64_t vidx_hi = wasm_i64x2_extract_lane(vidx, 1); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x4() local
87 …nst float vl2 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx_hi)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x4()
88 …t vl3 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx_hi >> 32))); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x4()
Dvsigmoid-neonfma-rr1-lut64-p2-div-x4.c46 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4() local
48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4()
50 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4()
80 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4() local
82 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4()
84 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4()
Dvsigmoid-neonfma-rr1-lut64-p2-nr1recps1fma-x4.c46 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4() local
48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4()
50 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4()
84 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4() local
86 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4()
88 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4()
Dvsigmoid-neonfma-rr1-lut2048-p1-nr2fma-x4.c46 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4() local
48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4()
50 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4()
83 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4() local
85 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4()
87 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4()
Dvsigmoid-neonfma-rr1-lut64-p2-nr2fma-x4.c46 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4() local
48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4()
50 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4()
84 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4() local
86 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4()
88 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4()
Dvsigmoid-neon-rr2-lut2048-p1-nr2recps-x4.c47 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4() local
49 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
51 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
85 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4() local
87 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
89 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4()
Dvsigmoid-neonfma-rr1-lut2048-p1-nr1recps1fma-x4.c46 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4() local
48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4()
50 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4()
83 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4() local
85 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4()
87 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4()
Dvsigmoid-neon-rr2-lut64-p2-nr2recps-x4.c47 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4() local
49 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
51 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
86 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4() local
88 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
90 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4()
Dvsigmoid-neonfma-rr1-lut2048-p1-nr2recps-x4.c46 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4() local
48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4()
50 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4()
83 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4() local
85 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_hi]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4()
87 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4()
Dvsigmoid-neonfma-rr1-lut64-p2-nr2recps-x4.c46 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4() local
48 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4()
50 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4()
84 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4() local
86 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4()
88 vl_hi = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4()
Dvsigmoid-sse41-rr2-lut64-p2-div-x4.c50 const uint64_t vidx_hi = (uint64_t) _mm_extract_epi64(vidx, 1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x4() local
52 …mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx_hi))); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x4()
54 …l_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx_hi >> 32))), 1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x4()
92 const uint64_t vidx_hi = (uint64_t) _mm_extract_epi64(vidx, 1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x4() local
94 …mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx_hi))); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x4()
96 …l_hl, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx_hi >> 32))), 1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x4()
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Dneonfma-rr1-lut64-p2-x4.c50 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x4() local
52 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x4()
54 vl_hi = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x4()
91 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x4() local
93 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x4()
95 vl_hi = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x4()
Dneon-rr2-lut64-p2-x4.c51 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x4() local
53 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x4()
55 vl_hi = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x4()
93 const uint64_t vidx_hi = vgetq_lane_u64(vidx, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x4() local
95 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x4()
97 vl_hi = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x4()
/external/XNNPACK/src/f32-velu/
Davx-rr2-lut16-p3.c.in135 const __m128i vidx_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx, 1)), 2); variable
139 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi);
140 const uint64_t vidx_hh = (uint64_t) _mm_extract_epi64(vidx_hi, 1);
152 …(const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_cvtsi128_si32(vidx_hi))));
153 …nst int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 2))));
156 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 1))), 1);
157 … int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) _mm_extract_epi32(vidx_hi, 3))), 1);
201 const __m128i vidx_hi = _mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vidx, 1)), 2); variable
205 const uint64_t vidx_hl = (uint64_t) _mm_cvtsi128_si64(vidx_hi);
206 const uint64_t vidx_hh = (uint64_t) _mm_extract_epi64(vidx_hi, 1);
[all …]

12345678