/external/XNNPACK/src/f32-vsigmoid/gen/ |
D | vsigmoid-neonfma-rr1-lut2048-p1-div-x24.c | 92 const uint64_t vidxKL = vgetq_lane_u64(vidxKLMN, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() local 94 float32x2_t vlKL = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxKL]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 112 vlKL = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidxKL >> 32)], vlKL, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24()
|
D | vsigmoid-neonfma-rr1-lut64-p2-div-x24.c | 93 const uint64_t vidxKL = vgetq_lane_u64(vidxKLMN, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() local 95 float32x2_t vlKL = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxKL]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 113 vlKL = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidxKL >> 32)], vlKL, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24()
|
D | vsigmoid-wasmsimd-rr2-lut64-p2-div-x24.c | 109 const uint64_t vidxKL = wasm_i64x2_extract_lane(vidxKLMN, 0); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x24() local 111 …st float vlK = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidxKL)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x24() 112 …at vlL = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidxKL >> 32))); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x24()
|
D | vsigmoid-neonfma-rr1-lut2048-p1-nr2fma-x24.c | 92 const uint64_t vidxKL = vgetq_lane_u64(vidxKLMN, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() local 94 float32x2_t vlKL = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxKL]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 112 vlKL = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidxKL >> 32)], vlKL, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24()
|
D | vsigmoid-neonfma-rr1-lut64-p2-nr2fma-x24.c | 93 const uint64_t vidxKL = vgetq_lane_u64(vidxKLMN, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() local 95 float32x2_t vlKL = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxKL]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 113 vlKL = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidxKL >> 32)], vlKL, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24()
|
D | vsigmoid-neonfma-rr1-lut64-p2-nr2recps-x24.c | 93 const uint64_t vidxKL = vgetq_lane_u64(vidxKLMN, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() local 95 float32x2_t vlKL = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxKL]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 113 vlKL = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidxKL >> 32)], vlKL, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24()
|
D | vsigmoid-neonfma-rr1-lut2048-p1-nr2recps-x24.c | 92 const uint64_t vidxKL = vgetq_lane_u64(vidxKLMN, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() local 94 float32x2_t vlKL = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxKL]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 112 vlKL = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidxKL >> 32)], vlKL, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24()
|
D | vsigmoid-neonfma-rr1-lut64-p2-nr1recps1fma-x24.c | 93 const uint64_t vidxKL = vgetq_lane_u64(vidxKLMN, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() local 95 float32x2_t vlKL = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxKL]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 113 vlKL = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidxKL >> 32)], vlKL, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24()
|
D | vsigmoid-neon-rr2-lut64-p2-nr2recps-x24.c | 94 const uint64_t vidxKL = vgetq_lane_u64(vidxKLMN, 0); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() local 96 float32x2_t vlKL = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxKL]); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 114 vlKL = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidxKL >> 32)], vlKL, 1); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24()
|
D | vsigmoid-neon-rr2-lut2048-p1-nr2recps-x24.c | 93 const uint64_t vidxKL = vgetq_lane_u64(vidxKLMN, 0); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() local 95 float32x2_t vlKL = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxKL]); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 113 vlKL = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidxKL >> 32)], vlKL, 1); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24()
|
D | vsigmoid-neonfma-rr1-lut2048-p1-nr1recps1fma-x24.c | 92 const uint64_t vidxKL = vgetq_lane_u64(vidxKLMN, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() local 94 float32x2_t vlKL = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxKL]); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 112 vlKL = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidxKL >> 32)], vlKL, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24()
|
D | vsigmoid-sse41-rr2-lut64-p2-div-x24.c | 111 const uint64_t vidxKL = (uint64_t) _mm_cvtsi128_si64(vidxKLMN); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24() local 113 …_mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidxKL))); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24() 115 …2(vlK, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidxKL >> 32))), 1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24()
|
D | vsigmoid-sse2-rr2-lut64-p2-div-x24.c | 121 const uint64_t vidxKL = (uint64_t) _mm_cvtsi128_si64(vidxKLMN); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24() local 123 …_mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidxKL))); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24() 125 …i32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidxKL >> 32)))); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-neon-rr2-lut16-p3-x24.c | 114 const uint64_t vidxKL = vgetq_lane_u64(vidxKLMN, 0); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() local 116 …L = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidxKL)); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() 118 …(const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidxKL >> 32)), vlKL, 1); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24()
|
D | velu-neonfma-rr1-lut16-p3-x24.c | 113 const uint64_t vidxKL = vgetq_lane_u64(vidxKLMN, 0); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() local 115 …L = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidxKL)); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 117 …(const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidxKL >> 32)), vlKL, 1); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x24.c | 115 const uint64_t vidxKL = wasm_i64x2_extract_lane(vidxKLMN, 0); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() local 117 …st float vlK = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidxKL)); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 118 …at vlL = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidxKL >> 32))); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24()
|
D | velu-wasmsimd-arm-rr2-lut16-p3-x24.c | 115 const uint64_t vidxKL = wasm_i64x2_extract_lane(vidxKLMN, 0); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24() local 117 …st float vlK = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidxKL)); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24() 118 …at vlL = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidxKL >> 32))); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24()
|
D | velu-sse41-rr2-lut16-p3-x24.c | 116 const uint64_t vidxKL = (uint64_t) _mm_cvtsi128_si64(vidxKLMN); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() local 118 …_mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidxKL))); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 120 …2(vlK, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidxKL >> 32))), 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
|
D | velu-sse2-rr2-lut16-p3-x24.c | 126 const uint64_t vidxKL = (uint64_t) _mm_cvtsi128_si64(vidxKLMN); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() local 128 …_mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidxKL))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 130 …i32_si128(*((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidxKL >> 32)))); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
|