/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-lut64-p2-div-x24.c | 95 float32x2_t vlKL = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxKL]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() local 113 vlKL = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidxKL >> 32)], vlKL, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 115 const float32x4_t vlKLMN = vcombine_f32(vlKL, vlMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24()
|
D | neonfma-rr1-lut2048-p1-div-x24.c | 94 float32x2_t vlKL = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxKL]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() local 112 vlKL = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidxKL >> 32)], vlKL, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 114 const float32x4_t vlKLMN = vcombine_f32(vlKL, vlMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24()
|
D | neonfma-rr1-lut64-p2-nr2fma-x24.c | 95 float32x2_t vlKL = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxKL]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() local 113 vlKL = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidxKL >> 32)], vlKL, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 115 const float32x4_t vlKLMN = vcombine_f32(vlKL, vlMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24()
|
D | neonfma-rr1-lut64-p2-nr2recps-x24.c | 95 float32x2_t vlKL = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxKL]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() local 113 vlKL = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidxKL >> 32)], vlKL, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 115 const float32x4_t vlKLMN = vcombine_f32(vlKL, vlMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x24.c | 94 float32x2_t vlKL = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxKL]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() local 112 vlKL = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidxKL >> 32)], vlKL, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 114 const float32x4_t vlKLMN = vcombine_f32(vlKL, vlMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24()
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x24.c | 95 float32x2_t vlKL = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxKL]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() local 113 vlKL = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidxKL >> 32)], vlKL, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 115 const float32x4_t vlKLMN = vcombine_f32(vlKL, vlMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24()
|
D | neon-rr2-lut2048-p1-nr2recps-x24.c | 95 float32x2_t vlKL = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxKL]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() local 113 vlKL = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidxKL >> 32)], vlKL, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 115 const float32x4_t vlKLMN = vcombine_f32(vlKL, vlMN); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24()
|
D | neon-rr2-lut64-p2-nr2recps-x24.c | 96 float32x2_t vlKL = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidxKL]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() local 114 vlKL = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidxKL >> 32)], vlKL, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 116 const float32x4_t vlKLMN = vcombine_f32(vlKL, vlMN); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24()
|
D | neonfma-rr1-lut2048-p1-nr2fma-x24.c | 94 float32x2_t vlKL = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxKL]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() local 112 vlKL = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidxKL >> 32)], vlKL, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 114 const float32x4_t vlKLMN = vcombine_f32(vlKL, vlMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x24.c | 94 float32x2_t vlKL = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxKL]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() local 112 vlKL = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidxKL >> 32)], vlKL, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 114 const float32x4_t vlKLMN = vcombine_f32(vlKL, vlMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24()
|
D | sse41-lut64-p2-div-x24.c | 115 …const __m128i vlKL = _mm_insert_epi32(vlK, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() local 117 const __m128i vlKLMN = _mm_unpacklo_epi64(vlKL, vlMN); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 170 …const __m128i vlKL = _mm_insert_epi32(vlK, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_… in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() local 172 const __m128i vlKLMN = _mm_unpacklo_epi64(vlKL, vlMN); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24()
|
D | sse2-lut64-p2-div-x24.c | 126 const __m128i vlKL = _mm_unpacklo_epi32(vlK, vlL); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() local 129 const __m128i vlKLMN = _mm_unpacklo_epi64(vlKL, vlMN); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() 193 const __m128i vlKL = _mm_unpacklo_epi32(vlK, vlL); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24() local 196 const __m128i vlKLMN = _mm_unpacklo_epi64(vlKL, vlMN); in xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-neonfma-rr1-lut16-p3-x24.c | 116 …int32x2_t vlKL = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() local 118 …vlKL = vld1_lane_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vid… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 120 const int32x4_t vlKLMN = vcombine_s32(vlKL, vlMN); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24()
|
D | velu-neon-rr2-lut16-p3-x24.c | 117 …int32x2_t vlKL = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint3… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() local 119 …vlKL = vld1_lane_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vid… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() 121 const int32x4_t vlKLMN = vcombine_s32(vlKL, vlMN); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24()
|
D | velu-sse41-rr2-lut16-p3-x24.c | 121 …const __m128i vlKL = _mm_insert_epi32(vlK, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() local 123 const __m128i vlKLMN = _mm_unpacklo_epi64(vlKL, vlMN); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 176 …const __m128i vlKL = _mm_insert_epi32(vlK, *((const int*) ((uintptr_t) xnn_table_exp2minus_k_over_… in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() local 178 const __m128i vlKLMN = _mm_unpacklo_epi64(vlKL, vlMN); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
|
D | velu-sse2-rr2-lut16-p3-x24.c | 132 const __m128i vlKL = _mm_unpacklo_epi32(vlK, vlL); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() local 135 const __m128i vlKLMN = _mm_unpacklo_epi64(vlKL, vlMN); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 199 const __m128i vlKL = _mm_unpacklo_epi32(vlK, vlL); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() local 202 const __m128i vlKLMN = _mm_unpacklo_epi64(vlKL, vlMN); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
|