/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-p5-div-x24.c | 46 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() local 60 const float32x4_t vzKLMN = vabsq_f32(vxKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 170 …vfKLMN = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfKLMN), vcagtq_f32(vxKLMN, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 178 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24()
|
D | neonfma-rr1-p5-nr1recps1fma-x24.c | 46 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() local 60 const float32x4_t vzKLMN = vabsq_f32(vxKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 194 …vfKLMN = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfKLMN), vcagtq_f32(vxKLMN, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 202 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24()
|
D | neonfma-rr1-p5-nr2recps-x24.c | 46 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() local 60 const float32x4_t vzKLMN = vabsq_f32(vxKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 194 …vfKLMN = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfKLMN), vcagtq_f32(vxKLMN, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 202 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24()
|
D | neonfma-rr1-p5-nr2fma-x24.c | 46 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() local 60 const float32x4_t vzKLMN = vabsq_f32(vxKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() 194 …vfKLMN = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfKLMN), vcagtq_f32(vxKLMN, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() 202 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24()
|
D | neon-rr2-p5-nr2recps-x24.c | 48 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() local 62 const float32x4_t vzKLMN = vabsq_f32(vxKLMN); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 204 …vfKLMN = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfKLMN), vcagtq_f32(vxKLMN, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 212 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24()
|
D | neonfma-rr1-lut64-p2-div-x24.c | 46 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() local 60 const float32x4_t vzKLMN = vabsq_f32(vxKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 220 …vfKLMN = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfKLMN), vcagtq_f32(vxKLMN, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24() 228 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24()
|
D | neonfma-rr1-lut2048-p1-div-x24.c | 46 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() local 60 const float32x4_t vzKLMN = vabsq_f32(vxKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 212 …vfKLMN = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfKLMN), vcagtq_f32(vxKLMN, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 220 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24()
|
D | psimd-p5-div-x24.c | 48 const psimd_f32 vxKLMN = psimd_load_f32(x + 20); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24() local 63 const psimd_f32 vzKLMN = psimd_abs_f32(vxKLMN); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24() 182 vfKLMN = psimd_signblend_f32(vxKLMN, vfKLMN, psimd_sub_f32(vone, vfKLMN)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24()
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x24.c | 46 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() local 60 const float32x4_t vzKLMN = vabsq_f32(vxKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 244 …vfKLMN = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfKLMN), vcagtq_f32(vxKLMN, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24() 252 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24()
|
D | neonfma-rr1-lut2048-p1-nr2fma-x24.c | 46 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() local 60 const float32x4_t vzKLMN = vabsq_f32(vxKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 236 …vfKLMN = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfKLMN), vcagtq_f32(vxKLMN, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 244 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24()
|
D | neonfma-rr1-lut64-p2-nr2fma-x24.c | 46 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() local 60 const float32x4_t vzKLMN = vabsq_f32(vxKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 244 …vfKLMN = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfKLMN), vcagtq_f32(vxKLMN, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24() 252 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x24.c | 46 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() local 60 const float32x4_t vzKLMN = vabsq_f32(vxKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 236 …vfKLMN = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfKLMN), vcagtq_f32(vxKLMN, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 244 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24()
|
D | neonfma-rr1-lut64-p2-nr2recps-x24.c | 46 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() local 60 const float32x4_t vzKLMN = vabsq_f32(vxKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 244 …vfKLMN = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfKLMN), vcagtq_f32(vxKLMN, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24() 252 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24()
|
D | neon-rr2-lut64-p2-nr2recps-x24.c | 48 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() local 62 const float32x4_t vzKLMN = vabsq_f32(vxKLMN); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 254 …vfKLMN = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfKLMN), vcagtq_f32(vxKLMN, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24() 262 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24()
|
D | neon-rr2-lut2048-p1-nr2recps-x24.c | 48 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() local 62 const float32x4_t vzKLMN = vabsq_f32(vxKLMN); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 246 …vfKLMN = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfKLMN), vcagtq_f32(vxKLMN, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 254 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x24.c | 46 const float32x4_t vxKLMN = vld1q_f32(x); x += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() local 60 const float32x4_t vzKLMN = vabsq_f32(vxKLMN); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 236 …vfKLMN = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vfKLMN), vcagtq_f32(vxKLMN, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 244 const uint32x4_t vmKLMN = vcltq_f32(vxKLMN, vmovq_n_f32(0.0f)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24()
|
D | sse41-p5-div-x24.c | 49 const __m128 vxKLMN = _mm_loadu_ps(x + 20); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() local 63 const __m128 vzKLMN = _mm_or_ps(vxKLMN, vsign_mask); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 189 vfKLMN = _mm_blendv_ps(_mm_sub_ps(vone, vfKLMN), vfKLMN, vxKLMN); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
|
D | sse2-p5-div-x24.c | 49 const __m128 vxKLMN = _mm_loadu_ps(x + 20); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() local 63 const __m128 vzKLMN = _mm_or_ps(vxKLMN, vsign_mask); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 189 … __m128 vmKLMN = _mm_castsi128_ps(_mm_cmpgt_epi32(_mm_setzero_si128(), _mm_castps_si128(vxKLMN))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
|