/external/XNNPACK/src/f32-sigmoid/gen/ |
D | psimd-p5-div-x8.c | 109 psimd_f32 vf4567 = psimd_div_f32(ve4567, psimd_add_f32(ve4567, vone)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() local 114 vf4567 = psimd_andnotmask_f32(vz4567 > vdenorm_cutoff, vf4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 118 vf4567 = psimd_signblend_f32(vx4567, vf4567, psimd_sub_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 121 psimd_store_f32(y + 4, vf4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8()
|
D | neonfma-rr1-p5-div-x8.c | 105 float32x4_t vf4567 = vdivq_f32(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() local 110 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() 117 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() 120 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8()
|
D | sse41-p5-div-x8.c | 112 __m128 vf4567 = _mm_div_ps(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() local 117 vf4567 = _mm_andnot_ps(_mm_cmplt_ps(vz4567, vdenorm_cutoff), vf4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 121 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 124 _mm_storeu_ps(y + 4, vf4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
|
D | psimd-p5-div-x12.c | 122 psimd_f32 vf4567 = psimd_div_f32(ve4567, psimd_add_f32(ve4567, vone)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() local 128 vf4567 = psimd_andnotmask_f32(vz4567 > vdenorm_cutoff, vf4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 133 vf4567 = psimd_signblend_f32(vx4567, vf4567, psimd_sub_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 137 psimd_store_f32(y + 4, vf4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
|
D | neonfma-rr1-p5-nr2recps-x8.c | 117 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() local 122 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 129 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 132 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8()
|
D | sse2-p5-div-x8.c | 112 __m128 vf4567 = _mm_div_ps(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() local 117 vf4567 = _mm_andnot_ps(_mm_cmplt_ps(vz4567, vdenorm_cutoff), vf4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() 124 vf4567 = _mm_or_ps(_mm_and_ps(vf4567, vm4567), _mm_andnot_ps(vm4567, _mm_sub_ps(vone, vf4567))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() 127 _mm_storeu_ps(y + 4, vf4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8()
|
D | neonfma-rr1-p5-nr1recps1fma-x8.c | 117 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() local 122 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() 129 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() 132 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8()
|
D | neonfma-rr1-p5-div-x12.c | 118 float32x4_t vf4567 = vdivq_f32(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() local 124 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 133 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 137 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
|
D | neonfma-rr1-p5-nr2fma-x8.c | 117 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() local 122 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() 129 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() 132 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8()
|
D | neon-rr2-p5-nr2recps-x8.c | 123 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() local 128 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 135 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 138 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8()
|
D | neonfma-rr1-lut64-p2-div-x8.c | 131 float32x4_t vf4567 = vdivq_f32(vy4567, vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() local 136 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() 143 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() 146 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8()
|
D | neonfma-rr1-lut2048-p1-div-x8.c | 127 float32x4_t vf4567 = vdivq_f32(vy4567, vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() local 132 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 139 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 142 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
|
D | psimd-p5-div-x16.c | 135 psimd_f32 vf4567 = psimd_div_f32(ve4567, psimd_add_f32(ve4567, vone)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() local 142 vf4567 = psimd_andnotmask_f32(vz4567 > vdenorm_cutoff, vf4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 148 vf4567 = psimd_signblend_f32(vx4567, vf4567, psimd_sub_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 153 psimd_store_f32(y + 4, vf4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16()
|
D | sse41-p5-div-x12.c | 126 __m128 vf4567 = _mm_div_ps(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() local 132 vf4567 = _mm_andnot_ps(_mm_cmplt_ps(vz4567, vdenorm_cutoff), vf4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 137 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 141 _mm_storeu_ps(y + 4, vf4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x8.c | 139 float32x4_t vf4567 = vmulq_f32(vy4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() local 144 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 151 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 154 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x8.c | 139 float32x4_t vf4567 = vmulq_f32(vy4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() local 144 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 151 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 154 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
|
D | neonfma-rr1-p5-div-x16.c | 131 float32x4_t vf4567 = vdivq_f32(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() local 138 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 149 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 154 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x8.c | 143 float32x4_t vf4567 = vmulq_f32(vy4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() local 148 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() 155 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() 158 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8()
|
D | neonfma-rr1-p5-nr2fma-x12.c | 133 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() local 139 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 148 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 152 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
|
D | neonfma-rr1-p5-nr2recps-x12.c | 133 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() local 139 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 148 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 152 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
|
D | neon-rr2-p5-nr2recps-x12.c | 140 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() local 146 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 155 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 159 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
|
D | neonfma-rr1-lut64-p2-nr2recps-x8.c | 143 float32x4_t vf4567 = vmulq_f32(vy4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() local 148 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() 155 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() 158 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8()
|
D | neonfma-rr1-lut2048-p1-nr2fma-x8.c | 139 float32x4_t vf4567 = vmulq_f32(vy4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() local 144 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 151 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 154 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
|
D | sse2-p5-div-x12.c | 126 __m128 vf4567 = _mm_div_ps(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() local 132 vf4567 = _mm_andnot_ps(_mm_cmplt_ps(vz4567, vdenorm_cutoff), vf4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 141 vf4567 = _mm_or_ps(_mm_and_ps(vf4567, vm4567), _mm_andnot_ps(vm4567, _mm_sub_ps(vone, vf4567))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 145 _mm_storeu_ps(y + 4, vf4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
|
D | neon-rr2-lut2048-p1-nr2recps-x8.c | 145 float32x4_t vf4567 = vmulq_f32(vy4567, vr4567); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() local 150 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 157 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 160 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
|