Home
last modified time | relevance | path

Searched refs:vf4567 (Results 1 – 25 of 156) sorted by relevance

1234567

/external/XNNPACK/src/f32-sigmoid/gen/
Dpsimd-p5-div-x8.c109 psimd_f32 vf4567 = psimd_div_f32(ve4567, psimd_add_f32(ve4567, vone)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() local
114 vf4567 = psimd_andnotmask_f32(vz4567 > vdenorm_cutoff, vf4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8()
118 vf4567 = psimd_signblend_f32(vx4567, vf4567, psimd_sub_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8()
121 psimd_store_f32(y + 4, vf4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8()
Dneonfma-rr1-p5-div-x8.c105 float32x4_t vf4567 = vdivq_f32(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() local
110vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8()
117 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8()
120 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8()
Dsse41-p5-div-x8.c112 __m128 vf4567 = _mm_div_ps(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() local
117 vf4567 = _mm_andnot_ps(_mm_cmplt_ps(vz4567, vdenorm_cutoff), vf4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
121 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
124 _mm_storeu_ps(y + 4, vf4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
Dpsimd-p5-div-x12.c122 psimd_f32 vf4567 = psimd_div_f32(ve4567, psimd_add_f32(ve4567, vone)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() local
128 vf4567 = psimd_andnotmask_f32(vz4567 > vdenorm_cutoff, vf4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
133 vf4567 = psimd_signblend_f32(vx4567, vf4567, psimd_sub_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
137 psimd_store_f32(y + 4, vf4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
Dneonfma-rr1-p5-nr2recps-x8.c117 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() local
122vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8()
129 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8()
132 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8()
Dsse2-p5-div-x8.c112 __m128 vf4567 = _mm_div_ps(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() local
117 vf4567 = _mm_andnot_ps(_mm_cmplt_ps(vz4567, vdenorm_cutoff), vf4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8()
124 vf4567 = _mm_or_ps(_mm_and_ps(vf4567, vm4567), _mm_andnot_ps(vm4567, _mm_sub_ps(vone, vf4567))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8()
127 _mm_storeu_ps(y + 4, vf4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8()
Dneonfma-rr1-p5-nr1recps1fma-x8.c117 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() local
122vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8()
129 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8()
132 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8()
Dneonfma-rr1-p5-div-x12.c118 float32x4_t vf4567 = vdivq_f32(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() local
124vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
133 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
137 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
Dneonfma-rr1-p5-nr2fma-x8.c117 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() local
122vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8()
129 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8()
132 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8()
Dneon-rr2-p5-nr2recps-x8.c123 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() local
128vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8()
135 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8()
138 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8()
Dneonfma-rr1-lut64-p2-div-x8.c131 float32x4_t vf4567 = vdivq_f32(vy4567, vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() local
136vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8()
143 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8()
146 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8()
Dneonfma-rr1-lut2048-p1-div-x8.c127 float32x4_t vf4567 = vdivq_f32(vy4567, vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() local
132vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
139 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
142 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
Dpsimd-p5-div-x16.c135 psimd_f32 vf4567 = psimd_div_f32(ve4567, psimd_add_f32(ve4567, vone)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() local
142 vf4567 = psimd_andnotmask_f32(vz4567 > vdenorm_cutoff, vf4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16()
148 vf4567 = psimd_signblend_f32(vx4567, vf4567, psimd_sub_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16()
153 psimd_store_f32(y + 4, vf4567); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16()
Dsse41-p5-div-x12.c126 __m128 vf4567 = _mm_div_ps(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() local
132 vf4567 = _mm_andnot_ps(_mm_cmplt_ps(vz4567, vdenorm_cutoff), vf4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
137 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
141 _mm_storeu_ps(y + 4, vf4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
Dneonfma-rr1-lut2048-p1-nr1recps1fma-x8.c139 float32x4_t vf4567 = vmulq_f32(vy4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() local
144vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
151 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
154 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
Dneonfma-rr1-lut2048-p1-nr2recps-x8.c139 float32x4_t vf4567 = vmulq_f32(vy4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() local
144vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
151 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
154 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
Dneonfma-rr1-p5-div-x16.c131 float32x4_t vf4567 = vdivq_f32(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() local
138vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
149 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
154 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
Dneonfma-rr1-lut64-p2-nr1recps1fma-x8.c143 float32x4_t vf4567 = vmulq_f32(vy4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() local
148vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8()
155 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8()
158 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8()
Dneonfma-rr1-p5-nr2fma-x12.c133 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() local
139vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
148 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
152 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
Dneonfma-rr1-p5-nr2recps-x12.c133 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() local
139vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
148 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
152 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
Dneon-rr2-p5-nr2recps-x12.c140 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() local
146vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
155 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
159 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
Dneonfma-rr1-lut64-p2-nr2recps-x8.c143 float32x4_t vf4567 = vmulq_f32(vy4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() local
148vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8()
155 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8()
158 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8()
Dneonfma-rr1-lut2048-p1-nr2fma-x8.c139 float32x4_t vf4567 = vmulq_f32(vy4567, vr4567); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() local
144vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
151 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
154 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
Dsse2-p5-div-x12.c126 __m128 vf4567 = _mm_div_ps(ve4567, vd4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() local
132 vf4567 = _mm_andnot_ps(_mm_cmplt_ps(vz4567, vdenorm_cutoff), vf4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
141 vf4567 = _mm_or_ps(_mm_and_ps(vf4567, vm4567), _mm_andnot_ps(vm4567, _mm_sub_ps(vone, vf4567))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
145 _mm_storeu_ps(y + 4, vf4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
Dneon-rr2-lut2048-p1-nr2recps-x8.c145 float32x4_t vf4567 = vmulq_f32(vy4567, vr4567); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() local
150vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
157 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
160 vst1q_f32(y, vf4567); y += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()

1234567