Home
last modified time | relevance | path

Searched refs:vf89AB (Results 1 – 25 of 126) sorted by relevance

123456

/external/XNNPACK/src/f32-sigmoid/gen/
Dpsimd-p5-div-x12.c123 psimd_f32 vf89AB = psimd_div_f32(ve89AB, psimd_add_f32(ve89AB, vone)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() local
129 vf89AB = psimd_andnotmask_f32(vz89AB > vdenorm_cutoff, vf89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
134 vf89AB = psimd_signblend_f32(vx89AB, vf89AB, psimd_sub_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
138 psimd_store_f32(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
Dneonfma-rr1-p5-div-x12.c119 float32x4_t vf89AB = vdivq_f32(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() local
125vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
134 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
138 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
Dpsimd-p5-div-x16.c136 psimd_f32 vf89AB = psimd_div_f32(ve89AB, psimd_add_f32(ve89AB, vone)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() local
143 vf89AB = psimd_andnotmask_f32(vz89AB > vdenorm_cutoff, vf89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16()
149 vf89AB = psimd_signblend_f32(vx89AB, vf89AB, psimd_sub_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16()
154 psimd_store_f32(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16()
Dsse41-p5-div-x12.c127 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() local
133 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
138 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
142 _mm_storeu_ps(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
Dneonfma-rr1-p5-div-x16.c132 float32x4_t vf89AB = vdivq_f32(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() local
139vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
150 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
155 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
Dneonfma-rr1-p5-nr2fma-x12.c134 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() local
140vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
149 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
153 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
Dneonfma-rr1-p5-nr2recps-x12.c134 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() local
140vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
149 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
153 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
Dneon-rr2-p5-nr2recps-x12.c141 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() local
147vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
156 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
160 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
Dsse2-p5-div-x12.c127 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() local
133 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
142 vf89AB = _mm_or_ps(_mm_and_ps(vf89AB, vm89AB), _mm_andnot_ps(vm89AB, _mm_sub_ps(vone, vf89AB))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
146 _mm_storeu_ps(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
Dneonfma-rr1-p5-nr1recps1fma-x12.c134 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() local
140vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12()
149 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12()
153 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12()
Dpsimd-p5-div-x20.c149 psimd_f32 vf89AB = psimd_div_f32(ve89AB, psimd_add_f32(ve89AB, vone)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x20() local
157 vf89AB = psimd_andnotmask_f32(vz89AB > vdenorm_cutoff, vf89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x20()
164 vf89AB = psimd_signblend_f32(vx89AB, vf89AB, psimd_sub_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x20()
170 psimd_store_f32(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x20()
Dneonfma-rr1-p5-div-x20.c145 float32x4_t vf89AB = vdivq_f32(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() local
153vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20()
166 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20()
172 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20()
Dneonfma-rr1-lut2048-p1-div-x12.c146 float32x4_t vf89AB = vdivq_f32(vy89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() local
152vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
161 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
165 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
Dsse41-p5-div-x16.c141 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() local
148 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
154 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
159 _mm_storeu_ps(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
Dneonfma-rr1-p5-nr2recps-x16.c150 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() local
157vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16()
168 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16()
173 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16()
Dneonfma-rr1-lut64-p2-div-x12.c151 float32x4_t vf89AB = vdivq_f32(vy89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() local
157vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12()
166 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12()
170 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12()
Dneon-rr2-p5-nr2recps-x16.c158 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() local
165vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
176 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
181 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
Dneonfma-rr1-lut2048-p1-nr1recps1fma-x12.c161 float32x4_t vf89AB = vmulq_f32(vy89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() local
167vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
176 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
180 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
Dpsimd-p5-div-x24.c162 psimd_f32 vf89AB = psimd_div_f32(ve89AB, psimd_add_f32(ve89AB, vone)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24() local
171 vf89AB = psimd_andnotmask_f32(vz89AB > vdenorm_cutoff, vf89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24()
179 vf89AB = psimd_signblend_f32(vx89AB, vf89AB, psimd_sub_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24()
186 psimd_store_f32(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24()
Dneonfma-rr1-p5-nr1recps1fma-x16.c150 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() local
157vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16()
168 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16()
173 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16()
Dsse2-p5-div-x16.c141 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() local
148 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
159 vf89AB = _mm_or_ps(_mm_and_ps(vf89AB, vm89AB), _mm_andnot_ps(vm89AB, _mm_sub_ps(vone, vf89AB))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
164 _mm_storeu_ps(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
Dneonfma-rr1-lut2048-p1-nr2recps-x12.c161 float32x4_t vf89AB = vmulq_f32(vy89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() local
167vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
176 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
180 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
Dsse41-p5-div-x20.c155 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() local
163 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
170 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
176 _mm_storeu_ps(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
Dneonfma-rr1-p5-nr2fma-x16.c150 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() local
157vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16()
168 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16()
173 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16()
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Dpsimd-p5-x12.c109 psimd_f32 vf89AB = psimd_qfma_f32(vs89AB, vt89AB, vp89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() local
115 vf89AB = psimd_andnotmask_f32(vx89AB < vdenorm_cutoff, vf89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
120 psimd_store_f32(output + 8, vf89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
126 vacc0 = psimd_add_f32(vacc0, vf89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()

123456