/external/XNNPACK/src/f32-sigmoid/gen/ |
D | psimd-p5-div-x12.c | 123 psimd_f32 vf89AB = psimd_div_f32(ve89AB, psimd_add_f32(ve89AB, vone)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() local 129 vf89AB = psimd_andnotmask_f32(vz89AB > vdenorm_cutoff, vf89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 134 vf89AB = psimd_signblend_f32(vx89AB, vf89AB, psimd_sub_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 138 psimd_store_f32(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
|
D | neonfma-rr1-p5-div-x12.c | 119 float32x4_t vf89AB = vdivq_f32(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() local 125 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 134 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 138 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
|
D | psimd-p5-div-x16.c | 136 psimd_f32 vf89AB = psimd_div_f32(ve89AB, psimd_add_f32(ve89AB, vone)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() local 143 vf89AB = psimd_andnotmask_f32(vz89AB > vdenorm_cutoff, vf89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 149 vf89AB = psimd_signblend_f32(vx89AB, vf89AB, psimd_sub_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 154 psimd_store_f32(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16()
|
D | sse41-p5-div-x12.c | 127 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() local 133 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 138 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 142 _mm_storeu_ps(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
|
D | neonfma-rr1-p5-div-x16.c | 132 float32x4_t vf89AB = vdivq_f32(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() local 139 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 150 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 155 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
|
D | neonfma-rr1-p5-nr2fma-x12.c | 134 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() local 140 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 149 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 153 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
|
D | neonfma-rr1-p5-nr2recps-x12.c | 134 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() local 140 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 149 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 153 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
|
D | neon-rr2-p5-nr2recps-x12.c | 141 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() local 147 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 156 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 160 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
|
D | sse2-p5-div-x12.c | 127 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() local 133 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 142 vf89AB = _mm_or_ps(_mm_and_ps(vf89AB, vm89AB), _mm_andnot_ps(vm89AB, _mm_sub_ps(vone, vf89AB))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 146 _mm_storeu_ps(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
|
D | neonfma-rr1-p5-nr1recps1fma-x12.c | 134 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() local 140 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() 149 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() 153 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12()
|
D | psimd-p5-div-x20.c | 149 psimd_f32 vf89AB = psimd_div_f32(ve89AB, psimd_add_f32(ve89AB, vone)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x20() local 157 vf89AB = psimd_andnotmask_f32(vz89AB > vdenorm_cutoff, vf89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x20() 164 vf89AB = psimd_signblend_f32(vx89AB, vf89AB, psimd_sub_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x20() 170 psimd_store_f32(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x20()
|
D | neonfma-rr1-p5-div-x20.c | 145 float32x4_t vf89AB = vdivq_f32(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() local 153 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 166 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 172 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20()
|
D | neonfma-rr1-lut2048-p1-div-x12.c | 146 float32x4_t vf89AB = vdivq_f32(vy89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() local 152 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() 161 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() 165 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
|
D | sse41-p5-div-x16.c | 141 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() local 148 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 154 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 159 _mm_storeu_ps(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
|
D | neonfma-rr1-p5-nr2recps-x16.c | 150 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() local 157 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 168 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 173 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16()
|
D | neonfma-rr1-lut64-p2-div-x12.c | 151 float32x4_t vf89AB = vdivq_f32(vy89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() local 157 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() 166 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() 170 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12()
|
D | neon-rr2-p5-nr2recps-x16.c | 158 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() local 165 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 176 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 181 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x12.c | 161 float32x4_t vf89AB = vmulq_f32(vy89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() local 167 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() 176 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() 180 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
|
D | psimd-p5-div-x24.c | 162 psimd_f32 vf89AB = psimd_div_f32(ve89AB, psimd_add_f32(ve89AB, vone)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24() local 171 vf89AB = psimd_andnotmask_f32(vz89AB > vdenorm_cutoff, vf89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24() 179 vf89AB = psimd_signblend_f32(vx89AB, vf89AB, psimd_sub_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24() 186 psimd_store_f32(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x24()
|
D | neonfma-rr1-p5-nr1recps1fma-x16.c | 150 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() local 157 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() 168 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() 173 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16()
|
D | sse2-p5-div-x16.c | 141 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() local 148 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 159 vf89AB = _mm_or_ps(_mm_and_ps(vf89AB, vm89AB), _mm_andnot_ps(vm89AB, _mm_sub_ps(vone, vf89AB))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 164 _mm_storeu_ps(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x12.c | 161 float32x4_t vf89AB = vmulq_f32(vy89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() local 167 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 176 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 180 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
|
D | sse41-p5-div-x20.c | 155 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() local 163 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 170 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 176 _mm_storeu_ps(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
|
D | neonfma-rr1-p5-nr2fma-x16.c | 150 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() local 157 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() 168 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() 173 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | psimd-p5-x12.c | 109 psimd_f32 vf89AB = psimd_qfma_f32(vs89AB, vt89AB, vp89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() local 115 vf89AB = psimd_andnotmask_f32(vx89AB < vdenorm_cutoff, vf89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 120 psimd_store_f32(output + 8, vf89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12() 126 vacc0 = psimd_add_f32(vacc0, vf89AB); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x12()
|