/external/XNNPACK/src/f32-sigmoid/gen/ |
D | psimd-p5-div-x8.c | 108 psimd_f32 vf0123 = psimd_div_f32(ve0123, psimd_add_f32(ve0123, vone)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() local 113 vf0123 = psimd_andnotmask_f32(vz0123 > vdenorm_cutoff, vf0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 117 vf0123 = psimd_signblend_f32(vx0123, vf0123, psimd_sub_f32(vone, vf0123)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8() 120 psimd_store_f32(y, vf0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x8()
|
D | neonfma-rr1-p5-div-x8.c | 104 float32x4_t vf0123 = vdivq_f32(ve0123, vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() local 109 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcagtq_f32(vx0123, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() 116 vf0123 = vbslq_f32(vm0123, vf0123, vsubq_f32(vone, vf0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8() 119 vst1q_f32(y, vf0123); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8()
|
D | sse41-p5-div-x8.c | 111 __m128 vf0123 = _mm_div_ps(ve0123, vd0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() local 116 vf0123 = _mm_andnot_ps(_mm_cmplt_ps(vz0123, vdenorm_cutoff), vf0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 120 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 123 _mm_storeu_ps(y, vf0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
|
D | psimd-p5-div-x12.c | 121 psimd_f32 vf0123 = psimd_div_f32(ve0123, psimd_add_f32(ve0123, vone)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() local 127 vf0123 = psimd_andnotmask_f32(vz0123 > vdenorm_cutoff, vf0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 132 vf0123 = psimd_signblend_f32(vx0123, vf0123, psimd_sub_f32(vone, vf0123)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12() 136 psimd_store_f32(y, vf0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x12()
|
D | neonfma-rr1-p5-nr2recps-x8.c | 116 float32x4_t vf0123 = vmulq_f32(ve0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() local 121 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcagtq_f32(vx0123, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 128 vf0123 = vbslq_f32(vm0123, vf0123, vsubq_f32(vone, vf0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 131 vst1q_f32(y, vf0123); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8()
|
D | sse2-p5-div-x8.c | 111 __m128 vf0123 = _mm_div_ps(ve0123, vd0123); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() local 116 vf0123 = _mm_andnot_ps(_mm_cmplt_ps(vz0123, vdenorm_cutoff), vf0123); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() 123 vf0123 = _mm_or_ps(_mm_and_ps(vf0123, vm0123), _mm_andnot_ps(vm0123, _mm_sub_ps(vone, vf0123))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() 126 _mm_storeu_ps(y, vf0123); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8()
|
D | neonfma-rr1-p5-nr1recps1fma-x8.c | 116 float32x4_t vf0123 = vmulq_f32(ve0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() local 121 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcagtq_f32(vx0123, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() 128 vf0123 = vbslq_f32(vm0123, vf0123, vsubq_f32(vone, vf0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() 131 vst1q_f32(y, vf0123); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8()
|
D | neonfma-rr1-p5-div-x12.c | 117 float32x4_t vf0123 = vdivq_f32(ve0123, vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() local 123 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcagtq_f32(vx0123, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 132 vf0123 = vbslq_f32(vm0123, vf0123, vsubq_f32(vone, vf0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 136 vst1q_f32(y, vf0123); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
|
D | neonfma-rr1-p5-nr2fma-x8.c | 116 float32x4_t vf0123 = vmulq_f32(ve0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() local 121 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcagtq_f32(vx0123, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() 128 vf0123 = vbslq_f32(vm0123, vf0123, vsubq_f32(vone, vf0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() 131 vst1q_f32(y, vf0123); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8()
|
D | neon-rr2-p5-nr2recps-x8.c | 122 float32x4_t vf0123 = vmulq_f32(ve0123, vr0123); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() local 127 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcagtq_f32(vx0123, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 134 vf0123 = vbslq_f32(vm0123, vf0123, vsubq_f32(vone, vf0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 137 vst1q_f32(y, vf0123); y += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8()
|
D | neonfma-rr1-lut64-p2-div-x8.c | 130 float32x4_t vf0123 = vdivq_f32(vy0123, vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() local 135 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcagtq_f32(vx0123, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() 142 vf0123 = vbslq_f32(vm0123, vf0123, vsubq_f32(vone, vf0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() 145 vst1q_f32(y, vf0123); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8()
|
D | neonfma-rr1-lut2048-p1-div-x8.c | 126 float32x4_t vf0123 = vdivq_f32(vy0123, vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() local 131 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcagtq_f32(vx0123, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 138 vf0123 = vbslq_f32(vm0123, vf0123, vsubq_f32(vone, vf0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 141 vst1q_f32(y, vf0123); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
|
D | psimd-p5-div-x16.c | 134 psimd_f32 vf0123 = psimd_div_f32(ve0123, psimd_add_f32(ve0123, vone)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() local 141 vf0123 = psimd_andnotmask_f32(vz0123 > vdenorm_cutoff, vf0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 147 vf0123 = psimd_signblend_f32(vx0123, vf0123, psimd_sub_f32(vone, vf0123)); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16() 152 psimd_store_f32(y, vf0123); in xnn_f32_sigmoid_ukernel__psimd_p5_div_x16()
|
D | sse41-p5-div-x12.c | 125 __m128 vf0123 = _mm_div_ps(ve0123, vd0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() local 131 vf0123 = _mm_andnot_ps(_mm_cmplt_ps(vz0123, vdenorm_cutoff), vf0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 136 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 140 _mm_storeu_ps(y, vf0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x8.c | 138 float32x4_t vf0123 = vmulq_f32(vy0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() local 143 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcagtq_f32(vx0123, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 150 vf0123 = vbslq_f32(vm0123, vf0123, vsubq_f32(vone, vf0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 153 vst1q_f32(y, vf0123); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x8.c | 138 float32x4_t vf0123 = vmulq_f32(vy0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() local 143 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcagtq_f32(vx0123, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 150 vf0123 = vbslq_f32(vm0123, vf0123, vsubq_f32(vone, vf0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 153 vst1q_f32(y, vf0123); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
|
D | neonfma-rr1-p5-div-x16.c | 130 float32x4_t vf0123 = vdivq_f32(ve0123, vd0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() local 137 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcagtq_f32(vx0123, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 148 vf0123 = vbslq_f32(vm0123, vf0123, vsubq_f32(vone, vf0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 153 vst1q_f32(y, vf0123); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x8.c | 142 float32x4_t vf0123 = vmulq_f32(vy0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() local 147 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcagtq_f32(vx0123, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() 154 vf0123 = vbslq_f32(vm0123, vf0123, vsubq_f32(vone, vf0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() 157 vst1q_f32(y, vf0123); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8()
|
D | neonfma-rr1-p5-nr2fma-x12.c | 132 float32x4_t vf0123 = vmulq_f32(ve0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() local 138 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcagtq_f32(vx0123, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 147 vf0123 = vbslq_f32(vm0123, vf0123, vsubq_f32(vone, vf0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 151 vst1q_f32(y, vf0123); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
|
D | neonfma-rr1-p5-nr2recps-x12.c | 132 float32x4_t vf0123 = vmulq_f32(ve0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() local 138 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcagtq_f32(vx0123, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 147 vf0123 = vbslq_f32(vm0123, vf0123, vsubq_f32(vone, vf0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 151 vst1q_f32(y, vf0123); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
|
D | neon-rr2-p5-nr2recps-x12.c | 139 float32x4_t vf0123 = vmulq_f32(ve0123, vr0123); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() local 145 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcagtq_f32(vx0123, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 154 vf0123 = vbslq_f32(vm0123, vf0123, vsubq_f32(vone, vf0123)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 158 vst1q_f32(y, vf0123); y += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
|
D | neonfma-rr1-lut64-p2-nr2recps-x8.c | 142 float32x4_t vf0123 = vmulq_f32(vy0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() local 147 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcagtq_f32(vx0123, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() 154 vf0123 = vbslq_f32(vm0123, vf0123, vsubq_f32(vone, vf0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() 157 vst1q_f32(y, vf0123); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8()
|
D | neonfma-rr1-lut2048-p1-nr2fma-x8.c | 138 float32x4_t vf0123 = vmulq_f32(vy0123, vr0123); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() local 143 …vf0123 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf0123), vcagtq_f32(vx0123, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 150 vf0123 = vbslq_f32(vm0123, vf0123, vsubq_f32(vone, vf0123)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 153 vst1q_f32(y, vf0123); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | psimd-p5-x4.c | 83 psimd_f32 vf0123 = psimd_qfma_f32(vs0123, vt0123, vp0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4() local 87 vf0123 = psimd_andnotmask_f32(vx0123 < vdenorm_cutoff, vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4() 90 psimd_store_f32(output, vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4() 94 vacc0 = psimd_add_f32(vacc0, vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__psimd_p5_x4()
|
D | sse2-p5-x4.c | 83 __m128 vf0123 = _mm_add_ps(_mm_mul_ps(vt0123, vp0123), vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() local 87 vf0123 = _mm_andnot_ps(_mm_cmplt_ps(vx0123, vdenorm_cutoff), vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 90 _mm_storeu_ps(output, vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 94 vacc0 = _mm_add_ps(vacc0, vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
|