/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-p5-div-x12.c | 92 float32x4_t vf89AB = vdivq_f32(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() local 96 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 104 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 108 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
|
D | sse41-p5-div-x12.c | 98 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() local 102 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 106 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 110 _mm_storeu_ps(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
|
D | wasmsimd-p5-div-x12.c | 98 v128_t vf89AB = wasm_f32x4_div(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12() local 102 vf89AB = wasm_v128_andnot(vf89AB, wasm_f32x4_gt(vz89AB, vdenorm_cutoff)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12() 106 vf89AB = wasm_v128_bitselect(vf89AB, wasm_f32x4_sub(vone, vf89AB), wasm_i32x4_shr(vx89AB, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12() 110 wasm_v128_store(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12()
|
D | neon-rr2-p5-nr2recps-x12.c | 109 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() local 113 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 121 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 125 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
|
D | neonfma-rr1-p5-nr2recps-x12.c | 104 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() local 108 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 116 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 120 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
|
D | neonfma-rr1-p5-div-x16.c | 105 float32x4_t vf89AB = vdivq_f32(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() local 110 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 120 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 125 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
|
D | neonfma-rr1-p5-nr2fma-x12.c | 104 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() local 108 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 116 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 120 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
|
D | wasmsimd-p5-div-x16.c | 112 v128_t vf89AB = wasm_f32x4_div(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() local 117 vf89AB = wasm_v128_andnot(vf89AB, wasm_f32x4_gt(vz89AB, vdenorm_cutoff)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 122 vf89AB = wasm_v128_bitselect(vf89AB, wasm_f32x4_sub(vone, vf89AB), wasm_i32x4_shr(vx89AB, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 127 wasm_v128_store(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16()
|
D | neonfma-rr1-p5-nr1recps1fma-x12.c | 104 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() local 108 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() 116 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() 120 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12()
|
D | sse2-p5-div-x12.c | 98 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() local 102 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 110 vf89AB = _mm_or_ps(_mm_and_ps(vf89AB, vm89AB), _mm_andnot_ps(vm89AB, _mm_sub_ps(vone, vf89AB))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 114 _mm_storeu_ps(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
|
D | wasmsimd-lut64-p2-div-x12.c | 115 v128_t vf89AB = wasm_f32x4_div(vy89AB, vd89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12() local 119 vf89AB = wasm_v128_andnot(vf89AB, wasm_f32x4_gt(vz89AB, vdenorm_cutoff)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12() 123 vf89AB = wasm_v128_bitselect(vf89AB, wasm_f32x4_sub(vone, vf89AB), wasm_i32x4_shr(vx89AB, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12() 127 wasm_v128_store(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12()
|
D | neonfma-rr1-lut2048-p1-div-x12.c | 106 float32x4_t vf89AB = vdivq_f32(vy89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() local 110 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() 118 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() 122 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
|
D | neonfma-rr1-lut64-p2-div-x12.c | 111 float32x4_t vf89AB = vdivq_f32(vy89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() local 115 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() 123 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() 127 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12()
|
D | sse41-p5-div-x16.c | 112 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() local 117 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 122 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 127 _mm_storeu_ps(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x12.c | 118 float32x4_t vf89AB = vmulq_f32(vy89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() local 122 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 130 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 134 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
|
D | neon-rr2-p5-nr2recps-x16.c | 126 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() local 131 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 141 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 146 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
|
D | neonfma-rr1-lut2048-p1-nr2fma-x12.c | 118 float32x4_t vf89AB = vmulq_f32(vy89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() local 122 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() 130 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() 134 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x12.c | 118 float32x4_t vf89AB = vmulq_f32(vy89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() local 122 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() 130 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() 134 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
|
D | neonfma-rr1-lut64-p2-nr2recps-x12.c | 123 float32x4_t vf89AB = vmulq_f32(vy89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12() local 127 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12() 135 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12() 139 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12()
|
D | wasmsimd-p5-div-x20.c | 126 v128_t vf89AB = wasm_f32x4_div(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() local 132 vf89AB = wasm_v128_andnot(vf89AB, wasm_f32x4_gt(vz89AB, vdenorm_cutoff)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 138 vf89AB = wasm_v128_bitselect(vf89AB, wasm_f32x4_sub(vone, vf89AB), wasm_i32x4_shr(vx89AB, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 144 wasm_v128_store(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20()
|
D | neonfma-rr1-p5-div-x20.c | 118 float32x4_t vf89AB = vdivq_f32(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() local 124 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 136 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 142 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20()
|
D | neonfma-rr1-p5-nr2fma-x16.c | 120 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() local 125 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() 135 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() 140 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16()
|
D | sse2-p5-div-x16.c | 112 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() local 117 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 127 vf89AB = _mm_or_ps(_mm_and_ps(vf89AB, vm89AB), _mm_andnot_ps(vm89AB, _mm_sub_ps(vone, vf89AB))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 132 _mm_storeu_ps(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
|
D | neon-rr2-lut2048-p1-nr2recps-x12.c | 123 float32x4_t vf89AB = vmulq_f32(vy89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() local 127 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 135 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 139 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12()
|
D | neonfma-rr1-p5-nr2recps-x16.c | 120 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() local 125 …vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 135 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 140 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16()
|