/external/XNNPACK/src/f32-vsigmoid/gen/ |
D | vsigmoid-neonfma-rr1-p5-div-x8.c | 78 float32x4_t vf4567 = vdivq_f32(ve4567, vd4567); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x8() local 81 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x8() 87 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x8() 90 vst1q_f32(y, vf4567); y += 4; in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x8()
|
D | vsigmoid-wasmsimd-rr2-p5-div-x8.c | 83 v128_t vf4567 = wasm_f32x4_div(ve4567, vd4567); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x8() local 86 vf4567 = wasm_v128_andnot(vf4567, wasm_f32x4_gt(vz4567, vdenorm_cutoff)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x8() 89 vf4567 = wasm_v128_bitselect(vf4567, wasm_f32x4_sub(vone, vf4567), wasm_i32x4_shr(vx4567, 31)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x8() 92 wasm_v128_store(y + 4, vf4567); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x8()
|
D | vsigmoid-sse41-rr2-p5-div-x8.c | 83 __m128 vf4567 = _mm_div_ps(ve4567, vd4567); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x8() local 86 vf4567 = _mm_andnot_ps(_mm_cmplt_ps(vz4567, vdenorm_cutoff), vf4567); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x8() 89 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x8() 92 _mm_storeu_ps(y + 4, vf4567); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x8()
|
D | vsigmoid-neonfma-rr1-p5-nr2recps-x8.c | 87 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() local 90 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 96 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8() 99 vst1q_f32(y, vf4567); y += 4; in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8()
|
D | vsigmoid-neonfma-rr1-lut64-p2-div-x8.c | 91 float32x4_t vf4567 = vdivq_f32(vy4567, vd4567); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() local 94 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() 100 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() 103 vst1q_f32(y, vf4567); y += 4; in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8()
|
D | vsigmoid-neonfma-rr1-lut2048-p1-div-x8.c | 87 float32x4_t vf4567 = vdivq_f32(vy4567, vd4567); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() local 90 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 96 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 99 vst1q_f32(y, vf4567); y += 4; in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
|
D | vsigmoid-neon-rr2-p5-nr2recps-x8.c | 91 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x8() local 94 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 100 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x8() 103 vst1q_f32(y, vf4567); y += 4; in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x8()
|
D | vsigmoid-neonfma-rr1-p5-nr1recps1fma-x8.c | 87 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() local 90 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() 96 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8() 99 vst1q_f32(y, vf4567); y += 4; in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8()
|
D | vsigmoid-sse2-rr2-p5-div-x8.c | 83 __m128 vf4567 = _mm_div_ps(ve4567, vd4567); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x8() local 86 vf4567 = _mm_andnot_ps(_mm_cmplt_ps(vz4567, vdenorm_cutoff), vf4567); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x8() 92 vf4567 = _mm_or_ps(_mm_and_ps(vf4567, vm4567), _mm_andnot_ps(vm4567, _mm_sub_ps(vone, vf4567))); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x8() 95 _mm_storeu_ps(y + 4, vf4567); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x8()
|
D | vsigmoid-neonfma-rr1-p5-div-x12.c | 91 float32x4_t vf4567 = vdivq_f32(ve4567, vd4567); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x12() local 95 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x12() 103 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x12() 107 vst1q_f32(y, vf4567); y += 4; in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x12()
|
D | vsigmoid-wasmsimd-rr2-lut64-p2-div-x8.c | 94 v128_t vf4567 = wasm_f32x4_div(vy4567, vd4567); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8() local 97 vf4567 = wasm_v128_andnot(vf4567, wasm_f32x4_gt(vz4567, vdenorm_cutoff)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8() 100 vf4567 = wasm_v128_bitselect(vf4567, wasm_f32x4_sub(vone, vf4567), wasm_i32x4_shr(vx4567, 31)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8() 103 wasm_v128_store(y + 4, vf4567); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8()
|
D | vsigmoid-wasmsimd-rr2-p5-div-x12.c | 97 v128_t vf4567 = wasm_f32x4_div(ve4567, vd4567); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x12() local 101 vf4567 = wasm_v128_andnot(vf4567, wasm_f32x4_gt(vz4567, vdenorm_cutoff)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x12() 105 vf4567 = wasm_v128_bitselect(vf4567, wasm_f32x4_sub(vone, vf4567), wasm_i32x4_shr(vx4567, 31)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x12() 109 wasm_v128_store(y + 4, vf4567); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x12()
|
D | vsigmoid-neonfma-rr1-p5-nr2fma-x8.c | 87 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() local 90 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() 96 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8() 99 vst1q_f32(y, vf4567); y += 4; in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8()
|
D | vsigmoid-neonfma-rr1-lut2048-p1-nr2recps-x8.c | 96 float32x4_t vf4567 = vmulq_f32(vy4567, vr4567); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() local 99 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 105 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 108 vst1q_f32(y, vf4567); y += 4; in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
|
D | vsigmoid-neonfma-rr1-lut64-p2-nr2recps-x8.c | 100 float32x4_t vf4567 = vmulq_f32(vy4567, vr4567); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() local 103 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() 109 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() 112 vst1q_f32(y, vf4567); y += 4; in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8()
|
D | vsigmoid-neonfma-rr1-p5-nr1recps1fma-x12.c | 103 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() local 107 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() 115 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() 119 vst1q_f32(y, vf4567); y += 4; in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12()
|
D | vsigmoid-neonfma-rr1-lut64-p2-nr1recps1fma-x8.c | 100 float32x4_t vf4567 = vmulq_f32(vy4567, vr4567); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() local 103 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() 109 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() 112 vst1q_f32(y, vf4567); y += 4; in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8()
|
D | vsigmoid-neonfma-rr1-p5-nr2recps-x12.c | 103 float32x4_t vf4567 = vmulq_f32(ve4567, vr4567); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() local 107 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 115 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 119 vst1q_f32(y, vf4567); y += 4; in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
|
D | vsigmoid-neonfma-rr1-lut2048-p1-nr1recps1fma-x8.c | 96 float32x4_t vf4567 = vmulq_f32(vy4567, vr4567); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() local 99 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 105 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 108 vst1q_f32(y, vf4567); y += 4; in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
|
D | vsigmoid-sse2-rr2-p5-div-x12.c | 97 __m128 vf4567 = _mm_div_ps(ve4567, vd4567); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x12() local 101 vf4567 = _mm_andnot_ps(_mm_cmplt_ps(vz4567, vdenorm_cutoff), vf4567); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x12() 109 vf4567 = _mm_or_ps(_mm_and_ps(vf4567, vm4567), _mm_andnot_ps(vm4567, _mm_sub_ps(vone, vf4567))); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x12() 113 _mm_storeu_ps(y + 4, vf4567); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x12()
|
D | vsigmoid-neon-rr2-lut2048-p1-nr2recps-x8.c | 100 float32x4_t vf4567 = vmulq_f32(vy4567, vr4567); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() local 103 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 109 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 112 vst1q_f32(y, vf4567); y += 4; in xnn_f32_vsigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
|
D | vsigmoid-sse41-rr2-p5-div-x12.c | 97 __m128 vf4567 = _mm_div_ps(ve4567, vd4567); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x12() local 101 vf4567 = _mm_andnot_ps(_mm_cmplt_ps(vz4567, vdenorm_cutoff), vf4567); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x12() 105 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x12() 109 _mm_storeu_ps(y + 4, vf4567); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x12()
|
D | vsigmoid-neonfma-rr1-p5-div-x16.c | 104 float32x4_t vf4567 = vdivq_f32(ve4567, vd4567); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x16() local 109 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x16() 119 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x16() 124 vst1q_f32(y, vf4567); y += 4; in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x16()
|
D | vsigmoid-neonfma-rr1-lut2048-p1-nr2fma-x8.c | 96 float32x4_t vf4567 = vmulq_f32(vy4567, vr4567); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() local 99 …vf4567 = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf4567), vcagtq_f32(vx4567, vdenorm… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 105 vf4567 = vbslq_f32(vm4567, vf4567, vsubq_f32(vone, vf4567)); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 108 vst1q_f32(y, vf4567); y += 4; in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
|
D | vsigmoid-sse41-rr2-p5-div-x16.c | 111 __m128 vf4567 = _mm_div_ps(ve4567, vd4567); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x16() local 116 vf4567 = _mm_andnot_ps(_mm_cmplt_ps(vz4567, vdenorm_cutoff), vf4567); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x16() 121 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x16() 126 _mm_storeu_ps(y + 4, vf4567); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x16()
|