Home
last modified time | relevance | path

Searched refs:vf89AB (Results 1 – 25 of 138) sorted by relevance

123456

/external/XNNPACK/src/f32-sigmoid/gen/
Dneonfma-rr1-p5-div-x12.c92 float32x4_t vf89AB = vdivq_f32(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() local
96vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
104 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
108 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
Dsse41-p5-div-x12.c98 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() local
102 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
106 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
110 _mm_storeu_ps(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
Dwasmsimd-p5-div-x12.c98 v128_t vf89AB = wasm_f32x4_div(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12() local
102 vf89AB = wasm_v128_andnot(vf89AB, wasm_f32x4_gt(vz89AB, vdenorm_cutoff)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12()
106 vf89AB = wasm_v128_bitselect(vf89AB, wasm_f32x4_sub(vone, vf89AB), wasm_i32x4_shr(vx89AB, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12()
110 wasm_v128_store(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12()
Dneon-rr2-p5-nr2recps-x12.c109 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() local
113vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
121 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
125 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
Dneonfma-rr1-p5-nr2recps-x12.c104 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() local
108vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
116 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
120 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
Dneonfma-rr1-p5-div-x16.c105 float32x4_t vf89AB = vdivq_f32(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() local
110vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
120 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
125 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
Dneonfma-rr1-p5-nr2fma-x12.c104 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() local
108vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
116 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
120 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
Dwasmsimd-p5-div-x16.c112 v128_t vf89AB = wasm_f32x4_div(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() local
117 vf89AB = wasm_v128_andnot(vf89AB, wasm_f32x4_gt(vz89AB, vdenorm_cutoff)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16()
122 vf89AB = wasm_v128_bitselect(vf89AB, wasm_f32x4_sub(vone, vf89AB), wasm_i32x4_shr(vx89AB, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16()
127 wasm_v128_store(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16()
Dneonfma-rr1-p5-nr1recps1fma-x12.c104 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() local
108vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12()
116 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12()
120 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12()
Dsse2-p5-div-x12.c98 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() local
102 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
110 vf89AB = _mm_or_ps(_mm_and_ps(vf89AB, vm89AB), _mm_andnot_ps(vm89AB, _mm_sub_ps(vone, vf89AB))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
114 _mm_storeu_ps(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
Dwasmsimd-lut64-p2-div-x12.c115 v128_t vf89AB = wasm_f32x4_div(vy89AB, vd89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12() local
119 vf89AB = wasm_v128_andnot(vf89AB, wasm_f32x4_gt(vz89AB, vdenorm_cutoff)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12()
123 vf89AB = wasm_v128_bitselect(vf89AB, wasm_f32x4_sub(vone, vf89AB), wasm_i32x4_shr(vx89AB, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12()
127 wasm_v128_store(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12()
Dneonfma-rr1-lut2048-p1-div-x12.c106 float32x4_t vf89AB = vdivq_f32(vy89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() local
110vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
118 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
122 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
Dneonfma-rr1-lut64-p2-div-x12.c111 float32x4_t vf89AB = vdivq_f32(vy89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() local
115vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12()
123 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12()
127 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12()
Dsse41-p5-div-x16.c112 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() local
117 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
122 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
127 _mm_storeu_ps(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
Dneonfma-rr1-lut2048-p1-nr2recps-x12.c118 float32x4_t vf89AB = vmulq_f32(vy89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() local
122vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
130 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
134 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
Dneon-rr2-p5-nr2recps-x16.c126 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() local
131vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
141 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
146 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
Dneonfma-rr1-lut2048-p1-nr2fma-x12.c118 float32x4_t vf89AB = vmulq_f32(vy89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() local
122vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12()
130 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12()
134 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12()
Dneonfma-rr1-lut2048-p1-nr1recps1fma-x12.c118 float32x4_t vf89AB = vmulq_f32(vy89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() local
122vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
130 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
134 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
Dneonfma-rr1-lut64-p2-nr2recps-x12.c123 float32x4_t vf89AB = vmulq_f32(vy89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12() local
127vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12()
135 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12()
139 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12()
Dwasmsimd-p5-div-x20.c126 v128_t vf89AB = wasm_f32x4_div(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() local
132 vf89AB = wasm_v128_andnot(vf89AB, wasm_f32x4_gt(vz89AB, vdenorm_cutoff)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20()
138 vf89AB = wasm_v128_bitselect(vf89AB, wasm_f32x4_sub(vone, vf89AB), wasm_i32x4_shr(vx89AB, 31)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20()
144 wasm_v128_store(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20()
Dneonfma-rr1-p5-div-x20.c118 float32x4_t vf89AB = vdivq_f32(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() local
124vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20()
136 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20()
142 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20()
Dneonfma-rr1-p5-nr2fma-x16.c120 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() local
125vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16()
135 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16()
140 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16()
Dsse2-p5-div-x16.c112 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() local
117 vf89AB = _mm_andnot_ps(_mm_cmplt_ps(vz89AB, vdenorm_cutoff), vf89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
127 vf89AB = _mm_or_ps(_mm_and_ps(vf89AB, vm89AB), _mm_andnot_ps(vm89AB, _mm_sub_ps(vone, vf89AB))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
132 _mm_storeu_ps(y + 8, vf89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
Dneon-rr2-lut2048-p1-nr2recps-x12.c123 float32x4_t vf89AB = vmulq_f32(vy89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() local
127vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12()
135 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12()
139 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12()
Dneonfma-rr1-p5-nr2recps-x16.c120 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() local
125vf89AB = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf89AB), vcagtq_f32(vx89AB, vdenorm… in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16()
135 vf89AB = vbslq_f32(vm89AB, vf89AB, vsubq_f32(vone, vf89AB)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16()
140 vst1q_f32(y, vf89AB); y += 4; in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16()

123456