/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-p5-div-x12.c | 84 const float32x4_t ve89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() local 88 const float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12() 92 float32x4_t vf89AB = vdivq_f32(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12()
|
D | sse41-p5-div-x12.c | 90 __m128 ve89AB = _mm_add_ps(_mm_mul_ps(vt89AB, vp89AB), vs89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() local 94 __m128 vd89AB = _mm_add_ps(ve89AB, vone); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 98 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
|
D | wasmsimd-p5-div-x12.c | 90 const v128_t ve89AB = wasm_f32x4_add(vs89AB, wasm_f32x4_mul(vt89AB, vp89AB)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12() local 94 const v128_t vd89AB = wasm_f32x4_add(ve89AB, vone); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12() 98 v128_t vf89AB = wasm_f32x4_div(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12()
|
D | neon-rr2-p5-nr2recps-x12.c | 89 const float32x4_t ve89AB = vmlaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() local 93 const float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12() 109 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12()
|
D | neonfma-rr1-p5-nr2recps-x12.c | 84 const float32x4_t ve89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() local 88 const float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12() 104 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12()
|
D | neonfma-rr1-p5-div-x16.c | 95 const float32x4_t ve89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() local 100 const float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 105 float32x4_t vf89AB = vdivq_f32(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
|
D | neonfma-rr1-p5-nr2fma-x12.c | 84 const float32x4_t ve89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() local 88 const float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12() 104 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12()
|
D | wasmsimd-p5-div-x16.c | 102 const v128_t ve89AB = wasm_f32x4_add(vs89AB, wasm_f32x4_mul(vt89AB, vp89AB)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() local 107 const v128_t vd89AB = wasm_f32x4_add(ve89AB, vone); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16() 112 v128_t vf89AB = wasm_f32x4_div(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16()
|
D | neonfma-rr1-p5-nr1recps1fma-x12.c | 84 const float32x4_t ve89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() local 88 const float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12() 104 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12()
|
D | sse2-p5-div-x12.c | 90 __m128 ve89AB = _mm_add_ps(_mm_mul_ps(vt89AB, vp89AB), vs89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() local 94 __m128 vd89AB = _mm_add_ps(ve89AB, vone); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 98 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
|
D | sse41-p5-div-x16.c | 102 __m128 ve89AB = _mm_add_ps(_mm_mul_ps(vt89AB, vp89AB), vs89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() local 107 __m128 vd89AB = _mm_add_ps(ve89AB, vone); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 112 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
|
D | neon-rr2-p5-nr2recps-x16.c | 101 const float32x4_t ve89AB = vmlaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() local 106 const float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16() 126 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16()
|
D | wasmsimd-p5-div-x20.c | 114 const v128_t ve89AB = wasm_f32x4_add(vs89AB, wasm_f32x4_mul(vt89AB, vp89AB)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() local 120 const v128_t vd89AB = wasm_f32x4_add(ve89AB, vone); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20() 126 v128_t vf89AB = wasm_f32x4_div(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20()
|
D | neonfma-rr1-p5-div-x20.c | 106 const float32x4_t ve89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() local 112 const float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 118 float32x4_t vf89AB = vdivq_f32(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20()
|
D | neonfma-rr1-p5-nr2fma-x16.c | 95 const float32x4_t ve89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() local 100 const float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16() 120 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16()
|
D | sse2-p5-div-x16.c | 102 __m128 ve89AB = _mm_add_ps(_mm_mul_ps(vt89AB, vp89AB), vs89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() local 107 __m128 vd89AB = _mm_add_ps(ve89AB, vone); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 112 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
|
D | neonfma-rr1-p5-nr2recps-x16.c | 95 const float32x4_t ve89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() local 100 const float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16() 120 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16()
|
D | sse41-p5-div-x20.c | 114 __m128 ve89AB = _mm_add_ps(_mm_mul_ps(vt89AB, vp89AB), vs89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() local 120 __m128 vd89AB = _mm_add_ps(ve89AB, vone); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 126 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
|
D | neonfma-rr1-p5-nr1recps1fma-x16.c | 95 const float32x4_t ve89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() local 100 const float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16() 120 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16()
|
D | sse2-p5-div-x20.c | 114 __m128 ve89AB = _mm_add_ps(_mm_mul_ps(vt89AB, vp89AB), vs89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() local 120 __m128 vd89AB = _mm_add_ps(ve89AB, vone); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 126 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
|
D | neonfma-rr1-p5-div-x24.c | 117 const float32x4_t ve89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() local 124 const float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 131 float32x4_t vf89AB = vdivq_f32(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24()
|
D | wasmsimd-p5-div-x24.c | 126 const v128_t ve89AB = wasm_f32x4_add(vs89AB, wasm_f32x4_mul(vt89AB, vp89AB)); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() local 133 const v128_t vd89AB = wasm_f32x4_add(ve89AB, vone); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24() 140 v128_t vf89AB = wasm_f32x4_div(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24()
|
D | neonfma-rr1-p5-nr1recps1fma-x20.c | 106 const float32x4_t ve89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() local 112 const float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() 136 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20()
|
D | neonfma-rr1-p5-nr2recps-x20.c | 106 const float32x4_t ve89AB = vfmaq_f32(vs89AB, vp89AB, vt89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() local 112 const float32x4_t vd89AB = vaddq_f32(ve89AB, vone); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 136 float32x4_t vf89AB = vmulq_f32(ve89AB, vr89AB); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20()
|
D | sse41-p5-div-x24.c | 126 __m128 ve89AB = _mm_add_ps(_mm_mul_ps(vt89AB, vp89AB), vs89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() local 133 __m128 vd89AB = _mm_add_ps(ve89AB, vone); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 140 __m128 vf89AB = _mm_div_ps(ve89AB, vd89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
|