/external/XNNPACK/src/f32-vsigmoid/gen/ |
D | vsigmoid-wasmsimd-rr2-p5-div-x20.c | 116 const v128_t veGHIJ = wasm_f32x4_add(vsGHIJ, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20() local 122 const v128_t vdGHIJ = wasm_f32x4_add(veGHIJ, vone); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20() 128 v128_t vfGHIJ = wasm_f32x4_div(veGHIJ, vdGHIJ); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x20()
|
D | vsigmoid-neonfma-rr1-p5-div-x20.c | 108 const float32x4_t veGHIJ = vfmaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x20() local 114 const float32x4_t vdGHIJ = vaddq_f32(veGHIJ, vone); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x20() 120 float32x4_t vfGHIJ = vdivq_f32(veGHIJ, vdGHIJ); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x20()
|
D | vsigmoid-sse41-rr2-p5-div-x20.c | 116 __m128 veGHIJ = _mm_add_ps(_mm_mul_ps(vtGHIJ, vpGHIJ), vsGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20() local 122 __m128 vdGHIJ = _mm_add_ps(veGHIJ, vone); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20() 128 __m128 vfGHIJ = _mm_div_ps(veGHIJ, vdGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x20()
|
D | vsigmoid-neonfma-rr1-p5-div-x24.c | 119 const float32x4_t veGHIJ = vfmaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x24() local 126 const float32x4_t vdGHIJ = vaddq_f32(veGHIJ, vone); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x24() 133 float32x4_t vfGHIJ = vdivq_f32(veGHIJ, vdGHIJ); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_div_x24()
|
D | vsigmoid-neonfma-rr1-p5-nr2fma-x20.c | 108 const float32x4_t veGHIJ = vfmaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() local 114 const float32x4_t vdGHIJ = vaddq_f32(veGHIJ, vone); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20() 138 float32x4_t vfGHIJ = vmulq_f32(veGHIJ, vrGHIJ); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20()
|
D | vsigmoid-sse2-rr2-p5-div-x20.c | 116 __m128 veGHIJ = _mm_add_ps(_mm_mul_ps(vtGHIJ, vpGHIJ), vsGHIJ); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20() local 122 __m128 vdGHIJ = _mm_add_ps(veGHIJ, vone); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20() 128 __m128 vfGHIJ = _mm_div_ps(veGHIJ, vdGHIJ); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x20()
|
D | vsigmoid-sse41-rr2-p5-div-x24.c | 128 __m128 veGHIJ = _mm_add_ps(_mm_mul_ps(vtGHIJ, vpGHIJ), vsGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24() local 135 __m128 vdGHIJ = _mm_add_ps(veGHIJ, vone); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24() 142 __m128 vfGHIJ = _mm_div_ps(veGHIJ, vdGHIJ); in xnn_f32_vsigmoid_ukernel__sse41_rr2_p5_div_x24()
|
D | vsigmoid-neonfma-rr1-p5-nr1recps1fma-x20.c | 108 const float32x4_t veGHIJ = vfmaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() local 114 const float32x4_t vdGHIJ = vaddq_f32(veGHIJ, vone); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20() 138 float32x4_t vfGHIJ = vmulq_f32(veGHIJ, vrGHIJ); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20()
|
D | vsigmoid-neon-rr2-p5-nr2recps-x20.c | 115 const float32x4_t veGHIJ = vmlaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20() local 121 const float32x4_t vdGHIJ = vaddq_f32(veGHIJ, vone); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20() 145 float32x4_t vfGHIJ = vmulq_f32(veGHIJ, vrGHIJ); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x20()
|
D | vsigmoid-neonfma-rr1-p5-nr2recps-x20.c | 108 const float32x4_t veGHIJ = vfmaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() local 114 const float32x4_t vdGHIJ = vaddq_f32(veGHIJ, vone); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20() 138 float32x4_t vfGHIJ = vmulq_f32(veGHIJ, vrGHIJ); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20()
|
D | vsigmoid-wasmsimd-rr2-p5-div-x24.c | 128 const v128_t veGHIJ = wasm_f32x4_add(vsGHIJ, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24() local 135 const v128_t vdGHIJ = wasm_f32x4_add(veGHIJ, vone); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24() 142 v128_t vfGHIJ = wasm_f32x4_div(veGHIJ, vdGHIJ); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_p5_div_x24()
|
D | vsigmoid-neonfma-rr1-p5-nr2recps-x24.c | 119 const float32x4_t veGHIJ = vfmaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() local 126 const float32x4_t vdGHIJ = vaddq_f32(veGHIJ, vone); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24() 154 float32x4_t vfGHIJ = vmulq_f32(veGHIJ, vrGHIJ); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24()
|
D | vsigmoid-neon-rr2-p5-nr2recps-x24.c | 127 const float32x4_t veGHIJ = vmlaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x24() local 134 const float32x4_t vdGHIJ = vaddq_f32(veGHIJ, vone); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x24() 162 float32x4_t vfGHIJ = vmulq_f32(veGHIJ, vrGHIJ); in xnn_f32_vsigmoid_ukernel__neon_rr2_p5_nr2recps_x24()
|
D | vsigmoid-neonfma-rr1-p5-nr1recps1fma-x24.c | 119 const float32x4_t veGHIJ = vfmaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() local 126 const float32x4_t vdGHIJ = vaddq_f32(veGHIJ, vone); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24() 154 float32x4_t vfGHIJ = vmulq_f32(veGHIJ, vrGHIJ); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24()
|
D | vsigmoid-sse2-rr2-p5-div-x24.c | 128 __m128 veGHIJ = _mm_add_ps(_mm_mul_ps(vtGHIJ, vpGHIJ), vsGHIJ); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x24() local 135 __m128 vdGHIJ = _mm_add_ps(veGHIJ, vone); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x24() 142 __m128 vfGHIJ = _mm_div_ps(veGHIJ, vdGHIJ); in xnn_f32_vsigmoid_ukernel__sse2_rr2_p5_div_x24()
|
D | vsigmoid-neonfma-rr1-p5-nr2fma-x24.c | 119 const float32x4_t veGHIJ = vfmaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() local 126 const float32x4_t vdGHIJ = vaddq_f32(veGHIJ, vone); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24() 154 float32x4_t vfGHIJ = vmulq_f32(veGHIJ, vrGHIJ); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-sse41-rr2-p6-x20.c | 139 const __m128 veGHIJ = _mm_mul_ps(_mm_add_ps(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() local 151 const __m128 vyGHIJ = _mm_blendv_ps(vxGHIJ, veGHIJ, vxGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
|
D | velu-neonfma-rr1-p6-x20.c | 130 const float32x4_t veGHIJ = vmulq_f32(vaddq_f32(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() local 147 const float32x4_t vyGHIJ = vbslq_f32(vmGHIJ, veGHIJ, vxGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
|
D | velu-neon-rr2-p6-x20.c | 137 const float32x4_t veGHIJ = vmulq_f32(vaddq_f32(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() local 154 const float32x4_t vyGHIJ = vbslq_f32(vmGHIJ, veGHIJ, vxGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
|
D | velu-neonfma-rr1-p6-x24.c | 144 const float32x4_t veGHIJ = vmulq_f32(vaddq_f32(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() local 164 const float32x4_t vyGHIJ = vbslq_f32(vmGHIJ, veGHIJ, vxGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24()
|
D | velu-wasmsimd-arm-rr2-p6-x20.c | 139 const v128_t veGHIJ = wasm_f32x4_mul(wasm_f32x4_add(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() local 156 const v128_t vyGHIJ = wasm_v128_bitselect(veGHIJ, vxGHIJ, vsignmGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
|
D | velu-neon-rr2-p6-x24.c | 152 const float32x4_t veGHIJ = vmulq_f32(vaddq_f32(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() local 172 const float32x4_t vyGHIJ = vbslq_f32(vmGHIJ, veGHIJ, vxGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
|
D | velu-wasmsimd-x86-rr2-p6-x20.c | 154 const v128_t veGHIJ = wasm_f32x4_mul(wasm_f32x4_add(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() local 171 const v128_t vyGHIJ = wasm_v128_bitselect(veGHIJ, vxGHIJ, vsignmGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
|
D | velu-sse2-rr2-p6-x20.c | 139 const __m128 veGHIJ = _mm_mul_ps(_mm_add_ps(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() local 156 const __m128 vyGHIJ = _mm_or_ps(_mm_and_ps(veGHIJ, vmGHIJ), _mm_andnot_ps(vmGHIJ, vxGHIJ)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neon-rr2-lut64-p2-x20.c | 63 …const int32x4_t veGHIJ = vshlq_n_s32(vbicq_s32(vreinterpretq_s32_f32(vnGHIJ), vmovq_n_s32(INT32_C(… in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20() local 112 …const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vaddq_s32(vreinterpretq_s32_f32(vlGHIJ), veGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x20()
|