/external/XNNPACK/src/f32-velu/gen/ |
D | velu-wasmsimd-x86-rr2-p6-x16.c | 64 v128_t vsCDEF = wasm_i32x4_shl(vnCDEF, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() local 87 vsCDEF = wasm_v128_andnot(vsCDEF, vsatmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 124 vtCDEF = wasm_f32x4_mul(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 125 vsCDEF = wasm_f32x4_sub(vsCDEF, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 135 const v128_t veCDEF = wasm_f32x4_mul(wasm_f32x4_add(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16()
|
D | velu-wasmsimd-x86-rr2-p6-x20.c | 67 v128_t vsCDEF = wasm_i32x4_shl(vnCDEF, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() local 94 vsCDEF = wasm_v128_andnot(vsCDEF, vsatmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 139 vtCDEF = wasm_f32x4_mul(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 140 vsCDEF = wasm_f32x4_sub(vsCDEF, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 153 const v128_t veCDEF = wasm_f32x4_mul(wasm_f32x4_add(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x16.c | 106 v128_t vsCDEF = wasm_i32x4_add(vlCDEF, venCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() local 124 vsCDEF = wasm_v128_andnot(vsCDEF, vsatmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 146 vtCDEF = wasm_f32x4_mul(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 147 vsCDEF = wasm_f32x4_sub(vsCDEF, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 157 const v128_t veCDEF = wasm_f32x4_mul(wasm_f32x4_add(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16()
|
D | velu-neonfma-rr1-p6-x16.c | 65 float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() local 104 vtCDEF = vmulq_f32(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 105 vsCDEF = vsubq_f32(vsCDEF, vone); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 115 const float32x4_t veCDEF = vmulq_f32(vaddq_f32(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16()
|
D | velu-wasmsimd-x86-rr2-p6-x24.c | 70 v128_t vsCDEF = wasm_i32x4_shl(vnCDEF, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() local 101 vsCDEF = wasm_v128_andnot(vsCDEF, vsatmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 154 vtCDEF = wasm_f32x4_mul(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 155 vsCDEF = wasm_f32x4_sub(vsCDEF, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 171 const v128_t veCDEF = wasm_f32x4_mul(wasm_f32x4_add(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x20.c | 118 v128_t vsCDEF = wasm_i32x4_add(vlCDEF, venCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() local 140 vsCDEF = wasm_v128_andnot(vsCDEF, vsatmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 167 vtCDEF = wasm_f32x4_mul(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 168 vsCDEF = wasm_f32x4_sub(vsCDEF, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 181 const v128_t veCDEF = wasm_f32x4_mul(wasm_f32x4_add(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20()
|
D | velu-neon-rr2-p6-x16.c | 66 float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() local 110 vtCDEF = vmulq_f32(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 111 vsCDEF = vsubq_f32(vsCDEF, vone); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 121 const float32x4_t veCDEF = vmulq_f32(vaddq_f32(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x16()
|
D | velu-sse41-rr2-p6-x16.c | 64 __m128 vsCDEF = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnCDEF), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() local 112 vtCDEF = _mm_mul_ps(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 113 vsCDEF = _mm_sub_ps(vsCDEF, vone); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 123 const __m128 veCDEF = _mm_mul_ps(_mm_add_ps(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16()
|
D | velu-wasmsimd-arm-rr2-p6-x16.c | 64 v128_t vsCDEF = wasm_i32x4_shl(vnCDEF, 23); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() local 112 vtCDEF = wasm_f32x4_mul(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 113 vsCDEF = wasm_f32x4_sub(vsCDEF, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 123 const v128_t veCDEF = wasm_f32x4_mul(wasm_f32x4_add(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x24.c | 130 v128_t vsCDEF = wasm_i32x4_add(vlCDEF, venCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() local 156 vsCDEF = wasm_v128_andnot(vsCDEF, vsatmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 188 vtCDEF = wasm_f32x4_mul(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 189 vsCDEF = wasm_f32x4_sub(vsCDEF, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 205 const v128_t veCDEF = wasm_f32x4_mul(wasm_f32x4_add(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24()
|
D | velu-sse41-rr2-p6-x20.c | 67 __m128 vsCDEF = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnCDEF), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() local 124 vtCDEF = _mm_mul_ps(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 125 vsCDEF = _mm_sub_ps(vsCDEF, vone); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 138 const __m128 veCDEF = _mm_mul_ps(_mm_add_ps(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
|
D | velu-neonfma-rr1-p6-x20.c | 68 float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() local 115 vtCDEF = vmulq_f32(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 116 vsCDEF = vsubq_f32(vsCDEF, vone); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 129 const float32x4_t veCDEF = vmulq_f32(vaddq_f32(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
|
D | velu-sse2-rr2-p6-x16.c | 64 __m128 vsCDEF = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnCDEF), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() local 112 vtCDEF = _mm_mul_ps(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 113 vsCDEF = _mm_sub_ps(vsCDEF, vone); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 123 const __m128 veCDEF = _mm_mul_ps(_mm_add_ps(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16()
|
D | velu-neon-rr2-p6-x20.c | 69 float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() local 122 vtCDEF = vmulq_f32(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 123 vsCDEF = vsubq_f32(vsCDEF, vone); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 136 const float32x4_t veCDEF = vmulq_f32(vaddq_f32(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
|
D | velu-neonfma-rr1-p6-x24.c | 71 float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() local 126 vtCDEF = vmulq_f32(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 127 vsCDEF = vsubq_f32(vsCDEF, vone); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 143 const float32x4_t veCDEF = vmulq_f32(vaddq_f32(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24()
|
D | velu-wasmsimd-arm-rr2-p6-x20.c | 67 v128_t vsCDEF = wasm_i32x4_shl(vnCDEF, 23); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() local 124 vtCDEF = wasm_f32x4_mul(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 125 vsCDEF = wasm_f32x4_sub(vsCDEF, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 138 const v128_t veCDEF = wasm_f32x4_mul(wasm_f32x4_add(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
|
D | velu-wasmsimd-arm-rr2-lut16-p3-x16.c | 106 v128_t vsCDEF = wasm_i32x4_add(vlCDEF, venCDEF); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16() local 134 vtCDEF = wasm_f32x4_mul(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16() 135 vsCDEF = wasm_f32x4_sub(vsCDEF, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16() 145 const v128_t veCDEF = wasm_f32x4_mul(wasm_f32x4_add(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16()
|
D | velu-neon-rr2-p6-x24.c | 72 float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() local 134 vtCDEF = vmulq_f32(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 135 vsCDEF = vsubq_f32(vsCDEF, vone); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 151 const float32x4_t veCDEF = vmulq_f32(vaddq_f32(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
|
D | velu-sse2-rr2-p6-x20.c | 67 __m128 vsCDEF = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnCDEF), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() local 124 vtCDEF = _mm_mul_ps(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() 125 vsCDEF = _mm_sub_ps(vsCDEF, vone); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() 138 const __m128 veCDEF = _mm_mul_ps(_mm_add_ps(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20()
|
D | velu-sse41-rr2-p6-x24.c | 70 __m128 vsCDEF = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnCDEF), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() local 136 vtCDEF = _mm_mul_ps(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 137 vsCDEF = _mm_sub_ps(vsCDEF, vone); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 153 const __m128 veCDEF = _mm_mul_ps(_mm_add_ps(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24()
|
D | velu-wasmsimd-arm-rr2-p6-x24.c | 70 v128_t vsCDEF = wasm_i32x4_shl(vnCDEF, 23); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() local 136 vtCDEF = wasm_f32x4_mul(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 137 vsCDEF = wasm_f32x4_sub(vsCDEF, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 153 const v128_t veCDEF = wasm_f32x4_mul(wasm_f32x4_add(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24()
|
D | velu-neon-rr2-lut16-p3-x16.c | 105 float32x4_t vsCDEF = vreinterpretq_f32_s32(vaddq_s32(vlCDEF, venCDEF)); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16() local 133 vtCDEF = vmulq_f32(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16() 134 vsCDEF = vsubq_f32(vsCDEF, vone); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16() 144 const float32x4_t veCDEF = vmulq_f32(vaddq_f32(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16()
|
D | velu-neonfma-rr1-lut16-p3-x16.c | 104 float32x4_t vsCDEF = vreinterpretq_f32_s32(vaddq_s32(vlCDEF, venCDEF)); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16() local 127 vtCDEF = vmulq_f32(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16() 128 vsCDEF = vsubq_f32(vsCDEF, vone); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16() 138 const float32x4_t veCDEF = vmulq_f32(vaddq_f32(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16()
|
D | velu-sse2-rr2-p6-x24.c | 70 __m128 vsCDEF = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnCDEF), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() local 136 vtCDEF = _mm_mul_ps(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 137 vsCDEF = _mm_sub_ps(vsCDEF, vone); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 153 const __m128 veCDEF = _mm_mul_ps(_mm_add_ps(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
|
D | velu-neonfma-rr1-lut16-p3-x20.c | 116 float32x4_t vsCDEF = vreinterpretq_f32_s32(vaddq_s32(vlCDEF, venCDEF)); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20() local 144 vtCDEF = vmulq_f32(vtCDEF, vsCDEF); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20() 145 vsCDEF = vsubq_f32(vsCDEF, vone); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20() 158 const float32x4_t veCDEF = vmulq_f32(vaddq_f32(vpCDEF, vsCDEF), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20()
|