/external/XNNPACK/src/f32-velu/gen/ |
D | velu-wasmsimd-x86-rr2-p6-x12.c | 60 v128_t vs89AB = wasm_i32x4_shl(vn89AB, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() local 78 vs89AB = wasm_v128_andnot(vs89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 107 vt89AB = wasm_f32x4_mul(vt89AB, vs89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 108 vs89AB = wasm_f32x4_sub(vs89AB, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 116 const v128_t ve89AB = wasm_f32x4_mul(wasm_f32x4_add(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12()
|
D | velu-wasmsimd-x86-rr2-p6-x16.c | 63 v128_t vs89AB = wasm_i32x4_shl(vn89AB, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() local 85 vs89AB = wasm_v128_andnot(vs89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 122 vt89AB = wasm_f32x4_mul(vt89AB, vs89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 123 vs89AB = wasm_f32x4_sub(vs89AB, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 134 const v128_t ve89AB = wasm_f32x4_mul(wasm_f32x4_add(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x12.c | 92 v128_t vs89AB = wasm_i32x4_add(vl89AB, ven89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() local 106 vs89AB = wasm_v128_andnot(vs89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 123 vt89AB = wasm_f32x4_mul(vt89AB, vs89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 124 vs89AB = wasm_f32x4_sub(vs89AB, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 132 const v128_t ve89AB = wasm_f32x4_mul(wasm_f32x4_add(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12()
|
D | velu-wasmsimd-x86-rr2-p6-x20.c | 66 v128_t vs89AB = wasm_i32x4_shl(vn89AB, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() local 92 vs89AB = wasm_v128_andnot(vs89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 137 vt89AB = wasm_f32x4_mul(vt89AB, vs89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 138 vs89AB = wasm_f32x4_sub(vs89AB, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 152 const v128_t ve89AB = wasm_f32x4_mul(wasm_f32x4_add(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x16.c | 104 v128_t vs89AB = wasm_i32x4_add(vl89AB, ven89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() local 122 vs89AB = wasm_v128_andnot(vs89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 144 vt89AB = wasm_f32x4_mul(vt89AB, vs89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 145 vs89AB = wasm_f32x4_sub(vs89AB, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 156 const v128_t ve89AB = wasm_f32x4_mul(wasm_f32x4_add(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16()
|
D | velu-neon-rr2-p6-x12.c | 61 float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() local 96 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 97 vs89AB = vsubq_f32(vs89AB, vone); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 105 const float32x4_t ve89AB = vmulq_f32(vaddq_f32(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x12()
|
D | velu-neonfma-rr1-p6-x12.c | 60 float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() local 91 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 92 vs89AB = vsubq_f32(vs89AB, vone); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 100 const float32x4_t ve89AB = vmulq_f32(vaddq_f32(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12()
|
D | velu-wasmsimd-arm-rr2-p6-x12.c | 60 v128_t vs89AB = wasm_i32x4_shl(vn89AB, 23); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() local 98 vt89AB = wasm_f32x4_mul(vt89AB, vs89AB); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 99 vs89AB = wasm_f32x4_sub(vs89AB, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 107 const v128_t ve89AB = wasm_f32x4_mul(wasm_f32x4_add(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12()
|
D | velu-sse41-rr2-p6-x12.c | 60 __m128 vs89AB = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn89AB), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() local 98 vt89AB = _mm_mul_ps(vt89AB, vs89AB); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 99 vs89AB = _mm_sub_ps(vs89AB, vone); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 107 const __m128 ve89AB = _mm_mul_ps(_mm_add_ps(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12()
|
D | velu-sse2-rr2-p6-x12.c | 60 __m128 vs89AB = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn89AB), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() local 98 vt89AB = _mm_mul_ps(vt89AB, vs89AB); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 99 vs89AB = _mm_sub_ps(vs89AB, vone); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 107 const __m128 ve89AB = _mm_mul_ps(_mm_add_ps(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12()
|
D | velu-neonfma-rr1-p6-x16.c | 63 float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() local 102 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 103 vs89AB = vsubq_f32(vs89AB, vone); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 114 const float32x4_t ve89AB = vmulq_f32(vaddq_f32(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16()
|
D | velu-wasmsimd-x86-rr2-p6-x24.c | 69 v128_t vs89AB = wasm_i32x4_shl(vn89AB, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() local 99 vs89AB = wasm_v128_andnot(vs89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 152 vt89AB = wasm_f32x4_mul(vt89AB, vs89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 153 vs89AB = wasm_f32x4_sub(vs89AB, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 170 const v128_t ve89AB = wasm_f32x4_mul(wasm_f32x4_add(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x20.c | 116 v128_t vs89AB = wasm_i32x4_add(vl89AB, ven89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() local 138 vs89AB = wasm_v128_andnot(vs89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 165 vt89AB = wasm_f32x4_mul(vt89AB, vs89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 166 vs89AB = wasm_f32x4_sub(vs89AB, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 180 const v128_t ve89AB = wasm_f32x4_mul(wasm_f32x4_add(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20()
|
D | velu-neon-rr2-p6-x16.c | 64 float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() local 108 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 109 vs89AB = vsubq_f32(vs89AB, vone); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 120 const float32x4_t ve89AB = vmulq_f32(vaddq_f32(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x16()
|
D | velu-sse41-rr2-p6-x16.c | 63 __m128 vs89AB = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn89AB), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() local 110 vt89AB = _mm_mul_ps(vt89AB, vs89AB); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 111 vs89AB = _mm_sub_ps(vs89AB, vone); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 122 const __m128 ve89AB = _mm_mul_ps(_mm_add_ps(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16()
|
D | velu-wasmsimd-arm-rr2-lut16-p3-x12.c | 92 v128_t vs89AB = wasm_i32x4_add(vl89AB, ven89AB); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12() local 114 vt89AB = wasm_f32x4_mul(vt89AB, vs89AB); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12() 115 vs89AB = wasm_f32x4_sub(vs89AB, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12() 123 const v128_t ve89AB = wasm_f32x4_mul(wasm_f32x4_add(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12()
|
D | velu-wasmsimd-arm-rr2-p6-x16.c | 63 v128_t vs89AB = wasm_i32x4_shl(vn89AB, 23); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() local 110 vt89AB = wasm_f32x4_mul(vt89AB, vs89AB); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 111 vs89AB = wasm_f32x4_sub(vs89AB, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 122 const v128_t ve89AB = wasm_f32x4_mul(wasm_f32x4_add(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x24.c | 128 v128_t vs89AB = wasm_i32x4_add(vl89AB, ven89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() local 154 vs89AB = wasm_v128_andnot(vs89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 186 vt89AB = wasm_f32x4_mul(vt89AB, vs89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 187 vs89AB = wasm_f32x4_sub(vs89AB, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 204 const v128_t ve89AB = wasm_f32x4_mul(wasm_f32x4_add(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24()
|
D | velu-sse41-rr2-p6-x20.c | 66 __m128 vs89AB = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn89AB), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() local 122 vt89AB = _mm_mul_ps(vt89AB, vs89AB); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 123 vs89AB = _mm_sub_ps(vs89AB, vone); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 137 const __m128 ve89AB = _mm_mul_ps(_mm_add_ps(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
|
D | velu-neonfma-rr1-p6-x20.c | 66 float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() local 113 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 114 vs89AB = vsubq_f32(vs89AB, vone); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 128 const float32x4_t ve89AB = vmulq_f32(vaddq_f32(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
|
D | velu-neonfma-rr1-lut16-p3-x12.c | 90 float32x4_t vs89AB = vreinterpretq_f32_s32(vaddq_s32(vl89AB, ven89AB)); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12() local 108 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12() 109 vs89AB = vsubq_f32(vs89AB, vone); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12() 117 const float32x4_t ve89AB = vmulq_f32(vaddq_f32(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12()
|
D | velu-sse2-rr2-p6-x16.c | 63 __m128 vs89AB = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn89AB), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() local 110 vt89AB = _mm_mul_ps(vt89AB, vs89AB); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 111 vs89AB = _mm_sub_ps(vs89AB, vone); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16() 122 const __m128 ve89AB = _mm_mul_ps(_mm_add_ps(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x16()
|
D | velu-neon-rr2-p6-x20.c | 67 float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() local 120 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 121 vs89AB = vsubq_f32(vs89AB, vone); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 135 const float32x4_t ve89AB = vmulq_f32(vaddq_f32(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
|
D | velu-neon-rr2-lut16-p3-x12.c | 91 float32x4_t vs89AB = vreinterpretq_f32_s32(vaddq_s32(vl89AB, ven89AB)); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12() local 113 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12() 114 vs89AB = vsubq_f32(vs89AB, vone); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12() 122 const float32x4_t ve89AB = vmulq_f32(vaddq_f32(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12()
|
D | velu-neonfma-rr1-p6-x24.c | 69 float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() local 124 vt89AB = vmulq_f32(vt89AB, vs89AB); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 125 vs89AB = vsubq_f32(vs89AB, vone); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 142 const float32x4_t ve89AB = vmulq_f32(vaddq_f32(vp89AB, vs89AB), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24()
|