/external/XNNPACK/src/f32-velu/gen/ |
D | velu-wasmsimd-x86-rr2-p6-x8.c | 56 v128_t vs0123 = wasm_i32x4_shl(vn0123, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() local 68 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 89 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 90 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 97 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x8.c | 77 v128_t vs0123 = wasm_i32x4_add(vl0123, ven0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() local 87 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 99 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 100 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 107 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8()
|
D | velu-wasmsimd-x86-rr2-p6-x12.c | 59 v128_t vs0123 = wasm_i32x4_shl(vn0123, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() local 75 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 104 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 105 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 115 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12()
|
D | velu-neonfma-rr1-p6-x8.c | 54 float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() local 77 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() 78 vs0123 = vsubq_f32(vs0123, vone); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() 85 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8()
|
D | velu-neon-rr2-p6-x8.c | 55 float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() local 81 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() 82 vs0123 = vsubq_f32(vs0123, vone); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() 89 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x8()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x12.c | 89 v128_t vs0123 = wasm_i32x4_add(vl0123, ven0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() local 103 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 120 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 121 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 131 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12()
|
D | velu-wasmsimd-x86-rr2-p6-x16.c | 62 v128_t vs0123 = wasm_i32x4_shl(vn0123, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() local 82 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 119 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 120 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 133 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16()
|
D | velu-sse41-rr2-p6-x8.c | 56 __m128 vs0123 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn0123), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() local 83 vt0123 = _mm_mul_ps(vt0123, vs0123); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 84 vs0123 = _mm_sub_ps(vs0123, vone); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 91 const __m128 ve0123 = _mm_mul_ps(_mm_add_ps(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8()
|
D | velu-wasmsimd-arm-rr2-p6-x8.c | 56 v128_t vs0123 = wasm_i32x4_shl(vn0123, 23); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() local 83 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() 84 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() 91 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8()
|
D | velu-neonfma-rr1-p6-x12.c | 57 float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() local 88 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 89 vs0123 = vsubq_f32(vs0123, vone); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 99 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12()
|
D | velu-sse2-rr2-p6-x8.c | 56 __m128 vs0123 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn0123), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() local 83 vt0123 = _mm_mul_ps(vt0123, vs0123); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() 84 vs0123 = _mm_sub_ps(vs0123, vone); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() 91 const __m128 ve0123 = _mm_mul_ps(_mm_add_ps(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8()
|
D | velu-wasmsimd-x86-rr2-p6-x20.c | 65 v128_t vs0123 = wasm_i32x4_shl(vn0123, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() local 89 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 134 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 135 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 151 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x16.c | 101 v128_t vs0123 = wasm_i32x4_add(vl0123, ven0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() local 119 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 141 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 142 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 155 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16()
|
D | velu-neon-rr2-p6-x12.c | 58 float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() local 93 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 94 vs0123 = vsubq_f32(vs0123, vone); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 104 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x12()
|
D | velu-wasmsimd-arm-rr2-lut16-p3-x8.c | 77 v128_t vs0123 = wasm_i32x4_add(vl0123, ven0123); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8() local 93 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8() 94 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8() 101 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8()
|
D | velu-wasmsimd-arm-rr2-p6-x12.c | 59 v128_t vs0123 = wasm_i32x4_shl(vn0123, 23); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() local 95 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 96 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 106 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12()
|
D | velu-sse2-rr2-p6-x12.c | 59 __m128 vs0123 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn0123), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() local 95 vt0123 = _mm_mul_ps(vt0123, vs0123); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 96 vs0123 = _mm_sub_ps(vs0123, vone); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 106 const __m128 ve0123 = _mm_mul_ps(_mm_add_ps(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12()
|
D | velu-sse41-rr2-p6-x12.c | 59 __m128 vs0123 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn0123), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() local 95 vt0123 = _mm_mul_ps(vt0123, vs0123); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 96 vs0123 = _mm_sub_ps(vs0123, vone); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 106 const __m128 ve0123 = _mm_mul_ps(_mm_add_ps(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12()
|
D | velu-neonfma-rr1-lut16-p3-x8.c | 75 float32x4_t vs0123 = vreinterpretq_f32_s32(vaddq_s32(vl0123, ven0123)); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8() local 88 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8() 89 vs0123 = vsubq_f32(vs0123, vone); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8() 96 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8()
|
D | velu-wasmsimd-x86-rr2-p6-x24.c | 68 v128_t vs0123 = wasm_i32x4_shl(vn0123, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() local 96 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 149 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 150 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 169 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
|
D | velu-neonfma-rr1-p6-x16.c | 60 float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() local 99 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 100 vs0123 = vsubq_f32(vs0123, vone); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 113 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16()
|
D | velu-wasmsimd-arm-rr2-p6-x16.c | 62 v128_t vs0123 = wasm_i32x4_shl(vn0123, 23); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() local 107 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 108 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16() 121 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16()
|
D | velu-sse41-rr2-p6-x16.c | 62 __m128 vs0123 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn0123), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() local 107 vt0123 = _mm_mul_ps(vt0123, vs0123); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 108 vs0123 = _mm_sub_ps(vs0123, vone); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 121 const __m128 ve0123 = _mm_mul_ps(_mm_add_ps(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16()
|
D | velu-neon-rr2-lut16-p3-x8.c | 76 float32x4_t vs0123 = vreinterpretq_f32_s32(vaddq_s32(vl0123, ven0123)); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() local 92 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() 93 vs0123 = vsubq_f32(vs0123, vone); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() 100 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8()
|
D | velu-neon-rr2-p6-x16.c | 61 float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() local 105 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 106 vs0123 = vsubq_f32(vs0123, vone); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 119 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x16()
|