/external/XNNPACK/src/f32-velu/gen/ |
D | velu-wasmsimd-x86-rr2-p6-x8.c | 55 v128_t vs0123 = wasm_i32x4_shl(vn0123, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() local 67 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 88 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 89 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 96 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8()
|
D | velu-wasmsimd-x86-rr2-p6-x12.c | 58 v128_t vs0123 = wasm_i32x4_shl(vn0123, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() local 74 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 103 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 104 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 114 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x8.c | 76 v128_t vs0123 = wasm_i32x4_add(vl0123, ven0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() local 86 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 98 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 99 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 106 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8()
|
D | velu-neonfma-rr1-p6-x8.c | 53 float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() local 76 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() 77 vs0123 = vsubq_f32(vs0123, vone); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() 84 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x12.c | 88 v128_t vs0123 = wasm_i32x4_add(vl0123, ven0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() local 102 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 119 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 120 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 130 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12()
|
D | velu-wasmsimd-x86-rr2-p6-x16.c | 61 v128_t vs0123 = wasm_i32x4_shl(vn0123, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() local 81 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 118 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 119 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 132 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16()
|
D | velu-neon-rr2-p6-x8.c | 54 float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() local 80 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() 81 vs0123 = vsubq_f32(vs0123, vone); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() 88 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x8()
|
D | velu-sse2-rr2-p6-x8.c | 55 __m128 vs0123 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn0123), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() local 82 vt0123 = _mm_mul_ps(vt0123, vs0123); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() 83 vs0123 = _mm_sub_ps(vs0123, vone); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() 90 const __m128 ve0123 = _mm_mul_ps(_mm_add_ps(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8()
|
D | velu-sse41-rr2-p6-x8.c | 55 __m128 vs0123 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn0123), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() local 82 vt0123 = _mm_mul_ps(vt0123, vs0123); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 83 vs0123 = _mm_sub_ps(vs0123, vone); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 90 const __m128 ve0123 = _mm_mul_ps(_mm_add_ps(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8()
|
D | velu-wasmsimd-arm-rr2-p6-x8.c | 55 v128_t vs0123 = wasm_i32x4_shl(vn0123, 23); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() local 82 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() 83 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() 90 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8()
|
D | velu-wasmsimd-x86-rr2-p6-x20.c | 64 v128_t vs0123 = wasm_i32x4_shl(vn0123, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() local 88 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 133 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 134 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 150 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x16.c | 100 v128_t vs0123 = wasm_i32x4_add(vl0123, ven0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() local 118 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 140 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 141 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 154 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16()
|
D | velu-neon-rr2-p6-x12.c | 57 float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() local 92 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 93 vs0123 = vsubq_f32(vs0123, vone); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 103 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x12()
|
D | velu-wasmsimd-arm-rr2-p6-x12.c | 58 v128_t vs0123 = wasm_i32x4_shl(vn0123, 23); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() local 94 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 95 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 105 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12()
|
D | velu-sse41-rr2-p6-x12.c | 58 __m128 vs0123 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn0123), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() local 94 vt0123 = _mm_mul_ps(vt0123, vs0123); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 95 vs0123 = _mm_sub_ps(vs0123, vone); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 105 const __m128 ve0123 = _mm_mul_ps(_mm_add_ps(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12()
|
D | velu-neonfma-rr1-p6-x12.c | 56 float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() local 87 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 88 vs0123 = vsubq_f32(vs0123, vone); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 98 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12()
|
D | velu-wasmsimd-arm-rr2-lut16-p3-x8.c | 76 v128_t vs0123 = wasm_i32x4_add(vl0123, ven0123); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8() local 92 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8() 93 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8() 100 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8()
|
D | velu-neonfma-rr1-lut16-p3-x8.c | 74 float32x4_t vs0123 = vreinterpretq_f32_s32(vaddq_s32(vl0123, ven0123)); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8() local 87 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8() 88 vs0123 = vsubq_f32(vs0123, vone); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8() 95 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8()
|
D | velu-neonfma-rr1-p6-x16.c | 59 float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() local 98 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 99 vs0123 = vsubq_f32(vs0123, vone); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 112 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16()
|
D | velu-neon-rr2-lut16-p3-x8.c | 75 float32x4_t vs0123 = vreinterpretq_f32_s32(vaddq_s32(vl0123, ven0123)); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() local 91 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() 92 vs0123 = vsubq_f32(vs0123, vone); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() 99 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8()
|
D | velu-sse2-rr2-p6-x12.c | 58 __m128 vs0123 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn0123), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() local 94 vt0123 = _mm_mul_ps(vt0123, vs0123); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 95 vs0123 = _mm_sub_ps(vs0123, vone); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 105 const __m128 ve0123 = _mm_mul_ps(_mm_add_ps(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12()
|
D | velu-wasmsimd-x86-rr2-p6-x24.c | 67 v128_t vs0123 = wasm_i32x4_shl(vn0123, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() local 95 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 148 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 149 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 168 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x20.c | 112 v128_t vs0123 = wasm_i32x4_add(vl0123, ven0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() local 134 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 161 vt0123 = wasm_f32x4_mul(vt0123, vs0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 162 vs0123 = wasm_f32x4_sub(vs0123, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 178 const v128_t ve0123 = wasm_f32x4_mul(wasm_f32x4_add(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20()
|
D | velu-neon-rr2-p6-x16.c | 60 float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() local 104 vt0123 = vmulq_f32(vt0123, vs0123); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 105 vs0123 = vsubq_f32(vs0123, vone); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 118 const float32x4_t ve0123 = vmulq_f32(vaddq_f32(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x16()
|
D | velu-sse41-rr2-p6-x16.c | 61 __m128 vs0123 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn0123), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() local 106 vt0123 = _mm_mul_ps(vt0123, vs0123); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 107 vs0123 = _mm_sub_ps(vs0123, vone); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 120 const __m128 ve0123 = _mm_mul_ps(_mm_add_ps(vp0123, vs0123), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16()
|