| /external/XNNPACK/src/f32-velu/gen/ |
| D | velu-wasmsimd-x86-rr2-p6-x8.c | 56 v128_t vs4567 = wasm_i32x4_shl(vn4567, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() local 69 vs4567 = wasm_v128_andnot(vs4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 90 vt4567 = wasm_f32x4_mul(vt4567, vs4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 91 vs4567 = wasm_f32x4_sub(vs4567, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 97 const v128_t ve4567 = wasm_f32x4_mul(wasm_f32x4_add(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8()
|
| D | velu-wasmsimd-x86-rr2-p6-x12.c | 59 v128_t vs4567 = wasm_i32x4_shl(vn4567, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() local 76 vs4567 = wasm_v128_andnot(vs4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 105 vt4567 = wasm_f32x4_mul(vt4567, vs4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 106 vs4567 = wasm_f32x4_sub(vs4567, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 115 const v128_t ve4567 = wasm_f32x4_mul(wasm_f32x4_add(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12()
|
| D | velu-wasmsimd-x86-rr2-lut16-p3-x8.c | 78 v128_t vs4567 = wasm_i32x4_add(vl4567, ven4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() local 88 vs4567 = wasm_v128_andnot(vs4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 100 vt4567 = wasm_f32x4_mul(vt4567, vs4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 101 vs4567 = wasm_f32x4_sub(vs4567, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 107 const v128_t ve4567 = wasm_f32x4_mul(wasm_f32x4_add(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8()
|
| D | velu-neonfma-rr1-p6-x8.c | 55 float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() local 78 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() 79 vs4567 = vsubq_f32(vs4567, vone); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8() 85 const float32x4_t ve4567 = vmulq_f32(vaddq_f32(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x8()
|
| D | velu-wasmsimd-x86-rr2-lut16-p3-x12.c | 90 v128_t vs4567 = wasm_i32x4_add(vl4567, ven4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() local 104 vs4567 = wasm_v128_andnot(vs4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 121 vt4567 = wasm_f32x4_mul(vt4567, vs4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 122 vs4567 = wasm_f32x4_sub(vs4567, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 131 const v128_t ve4567 = wasm_f32x4_mul(wasm_f32x4_add(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12()
|
| D | velu-wasmsimd-x86-rr2-p6-x16.c | 62 v128_t vs4567 = wasm_i32x4_shl(vn4567, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() local 83 vs4567 = wasm_v128_andnot(vs4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 120 vt4567 = wasm_f32x4_mul(vt4567, vs4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 121 vs4567 = wasm_f32x4_sub(vs4567, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 133 const v128_t ve4567 = wasm_f32x4_mul(wasm_f32x4_add(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16()
|
| D | velu-neon-rr2-p6-x8.c | 56 float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() local 82 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() 83 vs4567 = vsubq_f32(vs4567, vone); in xnn_f32_velu_ukernel__neon_rr2_p6_x8() 89 const float32x4_t ve4567 = vmulq_f32(vaddq_f32(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x8()
|
| D | velu-sse2-rr2-p6-x8.c | 56 __m128 vs4567 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn4567), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() local 84 vt4567 = _mm_mul_ps(vt4567, vs4567); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() 85 vs4567 = _mm_sub_ps(vs4567, vone); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8() 91 const __m128 ve4567 = _mm_mul_ps(_mm_add_ps(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x8()
|
| D | velu-sse41-rr2-p6-x8.c | 56 __m128 vs4567 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn4567), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() local 84 vt4567 = _mm_mul_ps(vt4567, vs4567); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 85 vs4567 = _mm_sub_ps(vs4567, vone); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 91 const __m128 ve4567 = _mm_mul_ps(_mm_add_ps(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8()
|
| D | velu-wasmsimd-arm-rr2-p6-x8.c | 56 v128_t vs4567 = wasm_i32x4_shl(vn4567, 23); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() local 84 vt4567 = wasm_f32x4_mul(vt4567, vs4567); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() 85 vs4567 = wasm_f32x4_sub(vs4567, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8() 91 const v128_t ve4567 = wasm_f32x4_mul(wasm_f32x4_add(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8()
|
| D | velu-wasmsimd-x86-rr2-p6-x20.c | 65 v128_t vs4567 = wasm_i32x4_shl(vn4567, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() local 90 vs4567 = wasm_v128_andnot(vs4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 135 vt4567 = wasm_f32x4_mul(vt4567, vs4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 136 vs4567 = wasm_f32x4_sub(vs4567, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 151 const v128_t ve4567 = wasm_f32x4_mul(wasm_f32x4_add(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
|
| D | velu-wasmsimd-x86-rr2-lut16-p3-x16.c | 102 v128_t vs4567 = wasm_i32x4_add(vl4567, ven4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() local 120 vs4567 = wasm_v128_andnot(vs4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 142 vt4567 = wasm_f32x4_mul(vt4567, vs4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 143 vs4567 = wasm_f32x4_sub(vs4567, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 155 const v128_t ve4567 = wasm_f32x4_mul(wasm_f32x4_add(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16()
|
| D | velu-neon-rr2-p6-x12.c | 59 float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() local 94 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 95 vs4567 = vsubq_f32(vs4567, vone); in xnn_f32_velu_ukernel__neon_rr2_p6_x12() 104 const float32x4_t ve4567 = vmulq_f32(vaddq_f32(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x12()
|
| D | velu-wasmsimd-arm-rr2-p6-x12.c | 59 v128_t vs4567 = wasm_i32x4_shl(vn4567, 23); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() local 96 vt4567 = wasm_f32x4_mul(vt4567, vs4567); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 97 vs4567 = wasm_f32x4_sub(vs4567, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12() 106 const v128_t ve4567 = wasm_f32x4_mul(wasm_f32x4_add(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12()
|
| D | velu-sse41-rr2-p6-x12.c | 59 __m128 vs4567 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn4567), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() local 96 vt4567 = _mm_mul_ps(vt4567, vs4567); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 97 vs4567 = _mm_sub_ps(vs4567, vone); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 106 const __m128 ve4567 = _mm_mul_ps(_mm_add_ps(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12()
|
| D | velu-neonfma-rr1-p6-x12.c | 58 float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() local 89 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 90 vs4567 = vsubq_f32(vs4567, vone); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12() 99 const float32x4_t ve4567 = vmulq_f32(vaddq_f32(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x12()
|
| D | velu-wasmsimd-arm-rr2-lut16-p3-x8.c | 78 v128_t vs4567 = wasm_i32x4_add(vl4567, ven4567); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8() local 94 vt4567 = wasm_f32x4_mul(vt4567, vs4567); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8() 95 vs4567 = wasm_f32x4_sub(vs4567, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8() 101 const v128_t ve4567 = wasm_f32x4_mul(wasm_f32x4_add(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8()
|
| D | velu-neonfma-rr1-lut16-p3-x8.c | 76 float32x4_t vs4567 = vreinterpretq_f32_s32(vaddq_s32(vl4567, ven4567)); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8() local 89 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8() 90 vs4567 = vsubq_f32(vs4567, vone); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8() 96 const float32x4_t ve4567 = vmulq_f32(vaddq_f32(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8()
|
| D | velu-neonfma-rr1-p6-x16.c | 61 float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() local 100 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 101 vs4567 = vsubq_f32(vs4567, vone); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16() 113 const float32x4_t ve4567 = vmulq_f32(vaddq_f32(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x16()
|
| D | velu-neon-rr2-lut16-p3-x8.c | 77 float32x4_t vs4567 = vreinterpretq_f32_s32(vaddq_s32(vl4567, ven4567)); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() local 93 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() 94 vs4567 = vsubq_f32(vs4567, vone); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() 100 const float32x4_t ve4567 = vmulq_f32(vaddq_f32(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8()
|
| D | velu-sse2-rr2-p6-x12.c | 59 __m128 vs4567 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn4567), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() local 96 vt4567 = _mm_mul_ps(vt4567, vs4567); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 97 vs4567 = _mm_sub_ps(vs4567, vone); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12() 106 const __m128 ve4567 = _mm_mul_ps(_mm_add_ps(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x12()
|
| D | velu-wasmsimd-x86-rr2-p6-x24.c | 68 v128_t vs4567 = wasm_i32x4_shl(vn4567, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() local 97 vs4567 = wasm_v128_andnot(vs4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 150 vt4567 = wasm_f32x4_mul(vt4567, vs4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 151 vs4567 = wasm_f32x4_sub(vs4567, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 169 const v128_t ve4567 = wasm_f32x4_mul(wasm_f32x4_add(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
|
| D | velu-wasmsimd-x86-rr2-lut16-p3-x20.c | 114 v128_t vs4567 = wasm_i32x4_add(vl4567, ven4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() local 136 vs4567 = wasm_v128_andnot(vs4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 163 vt4567 = wasm_f32x4_mul(vt4567, vs4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 164 vs4567 = wasm_f32x4_sub(vs4567, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 179 const v128_t ve4567 = wasm_f32x4_mul(wasm_f32x4_add(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20()
|
| D | velu-neon-rr2-p6-x16.c | 62 float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() local 106 vt4567 = vmulq_f32(vt4567, vs4567); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 107 vs4567 = vsubq_f32(vs4567, vone); in xnn_f32_velu_ukernel__neon_rr2_p6_x16() 119 const float32x4_t ve4567 = vmulq_f32(vaddq_f32(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x16()
|
| D | velu-sse41-rr2-p6-x16.c | 62 __m128 vs4567 = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vn4567), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() local 108 vt4567 = _mm_mul_ps(vt4567, vs4567); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 109 vs4567 = _mm_sub_ps(vs4567, vone); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 121 const __m128 ve4567 = _mm_mul_ps(_mm_add_ps(vp4567, vs4567), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16()
|