/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx512f-rr1-lut16-p3-perm-x16.c | 50 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16() local 83 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16() local
|
D | velu-avx2-rr1-lut8-p4-perm-x8.c | 49 const __m256i ven = _mm256_slli_epi32(_mm256_castps_si256(vn), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8() local 81 const __m256i ven = _mm256_slli_epi32(_mm256_castps_si256(vn), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8() local
|
D | velu-avx2-rr1-lut4-p4-perm-x8.c | 50 const __m256i ven = _mm256_slli_epi32(_mm256_castps_si256(vn), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8() local 82 const __m256i ven = _mm256_slli_epi32(_mm256_castps_si256(vn), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8() local
|
D | velu-avx2-rr1-lut16-p3-gather-x8.c | 52 const __m256i ven = _mm256_slli_epi32(_mm256_castps_si256(vn), 19); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8() local 85 const __m256i ven = _mm256_slli_epi32(_mm256_castps_si256(vn), 19); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8() local
|
D | velu-avx-rr2-lut4-p4-perm-x8.c | 53 __m256 ven = _mm256_andnot_ps(vindex_mask, vn); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8() local 89 __m256 ven = _mm256_andnot_ps(vindex_mask, vn); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8() local
|
D | velu-neonfma-rr1-lut16-p3-x4.c | 51 const int32x4_t ven = vshlq_n_s32(vreinterpretq_s32_f32(vn), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4() local 87 const int32x4_t ven = vshlq_n_s32(vreinterpretq_s32_f32(vn), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4() local
|
D | velu-neon-rr2-lut16-p3-x4.c | 52 const int32x4_t ven = vshlq_n_s32(vreinterpretq_s32_f32(vn), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4() local 89 const int32x4_t ven = vshlq_n_s32(vreinterpretq_s32_f32(vn), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4() local
|
D | velu-wasmsimd-arm-rr2-lut16-p3-x4.c | 53 const v128_t ven = wasm_i32x4_shl(vn, 19); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4() local 91 const v128_t ven = wasm_i32x4_shl(vn, 19); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4() local
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x4.c | 53 const v128_t ven = wasm_i32x4_shl(vn, 19); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4() local 94 const v128_t ven = wasm_i32x4_shl(vn, 19); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4() local
|
D | velu-avx512f-rr1-lut16-p3-perm-x32.c | 101 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32() local 134 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32() local
|
D | velu-sse41-rr2-lut16-p3-x4.c | 53 const __m128i ven = _mm_slli_epi32(_mm_castps_si128(vn), 19); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local 96 const __m128i ven = _mm_slli_epi32(_mm_castps_si128(vn), 19); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4() local
|
D | velu-avx2-rr1-lut4-p4-perm-x16.c | 101 const __m256i ven = _mm256_slli_epi32(_mm256_castps_si256(vn), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16() local 133 const __m256i ven = _mm256_slli_epi32(_mm256_castps_si256(vn), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16() local
|
D | velu-avx2-rr1-lut8-p4-perm-x16.c | 100 const __m256i ven = _mm256_slli_epi32(_mm256_castps_si256(vn), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16() local 132 const __m256i ven = _mm256_slli_epi32(_mm256_castps_si256(vn), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16() local
|
D | velu-avx2-rr1-lut16-p3-gather-x16.c | 103 const __m256i ven = _mm256_slli_epi32(_mm256_castps_si256(vn), 19); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16() local 136 const __m256i ven = _mm256_slli_epi32(_mm256_castps_si256(vn), 19); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16() local
|
D | velu-avx512f-rr1-lut16-p3-perm-x48.c | 118 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48() local 151 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48() local
|
D | velu-sse2-rr2-lut16-p3-x4.c | 53 const __m128i ven = _mm_slli_epi32(_mm_castps_si128(vn), 19); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local 101 const __m128i ven = _mm_slli_epi32(_mm_castps_si128(vn), 19); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4() local
|
D | velu-avx2-rr1-lut4-p4-perm-x24.c | 119 const __m256i ven = _mm256_slli_epi32(_mm256_castps_si256(vn), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24() local 151 const __m256i ven = _mm256_slli_epi32(_mm256_castps_si256(vn), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24() local
|
D | velu-avx-rr2-lut4-p4-perm-x16.c | 115 __m256 ven = _mm256_andnot_ps(vindex_mask, vn); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16() local 151 __m256 ven = _mm256_andnot_ps(vindex_mask, vn); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16() local
|
D | velu-avx2-rr1-lut8-p4-perm-x24.c | 118 const __m256i ven = _mm256_slli_epi32(_mm256_castps_si256(vn), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24() local 150 const __m256i ven = _mm256_slli_epi32(_mm256_castps_si256(vn), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24() local
|
D | velu-avx2-rr1-lut16-p3-gather-x24.c | 121 const __m256i ven = _mm256_slli_epi32(_mm256_castps_si256(vn), 19); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24() local 154 const __m256i ven = _mm256_slli_epi32(_mm256_castps_si256(vn), 19); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24() local
|
/external/XNNPACK/src/f32-velu/ |
D | avx-rr2-lut4-p4-perm.c.in | 110 __m256 ven = _mm256_andnot_ps(vindex_mask, vn); variable 146 __m256 ven = _mm256_andnot_ps(vindex_mask, vn); variable
|
/external/XNNPACK/src/math/ |
D | expm1minus-avx512f-rr1-lut16-p3-perm.c | 67 const __m512i ven = _mm512_slli_epi32(_mm512_castps_si512(vn), 19); in xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm() local
|
D | expm1minus-avx2-rr1-lut4-p4-perm.c | 67 const __m256i ven = _mm256_slli_epi32(_mm256_castps_si256(vn), 21); in xnn_math_f32_expm1minus__avx2_rr1_lut4_p4_perm() local
|
D | expm1minus-avx2-rr1-lut16-p3-gather.c | 67 const __m256i ven = _mm256_slli_epi32(_mm256_castps_si256(vn), 19); in xnn_math_f32_expm1minus__avx2_rr1_lut16_p3_gather() local
|
D | expm1minus-scalar-rr2-lut16-p3.c | 62 const uint32_t ven = fp32_to_bits(vn) << 19; in xnn_math_f32_expm1minus__scalar_rr2_lut16_p3() local
|