Home
last modified time | relevance | path

Searched refs:ven3 (Results 1 – 25 of 35) sorted by relevance

12

/external/XNNPACK/src/f32-velu/gen/
Dvelu-avx-rr2-lut4-p4-perm-x32.c72 __m256 ven3 = _mm256_andnot_ps(vindex_mask, vn3); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32() local
74 …128 ven3_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven3)), 21)); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
83 …8 ven3_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven3, 1)), 21)); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
92 ven3 = _mm256_insertf128_ps(_mm256_castps128_ps256(ven3_lo), ven3_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
101 __m256 vs3 = _mm256_mul_ps(vl3, ven3); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
Dvelu-avx-rr2-lut4-p4-perm-x40.c75 __m256 ven3 = _mm256_andnot_ps(vindex_mask, vn3); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40() local
77 …128 ven3_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven3)), 21)); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
89 …8 ven3_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven3, 1)), 21)); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
100 ven3 = _mm256_insertf128_ps(_mm256_castps128_ps256(ven3_lo), ven3_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
111 __m256 vs3 = _mm256_mul_ps(vl3, ven3); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
Dvelu-avx-rr2-lut4-p4-perm-x48.c78 __m256 ven3 = _mm256_andnot_ps(vindex_mask, vn3); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48() local
80 …128 ven3_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven3)), 21)); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
95 …8 ven3_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven3, 1)), 21)); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
108 ven3 = _mm256_insertf128_ps(_mm256_castps128_ps256(ven3_lo), ven3_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
121 __m256 vs3 = _mm256_mul_ps(vl3, ven3); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
Dvelu-scalar-rr2-lut16-p3-x4.c69 const uint32_t ven3 = fp32_to_bits(vn3) << 19; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() local
80 float vs3 = fp32_from_bits(xnn_table_exp2minus_k_over_16[vidx3] + ven3); in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4()
Dvelu-wasm-rr2-lut16-p3-x4.c69 const uint32_t ven3 = fp32_to_bits(vn3) << 19; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() local
80 float vs3 = fp32_from_bits(xnn_table_exp2minus_k_over_16[vidx3] + ven3); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4()
Dvelu-scalar-rr2-lut16-p3-x5.c72 const uint32_t ven3 = fp32_to_bits(vn3) << 19; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() local
86 float vs3 = fp32_from_bits(xnn_table_exp2minus_k_over_16[vidx3] + ven3); in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
Dvelu-wasm-rr2-lut16-p3-x5.c72 const uint32_t ven3 = fp32_to_bits(vn3) << 19; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() local
86 float vs3 = fp32_from_bits(xnn_table_exp2minus_k_over_16[vidx3] + ven3); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
Dvelu-scalar-rr2-lut16-p3-x6.c75 const uint32_t ven3 = fp32_to_bits(vn3) << 19; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() local
92 float vs3 = fp32_from_bits(xnn_table_exp2minus_k_over_16[vidx3] + ven3); in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
Dvelu-wasm-rr2-lut16-p3-x6.c75 const uint32_t ven3 = fp32_to_bits(vn3) << 19; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() local
92 float vs3 = fp32_from_bits(xnn_table_exp2minus_k_over_16[vidx3] + ven3); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
Dvelu-avx2-rr1-lut4-p4-perm-x32.c69 const __m256i ven3 = _mm256_slli_epi32(_mm256_castps_si256(vn3), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32() local
79 __m256 vs3 = _mm256_castsi256_ps(_mm256_add_epi32(vl3, ven3)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32()
Dvelu-avx2-rr1-lut16-p3-gather-x32.c74 const __m256i ven3 = _mm256_slli_epi32(_mm256_castps_si256(vn3), 19); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32() local
83 __m256 vs3 = _mm256_castsi256_ps(_mm256_add_epi32(vl3, ven3)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32()
Dvelu-avx2-rr1-lut8-p4-perm-x32.c68 const __m256i ven3 = _mm256_slli_epi32(_mm256_castps_si256(vn3), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32() local
78 __m256 vs3 = _mm256_castsi256_ps(_mm256_add_epi32(vl3, ven3)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32()
Dvelu-avx512f-rr1-lut16-p3-perm-x64.c65 const __m512i ven3 = _mm512_slli_epi32(_mm512_castps_si512(vn3), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64() local
74 __m512 vs3 = _mm512_castsi512_ps(_mm512_add_epi32(vl3, ven3)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64()
Dvelu-avx512f-rr1-lut16-p3-perm-x80.c68 const __m512i ven3 = _mm512_slli_epi32(_mm512_castps_si512(vn3), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80() local
79 __m512 vs3 = _mm512_castsi512_ps(_mm512_add_epi32(vl3, ven3)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80()
Dvelu-avx2-rr1-lut16-p3-gather-x40.c79 const __m256i ven3 = _mm256_slli_epi32(_mm256_castps_si256(vn3), 19); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40() local
90 __m256 vs3 = _mm256_castsi256_ps(_mm256_add_epi32(vl3, ven3)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40()
Dvelu-avx2-rr1-lut8-p4-perm-x40.c71 const __m256i ven3 = _mm256_slli_epi32(_mm256_castps_si256(vn3), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40() local
84 __m256 vs3 = _mm256_castsi256_ps(_mm256_add_epi32(vl3, ven3)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40()
Dvelu-avx2-rr1-lut4-p4-perm-x40.c72 const __m256i ven3 = _mm256_slli_epi32(_mm256_castps_si256(vn3), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40() local
85 __m256 vs3 = _mm256_castsi256_ps(_mm256_add_epi32(vl3, ven3)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40()
Dvelu-avx2-rr1-lut16-p3-gather-x48.c84 const __m256i ven3 = _mm256_slli_epi32(_mm256_castps_si256(vn3), 19); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48() local
97 __m256 vs3 = _mm256_castsi256_ps(_mm256_add_epi32(vl3, ven3)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48()
Dvelu-avx512f-rr1-lut16-p3-perm-x96.c71 const __m512i ven3 = _mm512_slli_epi32(_mm512_castps_si512(vn3), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96() local
84 __m512 vs3 = _mm512_castsi512_ps(_mm512_add_epi32(vl3, ven3)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96()
Dvelu-avx2-rr1-lut8-p4-perm-x48.c74 const __m256i ven3 = _mm256_slli_epi32(_mm256_castps_si256(vn3), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48() local
90 __m256 vs3 = _mm256_castsi256_ps(_mm256_add_epi32(vl3, ven3)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48()
Dvelu-avx2-rr1-lut4-p4-perm-x48.c75 const __m256i ven3 = _mm256_slli_epi32(_mm256_castps_si256(vn3), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48() local
91 __m256 vs3 = _mm256_castsi256_ps(_mm256_add_epi32(vl3, ven3)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48()
Dvelu-avx512f-rr1-lut16-p3-perm-x112.c74 const __m512i ven3 = _mm512_slli_epi32(_mm512_castps_si512(vn3), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112() local
89 __m512 vs3 = _mm512_castsi512_ps(_mm512_add_epi32(vl3, ven3)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
Dvelu-avx512f-rr1-lut16-p3-perm-x128.c77 const __m512i ven3 = _mm512_slli_epi32(_mm512_castps_si512(vn3), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128() local
94 __m512 vs3 = _mm512_castsi512_ps(_mm512_add_epi32(vl3, ven3)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
Dvelu-avx2-rr1-lut4-p4-perm-x56.c78 const __m256i ven3 = _mm256_slli_epi32(_mm256_castps_si256(vn3), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56() local
97 __m256 vs3 = _mm256_castsi256_ps(_mm256_add_epi32(vl3, ven3)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56()
Dvelu-avx2-rr1-lut8-p4-perm-x56.c77 const __m256i ven3 = _mm256_slli_epi32(_mm256_castps_si256(vn3), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56() local
96 __m256 vs3 = _mm256_castsi256_ps(_mm256_add_epi32(vl3, ven3)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56()

12