Home
last modified time | relevance | path

Searched refs:_mm256_i32gather_epi32 (Results 1 – 25 of 27) sorted by relevance

12

/external/XNNPACK/src/f32-velu/gen/
Dvelu-avx2-rr1-lut16-p3-gather-x72.c72 const __m256i vl0 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx0, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
74 const __m256i vl1 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx1, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
76 const __m256i vl2 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx2, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
78 const __m256i vl3 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx3, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
80 const __m256i vl4 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx4, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
82 const __m256i vl5 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx5, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
84 const __m256i vl6 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx6, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
86 const __m256i vl7 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx7, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
88 const __m256i vl8 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx8, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
224 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72()
[all …]
Dvelu-avx2-rr1-lut16-p3-gather-x80.c75 const __m256i vl0 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx0, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
77 const __m256i vl1 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx1, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
79 const __m256i vl2 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx2, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
81 const __m256i vl3 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx3, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
83 const __m256i vl4 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx4, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
85 const __m256i vl5 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx5, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
87 const __m256i vl6 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx6, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
89 const __m256i vl7 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx7, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
91 const __m256i vl8 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx8, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
93 const __m256i vl9 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx9, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80()
[all …]
Dvelu-avx2-rr1-lut16-p3-gather-x64.c69 const __m256i vl0 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx0, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64()
71 const __m256i vl1 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx1, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64()
73 const __m256i vl2 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx2, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64()
75 const __m256i vl3 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx3, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64()
77 const __m256i vl4 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx4, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64()
79 const __m256i vl5 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx5, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64()
81 const __m256i vl6 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx6, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64()
83 const __m256i vl7 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx7, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64()
206 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64()
239 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64()
Dvelu-avx2-rr1-lut16-p3-gather-x56.c66 const __m256i vl0 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx0, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56()
68 const __m256i vl1 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx1, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56()
70 const __m256i vl2 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx2, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56()
72 const __m256i vl3 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx3, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56()
74 const __m256i vl4 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx4, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56()
76 const __m256i vl5 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx5, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56()
78 const __m256i vl6 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx6, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56()
188 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56()
221 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56()
Dvelu-avx2-rr1-lut16-p3-gather-x48.c63 const __m256i vl0 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx0, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48()
65 const __m256i vl1 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx1, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48()
67 const __m256i vl2 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx2, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48()
69 const __m256i vl3 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx3, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48()
71 const __m256i vl4 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx4, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48()
73 const __m256i vl5 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx5, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48()
170 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48()
203 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48()
Dvelu-avx2-rr1-lut16-p3-gather-x40.c60 const __m256i vl0 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx0, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40()
62 const __m256i vl1 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx1, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40()
64 const __m256i vl2 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx2, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40()
66 const __m256i vl3 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx3, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40()
68 const __m256i vl4 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx4, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40()
152 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40()
185 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40()
Dvelu-avx2-rr1-lut16-p3-gather-x32.c57 const __m256i vl0 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx0, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32()
59 const __m256i vl1 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx1, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32()
61 const __m256i vl2 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx2, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32()
63 const __m256i vl3 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx3, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32()
134 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32()
167 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32()
Dvelu-avx2-rr1-lut16-p3-gather-x24.c54 const __m256i vl0 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx0, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24()
56 const __m256i vl1 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx1, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24()
58 const __m256i vl2 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx2, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24()
116 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24()
149 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24()
Dvelu-avx2-rr1-lut16-p3-gather-x16.c51 const __m256i vl0 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx0, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16()
53 const __m256i vl1 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx1, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16()
98 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16()
131 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16()
Dvelu-avx2-rr1-lut16-p3-gather-x8.c47 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8()
80 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8()
/external/XNNPACK/src/f32-velu/
Davx2-rr1-lut16-p3-gather.c.in53 …const __m256i vl${N} = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx${N}, sizeof(floa…
94 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float));
127 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float));
/external/XNNPACK/src/math/
Dexpm1minus-f32-avx2-rr1-lut16-p3-gather.c71 …const __m256i vl = _mm256_i32gather_epi32((const int*) xnn_table_exp2minus_k_over_16, vidx, sizeof… in xnn_math_f32_expm1minus__avx2_rr1_lut16_p3_gather()
Dsigmoid-f32-avx2-rr1-lut64-p2-gather-div.c75 …const __m256i vl = _mm256_i32gather_epi32((const int*) xnn_table_exp2minus_k_over_64, vidx, sizeof… in xnn_math_f32_sigmoid__avx2_rr1_lut64_p2_gather_div()
Dsigmoid-f32-avx2-rr2-lut64-p2-gather-div.c76 …const __m256i vl = _mm256_i32gather_epi32((const int*) xnn_table_exp2minus_k_over_64, vidx, sizeof… in xnn_math_f32_sigmoid__avx2_rr2_lut64_p2_gather_div()
Dsigmoid-f32-avx2-rr1-lut64-p2-gather-nr1fma.c75 …const __m256i vl = _mm256_i32gather_epi32((const int*) xnn_table_exp2minus_k_over_64, vidx, sizeof… in xnn_math_f32_sigmoid__avx2_rr1_lut64_p2_gather_nr1fma()
Dsigmoid-f32-avx2-rr1-lut64-p2-gather-nr2fma.c75 …const __m256i vl = _mm256_i32gather_epi32((const int*) xnn_table_exp2minus_k_over_64, vidx, sizeof… in xnn_math_f32_sigmoid__avx2_rr1_lut64_p2_gather_nr2fma()
Dsigmoid-f32-avx2-rr2-lut64-p2-gather-nr1fma.c76 …const __m256i vl = _mm256_i32gather_epi32((const int*) xnn_table_exp2minus_k_over_64, vidx, sizeof… in xnn_math_f32_sigmoid__avx2_rr2_lut64_p2_gather_nr1fma()
Dsigmoid-f32-avx2-rr2-lut64-p2-gather-nr2fma.c76 …const __m256i vl = _mm256_i32gather_epi32((const int*) xnn_table_exp2minus_k_over_64, vidx, sizeof… in xnn_math_f32_sigmoid__avx2_rr2_lut64_p2_gather_nr2fma()
Dsigmoid-f32-avx2-rr1-lut64-p2-gather-nr2fma1adj.c75 …const __m256i vl = _mm256_i32gather_epi32((const int*) xnn_table_exp2minus_k_over_64, vidx, sizeof… in xnn_math_f32_sigmoid__avx2_rr1_lut64_p2_gather_nr2fma1adj()
Dsigmoid-f32-avx2-rr2-lut64-p2-gather-nr2fma1adj.c76 …const __m256i vl = _mm256_i32gather_epi32((const int*) xnn_table_exp2minus_k_over_64, vidx, sizeof… in xnn_math_f32_sigmoid__avx2_rr2_lut64_p2_gather_nr2fma1adj()
/external/libaom/av1/common/x86/
Dwarp_plane_avx2.c1074 _mm256_i32gather_epi32(error_measure_lut, diff_1_lo, 4); in av1_calc_frame_error_avx2()
1076 _mm256_i32gather_epi32(error_measure_lut, diff_1_hi, 4); in av1_calc_frame_error_avx2()
1078 _mm256_i32gather_epi32(error_measure_lut, diff_2_lo, 4); in av1_calc_frame_error_avx2()
1080 _mm256_i32gather_epi32(error_measure_lut, diff_2_hi, 4); in av1_calc_frame_error_avx2()
1082 _mm256_i32gather_epi32(error_measure_lut, diff_3_lo, 4); in av1_calc_frame_error_avx2()
1084 _mm256_i32gather_epi32(error_measure_lut, diff_3_hi, 4); in av1_calc_frame_error_avx2()
1086 _mm256_i32gather_epi32(error_measure_lut, diff_4_lo, 4); in av1_calc_frame_error_avx2()
1088 _mm256_i32gather_epi32(error_measure_lut, diff_4_hi, 4); in av1_calc_frame_error_avx2()
Dselfguided_avx2.c266 const __m256i a_res = _mm256_i32gather_epi32(av1_x_by_xplus1, z, 4); in calc_ab()
403 const __m256i a_res = _mm256_i32gather_epi32(av1_x_by_xplus1, z, 4); in calc_ab_fast()
/external/skia/src/opts/
DSkBitmapProcState_opts.h71 _mm256_i32gather_epi32((const int*)ptr, skvx::bit_pun<__m256i>(ix), 4)); in S32_alpha_D32_filter_DX()
DSkVM_opts.h18 _mm256_i32gather_epi32(ptr, skvx::bit_pun<__m256i>(ix), 4)); in gather32()
/external/clang/test/CodeGen/
Davx2-builtins.c441 return _mm256_i32gather_epi32(b, c, 2); in test_mm256_i32gather_epi32()

12