/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx2-rr1-lut16-p3-gather-x72.c | 72 const __m256i vl0 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx0, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 74 const __m256i vl1 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx1, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 76 const __m256i vl2 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx2, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 78 const __m256i vl3 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx3, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 80 const __m256i vl4 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx4, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 82 const __m256i vl5 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx5, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 84 const __m256i vl6 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx6, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 86 const __m256i vl7 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx7, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 88 const __m256i vl8 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx8, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() 224 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72() [all …]
|
D | velu-avx2-rr1-lut16-p3-gather-x80.c | 75 const __m256i vl0 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx0, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 77 const __m256i vl1 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx1, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 79 const __m256i vl2 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx2, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 81 const __m256i vl3 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx3, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 83 const __m256i vl4 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx4, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 85 const __m256i vl5 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx5, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 87 const __m256i vl6 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx6, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 89 const __m256i vl7 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx7, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 91 const __m256i vl8 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx8, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() 93 const __m256i vl9 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx9, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80() [all …]
|
D | velu-avx2-rr1-lut16-p3-gather-x64.c | 69 const __m256i vl0 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx0, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64() 71 const __m256i vl1 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx1, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64() 73 const __m256i vl2 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx2, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64() 75 const __m256i vl3 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx3, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64() 77 const __m256i vl4 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx4, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64() 79 const __m256i vl5 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx5, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64() 81 const __m256i vl6 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx6, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64() 83 const __m256i vl7 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx7, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64() 206 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64() 239 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64()
|
D | velu-avx2-rr1-lut16-p3-gather-x56.c | 66 const __m256i vl0 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx0, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56() 68 const __m256i vl1 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx1, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56() 70 const __m256i vl2 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx2, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56() 72 const __m256i vl3 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx3, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56() 74 const __m256i vl4 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx4, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56() 76 const __m256i vl5 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx5, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56() 78 const __m256i vl6 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx6, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56() 188 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56() 221 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56()
|
D | velu-avx2-rr1-lut16-p3-gather-x48.c | 63 const __m256i vl0 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx0, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48() 65 const __m256i vl1 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx1, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48() 67 const __m256i vl2 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx2, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48() 69 const __m256i vl3 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx3, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48() 71 const __m256i vl4 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx4, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48() 73 const __m256i vl5 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx5, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48() 170 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48() 203 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48()
|
D | velu-avx2-rr1-lut16-p3-gather-x40.c | 60 const __m256i vl0 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx0, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40() 62 const __m256i vl1 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx1, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40() 64 const __m256i vl2 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx2, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40() 66 const __m256i vl3 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx3, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40() 68 const __m256i vl4 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx4, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40() 152 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40() 185 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40()
|
D | velu-avx2-rr1-lut16-p3-gather-x32.c | 57 const __m256i vl0 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx0, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32() 59 const __m256i vl1 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx1, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32() 61 const __m256i vl2 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx2, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32() 63 const __m256i vl3 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx3, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32() 134 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32() 167 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32()
|
D | velu-avx2-rr1-lut16-p3-gather-x24.c | 54 const __m256i vl0 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx0, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24() 56 const __m256i vl1 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx1, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24() 58 const __m256i vl2 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx2, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24() 116 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24() 149 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24()
|
D | velu-avx2-rr1-lut16-p3-gather-x16.c | 51 const __m256i vl0 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx0, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16() 53 const __m256i vl1 = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx1, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16() 98 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16() 131 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16()
|
D | velu-avx2-rr1-lut16-p3-gather-x8.c | 47 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8() 80 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8()
|
/external/XNNPACK/src/f32-velu/ |
D | avx2-rr1-lut16-p3-gather.c.in | 53 …const __m256i vl${N} = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx${N}, sizeof(floa… 94 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float)); 127 const __m256i vl = _mm256_i32gather_epi32(xnn_table_exp2minus_k_over_16, vidx, sizeof(float));
|
/external/XNNPACK/src/math/ |
D | expm1minus-f32-avx2-rr1-lut16-p3-gather.c | 71 …const __m256i vl = _mm256_i32gather_epi32((const int*) xnn_table_exp2minus_k_over_16, vidx, sizeof… in xnn_math_f32_expm1minus__avx2_rr1_lut16_p3_gather()
|
D | sigmoid-f32-avx2-rr1-lut64-p2-gather-div.c | 75 …const __m256i vl = _mm256_i32gather_epi32((const int*) xnn_table_exp2minus_k_over_64, vidx, sizeof… in xnn_math_f32_sigmoid__avx2_rr1_lut64_p2_gather_div()
|
D | sigmoid-f32-avx2-rr2-lut64-p2-gather-div.c | 76 …const __m256i vl = _mm256_i32gather_epi32((const int*) xnn_table_exp2minus_k_over_64, vidx, sizeof… in xnn_math_f32_sigmoid__avx2_rr2_lut64_p2_gather_div()
|
D | sigmoid-f32-avx2-rr1-lut64-p2-gather-nr1fma.c | 75 …const __m256i vl = _mm256_i32gather_epi32((const int*) xnn_table_exp2minus_k_over_64, vidx, sizeof… in xnn_math_f32_sigmoid__avx2_rr1_lut64_p2_gather_nr1fma()
|
D | sigmoid-f32-avx2-rr1-lut64-p2-gather-nr2fma.c | 75 …const __m256i vl = _mm256_i32gather_epi32((const int*) xnn_table_exp2minus_k_over_64, vidx, sizeof… in xnn_math_f32_sigmoid__avx2_rr1_lut64_p2_gather_nr2fma()
|
D | sigmoid-f32-avx2-rr2-lut64-p2-gather-nr1fma.c | 76 …const __m256i vl = _mm256_i32gather_epi32((const int*) xnn_table_exp2minus_k_over_64, vidx, sizeof… in xnn_math_f32_sigmoid__avx2_rr2_lut64_p2_gather_nr1fma()
|
D | sigmoid-f32-avx2-rr2-lut64-p2-gather-nr2fma.c | 76 …const __m256i vl = _mm256_i32gather_epi32((const int*) xnn_table_exp2minus_k_over_64, vidx, sizeof… in xnn_math_f32_sigmoid__avx2_rr2_lut64_p2_gather_nr2fma()
|
D | sigmoid-f32-avx2-rr1-lut64-p2-gather-nr2fma1adj.c | 75 …const __m256i vl = _mm256_i32gather_epi32((const int*) xnn_table_exp2minus_k_over_64, vidx, sizeof… in xnn_math_f32_sigmoid__avx2_rr1_lut64_p2_gather_nr2fma1adj()
|
D | sigmoid-f32-avx2-rr2-lut64-p2-gather-nr2fma1adj.c | 76 …const __m256i vl = _mm256_i32gather_epi32((const int*) xnn_table_exp2minus_k_over_64, vidx, sizeof… in xnn_math_f32_sigmoid__avx2_rr2_lut64_p2_gather_nr2fma1adj()
|
/external/libaom/av1/common/x86/ |
D | warp_plane_avx2.c | 1074 _mm256_i32gather_epi32(error_measure_lut, diff_1_lo, 4); in av1_calc_frame_error_avx2() 1076 _mm256_i32gather_epi32(error_measure_lut, diff_1_hi, 4); in av1_calc_frame_error_avx2() 1078 _mm256_i32gather_epi32(error_measure_lut, diff_2_lo, 4); in av1_calc_frame_error_avx2() 1080 _mm256_i32gather_epi32(error_measure_lut, diff_2_hi, 4); in av1_calc_frame_error_avx2() 1082 _mm256_i32gather_epi32(error_measure_lut, diff_3_lo, 4); in av1_calc_frame_error_avx2() 1084 _mm256_i32gather_epi32(error_measure_lut, diff_3_hi, 4); in av1_calc_frame_error_avx2() 1086 _mm256_i32gather_epi32(error_measure_lut, diff_4_lo, 4); in av1_calc_frame_error_avx2() 1088 _mm256_i32gather_epi32(error_measure_lut, diff_4_hi, 4); in av1_calc_frame_error_avx2()
|
D | selfguided_avx2.c | 266 const __m256i a_res = _mm256_i32gather_epi32(av1_x_by_xplus1, z, 4); in calc_ab() 403 const __m256i a_res = _mm256_i32gather_epi32(av1_x_by_xplus1, z, 4); in calc_ab_fast()
|
/external/skia/src/opts/ |
D | SkBitmapProcState_opts.h | 71 _mm256_i32gather_epi32((const int*)ptr, skvx::bit_pun<__m256i>(ix), 4)); in S32_alpha_D32_filter_DX()
|
D | SkVM_opts.h | 18 _mm256_i32gather_epi32(ptr, skvx::bit_pun<__m256i>(ix), 4)); in gather32()
|
/external/clang/test/CodeGen/ |
D | avx2-builtins.c | 441 return _mm256_i32gather_epi32(b, c, 2); in test_mm256_i32gather_epi32()
|