/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx-rr2-p6-x8.c | 52 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_velu_ukernel__avx_rr2_p6_x8() local 56 __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_p6_x8() 87 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_velu_ukernel__avx_rr2_p6_x8() local 91 __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_p6_x8()
|
D | velu-avx-rr2-p6-x16.c | 113 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_velu_ukernel__avx_rr2_p6_x16() local 117 __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_p6_x16() 148 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_velu_ukernel__avx_rr2_p6_x16() local 152 __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_p6_x16()
|
D | velu-avx-rr2-lut16-p3-x8.c | 88 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local 92 __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() 154 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8() local 158 __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
|
D | velu-avx-rr2-p6-x24.c | 134 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_velu_ukernel__avx_rr2_p6_x24() local 138 __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_p6_x24() 169 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_velu_ukernel__avx_rr2_p6_x24() local 173 __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_p6_x24()
|
D | velu-avx-rr2-p6-x32.c | 155 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_velu_ukernel__avx_rr2_p6_x32() local 159 __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_p6_x32() 190 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_velu_ukernel__avx_rr2_p6_x32() local 194 __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_p6_x32()
|
D | velu-avx-rr2-p6-x40.c | 176 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_velu_ukernel__avx_rr2_p6_x40() local 180 __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_p6_x40() 211 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_velu_ukernel__avx_rr2_p6_x40() local 215 __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_p6_x40()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx-rr2-p5-div-x8.c | 50 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x8() local 51 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x8() 86 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x8() local 87 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x8()
|
D | avx-rr2-p5-nr2-x8.c | 51 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x8() local 52 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x8() 90 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x8() local 91 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x8()
|
D | avx-rr2-p5-div-x16.c | 111 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x16() local 112 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x16() 147 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x16() local 148 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x16()
|
D | avx-rr2-p5-nr2-x16.c | 120 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() local 121 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() 159 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16() local 160 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x16()
|
D | avx-rr2-p5-div-x24.c | 131 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x24() local 132 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x24() 167 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x24() local 168 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x24()
|
D | avx-rr2-p5-div-x32.c | 151 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x32() local 152 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x32() 187 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x32() local 188 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x32()
|
D | avx-rr2-p5-nr2-x24.c | 143 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() local 144 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() 182 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24() local 183 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x24()
|
D | avx-rr2-p5-nr2-x32.c | 166 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() local 167 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() 205 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32() local 206 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x32()
|
D | avx-rr2-p5-div-x40.c | 171 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x40() local 172 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x40() 207 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x40() local 208 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x40()
|
D | avx-rr2-p5-nr2-x40.c | 189 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() local 190 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() 228 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40() local 229 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x40()
|
D | avx-rr2-p5-div-x48.c | 191 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48() local 192 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48() 227 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48() local 228 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x48()
|
/external/XNNPACK/src/math/ |
D | expm1minus-avx-rr2-p6.c | 62 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_math_f32_expm1minus__avx_rr2_p6() local 63 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_math_f32_expm1minus__avx_rr2_p6()
|
D | sigmoid-avx-rr2-p5-div.c | 64 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_math_f32_sigmoid__avx_rr2_p5_div() local 65 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_math_f32_sigmoid__avx_rr2_p5_div()
|
D | sigmoid-avx-rr2-p5-nr1.c | 65 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_math_f32_sigmoid__avx_rr2_p5_nr1() local 66 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_math_f32_sigmoid__avx_rr2_p5_nr1()
|
D | sigmoid-avx-rr2-p5-nr2.c | 65 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… in xnn_math_f32_sigmoid__avx_rr2_p5_nr2() local 66 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_math_f32_sigmoid__avx_rr2_p5_nr2()
|
D | expm1minus-avx-rr2-lut16-p3.c | 102 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ven_hi)); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3() local 103 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_math_f32_expm1minus__avx_rr2_lut16_p3()
|
D | sigmoid-avx-rr2-lut64-p2-div.c | 106 const __m128 vs_hi = _mm_castsi128_ps(_mm_add_epi32(vl_hi, ve_hi)); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div() local 107 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); in xnn_math_f32_sigmoid__avx_rr2_lut64_p2_div()
|
/external/XNNPACK/src/f32-sigmoid/ |
D | avx-p5.c.in | 63 …const __m128 vs_hi${ABC[N]} = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_… 64 …m256 vs${ABC[N]} = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo${ABC[N]}), vs_hi${ABC[N]}, 1); 134 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… variable 135 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); 179 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… variable 180 const __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1);
|
/external/XNNPACK/src/f32-velu/ |
D | avx-rr2-p6.c.in | 110 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… variable 114 __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1); 145 …const __m128 vs_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(vn, 1)… variable 149 __m256 vs = _mm256_insertf128_ps(_mm256_castps128_ps256(vs_lo), vs_hi, 1);
|