/external/XNNPACK/src/f32-sigmoid/gen/ |
D | scalar-p5-div-x4.c | 132 const float ve3 = vt3 * vp3 + vs3; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() local 138 float vf3 = ve3 / (ve3 + vone); in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
|
D | scalar-lut2048-p1-div-x4.c | 88 const uint32_t ve3 = (fp32_to_bits(vn3) & ~vindex_mask) << 12; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() local 99 const float vs3 = fp32_from_bits(xnn_table_exp2_k_over_2048[vidx3] + ve3); in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
|
D | scalar-lut64-p2-div-x4.c | 88 const uint32_t ve3 = (fp32_to_bits(vn3) & ~vindex_mask) << 17; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() local 99 const float vs3 = fp32_from_bits(xnn_table_exp2_k_over_64[vidx3] + ve3); in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
|
D | avx2-rr1-p5-div-x32.c | 125 const __m256 ve3 = _mm256_fmadd_ps(vt3, vp3, vs3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x32() local 131 const __m256 vd3 = _mm256_add_ps(ve3, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x32() 137 __m256 vf3 = _mm256_div_ps(ve3, vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x32()
|
D | avx2-rr1-p5-div-x40.c | 136 const __m256 ve3 = _mm256_fmadd_ps(vt3, vp3, vs3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x40() local 143 const __m256 vd3 = _mm256_add_ps(ve3, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x40() 150 __m256 vf3 = _mm256_div_ps(ve3, vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x40()
|
D | avx2-rr1-p5-nr1fma-x32.c | 125 const __m256 ve3 = _mm256_fmadd_ps(vt3, vp3, vs3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32() local 131 const __m256 vd3 = _mm256_add_ps(ve3, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32() 151 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32()
|
D | avx2-rr1-p5-nr2fma-x32.c | 125 const __m256 ve3 = _mm256_fmadd_ps(vt3, vp3, vs3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() local 131 const __m256 vd3 = _mm256_add_ps(ve3, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() 155 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
|
D | avx2-rr1-p5-nr2fma-x40.c | 136 const __m256 ve3 = _mm256_fmadd_ps(vt3, vp3, vs3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() local 143 const __m256 vd3 = _mm256_add_ps(ve3, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() 171 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
|
D | avx2-rr1-p5-nr1fma-x40.c | 136 const __m256 ve3 = _mm256_fmadd_ps(vt3, vp3, vs3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40() local 143 const __m256 vd3 = _mm256_add_ps(ve3, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40() 166 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40()
|
D | avx2-rr1-p5-div-x48.c | 147 const __m256 ve3 = _mm256_fmadd_ps(vt3, vp3, vs3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x48() local 155 const __m256 vd3 = _mm256_add_ps(ve3, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x48() 163 __m256 vf3 = _mm256_div_ps(ve3, vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x48()
|
D | avx2-rr1-p5-nr1fma-x48.c | 147 const __m256 ve3 = _mm256_fmadd_ps(vt3, vp3, vs3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48() local 155 const __m256 vd3 = _mm256_add_ps(ve3, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48() 181 __m256 vf3 = _mm256_mul_ps(ve3, vr3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48()
|
D | avx2-rr1-p5-div-x56.c | 158 const __m256 ve3 = _mm256_fmadd_ps(vt3, vp3, vs3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56() local 167 const __m256 vd3 = _mm256_add_ps(ve3, vone); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56() 176 __m256 vf3 = _mm256_div_ps(ve3, vd3); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx2-p5-x32.c | 114 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() local 123 ve3 = _mm256_max_ps(ve3, vmin_exponent); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32() 131 …const __m256 vs3 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve3, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x32()
|
D | avx2-p5-x40.c | 124 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() local 134 ve3 = _mm256_max_ps(ve3, vmin_exponent); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 143 …const __m256 vs3 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve3, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
|
D | avx2-p5-x48.c | 134 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() local 145 ve3 = _mm256_max_ps(ve3, vmin_exponent); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 155 …const __m256 vs3 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve3, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48()
|
D | avx2-p5-x56.c | 144 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() local 156 ve3 = _mm256_max_ps(ve3, vmin_exponent); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 167 …const __m256 vs3 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve3, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56()
|
D | avx2-p5-x64.c | 154 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() local 167 ve3 = _mm256_max_ps(ve3, vmin_exponent); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64() 179 …const __m256 vs3 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve3, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x64()
|
D | avx2-p5-x72.c | 164 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() local 178 ve3 = _mm256_max_ps(ve3, vmin_exponent); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72() 191 …const __m256 vs3 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve3, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x72()
|
D | avx2-p5-x80.c | 174 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() local 189 ve3 = _mm256_max_ps(ve3, vmin_exponent); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 203 …const __m256 vs3 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve3, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80()
|
D | avx2-p5-x88.c | 184 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() local 200 ve3 = _mm256_max_ps(ve3, vmin_exponent); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 215 …const __m256 vs3 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve3, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88()
|
D | avx2-p5-x96.c | 194 __m256 ve3 = _mm256_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() local 211 ve3 = _mm256_max_ps(ve3, vmin_exponent); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 227 …const __m256 vs3 = _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_castps_si256(_mm256_add_ps(ve3, vm… in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
|
D | avx512f-p5-scalef-x64.c | 108 const __m512 ve3 = _mm512_add_ps(vn3, vscalee); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64() local 114 vf3 = _mm512_scalef_ps(vf3, ve3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x64()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | scalar-lut64-p2-x4-acc2.c | 83 const uint32_t ve3 = (fp32_to_bits(vn3) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() local 94 const float vs3 = fp32_from_bits(xnn_table_exp2_k_over_64[vidx3] + ve3); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
|
D | scalar-lut64-p2-x4.c | 82 const uint32_t ve3 = (fp32_to_bits(vn3) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() local 93 const float vs3 = fp32_from_bits(xnn_table_exp2_k_over_64[vidx3] + ve3); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
|
D | scalar-lut64-p2-x4-acc4.c | 85 const uint32_t ve3 = (fp32_to_bits(vn3) & UINT32_C(0xFFFFFFC0)) << 17; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() local 96 const float vs3 = fp32_from_bits(xnn_table_exp2_k_over_64[vidx3] + ve3); in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
|