/external/XNNPACK/src/f32-sigmoid/gen/ |
D | scalar-lut2048-p1-div-x2.c | 56 const float vz0 = fabsf(vx0); in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2() local 66 float vn0 = vz0 * vminus_log2e_x2048 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2() 95 float vt0 = vn0 * vln2_o2048_hi + vz0; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2() 119 if XNN_UNPREDICTABLE(vz0 > vdenorm_cutoff) { in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2()
|
D | scalar-p5-div-x2.c | 55 const float vz0 = fabsf(vx0); in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() local 64 float vn0 = vz0 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() 78 float vt0 = vn0 * vln2_hi + vz0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2() 114 if XNN_UNPREDICTABLE(vz0 > vdenorm_cutoff) { in xnn_f32_sigmoid_ukernel__scalar_p5_div_x2()
|
D | scalar-lut64-p2-div-x2.c | 56 const float vz0 = fabsf(vx0); in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2() local 66 float vn0 = vz0 * vminus_log2e_x64 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2() 95 float vt0 = vn0 * vln2_o64_hi + vz0; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2() 123 if XNN_UNPREDICTABLE(vz0 > vdenorm_cutoff) { in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2()
|
D | scalar-lut2048-p1-div-x4.c | 58 const float vz0 = fabsf(vx0); in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() local 70 float vn0 = vz0 * vminus_log2e_x2048 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() 109 float vt0 = vn0 * vln2_o2048_hi + vz0; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() 143 if XNN_UNPREDICTABLE(vz0 > vdenorm_cutoff) { in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4()
|
D | scalar-p5-div-x4.c | 57 const float vz0 = fabsf(vx0); in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() local 68 float vn0 = vz0 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 88 float vt0 = vn0 * vln2_hi + vz0; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 142 if XNN_UNPREDICTABLE(vz0 > vdenorm_cutoff) { in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4()
|
D | scalar-lut64-p2-div-x4.c | 58 const float vz0 = fabsf(vx0); in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() local 70 float vn0 = vz0 * vminus_log2e_x64 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 109 float vt0 = vn0 * vln2_o64_hi + vz0; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 149 if XNN_UNPREDICTABLE(vz0 > vdenorm_cutoff) { in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4()
|
D | avx2-rr1-p5-div-x16.c | 55 const __m256 vz0 = _mm256_or_ps(vx0, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x16() local 64 __m256 vn0 = _mm256_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x16() 77 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vz0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x16() 113 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x16()
|
D | avx2-rr1-p5-nr1fma-x16.c | 55 const __m256 vz0 = _mm256_or_ps(vx0, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() local 64 __m256 vn0 = _mm256_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() 77 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vz0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16() 123 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16()
|
D | avx2-rr1-p5-div-x24.c | 56 const __m256 vz0 = _mm256_or_ps(vx0, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x24() local 66 __m256 vn0 = _mm256_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x24() 82 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vz0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x24() 127 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x24()
|
D | avx2-rr1-p5-nr2fma-x16.c | 55 const __m256 vz0 = _mm256_or_ps(vx0, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() local 64 __m256 vn0 = _mm256_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() 77 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vz0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16() 125 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16()
|
D | avx2-rr1-p5-nr1fma-x24.c | 56 const __m256 vz0 = _mm256_or_ps(vx0, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() local 66 __m256 vn0 = _mm256_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() 82 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vz0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24() 139 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24()
|
D | avx2-rr1-p5-div-x32.c | 57 const __m256 vz0 = _mm256_or_ps(vx0, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x32() local 68 __m256 vn0 = _mm256_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x32() 87 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vz0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x32() 141 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x32()
|
D | avx2-rr1-p5-nr2fma-x24.c | 56 const __m256 vz0 = _mm256_or_ps(vx0, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() local 66 __m256 vn0 = _mm256_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() 82 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vz0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24() 142 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24()
|
D | avx2-rr1-p5-div-x40.c | 58 const __m256 vz0 = _mm256_or_ps(vx0, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x40() local 70 __m256 vn0 = _mm256_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x40() 92 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vz0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x40() 155 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x40()
|
D | avx2-rr1-p5-nr1fma-x32.c | 57 const __m256 vz0 = _mm256_or_ps(vx0, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32() local 68 __m256 vn0 = _mm256_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32() 87 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vz0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32() 155 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32()
|
D | avx2-rr1-p5-nr2fma-x32.c | 57 const __m256 vz0 = _mm256_or_ps(vx0, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() local 68 __m256 vn0 = _mm256_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() 87 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vz0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32() 159 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32()
|
D | avx2-rr1-p5-nr2fma-x40.c | 58 const __m256 vz0 = _mm256_or_ps(vx0, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() local 70 __m256 vn0 = _mm256_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() 92 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vz0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40() 176 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40()
|
D | avx2-rr1-p5-nr1fma-x40.c | 58 const __m256 vz0 = _mm256_or_ps(vx0, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40() local 70 __m256 vn0 = _mm256_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40() 92 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vz0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40() 171 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40()
|
D | avx2-rr1-p5-div-x48.c | 59 const __m256 vz0 = _mm256_or_ps(vx0, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x48() local 72 __m256 vn0 = _mm256_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x48() 97 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vz0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x48() 169 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x48()
|
D | avx2-rr1-p5-nr1fma-x48.c | 59 const __m256 vz0 = _mm256_or_ps(vx0, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48() local 72 __m256 vn0 = _mm256_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48() 97 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vz0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48() 187 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48()
|
D | avx2-rr1-p5-div-x56.c | 60 const __m256 vz0 = _mm256_or_ps(vx0, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56() local 74 __m256 vn0 = _mm256_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56() 102 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vz0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56() 183 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56()
|
D | avx2-rr1-p5-nr2fma-x48.c | 59 const __m256 vz0 = _mm256_or_ps(vx0, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() local 72 __m256 vn0 = _mm256_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() 97 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vz0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48() 193 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48()
|
D | avx2-rr1-p5-div-x64.c | 61 const __m256 vz0 = _mm256_or_ps(vx0, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64() local 76 __m256 vn0 = _mm256_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64() 107 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vz0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64() 197 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64()
|
D | avx2-rr1-p5-nr1fma-x56.c | 60 const __m256 vz0 = _mm256_or_ps(vx0, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56() local 74 __m256 vn0 = _mm256_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56() 102 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vz0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56() 203 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56()
|
D | avx2-rr1-p5-div-x80.c | 63 const __m256 vz0 = _mm256_or_ps(vx0, vsign_mask); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() local 80 __m256 vn0 = _mm256_fmadd_ps(vz0, vlog2e, vmagic_bias); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 117 __m256 vt0 = _mm256_fmadd_ps(vn0, vminus_ln2, vz0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 225 vf0 = _mm256_andnot_ps(_mm256_cmp_ps(vz0, vdenorm_cutoff, _CMP_LT_OS), vf0); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
|