/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | scalar-p5-x4.c | 27 const float vmagic_bias = 0x1.8000FEp23f; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() local 61 float vn0 = vx0 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 62 float vn1 = vx1 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 63 float vn2 = vx2 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 64 float vn3 = vx3 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 74 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 75 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 76 vn2 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 77 vn3 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() 168 float vn = vx * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4() [all …]
|
D | scalar-lut64-p2-x4-acc2.c | 30 const float vmagic_bias = 0x1.800000p23f; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() local 65 float vn0 = vx0 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 66 float vn1 = vx1 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 67 float vn2 = vx2 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 68 float vn3 = vx3 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 97 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 98 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 99 vn2 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 100 vn3 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 181 float vn = vx * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() [all …]
|
D | scalar-p5-x4-acc2.c | 27 const float vmagic_bias = 0x1.8000FEp23f; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() local 62 float vn0 = vx0 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 63 float vn1 = vx1 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 64 float vn2 = vx2 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 65 float vn3 = vx3 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 75 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 76 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 77 vn2 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 78 vn3 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() 171 float vn = vx * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc2() [all …]
|
D | scalar-lut64-p2-x4.c | 30 const float vmagic_bias = 0x1.800000p23f; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() local 64 float vn0 = vx0 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() 65 float vn1 = vx1 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() 66 float vn2 = vx2 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() 67 float vn3 = vx3 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() 96 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() 97 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() 98 vn2 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() 99 vn3 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() 178 float vn = vx * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() [all …]
|
D | scalar-p5-x4-acc4.c | 27 const float vmagic_bias = 0x1.8000FEp23f; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() local 64 float vn0 = vx0 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 65 float vn1 = vx1 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 66 float vn2 = vx2 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 67 float vn3 = vx3 * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 77 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 78 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 79 vn2 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 80 vn3 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() 175 float vn = vx * vlog2e + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_p5_x4_acc4() [all …]
|
D | scalar-lut64-p2-x4-acc4.c | 30 const float vmagic_bias = 0x1.800000p23f; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() local 67 float vn0 = vx0 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 68 float vn1 = vx1 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 69 float vn2 = vx2 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 70 float vn3 = vx3 * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 99 vn0 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 100 vn1 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 101 vn2 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 102 vn3 -= vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 185 float vn = vx * vlog2e_x64 + vmagic_bias; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() [all …]
|
D | avx2-p5-x96-acc2.c | 28 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() local 76 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 77 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 78 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 79 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 80 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 81 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 82 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 83 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 84 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() [all …]
|
D | avx2-p5-x96-acc3.c | 28 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() local 77 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 78 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 79 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 80 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 81 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 82 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 83 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 84 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 85 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() [all …]
|
D | avx2-p5-x96-acc6.c | 28 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() local 80 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 81 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 82 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 83 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 84 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 85 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 86 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 87 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 88 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() [all …]
|
D | avx2-p5-x96.c | 28 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() local 75 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 76 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 77 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 78 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 79 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 80 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 81 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 82 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 83 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() [all …]
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | scalar-lut2048-p1-div-x4.c | 30 const float vmagic_bias = 0x1.800000p23f; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() local 70 float vn0 = vz0 * vminus_log2e_x2048 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() 71 float vn1 = vz1 * vminus_log2e_x2048 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() 72 float vn2 = vz2 * vminus_log2e_x2048 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() 73 float vn3 = vz3 * vminus_log2e_x2048 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() 102 vn0 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() 103 vn1 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() 104 vn2 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() 105 vn3 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() 196 float vn = vz * vminus_log2e_x2048 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4() [all …]
|
D | scalar-p5-div-x4.c | 27 const float vmagic_bias = 0x1.8000FEp23f; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() local 68 float vn0 = vz0 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 69 float vn1 = vz1 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 70 float vn2 = vz2 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 71 float vn3 = vz3 * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 81 vn0 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 82 vn1 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 83 vn2 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 84 vn3 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() 194 float vn = vz * vminus_log2e + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_p5_div_x4() [all …]
|
D | scalar-lut64-p2-div-x4.c | 30 const float vmagic_bias = 0x1.800000p23f; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() local 70 float vn0 = vz0 * vminus_log2e_x64 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 71 float vn1 = vz1 * vminus_log2e_x64 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 72 float vn2 = vz2 * vminus_log2e_x64 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 73 float vn3 = vz3 * vminus_log2e_x64 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 102 vn0 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 103 vn1 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 104 vn2 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 105 vn3 -= vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() 202 float vn = vz * vminus_log2e_x64 + vmagic_bias; in xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4() [all …]
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x88.c | 29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() local 74 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 75 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 76 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 77 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 78 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 79 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 80 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 81 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 82 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() [all …]
|
D | avx2-p5-x96.c | 29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() local 76 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 77 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 78 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 79 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 80 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 81 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 82 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 83 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 84 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() [all …]
|
D | avx2-p5-x80.c | 29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() local 72 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 73 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 74 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 75 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 76 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 77 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 78 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 79 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 80 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() [all …]
|
D | avx2-p5-x72.c | 29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() local 70 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 71 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 72 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 73 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 74 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 75 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 76 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 77 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 78 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() [all …]
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x96-acc3.c | 27 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() local 76 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 77 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 78 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 79 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 80 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 81 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 82 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 83 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 84 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() [all …]
|
D | avx2-p5-x96-acc6.c | 27 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() local 79 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 80 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 81 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 82 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 83 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 84 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 85 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 86 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 87 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() [all …]
|
D | avx2-p5-x96.c | 27 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() local 74 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 75 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 76 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 77 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 78 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 79 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 80 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 81 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 82 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() [all …]
|
D | avx2-p5-x96-acc2.c | 27 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() local 75 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 76 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 77 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 78 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 79 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 80 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 81 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 82 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 83 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() [all …]
|
D | avx2-p5-x80.c | 27 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() local 70 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 71 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 72 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 73 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 74 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 75 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 76 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 77 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 78 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() [all …]
|
D | avx2-p5-x80-acc2.c | 27 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() local 71 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 72 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 73 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 74 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 75 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 76 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 77 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 78 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 79 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() [all …]
|
D | avx2-p5-x72.c | 27 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() local 68 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 69 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 70 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 71 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 72 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 73 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 74 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 75 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 76 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() [all …]
|
D | avx2-p5-x72-acc3.c | 27 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() local 70 __m256 vn0 = _mm256_fmadd_ps(vx0, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 71 __m256 vn1 = _mm256_fmadd_ps(vx1, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 72 __m256 vn2 = _mm256_fmadd_ps(vx2, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 73 __m256 vn3 = _mm256_fmadd_ps(vx3, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 74 __m256 vn4 = _mm256_fmadd_ps(vx4, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 75 __m256 vn5 = _mm256_fmadd_ps(vx5, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 76 __m256 vn6 = _mm256_fmadd_ps(vx6, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 77 __m256 vn7 = _mm256_fmadd_ps(vx7, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 78 __m256 vn8 = _mm256_fmadd_ps(vx8, vlog2e, vmagic_bias); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() [all …]
|