/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x56.c | 101 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() local 109 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 118 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 126 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 134 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 142 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 154 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56() 162 __m256 vf6 = _mm256_fmadd_ps(vt6, vp6, vs6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x56()
|
D | avx512f-p5-scalef-x112.c | 78 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() local 86 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 95 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 103 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 111 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 119 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 127 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
|
D | avx2-p5-x64.c | 106 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() local 115 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 125 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 134 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 143 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 152 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 165 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 174 __m256 vf6 = _mm256_fmadd_ps(vt6, vp6, vs6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
|
D | avx512f-p5-scalef-x128.c | 81 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() local 90 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 100 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 109 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 118 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 127 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 136 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
D | avx2-p5-x72.c | 111 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() local 121 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 132 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 142 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 152 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 162 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 176 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 186 __m256 vf6 = _mm256_fmadd_ps(vt6, vp6, vs6); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
|
D | avx512f-p5-scalef-x144.c | 84 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() local 94 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 105 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 115 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 125 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 135 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 145 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x112.c | 70 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() local 78 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 87 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 95 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 103 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 111 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 119 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
|
D | avx512f-p5-scalef-x128.c | 72 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() local 81 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 91 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 100 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 109 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 118 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 127 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x144.c | 74 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() local 84 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 95 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 105 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 115 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 125 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 135 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
|
D | avx2-p5-x56.c | 76 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() local 84 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 93 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 101 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 109 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 117 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 125 vp6 = _mm256_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56()
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x64-acc2.c | 105 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() local 114 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 124 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 133 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 142 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 151 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 164 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2() 173 __m256 vf6 = _mm256_fmadd_ps(vt6, vp6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc2()
|
D | avx2-p5-x64.c | 104 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() local 113 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 123 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 132 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 141 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 150 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 163 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64() 172 __m256 vf6 = _mm256_fmadd_ps(vt6, vp6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64()
|
D | avx2-p5-x64-acc4.c | 107 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() local 116 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 126 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 135 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 144 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 153 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 166 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4() 175 __m256 vf6 = _mm256_fmadd_ps(vt6, vp6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x64_acc4()
|
D | avx512f-p5-scalef-x128-acc2.c | 81 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() local 90 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 100 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 109 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 118 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 127 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 136 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
|
D | avx512f-p5-scalef-x128-acc4.c | 83 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() local 92 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 102 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 111 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 120 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 129 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 138 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
|
D | avx512f-p5-scalef-x128.c | 80 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() local 89 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 99 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 108 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 117 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 126 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 135 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
D | avx2-p5-x72-acc3.c | 111 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() local 121 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 132 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 142 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 152 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 162 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 176 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3() 186 __m256 vf6 = _mm256_fmadd_ps(vt6, vp6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72_acc3()
|
D | avx2-p5-x72.c | 109 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() local 119 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 130 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 140 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 150 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 160 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 174 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72() 184 __m256 vf6 = _mm256_fmadd_ps(vt6, vp6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x72()
|
D | avx512f-p5-scalef-x144-acc3.c | 85 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local 95 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 106 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 116 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 126 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 136 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 146 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
|
D | avx512f-p5-scalef-x144.c | 83 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() local 93 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 104 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 114 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 124 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 134 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 144 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144()
|
D | avx2-p5-x80-acc2.c | 115 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() local 126 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 138 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 149 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 160 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 171 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 186 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 197 __m256 vf6 = _mm256_fmadd_ps(vt6, vp6, vs6); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx2-rr1-p6-x56.c | 86 __m256 vt6 = _mm256_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() local 94 __m256 vp6 = _mm256_fmadd_ps(vc6, vt6, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 102 vp6 = _mm256_fmadd_ps(vp6, vt6, vc4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 110 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 118 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 132 vp6 = _mm256_mul_ps(vp6, vt6); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 133 vt6 = _mm256_mul_ps(vt6, vs6); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 148 vp6 = _mm256_fmadd_ps(vp6, vt6, vt6); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56()
|
D | velu-avx512f-rr1-p6-x112.c | 88 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2, vz6); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() local 96 __m512 vp6 = _mm512_fmadd_ps(vc6, vt6, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 104 vp6 = _mm512_fmadd_ps(vp6, vt6, vc4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 112 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 120 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 134 vp6 = _mm512_mul_ps(vp6, vt6); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 135 vt6 = _mm512_mul_ps(vt6, vs6); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 151 vp6 = _mm512_fmadd_ps(vp6, vt6, vt6); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112()
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x128.c | 72 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() local 81 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 91 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 100 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 109 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 118 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 127 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x128-acc2.c | 74 __m512 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_hi, vx6); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() local 83 vt6 = _mm512_fmadd_ps(vn6, vminus_ln2_lo, vt6); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 93 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 102 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 111 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 120 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 129 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
|