/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x88.c | 125 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() local 137 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 150 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 162 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 174 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 186 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 202 vt10 = _mm256_mul_ps(vt10, vs10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 214 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
|
D | avx512f-p5-scalef-x176.c | 94 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() local 106 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 119 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 131 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 143 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 155 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 167 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176()
|
D | avx2-p5-x96.c | 130 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() local 143 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 157 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 170 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 183 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 196 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 213 vt10 = _mm256_mul_ps(vt10, vs10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 226 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
|
D | avx512f-p5-scalef-x192.c | 97 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() local 110 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 124 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 137 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 150 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 163 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 176 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x176.c | 82 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() local 94 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 107 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 119 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 131 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 143 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 155 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
|
D | avx512f-p5-scalef-x192.c | 84 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() local 97 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 111 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 124 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 137 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 150 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 163 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x96-acc3.c | 130 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() local 143 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 157 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 170 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 183 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 196 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 213 vt10 = _mm256_mul_ps(vt10, vs10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 226 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
|
D | avx2-p5-x96-acc6.c | 133 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() local 146 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 160 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 173 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 186 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 199 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 216 vt10 = _mm256_mul_ps(vt10, vs10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 229 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
|
D | avx2-p5-x96.c | 128 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() local 141 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 155 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 168 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 181 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 194 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 211 vt10 = _mm256_mul_ps(vt10, vs10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 224 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
|
D | avx2-p5-x96-acc2.c | 129 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() local 142 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 156 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 169 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 182 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 195 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 212 vt10 = _mm256_mul_ps(vt10, vs10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 225 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
|
D | avx512f-p5-scalef-x192-acc6.c | 101 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 114 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 128 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 141 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 154 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 167 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 180 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
|
D | avx512f-p5-scalef-x192-acc3.c | 98 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 111 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 125 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 138 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 151 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 164 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 177 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx512f-p5-scalef-x192.c | 96 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() local 109 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 123 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 136 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 149 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 162 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 175 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc2.c | 97 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 110 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 124 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 137 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 150 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 163 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 176 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx2-p5-x96-acc2.c | 130 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() local 143 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 157 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 170 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 183 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 196 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 213 vt10 = _mm256_mul_ps(vt10, vs10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 226 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
|
D | avx2-p5-x96-acc3.c | 131 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() local 144 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 158 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 171 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 184 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 197 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 214 vt10 = _mm256_mul_ps(vt10, vs10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 227 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3()
|
D | avx2-p5-x96-acc6.c | 134 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() local 147 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 161 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 174 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 187 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 200 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 217 vt10 = _mm256_mul_ps(vt10, vs10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 230 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
|
D | avx2-p5-x96.c | 129 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() local 142 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 156 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 169 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 182 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 195 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 212 vt10 = _mm256_mul_ps(vt10, vs10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 225 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96()
|
D | avx512f-p5-scalef-x192.c | 97 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() local 110 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 124 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 137 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 150 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 163 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 176 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc3.c | 99 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 112 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 126 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 139 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 152 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 165 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 178 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx512f-p5-scalef-x192-acc2.c | 98 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 111 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 125 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 138 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 151 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 164 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 177 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx512f-p5-scalef-x192-acc6.c | 102 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 115 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 129 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 142 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 155 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 168 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 181 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 84 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() local 97 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 111 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 124 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 137 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 150 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 163 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc2.c | 86 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() local 99 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 113 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 126 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 139 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 152 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 165 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx512f-p5-scalef-x192-acc3.c | 88 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() local 101 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 115 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 128 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 141 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 154 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 167 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
|