/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x88.c | 125 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() local 137 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 150 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 162 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 174 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 186 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 202 vt10 = _mm256_mul_ps(vt10, vs10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 214 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
|
D | avx512f-p5-scalef-x176.c | 94 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() local 106 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 119 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 131 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 143 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 155 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 167 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176()
|
D | avx2-p5-x96.c | 130 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() local 143 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 157 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 170 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 183 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 196 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 213 vt10 = _mm256_mul_ps(vt10, vs10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 226 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
|
D | avx512f-p5-scalef-x192.c | 97 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() local 110 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 124 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 137 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 150 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 163 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 176 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x176.c | 82 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() local 94 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 107 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 119 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 131 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 143 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 155 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
|
D | avx512f-p5-scalef-x192.c | 84 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() local 97 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 111 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 124 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 137 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 150 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 163 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
|
D | avx2-p5-x88.c | 88 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() local 100 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 113 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 125 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 137 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 149 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 161 vp10 = _mm256_fmadd_ps(vp10, vt10, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88()
|
D | avx2-p5-x96.c | 90 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() local 103 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 117 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 130 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 143 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 156 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 169 vp10 = _mm256_fmadd_ps(vp10, vt10, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x96.c | 128 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() local 141 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 155 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 168 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 181 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 194 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 211 vt10 = _mm256_mul_ps(vt10, vs10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 224 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
|
D | avx2-p5-x96-acc6.c | 133 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() local 146 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 160 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 173 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 186 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 199 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 216 vt10 = _mm256_mul_ps(vt10, vs10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 229 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
|
D | avx2-p5-x96-acc2.c | 129 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() local 142 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 156 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 169 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 182 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 195 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 212 vt10 = _mm256_mul_ps(vt10, vs10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 225 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
|
D | avx2-p5-x96-acc3.c | 130 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() local 143 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 157 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 170 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 183 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 196 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 213 vt10 = _mm256_mul_ps(vt10, vs10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 226 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
|
D | avx512f-p5-scalef-x192-acc2.c | 97 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 110 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 124 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 137 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 150 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 163 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 176 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx512f-p5-scalef-x192-acc3.c | 98 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 111 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 125 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 138 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 151 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 164 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 177 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx512f-p5-scalef-x192.c | 96 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() local 109 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 123 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 136 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 149 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 162 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 175 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc6.c | 101 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 114 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 128 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 141 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 154 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 167 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 180 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x192-acc2.c | 86 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() local 99 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 113 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 126 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 139 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 152 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 165 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx512f-p5-scalef-x192.c | 84 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() local 97 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 111 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 124 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 137 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 150 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 163 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc6.c | 94 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() local 107 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 121 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 134 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 147 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 160 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 173 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
|
D | avx512f-p5-scalef-x192-acc3.c | 88 __m512 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() local 101 vt10 = _mm512_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 115 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 128 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 141 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 154 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 167 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx2-p5-x96.c | 88 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_hi, vx10); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() local 101 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2_lo, vt10); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 115 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 128 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 141 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 154 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 167 vp10 = _mm256_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx2-rr1-p5-x96-acc2.c | 117 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2, vx10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() local 130 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() 143 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() 156 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() 169 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() 182 vt10 = _mm256_mul_ps(vt10, vs10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() 195 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2()
|
D | avx2-rr1-p5-x96.c | 116 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2, vx10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() local 129 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() 142 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() 155 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() 168 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() 181 vt10 = _mm256_mul_ps(vt10, vs10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() 194 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96()
|
D | avx2-rr1-p5-x96-acc3.c | 118 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2, vx10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() local 131 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() 144 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() 157 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() 170 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() 183 vt10 = _mm256_mul_ps(vt10, vs10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() 196 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3()
|
D | avx2-rr1-p5-x96-acc6.c | 121 __m256 vt10 = _mm256_fmadd_ps(vn10, vminus_ln2, vx10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() local 134 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() 147 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() 160 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() 173 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() 186 vt10 = _mm256_mul_ps(vt10, vs10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() 199 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6()
|