/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx512f-p5-scalef-x176.c | 119 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() local 131 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 143 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 155 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 167 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 182 __m512 vf10 = _mm512_scalef_ps(vp10, vn10); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176()
|
D | avx512f-p5-scalef-x192.c | 124 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() local 137 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 150 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 163 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 176 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 192 __m512 vf10 = _mm512_scalef_ps(vp10, vn10); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
D | avx2-p5-x88.c | 150 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() local 162 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 174 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 186 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 214 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x176.c | 107 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() local 119 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 131 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 143 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 155 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 173 __m512 vf10 = _mm512_mul_ps(vp10, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
|
D | avx512f-p5-scalef-x192.c | 111 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() local 124 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 137 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 150 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 163 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 182 __m512 vf10 = _mm512_mul_ps(vp10, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
|
D | avx2-p5-x88.c | 113 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() local 125 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 137 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 149 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 161 vp10 = _mm256_fmadd_ps(vp10, vt10, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88() 179 __m256 vf10 = _mm256_mul_ps(vp10, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x88()
|
D | avx2-p5-x96.c | 117 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() local 130 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 143 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 156 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 169 vp10 = _mm256_fmadd_ps(vp10, vt10, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 188 __m256 vf10 = _mm256_mul_ps(vp10, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx512f-p5-scalef-x192-acc2.c | 124 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 137 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 150 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 163 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 176 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 192 const __m512 vf10 = _mm512_scalef_ps(vp10, vn10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx512f-p5-scalef-x192.c | 123 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() local 136 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 149 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 162 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 175 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 191 const __m512 vf10 = _mm512_scalef_ps(vp10, vn10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc3.c | 125 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 138 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 151 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 164 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 177 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 193 const __m512 vf10 = _mm512_scalef_ps(vp10, vn10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx512f-p5-scalef-x192-acc6.c | 128 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 141 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 154 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 167 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 180 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 196 const __m512 vf10 = _mm512_scalef_ps(vp10, vn10); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
|
D | avx2-p5-x96.c | 155 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() local 168 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 181 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 194 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 224 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx512f-p5-scalef-x192-acc2.c | 125 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 138 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 151 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 164 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 177 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 193 const __m512 vf10 = _mm512_scalef_ps(vp10, vn10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx512f-p5-scalef-x192-acc3.c | 126 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 139 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 152 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 165 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 178 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 194 const __m512 vf10 = _mm512_scalef_ps(vp10, vn10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx512f-p5-scalef-x192.c | 124 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() local 137 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 150 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 163 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 176 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 192 const __m512 vf10 = _mm512_scalef_ps(vp10, vn10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc6.c | 129 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 142 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 155 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 168 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 181 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 197 const __m512 vf10 = _mm512_scalef_ps(vp10, vn10); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
|
D | avx2-p5-x96-acc6.c | 161 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() local 174 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 187 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 200 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 230 __m256 vf10 = _mm256_fmadd_ps(vt10, vp10, vs10); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 111 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() local 124 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 137 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 150 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 163 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 215 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp10, vdelta_e10)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc2.c | 113 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() local 126 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 139 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 152 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 165 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 219 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp10, vdelta_e10)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx512f-p5-scalef-x192-acc3.c | 115 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() local 128 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 141 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 154 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 167 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 223 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp10, vdelta_e10)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx512f-p5-scalef-x192-acc6.c | 121 __m512 vp10 = _mm512_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() local 134 vp10 = _mm512_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 147 vp10 = _mm512_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 160 vp10 = _mm512_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 173 vp10 = _mm512_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 235 vaccv4 = _mm512_add_ps(vaccv4, _mm512_scalef_ps(vp10, vdelta_e10)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
|
D | avx2-p5-x96.c | 115 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() local 128 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 141 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 154 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 167 vp10 = _mm256_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 242 vaccv0 = _mm256_fmadd_ps(vp10, vs10, vaccv0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
|
D | avx2-p5-x96-acc3.c | 119 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() local 132 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 145 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 158 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 171 vp10 = _mm256_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 252 vaccv1 = _mm256_fmadd_ps(vp10, vs10, vaccv1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
|
D | avx2-p5-x96-acc2.c | 117 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() local 130 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 143 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 156 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 169 vp10 = _mm256_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 247 vaccv0 = _mm256_fmadd_ps(vp10, vs10, vaccv0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
|
D | avx2-p5-x96-acc6.c | 125 __m256 vp10 = _mm256_fmadd_ps(vc5, vt10, vc4); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() local 138 vp10 = _mm256_fmadd_ps(vp10, vt10, vc3); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 151 vp10 = _mm256_fmadd_ps(vp10, vt10, vc2); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 164 vp10 = _mm256_fmadd_ps(vp10, vt10, vc1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 177 vp10 = _mm256_fmadd_ps(vp10, vt10, vc0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 267 vaccv4 = _mm256_fmadd_ps(vp10, vs10, vaccv4); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6()
|