/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx512f-p5-scalef-x80.c | 83 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80() local 89 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80() 95 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80() 101 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80() 107 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80() 116 __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x80()
|
D | avx512f-p5-scalef-x96.c | 88 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() local 95 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() 102 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() 109 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() 116 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96() 126 __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x96()
|
D | avx512f-p5-scalef-x112.c | 93 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() local 101 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 109 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 117 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 125 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 136 __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
|
D | avx512f-p5-scalef-x128.c | 98 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() local 107 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 116 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 125 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 134 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 146 __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x144.c | 103 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() local 113 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 123 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 133 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 143 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 156 __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x80.c | 77 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() local 83 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() 89 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() 95 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() 101 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80() 113 __m512 vf4 = _mm512_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x80()
|
D | avx512f-p5-scalef-x96.c | 81 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() local 88 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 95 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 102 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 109 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96() 122 __m512 vf4 = _mm512_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x96()
|
D | avx2-p5-x40.c | 83 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() local 89 vp4 = _mm256_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 95 vp4 = _mm256_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 101 vp4 = _mm256_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 107 vp4 = _mm256_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40() 119 __m256 vf4 = _mm256_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x40()
|
D | avx512f-p5-scalef-x112.c | 85 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() local 93 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 101 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 109 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 117 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 131 __m512 vf4 = _mm512_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
|
D | avx512f-p5-scalef-x128.c | 89 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() local 98 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 107 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 116 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 125 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 140 __m512 vf4 = _mm512_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
|
D | avx2-p5-x48.c | 87 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() local 94 vp4 = _mm256_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 101 vp4 = _mm256_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 108 vp4 = _mm256_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 115 vp4 = _mm256_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48() 128 __m256 vf4 = _mm256_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x48()
|
D | avx512f-p5-scalef-x144.c | 93 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() local 103 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 113 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 123 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 133 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 149 __m512 vf4 = _mm512_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
|
D | avx2-p5-x56.c | 91 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() local 99 vp4 = _mm256_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 107 vp4 = _mm256_fmadd_ps(vp4, vt4, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 115 vp4 = _mm256_fmadd_ps(vp4, vt4, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 123 vp4 = _mm256_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 137 __m256 vf4 = _mm256_mul_ps(vp4, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56()
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx512f-p5-scalef-x128-acc2.c | 98 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() local 107 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 116 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 125 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 134 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 146 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
|
D | avx512f-p5-scalef-x128.c | 97 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() local 106 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 115 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 124 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 133 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 145 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x128-acc4.c | 100 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() local 109 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 118 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 127 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 136 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 148 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
|
D | avx512f-p5-scalef-x144-acc3.c | 104 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local 114 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 124 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 134 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 144 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 157 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
|
D | avx512f-p5-scalef-x144.c | 102 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() local 112 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 122 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 132 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 142 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 155 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx512f-p5-scalef-x128-acc2.c | 99 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() local 108 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 117 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 126 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 135 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 147 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
|
D | avx512f-p5-scalef-x128.c | 98 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() local 107 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 116 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 125 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 134 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 146 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x144-acc3.c | 105 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local 115 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 125 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 135 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 145 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 158 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
|
D | avx512f-p5-scalef-x128-acc4.c | 101 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() local 110 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 119 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 128 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 137 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 149 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
|
D | avx512f-p5-scalef-x144.c | 103 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() local 113 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 123 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 133 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 143 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 156 const __m512 vf4 = _mm512_scalef_ps(vp4, vn4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144()
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x128.c | 89 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() local 98 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 107 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 116 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 125 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 165 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x128-acc2.c | 91 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() local 100 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 109 vp4 = _mm512_fmadd_ps(vp4, vt4, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 118 vp4 = _mm512_fmadd_ps(vp4, vt4, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 127 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 169 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp4, vdelta_e4)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
|