/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx512f-p5-scalef-x112.c | 95 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() local 103 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 111 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 119 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 127 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112() 138 __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x112()
|
D | avx512f-p5-scalef-x128.c | 100 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() local 109 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 118 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 127 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 136 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 148 __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x144.c | 105 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() local 115 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 125 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 135 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 145 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 158 __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144()
|
D | avx512f-p5-scalef-x160.c | 110 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() local 121 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 132 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 143 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 154 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 168 __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x112.c | 87 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() local 95 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 103 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 111 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 119 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112() 133 __m512 vf6 = _mm512_mul_ps(vp6, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x112()
|
D | avx512f-p5-scalef-x128.c | 91 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() local 100 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 109 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 118 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 127 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 142 __m512 vf6 = _mm512_mul_ps(vp6, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x144.c | 95 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() local 105 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 115 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 125 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 135 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 151 __m512 vf6 = _mm512_mul_ps(vp6, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
|
D | avx2-p5-x56.c | 93 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() local 101 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 109 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 117 vp6 = _mm256_fmadd_ps(vp6, vt6, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 125 vp6 = _mm256_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56() 139 __m256 vf6 = _mm256_mul_ps(vp6, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x56()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx512f-p5-scalef-x128-acc2.c | 101 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() local 110 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 119 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 128 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 137 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 149 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
|
D | avx512f-p5-scalef-x128.c | 100 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() local 109 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 118 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 127 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 136 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 148 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x144.c | 105 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() local 115 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 125 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 135 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 145 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 158 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144()
|
D | avx512f-p5-scalef-x144-acc3.c | 107 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local 117 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 127 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 137 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 147 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 160 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
|
D | avx512f-p5-scalef-x128-acc4.c | 103 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() local 112 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 121 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 130 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 139 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 151 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx512f-p5-scalef-x128.c | 99 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() local 108 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 117 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 126 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 135 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 147 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x128-acc2.c | 100 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() local 109 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 118 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 127 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 136 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 148 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
|
D | avx512f-p5-scalef-x128-acc4.c | 102 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() local 111 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 120 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 129 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 138 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 150 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
|
D | avx512f-p5-scalef-x144.c | 104 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() local 114 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 124 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 134 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 144 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 157 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144()
|
D | avx512f-p5-scalef-x144-acc3.c | 106 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local 116 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 126 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 136 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 146 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 159 const __m512 vf6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx512f-rr1-p5-scalef-div-x112.c | 85 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() local 93 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() 101 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() 109 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() 117 vp6 = _mm512_fmadd_ps(vp6, vt6, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112() 125 const __m512 ve6 = _mm512_scalef_ps(vp6, vn6); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112()
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x128-acc2.c | 93 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() local 102 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 111 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 120 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 129 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 171 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
|
D | avx512f-p5-scalef-x128.c | 91 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() local 100 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 109 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 118 vp6 = _mm512_fmadd_ps(vp6, vt6, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 127 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 167 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp6, vdelta_e6)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx512f-rr1-p6-x112.c | 97 __m512 vp6 = _mm512_fmadd_ps(vc6, vt6, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() local 105 vp6 = _mm512_fmadd_ps(vp6, vt6, vc4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 113 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 121 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 135 vp6 = _mm512_mul_ps(vp6, vt6); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 152 vp6 = _mm512_fmadd_ps(vp6, vt6, vt6); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112() 167 __m512 vy6 = _mm512_fmadd_ps(vp6, valpha, vs6); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x112()
|
D | velu-avx2-rr1-p6-x56.c | 97 __m256 vp6 = _mm256_fmadd_ps(vc6, vt6, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() local 105 vp6 = _mm256_fmadd_ps(vp6, vt6, vc4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 113 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 121 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 135 vp6 = _mm256_mul_ps(vp6, vt6); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 151 vp6 = _mm256_fmadd_ps(vp6, vt6, vt6); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56() 165 const __m256 ve6 = _mm256_fmadd_ps(vp6, valpha, vs6); in xnn_f32_velu_ukernel__avx2_rr1_p6_x56()
|
D | velu-avx2-rr1-p6-x64.c | 103 __m256 vp6 = _mm256_fmadd_ps(vc6, vt6, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() local 112 vp6 = _mm256_fmadd_ps(vp6, vt6, vc4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 121 vp6 = _mm256_fmadd_ps(vp6, vt6, vc3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 130 vp6 = _mm256_fmadd_ps(vp6, vt6, vc2); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 145 vp6 = _mm256_mul_ps(vp6, vt6); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 163 vp6 = _mm256_fmadd_ps(vp6, vt6, vt6); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 179 const __m256 ve6 = _mm256_fmadd_ps(vp6, valpha, vs6); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64()
|
D | velu-avx512f-rr1-p6-x128.c | 103 __m512 vp6 = _mm512_fmadd_ps(vc6, vt6, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() local 112 vp6 = _mm512_fmadd_ps(vp6, vt6, vc4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 121 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 130 vp6 = _mm512_fmadd_ps(vp6, vt6, vc2); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 145 vp6 = _mm512_mul_ps(vp6, vt6); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 165 vp6 = _mm512_fmadd_ps(vp6, vt6, vt6); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 181 __m512 vy6 = _mm512_fmadd_ps(vp6, valpha, vs6); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128()
|