/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx512f-p5-scalef-x192-acc2.c | 30 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 166 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 167 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 168 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 169 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 170 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 171 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 172 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 173 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 174 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
D | avx512f-p5-scalef-x192.c | 30 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() local 165 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 166 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 167 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 168 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 169 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 170 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 171 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 172 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 173 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x192-acc3.c | 30 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 167 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 168 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 169 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 170 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 171 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 172 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 173 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 174 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 175 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() [all …]
|
D | avx512f-p5-scalef-x192-acc6.c | 30 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 170 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 171 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 172 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 173 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 174 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 175 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 176 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 177 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 178 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() [all …]
|
D | avx512f-p5-scalef-x160-acc5.c | 30 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() local 151 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 152 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 153 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 154 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 155 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 156 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 157 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 158 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 159 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() [all …]
|
D | avx512f-p5-scalef-x160.c | 30 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() local 147 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 148 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 149 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 150 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 151 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 152 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 153 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 154 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 155 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x160-acc2.c | 30 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() local 148 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 149 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 150 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 151 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 152 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 153 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 154 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 155 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 156 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() [all …]
|
D | avx512f-p5-scalef-x144.c | 30 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() local 138 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 139 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 140 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 141 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 142 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 143 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 144 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 145 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 146 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() [all …]
|
D | avx512f-p5-scalef-x144-acc3.c | 30 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local 140 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 141 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 142 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 143 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 144 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 145 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 146 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 147 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 148 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() [all …]
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx512f-p5-scalef-x192.c | 31 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() local 166 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 167 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 168 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 169 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 170 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 171 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 172 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 173 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 174 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x176.c | 31 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() local 157 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 158 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 159 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 160 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 161 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 162 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 163 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 164 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 165 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() [all …]
|
D | avx512f-p5-scalef-x160.c | 31 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() local 148 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 149 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 150 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 151 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 152 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 153 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 154 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 155 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 156 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144.c | 31 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() local 139 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 140 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 141 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 142 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 143 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 144 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 145 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 146 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 147 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() [all …]
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 32 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() local 153 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 154 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 155 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 156 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 157 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 158 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 159 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 160 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 161 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x176.c | 32 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() local 145 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 146 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 147 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 148 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 149 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 150 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 151 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 152 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 153 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() [all …]
|
D | avx512f-p5-scalef-x160.c | 32 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() local 137 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 138 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 139 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 140 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 141 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 142 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 143 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 144 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 145 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144.c | 32 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() local 129 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 130 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 131 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 132 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 133 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 134 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 135 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 136 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 137 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() [all …]
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx512f-p5-scalef-x192-acc2.c | 31 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 167 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 168 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 169 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 170 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 171 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 172 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 173 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 174 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 175 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
D | avx512f-p5-scalef-x192-acc3.c | 31 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 168 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 169 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 170 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 171 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 172 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 173 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 174 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 175 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 176 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() [all …]
|
D | avx512f-p5-scalef-x192.c | 31 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() local 166 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 167 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 168 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 169 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 170 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 171 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 172 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 173 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 174 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x160-acc2.c | 31 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() local 149 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 150 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 151 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 152 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 153 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 154 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 155 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 156 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 157 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() [all …]
|
D | avx512f-p5-scalef-x160.c | 31 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() local 148 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 149 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 150 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 151 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 152 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 153 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 154 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 155 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 156 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144.c | 31 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() local 139 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 140 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 141 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 142 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 143 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 144 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 145 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 146 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 147 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() [all …]
|
D | avx512f-p5-scalef-x144-acc3.c | 31 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local 141 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 142 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 143 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 144 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 145 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 146 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 147 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 148 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 149 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() [all …]
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 31 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() local 153 vp0 = _mm512_fmadd_ps(vp0, vt0, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 154 vp1 = _mm512_fmadd_ps(vp1, vt1, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 155 vp2 = _mm512_fmadd_ps(vp2, vt2, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 156 vp3 = _mm512_fmadd_ps(vp3, vt3, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 157 vp4 = _mm512_fmadd_ps(vp4, vt4, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 158 vp5 = _mm512_fmadd_ps(vp5, vt5, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 159 vp6 = _mm512_fmadd_ps(vp6, vt6, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 160 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 161 vp8 = _mm512_fmadd_ps(vp8, vt8, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() [all …]
|