/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx512f-p5-scalef-x192.c | 34 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() local 127 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 128 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 129 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 130 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 131 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 132 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 133 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 134 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 135 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x192-acc3.c | 34 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 129 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 130 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 131 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 132 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 133 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 134 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 135 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 136 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 137 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() [all …]
|
D | avx512f-p5-scalef-x192-acc2.c | 34 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 128 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 129 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 130 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 131 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 132 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 133 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 134 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 135 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 136 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
D | avx512f-p5-scalef-x160.c | 34 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() local 115 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 116 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 117 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 118 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 119 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 120 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 121 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 122 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 123 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x160-acc2.c | 34 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() local 116 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 117 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 118 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 119 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 120 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 121 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 122 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 123 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 124 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() [all …]
|
D | avx512f-p5-scalef-x144-acc3.c | 34 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local 111 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 112 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 113 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 114 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 115 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 116 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 117 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 118 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 119 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() [all …]
|
D | avx512f-p5-scalef-x144.c | 34 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() local 109 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 110 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 111 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 112 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 113 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 114 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 115 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 116 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 117 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() [all …]
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 35 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() local 114 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 115 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 116 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 117 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 118 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 119 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 120 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 121 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 122 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x176.c | 35 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() local 109 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 110 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 111 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 112 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 113 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 114 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 115 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 116 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 117 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() [all …]
|
D | avx512f-p5-scalef-x160.c | 35 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() local 104 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 105 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 106 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 107 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 108 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 109 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 110 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 111 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 112 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144.c | 35 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() local 99 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 100 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 101 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 102 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 103 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 104 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 105 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 106 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 107 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() [all …]
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx512f-p5-scalef-x192-acc6.c | 33 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 131 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 132 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 133 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 134 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 135 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 136 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 137 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 138 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 139 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() [all …]
|
D | avx512f-p5-scalef-x192-acc3.c | 33 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 128 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 129 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 130 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 131 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 132 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 133 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 134 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 135 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 136 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() [all …]
|
D | avx512f-p5-scalef-x192.c | 33 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() local 126 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 127 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 128 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 129 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 130 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 131 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 132 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 133 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 134 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x192-acc2.c | 33 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 127 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 128 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 129 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 130 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 131 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 132 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 133 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 134 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 135 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
D | avx512f-p5-scalef-x160-acc5.c | 33 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() local 118 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 119 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 120 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 121 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 122 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 123 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 124 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 125 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 126 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() [all …]
|
D | avx512f-p5-scalef-x160-acc2.c | 33 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() local 115 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 116 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 117 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 118 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 119 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 120 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 121 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 122 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 123 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() [all …]
|
D | avx512f-p5-scalef-x160.c | 33 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() local 114 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 115 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 116 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 117 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 118 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 119 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 120 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 121 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 122 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144-acc3.c | 33 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local 110 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 111 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 112 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 113 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 114 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 115 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 116 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 117 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 118 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() [all …]
|
D | avx512f-p5-scalef-x144.c | 33 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() local 108 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 109 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 110 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 111 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 112 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 113 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 114 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 115 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 116 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() [all …]
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx512f-p5-scalef-x192.c | 34 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() local 127 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 128 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 129 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 130 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 131 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 132 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 133 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 134 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 135 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x176.c | 34 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() local 121 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 122 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 123 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 124 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 125 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 126 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 127 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 128 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 129 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() [all …]
|
D | avx512f-p5-scalef-x160.c | 34 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() local 115 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 116 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 117 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 118 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 119 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 120 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 121 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 122 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 123 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144.c | 34 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() local 109 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 110 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 111 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 112 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 113 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 114 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 115 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 116 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 117 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() [all …]
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 34 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() local 114 vp0 = _mm512_fmadd_ps(vp0, vt0, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 115 vp1 = _mm512_fmadd_ps(vp1, vt1, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 116 vp2 = _mm512_fmadd_ps(vp2, vt2, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 117 vp3 = _mm512_fmadd_ps(vp3, vt3, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 118 vp4 = _mm512_fmadd_ps(vp4, vt4, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 119 vp5 = _mm512_fmadd_ps(vp5, vt5, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 120 vp6 = _mm512_fmadd_ps(vp6, vt6, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 121 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 122 vp8 = _mm512_fmadd_ps(vp8, vt8, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() [all …]
|