/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx512f-p5-scalef-x192-acc2.c | 35 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 114 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 115 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 116 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 117 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 118 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 119 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 120 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 121 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 122 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
D | avx512f-p5-scalef-x192.c | 35 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() local 113 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 114 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 115 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 116 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 117 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 118 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 119 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 120 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 121 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x192-acc3.c | 35 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 115 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 116 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 117 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 118 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 119 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 120 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 121 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 122 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 123 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() [all …]
|
D | avx512f-p5-scalef-x192-acc6.c | 35 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 118 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 119 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 120 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 121 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 122 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 123 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 124 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 125 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 126 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() [all …]
|
D | avx512f-p5-scalef-x160-acc5.c | 35 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() local 107 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 108 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 109 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 110 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 111 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 112 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 113 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 114 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 115 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() [all …]
|
D | avx512f-p5-scalef-x160.c | 35 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() local 103 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 104 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 105 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 106 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 107 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 108 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 109 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 110 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 111 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x160-acc2.c | 35 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() local 104 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 105 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 106 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 107 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 108 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 109 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 110 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 111 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 112 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() [all …]
|
D | avx512f-p5-scalef-x144.c | 35 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() local 98 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 99 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 100 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 101 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 102 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 103 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 104 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 105 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 106 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() [all …]
|
D | avx512f-p5-scalef-x144-acc3.c | 35 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local 100 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 101 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 102 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 103 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 104 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 105 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 106 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 107 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 108 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() [all …]
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx512f-p5-scalef-x192.c | 36 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() local 114 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 115 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 116 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 117 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 118 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 119 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 120 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 121 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 122 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x176.c | 36 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() local 109 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 110 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 111 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 112 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 113 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 114 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 115 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 116 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 117 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() [all …]
|
D | avx512f-p5-scalef-x160.c | 36 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() local 104 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 105 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 106 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 107 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 108 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 109 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 110 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 111 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 112 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144.c | 36 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() local 99 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 100 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 101 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 102 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 103 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 104 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 105 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 106 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 107 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() [all …]
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 37 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() local 101 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 102 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 103 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 104 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 105 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 106 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 107 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 108 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 109 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x176.c | 37 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() local 97 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 98 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 99 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 100 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 101 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 102 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 103 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 104 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 105 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() [all …]
|
D | avx512f-p5-scalef-x160.c | 37 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() local 93 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 94 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 95 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 96 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 97 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 98 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 99 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 100 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 101 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144.c | 37 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() local 89 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 90 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 91 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 92 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 93 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 94 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 95 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 96 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 97 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() [all …]
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx512f-p5-scalef-x192-acc2.c | 36 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 115 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 116 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 117 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 118 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 119 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 120 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 121 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 122 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 123 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
D | avx512f-p5-scalef-x192-acc3.c | 36 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 116 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 117 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 118 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 119 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 120 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 121 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 122 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 123 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 124 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() [all …]
|
D | avx512f-p5-scalef-x192.c | 36 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() local 114 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 115 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 116 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 117 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 118 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 119 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 120 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 121 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 122 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x160-acc2.c | 36 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() local 105 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 106 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 107 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 108 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 109 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 110 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 111 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 112 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 113 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() [all …]
|
D | avx512f-p5-scalef-x160.c | 36 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() local 104 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 105 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 106 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 107 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 108 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 109 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 110 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 111 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 112 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() [all …]
|
D | avx512f-p5-scalef-x144.c | 36 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() local 99 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 100 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 101 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 102 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 103 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 104 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 105 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 106 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 107 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() [all …]
|
D | avx512f-p5-scalef-x144-acc3.c | 36 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local 101 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 102 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 103 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 104 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 105 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 106 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 107 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 108 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 109 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() [all …]
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 36 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() local 101 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 102 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 103 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 104 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 105 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 106 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 107 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 108 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 109 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() [all …]
|