/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx512f-p5-scalef-x128-acc2.c | 102 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() local 111 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 120 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 129 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 138 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 150 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
|
D | avx512f-p5-scalef-x128.c | 101 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() local 110 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 119 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 128 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 137 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128() 149 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x144.c | 106 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() local 116 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 126 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 136 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 146 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144() 159 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144()
|
D | avx512f-p5-scalef-x144-acc3.c | 108 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local 118 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 128 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 138 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 148 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 161 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
|
D | avx512f-p5-scalef-x128-acc4.c | 104 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() local 113 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 122 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 131 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 140 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 152 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
|
D | avx512f-p5-scalef-x160-acc2.c | 112 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() local 123 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 134 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 145 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 156 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2() 170 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160_acc2()
|
D | avx512f-p5-scalef-x160.c | 111 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() local 122 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 133 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 144 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 155 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160() 169 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x160()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x128.c | 92 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() local 101 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 110 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 119 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 128 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 143 __m512 vf7 = _mm512_mul_ps(vp7, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x144.c | 96 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() local 106 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 116 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 126 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 136 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144() 152 __m512 vf7 = _mm512_mul_ps(vp7, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x144()
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx512f-p5-scalef-x128.c | 100 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() local 109 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 118 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 127 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 136 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128() 148 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x128-acc2.c | 101 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() local 110 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 119 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 128 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 137 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2() 149 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc2()
|
D | avx512f-p5-scalef-x128-acc4.c | 103 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() local 112 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 121 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 130 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 139 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4() 151 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x128_acc4()
|
D | avx512f-p5-scalef-x144.c | 105 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() local 115 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 125 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 135 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 145 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144() 158 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144()
|
D | avx512f-p5-scalef-x144-acc3.c | 107 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() local 117 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 127 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 137 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 147 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3() 160 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x144_acc3()
|
D | avx512f-p5-scalef-x160-acc5.c | 114 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() local 125 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 136 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 147 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 158 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 172 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
|
D | avx512f-p5-scalef-x160.c | 110 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() local 121 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 132 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 143 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 154 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160() 168 const __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx512f-p5-scalef-x128.c | 101 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() local 110 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 119 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 128 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 137 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 149 __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x144.c | 106 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() local 116 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 126 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 136 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 146 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144() 159 __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x144()
|
D | avx512f-p5-scalef-x160.c | 111 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() local 122 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 133 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 144 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 155 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 169 __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x128-acc2.c | 94 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() local 103 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 112 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 121 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 130 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 172 vaccv1 = _mm512_add_ps(vaccv1, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
|
D | avx512f-p5-scalef-x128.c | 92 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() local 101 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 110 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 119 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 128 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128() 168 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128()
|
D | avx512f-p5-scalef-x144.c | 96 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() local 106 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 116 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 126 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 136 vp7 = _mm512_fmadd_ps(vp7, vt7, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144() 179 vaccv0 = _mm512_add_ps(vaccv0, _mm512_scalef_ps(vp7, vdelta_e7)); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-avx2-rr1-p6-x64.c | 104 __m256 vp7 = _mm256_fmadd_ps(vc6, vt7, vc5); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() local 113 vp7 = _mm256_fmadd_ps(vp7, vt7, vc4); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 122 vp7 = _mm256_fmadd_ps(vp7, vt7, vc3); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 131 vp7 = _mm256_fmadd_ps(vp7, vt7, vc2); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 147 vp7 = _mm256_mul_ps(vp7, vt7); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 165 vp7 = _mm256_fmadd_ps(vp7, vt7, vt7); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64() 181 const __m256 ve7 = _mm256_fmadd_ps(vp7, valpha, vs7); in xnn_f32_velu_ukernel__avx2_rr1_p6_x64()
|
D | velu-avx512f-rr1-p6-x128.c | 104 __m512 vp7 = _mm512_fmadd_ps(vc6, vt7, vc5); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() local 113 vp7 = _mm512_fmadd_ps(vp7, vt7, vc4); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 122 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 131 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 147 vp7 = _mm512_mul_ps(vp7, vt7); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 166 vp7 = _mm512_fmadd_ps(vp7, vt7, vt7); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128() 183 __m512 vy7 = _mm512_fmadd_ps(vp7, valpha, vs7); in xnn_f32_velu_ukernel__avx512f_rr1_p6_x128()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx512f-rr1-p5-scalef-div-x128.c | 91 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() local 100 vp7 = _mm512_fmadd_ps(vp7, vt7, vc3); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 109 vp7 = _mm512_fmadd_ps(vp7, vt7, vc2); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 118 vp7 = _mm512_fmadd_ps(vp7, vt7, vc1); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 127 vp7 = _mm512_fmadd_ps(vp7, vt7, vone); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 136 const __m512 ve7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128()
|