/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x96.c | 129 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() local 142 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 156 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 169 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 182 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 195 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 212 vt11 = _mm256_mul_ps(vt11, vs11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96() 225 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96()
|
D | avx2-p5-x96-acc2.c | 130 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() local 143 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 157 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 170 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 183 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 196 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 213 vt11 = _mm256_mul_ps(vt11, vs11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2() 226 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc2()
|
D | avx2-p5-x96-acc3.c | 131 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() local 144 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 158 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 171 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 184 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 197 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 214 vt11 = _mm256_mul_ps(vt11, vs11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3() 227 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc3()
|
D | avx2-p5-x96-acc6.c | 134 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() local 147 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 161 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 174 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 187 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 200 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 217 vt11 = _mm256_mul_ps(vt11, vs11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6() 230 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x96_acc6()
|
D | avx512f-p5-scalef-x192-acc2.c | 98 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 111 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 125 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 138 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 151 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 164 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 177 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx512f-p5-scalef-x192.c | 97 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() local 110 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 124 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 137 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 150 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 163 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 176 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc3.c | 99 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 112 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 126 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 139 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 152 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 165 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 178 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx512f-p5-scalef-x192-acc6.c | 102 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 115 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 129 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 142 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 155 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 168 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 181 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx2-p5-x96-acc6.c | 135 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() local 148 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 162 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 175 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 188 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 201 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 218 vt11 = _mm256_mul_ps(vt11, vs11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 231 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
|
D | avx2-p5-x96-acc3.c | 132 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() local 145 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 159 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 172 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 185 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 198 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 215 vt11 = _mm256_mul_ps(vt11, vs11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 228 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3()
|
D | avx2-p5-x96-acc2.c | 131 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() local 144 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 158 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 171 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 184 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 197 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 214 vt11 = _mm256_mul_ps(vt11, vs11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 227 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
|
D | avx2-p5-x96.c | 130 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() local 143 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 157 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 170 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 183 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 196 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 213 vt11 = _mm256_mul_ps(vt11, vs11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 226 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96()
|
D | avx512f-p5-scalef-x192-acc2.c | 99 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 112 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 126 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 139 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 152 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 165 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 178 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx512f-p5-scalef-x192-acc3.c | 100 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 113 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 127 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 140 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 153 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 166 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 179 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx512f-p5-scalef-x192.c | 98 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() local 111 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 125 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 138 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 151 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 164 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 177 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc6.c | 103 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 116 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 130 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 143 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 156 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 169 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 182 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x96.c | 131 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() local 144 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 158 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 171 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 184 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 197 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 214 vt11 = _mm256_mul_ps(vt11, vs11); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 227 __m256 vf11 = _mm256_fmadd_ps(vt11, vp11, vs11); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
|
D | avx512f-p5-scalef-x192.c | 98 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() local 111 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 125 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 138 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 151 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 164 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 177 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 85 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() local 98 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 112 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 125 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 138 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 151 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 164 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
|
D | avx2-p5-x96.c | 91 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() local 104 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 118 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 131 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 144 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 157 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 170 vp11 = _mm256_fmadd_ps(vp11, vt11, vc0); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x192.c | 85 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() local 98 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 112 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 125 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 138 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 151 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 164 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192()
|
D | avx512f-p5-scalef-x192-acc2.c | 87 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() local 100 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 114 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 127 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 140 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 153 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 166 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx512f-p5-scalef-x192-acc3.c | 89 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() local 102 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 116 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 129 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 142 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 155 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 168 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx512f-p5-scalef-x192-acc6.c | 95 __m512 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() local 108 vt11 = _mm512_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 122 __m512 vp11 = _mm512_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 135 vp11 = _mm512_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 148 vp11 = _mm512_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 161 vp11 = _mm512_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 174 vp11 = _mm512_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
|
D | avx2-p5-x96.c | 89 __m256 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_hi, vx11); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() local 102 vt11 = _mm256_fmadd_ps(vn11, vminus_ln2_lo, vt11); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 116 __m256 vp11 = _mm256_fmadd_ps(vc5, vt11, vc4); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 129 vp11 = _mm256_fmadd_ps(vp11, vt11, vc3); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 142 vp11 = _mm256_fmadd_ps(vp11, vt11, vc2); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 155 vp11 = _mm256_fmadd_ps(vp11, vt11, vc1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96() 168 vp11 = _mm256_fmadd_ps(vp11, vt11, vc0); in xnn_f32_raddextexp_ukernel__avx2_p5_x96()
|