/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x128-acc2.c | 43 __m512 vacce1 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() local 143 __m512 vmax_e1 = _mm512_max_ps(vacce1, vn1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 152 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 175 vacce1 = vmax_e1; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 179 const __m512 vmax_acce01 = _mm512_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2() 182 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_acce01); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc2()
|
D | avx512f-p5-scalef-x144-acc3.c | 44 __m512 vacce1 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() local 154 __m512 vmax_e1 = _mm512_max_ps(vacce1, vn1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 164 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 191 vacce1 = vmax_e1; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 196 const __m512 vmax_acce01 = _mm512_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3() 201 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_acce012); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x144_acc3()
|
D | avx512f-p5-scalef-x160-acc2.c | 43 __m512 vacce1 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() local 161 __m512 vmax_e1 = _mm512_max_ps(vacce1, vn1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 172 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 199 vacce1 = vmax_e1; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 203 const __m512 vmax_acce01 = _mm512_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2() 206 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_acce01); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc2()
|
D | avx512f-p5-scalef-x128-acc4.c | 45 __m512 vacce1 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() local 147 __m512 vmax_e1 = _mm512_max_ps(vacce1, vn1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 156 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 183 vacce1 = vmax_e1; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 189 const __m512 vmax_acce01 = _mm512_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4() 194 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_acce0123); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x128_acc4()
|
D | avx512f-p5-scalef-x192-acc2.c | 43 __m512 vacce1 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() local 179 __m512 vmax_e1 = _mm512_max_ps(vacce1, vn1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 192 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 223 vacce1 = vmax_e1; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 227 const __m512 vmax_acce01 = _mm512_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2() 230 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_acce01); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc2()
|
D | avx512f-p5-scalef-x160-acc5.c | 46 __m512 vacce1 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() local 167 __m512 vmax_e1 = _mm512_max_ps(vacce1, vn1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 178 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 211 vacce1 = vmax_e1; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 218 const __m512 vmax_acce01 = _mm512_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 225 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_acce01234); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
|
D | avx2-p5-x64-acc2.c | 47 __m256 vacce1 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2() local 147 __m256 vmax_e1 = _mm256_max_ps(vacce1, vn1); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2() 160 const __m256 vdelta_acce1 = _mm256_max_ps(_mm256_sub_ps(vacce1, vmax_e1), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2() 199 vacce1 = vmax_e1; in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2() 203 const __m256 vmax_acce01 = _mm256_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2() 206 const __m256 vdelta_acce1 = _mm256_max_ps(_mm256_sub_ps(vacce1, vmax_acce01), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc2()
|
D | avx2-p5-x64-acc4.c | 49 __m256 vacce1 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() local 151 __m256 vmax_e1 = _mm256_max_ps(vacce1, vn1); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 164 const __m256 vdelta_acce1 = _mm256_max_ps(_mm256_sub_ps(vacce1, vmax_e1), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 209 vacce1 = vmax_e1; in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 215 const __m256 vmax_acce01 = _mm256_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 220 const __m256 vdelta_acce1 = _mm256_max_ps(_mm256_sub_ps(vacce1, vmax_acce0123), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
|
D | avx2-p5-x80-acc2.c | 47 __m256 vacce1 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() local 165 __m256 vmax_e1 = _mm256_max_ps(vacce1, vn1); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 180 const __m256 vdelta_acce1 = _mm256_max_ps(_mm256_sub_ps(vacce1, vmax_e1), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 225 vacce1 = vmax_e1; in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 229 const __m256 vmax_acce01 = _mm256_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 232 const __m256 vdelta_acce1 = _mm256_max_ps(_mm256_sub_ps(vacce1, vmax_acce01), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
|
D | avx512f-p5-scalef-x192-acc3.c | 44 __m512 vacce1 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() local 181 __m512 vmax_e1 = _mm512_max_ps(vacce1, vn1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 194 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 227 vacce1 = vmax_e1; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 232 const __m512 vmax_acce01 = _mm512_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 237 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_acce012); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3()
|
D | avx2-p5-x72-acc3.c | 48 __m256 vacce1 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() local 158 __m256 vmax_e1 = _mm256_max_ps(vacce1, vn1); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 172 const __m256 vdelta_acce1 = _mm256_max_ps(_mm256_sub_ps(vacce1, vmax_e1), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 217 vacce1 = vmax_e1; in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 222 const __m256 vmax_acce01 = _mm256_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 227 const __m256 vdelta_acce1 = _mm256_max_ps(_mm256_sub_ps(vacce1, vmax_acce012), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
|
D | avx512f-p5-scalef-x192-acc6.c | 47 __m512 vacce1 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() local 187 __m512 vmax_e1 = _mm512_max_ps(vacce1, vn1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 200 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_e1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 239 vacce1 = vmax_e1; in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 247 const __m512 vmax_acce01 = _mm512_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 254 const __m512 vdelta_acce1 = _mm512_sub_ps(vacce1, vmax_acce012345); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
|
D | avx2-p5-x80-acc5.c | 50 __m256 vacce1 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() local 171 __m256 vmax_e1 = _mm256_max_ps(vacce1, vn1); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 186 const __m256 vdelta_acce1 = _mm256_max_ps(_mm256_sub_ps(vacce1, vmax_e1), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 240 vacce1 = vmax_e1; in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 247 const __m256 vmax_acce01 = _mm256_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 254 const __m256 vdelta_acce1 = _mm256_max_ps(_mm256_sub_ps(vacce1, vmax_acce01234), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
|
D | avx2-p5-x96-acc3.c | 48 __m256 vacce1 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() local 185 __m256 vmax_e1 = _mm256_max_ps(vacce1, vn1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 202 const __m256 vdelta_acce1 = _mm256_max_ps(_mm256_sub_ps(vacce1, vmax_e1), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 256 vacce1 = vmax_e1; in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 261 const __m256 vmax_acce01 = _mm256_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 266 const __m256 vdelta_acce1 = _mm256_max_ps(_mm256_sub_ps(vacce1, vmax_acce012), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
|
D | avx2-p5-x96-acc2.c | 47 __m256 vacce1 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() local 183 __m256 vmax_e1 = _mm256_max_ps(vacce1, vn1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 200 const __m256 vdelta_acce1 = _mm256_max_ps(_mm256_sub_ps(vacce1, vmax_e1), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 251 vacce1 = vmax_e1; in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 255 const __m256 vmax_acce01 = _mm256_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2() 258 const __m256 vdelta_acce1 = _mm256_max_ps(_mm256_sub_ps(vacce1, vmax_acce01), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc2()
|
D | avx2-p5-x96-acc6.c | 51 __m256 vacce1 = vminus_inf; in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() local 191 __m256 vmax_e1 = _mm256_max_ps(vacce1, vn1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 208 const __m256 vdelta_acce1 = _mm256_max_ps(_mm256_sub_ps(vacce1, vmax_e1), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 271 vacce1 = vmax_e1; in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 279 const __m256 vmax_acce01 = _mm256_max_ps(vacce0, vacce1); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 286 const __m256 vdelta_acce1 = _mm256_max_ps(_mm256_sub_ps(vacce1, vmax_acce012345), vmin_exponent); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6()
|