/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx2-rr1-p5-div-x80.c | 194 __m256 vf9 = _mm256_div_ps(ve9, vd9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() local 205 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vz9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 216 vf9 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf9), vf9, vx9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 227 _mm256_storeu_ps(y + 72, vf9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
|
D | avx2-rr1-p5-nr1fma-x80.c | 217 __m256 vf9 = _mm256_mul_ps(ve9, vr9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() local 228 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vz9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() 239 vf9 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf9), vf9, vx9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() 250 _mm256_storeu_ps(y + 72, vf9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
|
D | avx2-rr1-p5-nr2fma-x80.c | 227 __m256 vf9 = _mm256_mul_ps(ve9, vr9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() local 238 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vz9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 249 vf9 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf9), vf9, vx9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 260 _mm256_storeu_ps(y + 72, vf9); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
|
D | avx-rr2-p5-div-x80.c | 226 __m256 vf9 = _mm256_div_ps(ve9, vd9); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() local 237 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vz9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() 248 vf9 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf9), vf9, vx9); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80() 259 _mm256_storeu_ps(y + 72, vf9); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x80()
|
D | avx-rr2-p5-nr2-x80.c | 259 __m256 vf9 = _mm256_mul_ps(ve9, vr9); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80() local 270 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vz9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80() 281 vf9 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf9), vf9, vx9); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80() 292 _mm256_storeu_ps(y + 72, vf9); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_nr2_x80()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x80.c | 201 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() local 214 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 226 vf9 = _mm256_mul_ps(vf9, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 238 _mm256_storeu_ps(output + 72, vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
|
D | avx2-p5-x88.c | 213 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() local 227 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 240 vf9 = _mm256_mul_ps(vf9, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 253 _mm256_storeu_ps(output + 72, vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
|
D | avx2-p5-x96.c | 225 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() local 240 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 254 vf9 = _mm256_mul_ps(vf9, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96() 268 _mm256_storeu_ps(output + 72, vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x96()
|
D | avx512f-p5-scalef-x160.c | 171 __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() local 183 vf9 = _mm512_mul_ps(vf9, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160() 196 _mm512_storeu_ps(output + 144, vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x160()
|
D | avx512f-p5-scalef-x176.c | 181 __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() local 194 vf9 = _mm512_mul_ps(vf9, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176() 208 _mm512_storeu_ps(output + 144, vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x176()
|
D | avx512f-p5-scalef-x192.c | 191 __m512 vf9 = _mm512_scalef_ps(vp9, vn9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() local 205 vf9 = _mm512_mul_ps(vf9, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 220 _mm512_storeu_ps(output + 144, vf9); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x160.c | 163 __m512 vf9 = _mm512_mul_ps(vp9, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() local 186 vf9 = _mm512_scalef_ps(vf9, ve9); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160() 199 _mm512_storeu_ps(y + 144, vf9); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x160()
|
D | avx512f-p5-scalef-x176.c | 172 __m512 vf9 = _mm512_mul_ps(vp9, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() local 197 vf9 = _mm512_scalef_ps(vf9, ve9); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 211 _mm512_storeu_ps(y + 144, vf9); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176()
|
D | avx512f-p5-scalef-x192.c | 181 __m512 vf9 = _mm512_mul_ps(vp9, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() local 208 vf9 = _mm512_scalef_ps(vf9, ve9); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 223 _mm512_storeu_ps(y + 144, vf9); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192()
|
D | avx2-p5-x80.c | 169 __m256 vf9 = _mm256_mul_ps(vp9, vscalev); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() local 221 vf9 = _mm256_mul_ps(vf9, vs9); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80() 233 _mm256_storeu_ps(y + 72, vf9); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x80()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx2-p5-x80.c | 200 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() local 213 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 225 _mm256_storeu_ps(output + 72, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80() 238 vacc0 = _mm256_add_ps(vacc0, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80()
|
D | avx2-p5-x80-acc5.c | 204 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() local 217 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 229 _mm256_storeu_ps(output + 72, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5() 242 vacc4 = _mm256_add_ps(vacc4, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc5()
|
D | avx2-p5-x80-acc2.c | 201 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() local 214 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 226 _mm256_storeu_ps(output + 72, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2() 239 vacc1 = _mm256_add_ps(vacc1, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x80_acc2()
|
D | avx2-p5-x96-acc6.c | 229 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() local 244 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 258 _mm256_storeu_ps(output + 72, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6() 273 vacc3 = _mm256_add_ps(vacc3, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc6()
|
D | avx2-p5-x96-acc3.c | 226 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() local 241 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 255 _mm256_storeu_ps(output + 72, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3() 270 vacc0 = _mm256_add_ps(vacc0, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc3()
|
D | avx2-p5-x96-acc2.c | 225 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() local 240 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 254 _mm256_storeu_ps(output + 72, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2() 269 vacc1 = _mm256_add_ps(vacc1, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96_acc2()
|
D | avx2-p5-x96.c | 224 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() local 239 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 253 _mm256_storeu_ps(output + 72, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96() 268 vacc0 = _mm256_add_ps(vacc0, vf9); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x96()
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx2-p5-x80-acc2.c | 200 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() local 213 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2() 225 vacc1 = _mm256_add_ps(vacc1, vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc2()
|
D | avx2-p5-x80.c | 199 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() local 212 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80() 224 vacc0 = _mm256_add_ps(vacc0, vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80()
|
D | avx2-p5-x80-acc5.c | 203 __m256 vf9 = _mm256_fmadd_ps(vt9, vp9, vs9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() local 216 vf9 = _mm256_andnot_ps(_mm256_cmp_ps(vx9, vdenorm_cutoff, _CMP_LT_OS), vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5() 228 vacc4 = _mm256_add_ps(vacc4, vf9); in xnn_f32_raddexpminusmax_ukernel__avx2_p5_x80_acc5()
|