/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx2-rr1-p5-div-x64.c | 166 __m256 vf7 = _mm256_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64() local 175 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64() 184 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64() 193 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64()
|
D | avx2-rr1-p5-div-x72.c | 179 __m256 vf7 = _mm256_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72() local 189 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72() 199 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72() 209 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72()
|
D | avx2-rr1-p5-nr1fma-x64.c | 185 __m256 vf7 = _mm256_mul_ps(ve7, vr7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() local 194 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() 203 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() 212 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64()
|
D | avx2-rr1-p5-div-x80.c | 192 __m256 vf7 = _mm256_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() local 203 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 214 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 225 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
|
D | avx-rr2-p5-div-x64.c | 192 __m256 vf7 = _mm256_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64() local 201 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64() 210 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64() 219 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x64()
|
D | avx2-rr1-p5-nr2fma-x64.c | 193 __m256 vf7 = _mm256_mul_ps(ve7, vr7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() local 202 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() 211 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64() 220 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64()
|
D | avx2-rr1-p5-nr2fma-x72.c | 209 __m256 vf7 = _mm256_mul_ps(ve7, vr7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() local 219 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 229 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 239 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
|
D | avx2-rr1-p5-nr1fma-x72.c | 200 __m256 vf7 = _mm256_mul_ps(ve7, vr7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() local 210 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() 220 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() 230 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72()
|
D | avx2-rr1-p5-nr1fma-x80.c | 215 __m256 vf7 = _mm256_mul_ps(ve7, vr7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() local 226 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() 237 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() 248 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
|
D | avx512f-rr1-p5-scalef-div-x128.c | 154 __m512 vf7 = _mm512_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() local 163 …vf7 = _mm512_mask_sub_ps(vf7, _mm512_testn_epi32_mask(_mm512_castps_si512(vx7), vsign_mask), vone,… in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 172 _mm512_storeu_ps(y + 112, vf7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128()
|
D | avx512f-rr2-lut32-p2-perm2-scalef-div-x128.c | 163 __m512 vf7 = _mm512_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() local 172 …vf7 = _mm512_mask_sub_ps(vf7, _mm512_testn_epi32_mask(_mm512_castps_si512(vx7), vsign_mask), vone,… in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 181 _mm512_storeu_ps(y + 112, vf7); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
|
D | avx512f-rr1-lut16-p3-perm-scalef-div-x128.c | 157 __m512 vf7 = _mm512_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() local 166 …vf7 = _mm512_mask_sub_ps(vf7, _mm512_testn_epi32_mask(_mm512_castps_si512(vx7), vsign_mask), vone,… in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 175 _mm512_storeu_ps(y + 112, vf7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
|
D | avx-rr2-p5-div-x72.c | 208 __m256 vf7 = _mm256_div_ps(ve7, vd7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72() local 218 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72() 228 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72() 238 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx_rr2_p5_div_x72()
|
D | avx2-rr1-p5-nr2fma-x80.c | 225 __m256 vf7 = _mm256_mul_ps(ve7, vr7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() local 236 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vz7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 247 vf7 = _mm256_blendv_ps(_mm256_sub_ps(vone, vf7), vf7, vx7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 258 _mm256_storeu_ps(y + 56, vf7); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x128.c | 175 __m512 vf7 = _mm512_mul_ps(ve7, vr7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() local 185 …vf7 = _mm512_mask_sub_ps(vf7, _mm512_testn_epi32_mask(_mm512_castps_si512(vx7), vsign_mask), vone,… in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 194 _mm512_storeu_ps(y + 112, vf7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
|
D | avx512f-rr1-p5-scalef-nr1fma-x128.c | 172 __m512 vf7 = _mm512_mul_ps(ve7, vr7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128() local 182 …vf7 = _mm512_mask_sub_ps(vf7, _mm512_testn_epi32_mask(_mm512_castps_si512(vx7), vsign_mask), vone,… in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128() 191 _mm512_storeu_ps(y + 112, vf7); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128()
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx2-p5-x64.c | 175 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() local 186 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 196 vf7 = _mm256_mul_ps(vf7, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64() 206 _mm256_storeu_ps(output + 56, vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x64()
|
D | avx2-p5-x72.c | 187 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() local 199 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 210 vf7 = _mm256_mul_ps(vf7, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72() 221 _mm256_storeu_ps(output + 56, vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x72()
|
D | avx2-p5-x80.c | 199 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() local 212 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 224 vf7 = _mm256_mul_ps(vf7, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80() 236 _mm256_storeu_ps(output + 56, vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x80()
|
D | avx2-p5-x88.c | 211 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() local 225 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 238 vf7 = _mm256_mul_ps(vf7, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88() 251 _mm256_storeu_ps(output + 56, vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx2_p5_x88()
|
D | avx512f-p5-scalef-x128.c | 149 __m512 vf7 = _mm512_scalef_ps(vp7, vn7); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() local 159 vf7 = _mm512_mul_ps(vf7, vscale); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128() 170 _mm512_storeu_ps(output + 112, vf7); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x128()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx2-p5-x64.c | 174 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() local 185 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() 195 _mm256_storeu_ps(output + 56, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64() 206 vacc0 = _mm256_add_ps(vacc0, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64()
|
D | avx2-p5-x64-acc2.c | 175 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() local 186 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 196 _mm256_storeu_ps(output + 56, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2() 207 vacc1 = _mm256_add_ps(vacc1, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc2()
|
D | avx2-p5-x64-acc4.c | 177 __m256 vf7 = _mm256_fmadd_ps(vt7, vp7, vs7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc4() local 188 vf7 = _mm256_andnot_ps(_mm256_cmp_ps(vx7, vdenorm_cutoff, _CMP_LT_OS), vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc4() 198 _mm256_storeu_ps(output + 56, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc4() 209 vacc3 = _mm256_add_ps(vacc3, vf7); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_p5_x64_acc4()
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x128.c | 143 __m512 vf7 = _mm512_mul_ps(vp7, vscalev); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() local 162 vf7 = _mm512_scalef_ps(vf7, ve7); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128() 173 _mm512_storeu_ps(y + 112, vf7); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x128()
|