/external/XNNPACK/src/f32-dwconv/gen/ |
D | up16x25-minmax-fma3-acc2.c | 34 const __m256 vmax = _mm256_broadcast_ps((const __m128*) params->sse.max); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 35 const __m256 vmin = _mm256_broadcast_ps((const __m128*) params->sse.min); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 167 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 168 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 171 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 172 const __m256 vi0x89ABCDEF = _mm256_loadu_ps(i0 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 175 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 176 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 180 const __m256 vi1x01234567 = _mm256_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() 181 const __m256 vi1x89ABCDEF = _mm256_loadu_ps(i1 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2() [all …]
|
D | up16x25-minmax-fma3.c | 34 const __m256 vmax = _mm256_broadcast_ps((const __m128*) params->sse.max); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() 35 const __m256 vmin = _mm256_broadcast_ps((const __m128*) params->sse.min); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() 167 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() 168 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() 171 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() 172 const __m256 vi0x89ABCDEF = _mm256_loadu_ps(i0 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() 175 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() 176 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() 180 const __m256 vi1x01234567 = _mm256_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() 181 const __m256 vi1x89ABCDEF = _mm256_loadu_ps(i1 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3() [all …]
|
D | up16x25-minmax-avx.c | 34 const __m256 vmax = _mm256_broadcast_ps((const __m128*) params->sse.max); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() 35 const __m256 vmin = _mm256_broadcast_ps((const __m128*) params->sse.min); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() 167 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() 168 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() 171 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() 172 const __m256 vi0x89ABCDEF = _mm256_loadu_ps(i0 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() 175 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() 176 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() 180 const __m256 vi1x01234567 = _mm256_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() 181 const __m256 vi1x89ABCDEF = _mm256_loadu_ps(i1 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx() [all …]
|
D | up16x25-minmax-avx-acc2.c | 34 const __m256 vmax = _mm256_broadcast_ps((const __m128*) params->sse.max); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 35 const __m256 vmin = _mm256_broadcast_ps((const __m128*) params->sse.min); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 167 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 168 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 171 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 172 const __m256 vi0x89ABCDEF = _mm256_loadu_ps(i0 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 175 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 176 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 180 const __m256 vi1x01234567 = _mm256_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() 181 const __m256 vi1x89ABCDEF = _mm256_loadu_ps(i1 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2() [all …]
|
D | up16x9-minmax-fma3-acc2.c | 34 const __m256 vmax = _mm256_broadcast_ps((const __m128*) params->sse.max); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 35 const __m256 vmin = _mm256_broadcast_ps((const __m128*) params->sse.min); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 87 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 88 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 91 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 92 const __m256 vi0x89ABCDEF = _mm256_loadu_ps(i0 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 95 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 96 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 100 const __m256 vi1x01234567 = _mm256_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() 101 const __m256 vi1x89ABCDEF = _mm256_loadu_ps(i1 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() [all …]
|
D | up16x9-minmax-fma3.c | 34 const __m256 vmax = _mm256_broadcast_ps((const __m128*) params->sse.max); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() 35 const __m256 vmin = _mm256_broadcast_ps((const __m128*) params->sse.min); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() 87 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() 88 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() 91 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() 92 const __m256 vi0x89ABCDEF = _mm256_loadu_ps(i0 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() 95 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() 96 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() 100 const __m256 vi1x01234567 = _mm256_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() 101 const __m256 vi1x89ABCDEF = _mm256_loadu_ps(i1 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() [all …]
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx2-rr1-p5-nr1fma-x80.c | 28 const __m256 vsign_mask = _mm256_set1_ps(-0.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() 29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() 30 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() 31 const __m256 vminus_ln2 = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() 32 const __m256 vc5 = _mm256_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() 33 const __m256 vc4 = _mm256_set1_ps(0x1.573A1Ap-5f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() 34 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() 35 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() 36 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() 37 const __m256 vone = _mm256_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80() [all …]
|
D | avx2-rr1-p5-div-x80.c | 28 const __m256 vsign_mask = _mm256_set1_ps(-0.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 30 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 31 const __m256 vminus_ln2 = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 32 const __m256 vc5 = _mm256_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 33 const __m256 vc4 = _mm256_set1_ps(0x1.573A1Ap-5f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 34 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 35 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 36 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() 37 const __m256 vone = _mm256_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80() [all …]
|
D | avx2-rr1-p5-div-x64.c | 28 const __m256 vsign_mask = _mm256_set1_ps(-0.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64() 29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64() 30 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64() 31 const __m256 vminus_ln2 = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64() 32 const __m256 vc5 = _mm256_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64() 33 const __m256 vc4 = _mm256_set1_ps(0x1.573A1Ap-5f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64() 34 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64() 35 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64() 36 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64() 37 const __m256 vone = _mm256_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64() [all …]
|
D | avx2-rr1-p5-nr1fma-x64.c | 28 const __m256 vsign_mask = _mm256_set1_ps(-0.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() 29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() 30 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() 31 const __m256 vminus_ln2 = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() 32 const __m256 vc5 = _mm256_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() 33 const __m256 vc4 = _mm256_set1_ps(0x1.573A1Ap-5f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() 34 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() 35 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() 36 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() 37 const __m256 vone = _mm256_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64() [all …]
|
D | avx2-rr1-p5-div-x72.c | 28 const __m256 vsign_mask = _mm256_set1_ps(-0.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72() 29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72() 30 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72() 31 const __m256 vminus_ln2 = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72() 32 const __m256 vc5 = _mm256_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72() 33 const __m256 vc4 = _mm256_set1_ps(0x1.573A1Ap-5f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72() 34 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72() 35 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72() 36 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72() 37 const __m256 vone = _mm256_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72() [all …]
|
D | avx2-rr1-p5-nr2fma-x72.c | 28 const __m256 vsign_mask = _mm256_set1_ps(-0.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 30 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 31 const __m256 vminus_ln2 = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 32 const __m256 vc5 = _mm256_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 33 const __m256 vc4 = _mm256_set1_ps(0x1.573A1Ap-5f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 34 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 35 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 36 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() 37 const __m256 vone = _mm256_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72() [all …]
|
D | avx2-rr1-p5-nr1fma-x72.c | 28 const __m256 vsign_mask = _mm256_set1_ps(-0.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() 29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() 30 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() 31 const __m256 vminus_ln2 = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() 32 const __m256 vc5 = _mm256_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() 33 const __m256 vc4 = _mm256_set1_ps(0x1.573A1Ap-5f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() 34 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() 35 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() 36 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() 37 const __m256 vone = _mm256_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72() [all …]
|
D | avx2-rr1-p5-nr2fma-x80.c | 28 const __m256 vsign_mask = _mm256_set1_ps(-0.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 30 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 31 const __m256 vminus_ln2 = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 32 const __m256 vc5 = _mm256_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 33 const __m256 vc4 = _mm256_set1_ps(0x1.573A1Ap-5f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 34 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 35 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 36 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() 37 const __m256 vone = _mm256_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80() [all …]
|
D | avx2-rr1-p5-div-x56.c | 28 const __m256 vsign_mask = _mm256_set1_ps(-0.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56() 29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56() 30 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56() 31 const __m256 vminus_ln2 = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56() 32 const __m256 vc5 = _mm256_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56() 33 const __m256 vc4 = _mm256_set1_ps(0x1.573A1Ap-5f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56() 34 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56() 35 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56() 36 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56() 37 const __m256 vone = _mm256_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56() [all …]
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx2-p5-x96-acc6.c | 27 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p+0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 28 const __m256 vminus_ln2_hi = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 29 const __m256 vminus_ln2_lo = _mm256_set1_ps(0x1.05C61p-29f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 33 const __m256 vmin_exponent = _mm256_set1_ps(-127.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 34 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 35 const __m256 vminus_inf = _mm256_set1_ps(-INFINITY); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 37 const __m256 vc0 = _mm256_set1_ps(1.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 38 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 39 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() 40 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6() [all …]
|
D | avx2-p5-x64-acc4.c | 27 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p+0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 28 const __m256 vminus_ln2_hi = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 29 const __m256 vminus_ln2_lo = _mm256_set1_ps(0x1.05C61p-29f); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 33 const __m256 vmin_exponent = _mm256_set1_ps(-127.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 34 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 35 const __m256 vminus_inf = _mm256_set1_ps(-INFINITY); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 37 const __m256 vc0 = _mm256_set1_ps(1.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 38 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 39 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() 40 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4() [all …]
|
D | avx2-p5-x72-acc3.c | 27 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p+0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 28 const __m256 vminus_ln2_hi = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 29 const __m256 vminus_ln2_lo = _mm256_set1_ps(0x1.05C61p-29f); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 33 const __m256 vmin_exponent = _mm256_set1_ps(-127.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 34 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 35 const __m256 vminus_inf = _mm256_set1_ps(-INFINITY); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 37 const __m256 vc0 = _mm256_set1_ps(1.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 38 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 39 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() 40 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3() [all …]
|
D | avx2-p5-x80-acc5.c | 27 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p+0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 28 const __m256 vminus_ln2_hi = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 29 const __m256 vminus_ln2_lo = _mm256_set1_ps(0x1.05C61p-29f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 33 const __m256 vmin_exponent = _mm256_set1_ps(-127.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 34 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 35 const __m256 vminus_inf = _mm256_set1_ps(-INFINITY); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 37 const __m256 vc0 = _mm256_set1_ps(1.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 38 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 39 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() 40 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5() [all …]
|
D | avx2-p5-x80-acc2.c | 27 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p+0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 28 const __m256 vminus_ln2_hi = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 29 const __m256 vminus_ln2_lo = _mm256_set1_ps(0x1.05C61p-29f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 33 const __m256 vmin_exponent = _mm256_set1_ps(-127.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 34 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 35 const __m256 vminus_inf = _mm256_set1_ps(-INFINITY); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 37 const __m256 vc0 = _mm256_set1_ps(1.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 38 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 39 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() 40 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2() [all …]
|
D | avx2-p5-x96-acc3.c | 27 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p+0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 28 const __m256 vminus_ln2_hi = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 29 const __m256 vminus_ln2_lo = _mm256_set1_ps(0x1.05C61p-29f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 33 const __m256 vmin_exponent = _mm256_set1_ps(-127.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 34 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 35 const __m256 vminus_inf = _mm256_set1_ps(-INFINITY); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 37 const __m256 vc0 = _mm256_set1_ps(1.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 38 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 39 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() 40 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3() [all …]
|
/external/XNNPACK/src/f32-vsqrt/gen/ |
D | fma3-nr1fma1adj-x64.c | 29 const __m256 vhalf = _mm256_broadcast_ss(¶ms->fma.half); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() 31 const __m256 vx0 = _mm256_loadu_ps(x); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() 32 const __m256 vx1 = _mm256_loadu_ps(x + 8); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() 33 const __m256 vx2 = _mm256_loadu_ps(x + 16); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() 34 const __m256 vx3 = _mm256_loadu_ps(x + 24); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() 35 const __m256 vx4 = _mm256_loadu_ps(x + 32); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() 36 const __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() 37 const __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() 38 const __m256 vx7 = _mm256_loadu_ps(x + 56); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() 41 const __m256 vrsqrtx0 = _mm256_rsqrt_ps(vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() [all …]
|
D | fma3-nr1fma1adj-x56.c | 29 const __m256 vhalf = _mm256_broadcast_ss(¶ms->fma.half); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() 31 const __m256 vx0 = _mm256_loadu_ps(x); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() 32 const __m256 vx1 = _mm256_loadu_ps(x + 8); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() 33 const __m256 vx2 = _mm256_loadu_ps(x + 16); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() 34 const __m256 vx3 = _mm256_loadu_ps(x + 24); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() 35 const __m256 vx4 = _mm256_loadu_ps(x + 32); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() 36 const __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() 37 const __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() 40 const __m256 vrsqrtx0 = _mm256_rsqrt_ps(vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() 41 const __m256 vrsqrtx1 = _mm256_rsqrt_ps(vx1); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() [all …]
|
D | fma3-nr1fma1adj-x48.c | 29 const __m256 vhalf = _mm256_broadcast_ss(¶ms->fma.half); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() 31 const __m256 vx0 = _mm256_loadu_ps(x); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() 32 const __m256 vx1 = _mm256_loadu_ps(x + 8); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() 33 const __m256 vx2 = _mm256_loadu_ps(x + 16); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() 34 const __m256 vx3 = _mm256_loadu_ps(x + 24); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() 35 const __m256 vx4 = _mm256_loadu_ps(x + 32); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() 36 const __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() 39 const __m256 vrsqrtx0 = _mm256_rsqrt_ps(vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() 40 const __m256 vrsqrtx1 = _mm256_rsqrt_ps(vx1); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() 41 const __m256 vrsqrtx2 = _mm256_rsqrt_ps(vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() [all …]
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx2-p5-x96.c | 29 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p+0f); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 30 const __m256 vminus_ln2_hi = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 31 const __m256 vminus_ln2_lo = _mm256_set1_ps(0x1.05C61p-29f); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 35 const __m256 vmin_exponent = _mm256_set1_ps(-127.0f); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 36 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 38 const __m256 vc0 = _mm256_set1_ps(1.0f); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 39 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 40 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 41 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() 42 const __m256 vc4 = _mm256_set1_ps(0x1.573A1Ap-5f); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96() [all …]
|