Home
last modified time | relevance | path

Searched refs:__m256 (Results 1 – 25 of 498) sorted by relevance

12345678910>>...20

/external/XNNPACK/src/f32-dwconv/gen/
Dup16x25-minmax-fma3-acc2.c34 const __m256 vmax = _mm256_broadcast_ps((const __m128*) params->sse.max); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
35 const __m256 vmin = _mm256_broadcast_ps((const __m128*) params->sse.min); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
167 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
168 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
171 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
172 const __m256 vi0x89ABCDEF = _mm256_loadu_ps(i0 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
175 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
176 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
180 const __m256 vi1x01234567 = _mm256_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
181 const __m256 vi1x89ABCDEF = _mm256_loadu_ps(i1 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3_acc2()
[all …]
Dup16x25-minmax-fma3.c34 const __m256 vmax = _mm256_broadcast_ps((const __m128*) params->sse.max); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3()
35 const __m256 vmin = _mm256_broadcast_ps((const __m128*) params->sse.min); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3()
167 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3()
168 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3()
171 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3()
172 const __m256 vi0x89ABCDEF = _mm256_loadu_ps(i0 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3()
175 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3()
176 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3()
180 const __m256 vi1x01234567 = _mm256_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3()
181 const __m256 vi1x89ABCDEF = _mm256_loadu_ps(i1 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__fma3()
[all …]
Dup16x25-minmax-avx.c34 const __m256 vmax = _mm256_broadcast_ps((const __m128*) params->sse.max); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
35 const __m256 vmin = _mm256_broadcast_ps((const __m128*) params->sse.min); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
167 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
168 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
171 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
172 const __m256 vi0x89ABCDEF = _mm256_loadu_ps(i0 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
175 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
176 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
180 const __m256 vi1x01234567 = _mm256_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
181 const __m256 vi1x89ABCDEF = _mm256_loadu_ps(i1 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
[all …]
Dup16x25-minmax-avx-acc2.c34 const __m256 vmax = _mm256_broadcast_ps((const __m128*) params->sse.max); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
35 const __m256 vmin = _mm256_broadcast_ps((const __m128*) params->sse.min); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
167 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
168 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
171 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
172 const __m256 vi0x89ABCDEF = _mm256_loadu_ps(i0 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
175 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
176 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
180 const __m256 vi1x01234567 = _mm256_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
181 const __m256 vi1x89ABCDEF = _mm256_loadu_ps(i1 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx_acc2()
[all …]
Dup16x9-minmax-fma3-acc2.c34 const __m256 vmax = _mm256_broadcast_ps((const __m128*) params->sse.max); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
35 const __m256 vmin = _mm256_broadcast_ps((const __m128*) params->sse.min); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
87 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
88 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
91 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
92 const __m256 vi0x89ABCDEF = _mm256_loadu_ps(i0 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
95 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
96 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
100 const __m256 vi1x01234567 = _mm256_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
101 const __m256 vi1x89ABCDEF = _mm256_loadu_ps(i1 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2()
[all …]
Dup16x9-minmax-fma3.c34 const __m256 vmax = _mm256_broadcast_ps((const __m128*) params->sse.max); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
35 const __m256 vmin = _mm256_broadcast_ps((const __m128*) params->sse.min); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
87 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
88 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
91 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
92 const __m256 vi0x89ABCDEF = _mm256_loadu_ps(i0 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
95 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
96 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
100 const __m256 vi1x01234567 = _mm256_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
101 const __m256 vi1x89ABCDEF = _mm256_loadu_ps(i1 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
[all …]
/external/XNNPACK/src/f32-sigmoid/gen/
Davx2-rr1-p5-nr1fma-x80.c28 const __m256 vsign_mask = _mm256_set1_ps(-0.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
30 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
31 const __m256 vminus_ln2 = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
32 const __m256 vc5 = _mm256_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
33 const __m256 vc4 = _mm256_set1_ps(0x1.573A1Ap-5f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
34 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
35 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
36 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
37 const __m256 vone = _mm256_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80()
[all …]
Davx2-rr1-p5-div-x80.c28 const __m256 vsign_mask = _mm256_set1_ps(-0.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
30 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
31 const __m256 vminus_ln2 = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
32 const __m256 vc5 = _mm256_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
33 const __m256 vc4 = _mm256_set1_ps(0x1.573A1Ap-5f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
34 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
35 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
36 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
37 const __m256 vone = _mm256_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80()
[all …]
Davx2-rr1-p5-div-x64.c28 const __m256 vsign_mask = _mm256_set1_ps(-0.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64()
29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64()
30 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64()
31 const __m256 vminus_ln2 = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64()
32 const __m256 vc5 = _mm256_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64()
33 const __m256 vc4 = _mm256_set1_ps(0x1.573A1Ap-5f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64()
34 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64()
35 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64()
36 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64()
37 const __m256 vone = _mm256_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64()
[all …]
Davx2-rr1-p5-nr1fma-x64.c28 const __m256 vsign_mask = _mm256_set1_ps(-0.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64()
29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64()
30 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64()
31 const __m256 vminus_ln2 = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64()
32 const __m256 vc5 = _mm256_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64()
33 const __m256 vc4 = _mm256_set1_ps(0x1.573A1Ap-5f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64()
34 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64()
35 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64()
36 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64()
37 const __m256 vone = _mm256_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64()
[all …]
Davx2-rr1-p5-div-x72.c28 const __m256 vsign_mask = _mm256_set1_ps(-0.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72()
29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72()
30 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72()
31 const __m256 vminus_ln2 = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72()
32 const __m256 vc5 = _mm256_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72()
33 const __m256 vc4 = _mm256_set1_ps(0x1.573A1Ap-5f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72()
34 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72()
35 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72()
36 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72()
37 const __m256 vone = _mm256_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72()
[all …]
Davx2-rr1-p5-nr2fma-x72.c28 const __m256 vsign_mask = _mm256_set1_ps(-0.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
30 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
31 const __m256 vminus_ln2 = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
32 const __m256 vc5 = _mm256_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
33 const __m256 vc4 = _mm256_set1_ps(0x1.573A1Ap-5f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
34 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
35 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
36 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
37 const __m256 vone = _mm256_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72()
[all …]
Davx2-rr1-p5-nr1fma-x72.c28 const __m256 vsign_mask = _mm256_set1_ps(-0.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72()
29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72()
30 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72()
31 const __m256 vminus_ln2 = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72()
32 const __m256 vc5 = _mm256_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72()
33 const __m256 vc4 = _mm256_set1_ps(0x1.573A1Ap-5f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72()
34 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72()
35 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72()
36 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72()
37 const __m256 vone = _mm256_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72()
[all …]
Davx2-rr1-p5-nr2fma-x80.c28 const __m256 vsign_mask = _mm256_set1_ps(-0.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
30 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
31 const __m256 vminus_ln2 = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
32 const __m256 vc5 = _mm256_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
33 const __m256 vc4 = _mm256_set1_ps(0x1.573A1Ap-5f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
34 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
35 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
36 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
37 const __m256 vone = _mm256_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80()
[all …]
Davx2-rr1-p5-div-x56.c28 const __m256 vsign_mask = _mm256_set1_ps(-0.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56()
29 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56()
30 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56()
31 const __m256 vminus_ln2 = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56()
32 const __m256 vc5 = _mm256_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56()
33 const __m256 vc4 = _mm256_set1_ps(0x1.573A1Ap-5f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56()
34 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56()
35 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56()
36 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56()
37 const __m256 vone = _mm256_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56()
[all …]
/external/XNNPACK/src/f32-raddextexp/gen/
Davx2-p5-x96-acc6.c27 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p+0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6()
28 const __m256 vminus_ln2_hi = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6()
29 const __m256 vminus_ln2_lo = _mm256_set1_ps(0x1.05C61p-29f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6()
33 const __m256 vmin_exponent = _mm256_set1_ps(-127.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6()
34 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6()
35 const __m256 vminus_inf = _mm256_set1_ps(-INFINITY); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6()
37 const __m256 vc0 = _mm256_set1_ps(1.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6()
38 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6()
39 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6()
40 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc6()
[all …]
Davx2-p5-x64-acc4.c27 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p+0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
28 const __m256 vminus_ln2_hi = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
29 const __m256 vminus_ln2_lo = _mm256_set1_ps(0x1.05C61p-29f); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
33 const __m256 vmin_exponent = _mm256_set1_ps(-127.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
34 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
35 const __m256 vminus_inf = _mm256_set1_ps(-INFINITY); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
37 const __m256 vc0 = _mm256_set1_ps(1.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
38 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
39 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
40 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_raddextexp_ukernel__avx2_p5_x64_acc4()
[all …]
Davx2-p5-x72-acc3.c27 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p+0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
28 const __m256 vminus_ln2_hi = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
29 const __m256 vminus_ln2_lo = _mm256_set1_ps(0x1.05C61p-29f); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
33 const __m256 vmin_exponent = _mm256_set1_ps(-127.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
34 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
35 const __m256 vminus_inf = _mm256_set1_ps(-INFINITY); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
37 const __m256 vc0 = _mm256_set1_ps(1.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
38 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
39 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
40 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_raddextexp_ukernel__avx2_p5_x72_acc3()
[all …]
Davx2-p5-x80-acc5.c27 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p+0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
28 const __m256 vminus_ln2_hi = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
29 const __m256 vminus_ln2_lo = _mm256_set1_ps(0x1.05C61p-29f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
33 const __m256 vmin_exponent = _mm256_set1_ps(-127.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
34 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
35 const __m256 vminus_inf = _mm256_set1_ps(-INFINITY); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
37 const __m256 vc0 = _mm256_set1_ps(1.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
38 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
39 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
40 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc5()
[all …]
Davx2-p5-x80-acc2.c27 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p+0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
28 const __m256 vminus_ln2_hi = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
29 const __m256 vminus_ln2_lo = _mm256_set1_ps(0x1.05C61p-29f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
33 const __m256 vmin_exponent = _mm256_set1_ps(-127.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
34 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
35 const __m256 vminus_inf = _mm256_set1_ps(-INFINITY); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
37 const __m256 vc0 = _mm256_set1_ps(1.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
38 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
39 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
40 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_raddextexp_ukernel__avx2_p5_x80_acc2()
[all …]
Davx2-p5-x96-acc3.c27 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p+0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
28 const __m256 vminus_ln2_hi = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
29 const __m256 vminus_ln2_lo = _mm256_set1_ps(0x1.05C61p-29f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
33 const __m256 vmin_exponent = _mm256_set1_ps(-127.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
34 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
35 const __m256 vminus_inf = _mm256_set1_ps(-INFINITY); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
37 const __m256 vc0 = _mm256_set1_ps(1.0f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
38 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
39 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
40 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_raddextexp_ukernel__avx2_p5_x96_acc3()
[all …]
/external/XNNPACK/src/f32-vsqrt/gen/
Dfma3-nr1fma1adj-x64.c29 const __m256 vhalf = _mm256_broadcast_ss(&params->fma.half); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
31 const __m256 vx0 = _mm256_loadu_ps(x); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
32 const __m256 vx1 = _mm256_loadu_ps(x + 8); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
33 const __m256 vx2 = _mm256_loadu_ps(x + 16); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
34 const __m256 vx3 = _mm256_loadu_ps(x + 24); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
35 const __m256 vx4 = _mm256_loadu_ps(x + 32); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
36 const __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
37 const __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
38 const __m256 vx7 = _mm256_loadu_ps(x + 56); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
41 const __m256 vrsqrtx0 = _mm256_rsqrt_ps(vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
[all …]
Dfma3-nr1fma1adj-x56.c29 const __m256 vhalf = _mm256_broadcast_ss(&params->fma.half); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
31 const __m256 vx0 = _mm256_loadu_ps(x); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
32 const __m256 vx1 = _mm256_loadu_ps(x + 8); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
33 const __m256 vx2 = _mm256_loadu_ps(x + 16); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
34 const __m256 vx3 = _mm256_loadu_ps(x + 24); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
35 const __m256 vx4 = _mm256_loadu_ps(x + 32); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
36 const __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
37 const __m256 vx6 = _mm256_loadu_ps(x + 48); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
40 const __m256 vrsqrtx0 = _mm256_rsqrt_ps(vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
41 const __m256 vrsqrtx1 = _mm256_rsqrt_ps(vx1); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
[all …]
Dfma3-nr1fma1adj-x48.c29 const __m256 vhalf = _mm256_broadcast_ss(&params->fma.half); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
31 const __m256 vx0 = _mm256_loadu_ps(x); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
32 const __m256 vx1 = _mm256_loadu_ps(x + 8); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
33 const __m256 vx2 = _mm256_loadu_ps(x + 16); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
34 const __m256 vx3 = _mm256_loadu_ps(x + 24); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
35 const __m256 vx4 = _mm256_loadu_ps(x + 32); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
36 const __m256 vx5 = _mm256_loadu_ps(x + 40); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
39 const __m256 vrsqrtx0 = _mm256_rsqrt_ps(vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
40 const __m256 vrsqrtx1 = _mm256_rsqrt_ps(vx1); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
41 const __m256 vrsqrtx2 = _mm256_rsqrt_ps(vx2); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
[all …]
/external/XNNPACK/src/f32-vscaleextexp/gen/
Davx2-p5-x96.c29 const __m256 vlog2e = _mm256_set1_ps(0x1.715476p+0f); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
30 const __m256 vminus_ln2_hi = _mm256_set1_ps(-0x1.62E43p-1f); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
31 const __m256 vminus_ln2_lo = _mm256_set1_ps(0x1.05C61p-29f); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
35 const __m256 vmin_exponent = _mm256_set1_ps(-127.0f); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
36 const __m256 vmagic_bias = _mm256_set1_ps(0x1.8000FEp23f); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
38 const __m256 vc0 = _mm256_set1_ps(1.0f); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
39 const __m256 vc1 = _mm256_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
40 const __m256 vc2 = _mm256_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
41 const __m256 vc3 = _mm256_set1_ps(0x1.555A80p-3f); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
42 const __m256 vc4 = _mm256_set1_ps(0x1.573A1Ap-5f); in xnn_f32_vscaleextexp_ukernel__avx2_p5_x96()
[all …]

12345678910>>...20