Home
last modified time | relevance | path

Searched refs:__m512 (Results 1 – 25 of 322) sorted by relevance

12345678910>>...13

/external/XNNPACK/src/f32-dwconv/gen/
Dup32x25-minmax-avx512f-acc2.c33 const __m512 vmax = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.max)); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2()
34 const __m512 vmin = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.min)); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2()
166 __m512 vacc0123456789ABCDEFp0 = _mm512_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2()
167 __m512 vaccGHIJKLMNOPQRSTUVp0 = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2()
170 const __m512 vi0x0123456789ABCDEF = _mm512_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2()
171 const __m512 vi0xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i0 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2()
174 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2()
175 const __m512 vk0xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2()
179 const __m512 vi1x0123456789ABCDEF = _mm512_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2()
180 const __m512 vi1xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i1 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2()
[all …]
Dup32x25-minmax-avx512f.c33 const __m512 vmax = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.max)); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f()
34 const __m512 vmin = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.min)); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f()
166 __m512 vacc0123456789ABCDEFp0 = _mm512_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f()
167 __m512 vaccGHIJKLMNOPQRSTUVp0 = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f()
170 const __m512 vi0x0123456789ABCDEF = _mm512_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f()
171 const __m512 vi0xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i0 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f()
174 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f()
175 const __m512 vk0xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f()
179 const __m512 vi1x0123456789ABCDEF = _mm512_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f()
180 const __m512 vi1xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i1 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f()
[all …]
Dup32x9-minmax-avx512f-acc2.c33 const __m512 vmax = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.max)); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
34 const __m512 vmin = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.min)); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
86 __m512 vacc0123456789ABCDEFp0 = _mm512_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
87 __m512 vaccGHIJKLMNOPQRSTUVp0 = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
90 const __m512 vi0x0123456789ABCDEF = _mm512_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
91 const __m512 vi0xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i0 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
94 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
95 const __m512 vk0xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
99 const __m512 vi1x0123456789ABCDEF = _mm512_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
100 const __m512 vi1xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i1 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
[all …]
/external/XNNPACK/src/f32-vsqrt/gen/
Davx512f-nr1fma1adj-x128.c28 const __m512 vhalf = _mm512_set1_ps(params->fma.half); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
30 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
31 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
32 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
33 const __m512 vx3 = _mm512_loadu_ps(x + 48); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
34 const __m512 vx4 = _mm512_loadu_ps(x + 64); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
35 const __m512 vx5 = _mm512_loadu_ps(x + 80); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
36 const __m512 vx6 = _mm512_loadu_ps(x + 96); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
37 const __m512 vx7 = _mm512_loadu_ps(x + 112); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
40 const __m512 vrsqrtx0 = _mm512_rsqrt14_ps(vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
[all …]
Davx512f-nr1fma1adj-x112.c28 const __m512 vhalf = _mm512_set1_ps(params->fma.half); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
30 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
31 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
32 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
33 const __m512 vx3 = _mm512_loadu_ps(x + 48); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
34 const __m512 vx4 = _mm512_loadu_ps(x + 64); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
35 const __m512 vx5 = _mm512_loadu_ps(x + 80); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
36 const __m512 vx6 = _mm512_loadu_ps(x + 96); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
39 const __m512 vrsqrtx0 = _mm512_rsqrt14_ps(vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
40 const __m512 vrsqrtx1 = _mm512_rsqrt14_ps(vx1); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
[all …]
Davx512f-nr1fma1adj-x96.c28 const __m512 vhalf = _mm512_set1_ps(params->fma.half); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
30 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
31 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
32 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
33 const __m512 vx3 = _mm512_loadu_ps(x + 48); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
34 const __m512 vx4 = _mm512_loadu_ps(x + 64); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
35 const __m512 vx5 = _mm512_loadu_ps(x + 80); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
38 const __m512 vrsqrtx0 = _mm512_rsqrt14_ps(vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
39 const __m512 vrsqrtx1 = _mm512_rsqrt14_ps(vx1); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
40 const __m512 vrsqrtx2 = _mm512_rsqrt14_ps(vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
[all …]
/external/XNNPACK/src/f32-raddextexp/gen/
Davx512f-p5-scalef-x192-acc6.c27 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p+0f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
28 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
29 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
31 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
32 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
33 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
34 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
35 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
36 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
38 const __m512 vminus_inf = _mm512_set1_ps(-INFINITY); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6()
[all …]
Davx512f-p5-scalef-x160-acc5.c27 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p+0f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
28 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
29 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
31 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
32 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
33 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
34 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
35 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
36 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
38 const __m512 vminus_inf = _mm512_set1_ps(-INFINITY); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5()
[all …]
/external/XNNPACK/src/f32-sigmoid/gen/
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x128.c28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p19f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
30 const __m512 vtable = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
35 const __m512 vminus_ln2 = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
36 const __m512 vc3 = _mm512_set1_ps(0x1.55559Ap-3f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
37 const __m512 vc2 = _mm512_set1_ps(0x1.00021Ep-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
38 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
41 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
42 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
43 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128()
[all …]
Davx512f-rr1-lut16-p3-perm-scalef-div-x112.c28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p19f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
30 const __m512 vtable = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
35 const __m512 vminus_ln2 = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
36 const __m512 vc3 = _mm512_set1_ps(0x1.55559Ap-3f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
37 const __m512 vc2 = _mm512_set1_ps(0x1.00021Ep-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
38 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
41 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
42 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
43 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112()
[all …]
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x112.c28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p18f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
30 const __m512 vtable_hi = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
35 const __m512 vtable_lo = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
40 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
41 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
42 const __m512 vc2 = _mm512_set1_ps(0x1.000000p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
43 const __m512 vc1 = _mm512_set1_ps(0x1.0000F6p-0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
44 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
47 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112()
[all …]
Davx512f-rr2-lut32-p2-perm2-scalef-div-x128.c28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p18f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
30 const __m512 vtable_hi = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
35 const __m512 vtable_lo = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
40 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
41 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
42 const __m512 vc2 = _mm512_set1_ps(0x1.000000p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
43 const __m512 vc1 = _mm512_set1_ps(0x1.0000F6p-0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
44 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
47 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128()
[all …]
Davx512f-rr2-lut32-p2-perm2-scalef-div-x96.c28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p18f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96()
29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96()
30 const __m512 vtable_hi = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96()
35 const __m512 vtable_lo = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96()
40 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96()
41 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96()
42 const __m512 vc2 = _mm512_set1_ps(0x1.000000p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96()
43 const __m512 vc1 = _mm512_set1_ps(0x1.0000F6p-0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96()
44 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96()
47 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96()
[all …]
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x128.c28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p18f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
30 const __m512 vtable_hi = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
35 const __m512 vtable_lo = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
40 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
41 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
42 const __m512 vc2 = _mm512_set1_ps(0x1.000000p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
43 const __m512 vc1 = _mm512_set1_ps(0x1.0000F6p-0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
44 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
47 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128()
[all …]
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x112.c28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p19f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
30 const __m512 vtable = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
35 const __m512 vminus_ln2 = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
36 const __m512 vc3 = _mm512_set1_ps(0x1.55559Ap-3f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
37 const __m512 vc2 = _mm512_set1_ps(0x1.00021Ep-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
38 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
41 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
42 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
43 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112()
[all …]
Davx512f-rr1-lut16-p3-perm-scalef-div-x128.c28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p19f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
30 const __m512 vtable = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
35 const __m512 vminus_ln2 = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
36 const __m512 vc3 = _mm512_set1_ps(0x1.55559Ap-3f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
37 const __m512 vc2 = _mm512_set1_ps(0x1.00021Ep-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
38 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
41 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
42 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
43 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128()
[all …]
Davx512f-rr1-lut16-p3-perm-scalef-div-x96.c28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p19f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96()
29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96()
30 const __m512 vtable = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96()
35 const __m512 vminus_ln2 = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96()
36 const __m512 vc3 = _mm512_set1_ps(0x1.55559Ap-3f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96()
37 const __m512 vc2 = _mm512_set1_ps(0x1.00021Ep-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96()
38 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96()
41 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96()
42 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96()
43 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96()
[all …]
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x80.c28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p18f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()
29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()
30 const __m512 vtable_hi = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()
35 const __m512 vtable_lo = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()
40 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()
41 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()
42 const __m512 vc2 = _mm512_set1_ps(0x1.000000p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()
43 const __m512 vc1 = _mm512_set1_ps(0x1.0000F6p-0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()
44 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()
47 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80()
[all …]
Davx512f-rr2-lut32-p2-perm2-scalef-div-x112.c28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p18f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
30 const __m512 vtable_hi = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
35 const __m512 vtable_lo = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
40 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
41 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
42 const __m512 vc2 = _mm512_set1_ps(0x1.000000p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
43 const __m512 vc1 = _mm512_set1_ps(0x1.0000F6p-0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
44 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
47 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112()
[all …]
Davx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x96.c28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p18f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
30 const __m512 vtable_hi = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
35 const __m512 vtable_lo = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
40 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
41 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
42 const __m512 vc2 = _mm512_set1_ps(0x1.000000p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
43 const __m512 vc1 = _mm512_set1_ps(0x1.0000F6p-0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
44 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
47 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96()
[all …]
Davx512f-rr2-lut32-p2-perm2-scalef-div-x80.c28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p18f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80()
29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80()
30 const __m512 vtable_hi = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80()
35 const __m512 vtable_lo = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80()
40 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80()
41 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80()
42 const __m512 vc2 = _mm512_set1_ps(0x1.000000p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80()
43 const __m512 vc1 = _mm512_set1_ps(0x1.0000F6p-0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80()
44 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80()
47 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80()
[all …]
Davx512f-rr1-lut16-p3-perm-scalef-nr1fma-x80.c28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p19f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
30 const __m512 vtable = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
35 const __m512 vminus_ln2 = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
36 const __m512 vc3 = _mm512_set1_ps(0x1.55559Ap-3f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
37 const __m512 vc2 = _mm512_set1_ps(0x1.00021Ep-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
38 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
41 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
42 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
43 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80()
[all …]
Davx512f-rr1-p5-scalef-div-x128.c28 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128()
29 const __m512 vminus_ln2 = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128()
30 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128()
31 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128()
32 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128()
33 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128()
34 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128()
35 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128()
38 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128()
39 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128()
[all …]
/external/XNNPACK/src/f32-raddexpminusmax/gen/
Davx512f-p5-scalef-x192-acc6.c26 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p+0f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
27 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
28 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
30 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
31 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
32 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
33 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
34 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
35 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
37 const __m512 vi_max = _mm512_set1_ps(max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6()
[all …]
Davx512f-p5-scalef-x160-acc5.c26 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p+0f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
27 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
28 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
30 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
31 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
32 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
33 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
34 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
35 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
37 const __m512 vi_max = _mm512_set1_ps(max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5()
[all …]

12345678910>>...13