/external/XNNPACK/src/f32-dwconv/gen/ |
D | up32x25-minmax-avx512f-acc2.c | 33 const __m512 vmax = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.max)); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 34 const __m512 vmin = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.min)); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 166 __m512 vacc0123456789ABCDEFp0 = _mm512_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 167 __m512 vaccGHIJKLMNOPQRSTUVp0 = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 170 const __m512 vi0x0123456789ABCDEF = _mm512_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 171 const __m512 vi0xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i0 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 174 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 175 const __m512 vk0xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 179 const __m512 vi1x0123456789ABCDEF = _mm512_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 180 const __m512 vi1xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i1 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() [all …]
|
D | up32x25-minmax-avx512f.c | 33 const __m512 vmax = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.max)); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 34 const __m512 vmin = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.min)); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 166 __m512 vacc0123456789ABCDEFp0 = _mm512_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 167 __m512 vaccGHIJKLMNOPQRSTUVp0 = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 170 const __m512 vi0x0123456789ABCDEF = _mm512_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 171 const __m512 vi0xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i0 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 174 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 175 const __m512 vk0xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 179 const __m512 vi1x0123456789ABCDEF = _mm512_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 180 const __m512 vi1xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i1 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() [all …]
|
D | up32x9-minmax-avx512f-acc2.c | 33 const __m512 vmax = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.max)); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 34 const __m512 vmin = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.min)); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 86 __m512 vacc0123456789ABCDEFp0 = _mm512_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 87 __m512 vaccGHIJKLMNOPQRSTUVp0 = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 90 const __m512 vi0x0123456789ABCDEF = _mm512_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 91 const __m512 vi0xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i0 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 94 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 95 const __m512 vk0xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 99 const __m512 vi1x0123456789ABCDEF = _mm512_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 100 const __m512 vi1xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i1 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() [all …]
|
/external/XNNPACK/src/f32-vsqrt/gen/ |
D | avx512f-nr1fma1adj-x128.c | 28 const __m512 vhalf = _mm512_set1_ps(params->fma.half); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 30 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 31 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 32 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 33 const __m512 vx3 = _mm512_loadu_ps(x + 48); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 34 const __m512 vx4 = _mm512_loadu_ps(x + 64); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 35 const __m512 vx5 = _mm512_loadu_ps(x + 80); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 36 const __m512 vx6 = _mm512_loadu_ps(x + 96); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 37 const __m512 vx7 = _mm512_loadu_ps(x + 112); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 40 const __m512 vrsqrtx0 = _mm512_rsqrt14_ps(vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() [all …]
|
D | avx512f-nr1fma1adj-x112.c | 28 const __m512 vhalf = _mm512_set1_ps(params->fma.half); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 30 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 31 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 32 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 33 const __m512 vx3 = _mm512_loadu_ps(x + 48); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 34 const __m512 vx4 = _mm512_loadu_ps(x + 64); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 35 const __m512 vx5 = _mm512_loadu_ps(x + 80); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 36 const __m512 vx6 = _mm512_loadu_ps(x + 96); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 39 const __m512 vrsqrtx0 = _mm512_rsqrt14_ps(vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 40 const __m512 vrsqrtx1 = _mm512_rsqrt14_ps(vx1); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() [all …]
|
D | avx512f-nr1fma1adj-x96.c | 28 const __m512 vhalf = _mm512_set1_ps(params->fma.half); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 30 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 31 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 32 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 33 const __m512 vx3 = _mm512_loadu_ps(x + 48); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 34 const __m512 vx4 = _mm512_loadu_ps(x + 64); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 35 const __m512 vx5 = _mm512_loadu_ps(x + 80); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 38 const __m512 vrsqrtx0 = _mm512_rsqrt14_ps(vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 39 const __m512 vrsqrtx1 = _mm512_rsqrt14_ps(vx1); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 40 const __m512 vrsqrtx2 = _mm512_rsqrt14_ps(vx2); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() [all …]
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x192-acc6.c | 27 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p+0f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 28 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 29 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 31 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 32 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 33 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 34 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 35 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 36 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() 38 const __m512 vminus_inf = _mm512_set1_ps(-INFINITY); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc6() [all …]
|
D | avx512f-p5-scalef-x160-acc5.c | 27 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p+0f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 28 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 29 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 31 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 32 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 33 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 34 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 35 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 36 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() 38 const __m512 vminus_inf = _mm512_set1_ps(-INFINITY); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x160_acc5() [all …]
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x128.c | 28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p19f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 30 const __m512 vtable = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 35 const __m512 vminus_ln2 = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 36 const __m512 vc3 = _mm512_set1_ps(0x1.55559Ap-3f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 37 const __m512 vc2 = _mm512_set1_ps(0x1.00021Ep-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 38 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 41 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 42 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() 43 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128() [all …]
|
D | avx512f-rr1-lut16-p3-perm-scalef-div-x112.c | 28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p19f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 30 const __m512 vtable = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 35 const __m512 vminus_ln2 = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 36 const __m512 vc3 = _mm512_set1_ps(0x1.55559Ap-3f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 37 const __m512 vc2 = _mm512_set1_ps(0x1.00021Ep-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 38 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 41 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 42 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() 43 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112() [all …]
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x112.c | 28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p18f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 30 const __m512 vtable_hi = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 35 const __m512 vtable_lo = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 40 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 41 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 42 const __m512 vc2 = _mm512_set1_ps(0x1.000000p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 43 const __m512 vc1 = _mm512_set1_ps(0x1.0000F6p-0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 44 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() 47 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112() [all …]
|
D | avx512f-rr2-lut32-p2-perm2-scalef-div-x128.c | 28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p18f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 30 const __m512 vtable_hi = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 35 const __m512 vtable_lo = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 40 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 41 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 42 const __m512 vc2 = _mm512_set1_ps(0x1.000000p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 43 const __m512 vc1 = _mm512_set1_ps(0x1.0000F6p-0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 44 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() 47 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128() [all …]
|
D | avx512f-rr2-lut32-p2-perm2-scalef-div-x96.c | 28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p18f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() 29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() 30 const __m512 vtable_hi = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() 35 const __m512 vtable_lo = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() 40 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() 41 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() 42 const __m512 vc2 = _mm512_set1_ps(0x1.000000p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() 43 const __m512 vc1 = _mm512_set1_ps(0x1.0000F6p-0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() 44 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() 47 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96() [all …]
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x128.c | 28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p18f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 30 const __m512 vtable_hi = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 35 const __m512 vtable_lo = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 40 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 41 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 42 const __m512 vc2 = _mm512_set1_ps(0x1.000000p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 43 const __m512 vc1 = _mm512_set1_ps(0x1.0000F6p-0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 44 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() 47 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128() [all …]
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x112.c | 28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p19f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 30 const __m512 vtable = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 35 const __m512 vminus_ln2 = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 36 const __m512 vc3 = _mm512_set1_ps(0x1.55559Ap-3f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 37 const __m512 vc2 = _mm512_set1_ps(0x1.00021Ep-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 38 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 41 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 42 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() 43 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112() [all …]
|
D | avx512f-rr1-lut16-p3-perm-scalef-div-x128.c | 28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p19f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 30 const __m512 vtable = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 35 const __m512 vminus_ln2 = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 36 const __m512 vc3 = _mm512_set1_ps(0x1.55559Ap-3f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 37 const __m512 vc2 = _mm512_set1_ps(0x1.00021Ep-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 38 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 41 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 42 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() 43 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128() [all …]
|
D | avx512f-rr1-lut16-p3-perm-scalef-div-x96.c | 28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p19f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() 29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() 30 const __m512 vtable = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() 35 const __m512 vminus_ln2 = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() 36 const __m512 vc3 = _mm512_set1_ps(0x1.55559Ap-3f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() 37 const __m512 vc2 = _mm512_set1_ps(0x1.00021Ep-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() 38 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() 41 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() 42 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() 43 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96() [all …]
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x80.c | 28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p18f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() 29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() 30 const __m512 vtable_hi = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() 35 const __m512 vtable_lo = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() 40 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() 41 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() 42 const __m512 vc2 = _mm512_set1_ps(0x1.000000p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() 43 const __m512 vc1 = _mm512_set1_ps(0x1.0000F6p-0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() 44 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() 47 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80() [all …]
|
D | avx512f-rr2-lut32-p2-perm2-scalef-div-x112.c | 28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p18f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 30 const __m512 vtable_hi = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 35 const __m512 vtable_lo = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 40 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 41 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 42 const __m512 vc2 = _mm512_set1_ps(0x1.000000p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 43 const __m512 vc1 = _mm512_set1_ps(0x1.0000F6p-0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 44 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() 47 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112() [all …]
|
D | avx512f-rr2-lut32-p2-perm2-scalef-nr1fma-x96.c | 28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p18f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() 29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() 30 const __m512 vtable_hi = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() 35 const __m512 vtable_lo = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() 40 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() 41 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() 42 const __m512 vc2 = _mm512_set1_ps(0x1.000000p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() 43 const __m512 vc1 = _mm512_set1_ps(0x1.0000F6p-0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() 44 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() 47 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96() [all …]
|
D | avx512f-rr2-lut32-p2-perm2-scalef-div-x80.c | 28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p18f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80() 29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80() 30 const __m512 vtable_hi = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80() 35 const __m512 vtable_lo = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80() 40 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80() 41 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80() 42 const __m512 vc2 = _mm512_set1_ps(0x1.000000p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80() 43 const __m512 vc1 = _mm512_set1_ps(0x1.0000F6p-0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80() 44 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80() 47 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80() [all …]
|
D | avx512f-rr1-lut16-p3-perm-scalef-nr1fma-x80.c | 28 const __m512 vmagic_bias = _mm512_set1_ps(0x1.800000p19f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() 29 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() 30 const __m512 vtable = _mm512_set_ps( in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() 35 const __m512 vminus_ln2 = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() 36 const __m512 vc3 = _mm512_set1_ps(0x1.55559Ap-3f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() 37 const __m512 vc2 = _mm512_set1_ps(0x1.00021Ep-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() 38 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() 41 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() 42 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() 43 const __m512 vx2 = _mm512_loadu_ps(x + 32); in xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80() [all …]
|
D | avx512f-rr1-p5-scalef-div-x128.c | 28 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p+0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 29 const __m512 vminus_ln2 = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 30 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 31 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 32 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 33 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 34 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 35 const __m512 vone = _mm512_set1_ps(1.0f); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 38 const __m512 vx0 = _mm512_loadu_ps(x); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() 39 const __m512 vx1 = _mm512_loadu_ps(x + 16); in xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128() [all …]
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx512f-p5-scalef-x192-acc6.c | 26 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p+0f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 27 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 28 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 30 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 31 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 32 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 33 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 34 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 35 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 37 const __m512 vi_max = _mm512_set1_ps(max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() [all …]
|
D | avx512f-p5-scalef-x160-acc5.c | 26 const __m512 vlog2e = _mm512_set1_ps(0x1.715476p+0f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 27 const __m512 vminus_ln2_hi = _mm512_set1_ps(-0x1.62E43p-1f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 28 const __m512 vminus_ln2_lo = _mm512_set1_ps(0x1.05C61p-29f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 30 const __m512 vc0 = _mm512_set1_ps(1.0f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 31 const __m512 vc1 = _mm512_set1_ps(0x1.FFFFF6p-1f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 32 const __m512 vc2 = _mm512_set1_ps(0x1.FFFDC6p-2f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 33 const __m512 vc3 = _mm512_set1_ps(0x1.555A80p-3f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 34 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 35 const __m512 vc5 = _mm512_set1_ps(0x1.0F9F9Cp-7f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() 37 const __m512 vi_max = _mm512_set1_ps(max); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x160_acc5() [all …]
|