Home
last modified time | relevance | path

Searched refs:_mm_add_ps (Results 1 – 25 of 167) sorted by relevance

1234567

/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Dsse2-p5-x20-acc2.c62 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
63 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
64 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
65 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vxCDEF, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
66 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vxGHIJ, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
85 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
86 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
87 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
88 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
89 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vxGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
[all …]
Dsse2-p5-x20.c61 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
62 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
63 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
64 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vxCDEF, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
65 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vxGHIJ, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
84 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
85 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
86 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
87 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
88 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vxGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
[all …]
Dsse2-p5-x20-acc5.c65 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
66 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
67 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
68 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vxCDEF, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
69 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vxGHIJ, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
88 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
89 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
90 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
91 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
92 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vxGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
[all …]
Dsse2-p5-x16-acc4.c62 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
63 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
64 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
65 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vxCDEF, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
82 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
83 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
84 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
85 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
87 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
88 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
[all …]
Dsse2-p5-x16-acc2.c60 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
61 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
62 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
63 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vxCDEF, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
80 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
81 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
82 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
83 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
85 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
86 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
[all …]
Dsse2-p5-x16.c59 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
60 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
61 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
62 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vxCDEF, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
79 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
80 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
81 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
82 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
84 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
85 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
[all …]
Dsse2-p5-x12-acc3.c59 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
60 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
61 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
76 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
77 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
78 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
80 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
81 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
82 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_lo), vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
85 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
[all …]
Dsse2-p5-x12.c57 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
58 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
59 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
74 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
75 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
76 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
78 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
79 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
80 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_lo), vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
83 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
[all …]
Dsse2-p5-x12-acc2.c58 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
59 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
60 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
75 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
76 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
77 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
79 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
80 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
81 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_lo), vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
84 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
[all …]
Dsse2-p5-x8-acc2.c56 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
57 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
70 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
71 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
73 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
74 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
77 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
78 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc5, vt4567), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
80 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
81 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
[all …]
Dsse2-p5-x8.c55 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
56 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
69 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
70 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
72 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
73 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
76 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
77 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc5, vt4567), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
79 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
80 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
[all …]
Dsse2-p5-x4.c53 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
64 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
66 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
69 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
71 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
73 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
75 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
83 __m128 vf0123 = _mm_add_ps(_mm_mul_ps(vt0123, vp0123), vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
94 vacc0 = _mm_add_ps(vacc0, vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
107 __m128 vn = _mm_add_ps(_mm_mul_ps(vx, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4()
[all …]
/external/XNNPACK/src/f32-sigmoid/gen/
Dsse41-p5-div-x24.c71 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
72 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
73 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
74 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
75 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vzGHIJ, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
76 __m128 vnKLMN = _mm_add_ps(_mm_mul_ps(vzKLMN, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
97 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
98 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
99 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
100 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
[all …]
Dsse41-p5-div-x20.c69 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
70 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
71 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
72 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
73 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vzGHIJ, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
92 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
93 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
94 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
95 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
96 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
[all …]
Dsse2-p5-div-x24.c71 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
72 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
73 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
74 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
75 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vzGHIJ, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
76 __m128 vnKLMN = _mm_add_ps(_mm_mul_ps(vzKLMN, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
97 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
98 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
99 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
100 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
[all …]
Dsse2-p5-div-x20.c69 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
70 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
71 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
72 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
73 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vzGHIJ, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
92 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
93 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
94 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
95 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
96 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
[all …]
Dsse41-p5-div-x16.c67 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
68 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
69 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
70 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
87 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
88 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
89 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
90 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
92 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
93 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
[all …]
Dsse41-p5-div-x12.c65 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
66 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
67 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
82 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
83 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
84 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
86 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
87 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
88 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_lo), vt89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
91 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
[all …]
Dsse2-p5-div-x16.c67 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
68 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
69 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
70 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
87 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
88 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
89 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
90 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
92 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
93 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
[all …]
Dsse2-p5-div-x12.c65 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
66 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
67 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
82 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
83 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
84 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
86 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
87 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
88 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_lo), vt89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
91 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
[all …]
Dsse41-p5-div-x8.c63 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
64 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
77 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
78 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
80 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
81 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
84 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
85 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc5, vt4567), vc4); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
87 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
88 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
[all …]
Dsse2-p5-div-x8.c63 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8()
64 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8()
77 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8()
78 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8()
80 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8()
81 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8()
84 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8()
85 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc5, vt4567), vc4); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8()
87 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8()
88 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8()
[all …]
/external/XNNPACK/src/f32-dwconv/gen/
Dup8x25-sse.c98 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse()
99 vacc4567p0 = _mm_add_ps(vacc4567p0, _mm_mul_ps(vi0x4567, vk0x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse()
107 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse()
108 vacc4567p0 = _mm_add_ps(vacc4567p0, _mm_mul_ps(vi1x4567, vk1x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse()
116 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse()
117 vacc4567p0 = _mm_add_ps(vacc4567p0, _mm_mul_ps(vi2x4567, vk2x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse()
125 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse()
126 vacc4567p0 = _mm_add_ps(vacc4567p0, _mm_mul_ps(vi3x4567, vk3x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse()
134 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse()
135 vacc4567p0 = _mm_add_ps(vacc4567p0, _mm_mul_ps(vi4x4567, vk4x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse()
[all …]
Dup8x25-sse-acc2.c98 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
99 vacc4567p0 = _mm_add_ps(vacc4567p0, _mm_mul_ps(vi0x4567, vk0x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
116 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
117 vacc4567p0 = _mm_add_ps(vacc4567p0, _mm_mul_ps(vi2x4567, vk2x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
125 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
126 vacc4567p1 = _mm_add_ps(vacc4567p1, _mm_mul_ps(vi3x4567, vk3x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
134 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
135 vacc4567p0 = _mm_add_ps(vacc4567p0, _mm_mul_ps(vi4x4567, vk4x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
143 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
144 vacc4567p1 = _mm_add_ps(vacc4567p1, _mm_mul_ps(vi5x4567, vk5x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2()
[all …]
/external/XNNPACK/src/f32-gemm/gen/
D4x8s4-sse.c85 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0, vb0123c0)); in xnn_f32_gemm_ukernel_4x8s4__sse()
86 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1, vb0123c0)); in xnn_f32_gemm_ukernel_4x8s4__sse()
87 vacc2x0123 = _mm_add_ps(vacc2x0123, _mm_mul_ps(va2, vb0123c0)); in xnn_f32_gemm_ukernel_4x8s4__sse()
88 vacc3x0123 = _mm_add_ps(vacc3x0123, _mm_mul_ps(va3, vb0123c0)); in xnn_f32_gemm_ukernel_4x8s4__sse()
89 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0, vb4567c0)); in xnn_f32_gemm_ukernel_4x8s4__sse()
90 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1, vb4567c0)); in xnn_f32_gemm_ukernel_4x8s4__sse()
91 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2, vb4567c0)); in xnn_f32_gemm_ukernel_4x8s4__sse()
92 vacc3x4567 = _mm_add_ps(vacc3x4567, _mm_mul_ps(va3, vb4567c0)); in xnn_f32_gemm_ukernel_4x8s4__sse()
102 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0, vb0123c1)); in xnn_f32_gemm_ukernel_4x8s4__sse()
103 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1, vb0123c1)); in xnn_f32_gemm_ukernel_4x8s4__sse()
[all …]

1234567