/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | sse2-p5-x20-acc2.c | 62 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 63 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 64 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 65 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vxCDEF, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 66 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vxGHIJ, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 85 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 86 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 87 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 88 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 89 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vxGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() [all …]
|
D | sse2-p5-x20.c | 61 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 62 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 63 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 64 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vxCDEF, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 65 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vxGHIJ, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 84 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 85 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 86 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 87 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 88 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vxGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() [all …]
|
D | sse2-p5-x20-acc5.c | 65 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 66 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 67 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 68 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vxCDEF, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 69 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vxGHIJ, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 88 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 89 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 90 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 91 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 92 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vxGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() [all …]
|
D | sse2-p5-x16-acc4.c | 62 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 63 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 64 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 65 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vxCDEF, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 82 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 83 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 84 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 85 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 87 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 88 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() [all …]
|
D | sse2-p5-x16-acc2.c | 60 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 61 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 62 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 63 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vxCDEF, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 80 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 81 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 82 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 83 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 85 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 86 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() [all …]
|
D | sse2-p5-x16.c | 59 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 60 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 61 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 62 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vxCDEF, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 79 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 80 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 81 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 82 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vxCDEF); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 84 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 85 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() [all …]
|
D | sse2-p5-x12-acc3.c | 59 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 60 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 61 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 76 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 77 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 78 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 80 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 81 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 82 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_lo), vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 85 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() [all …]
|
D | sse2-p5-x12.c | 57 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 58 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 59 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 74 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 75 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 76 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 78 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 79 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 80 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_lo), vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 83 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() [all …]
|
D | sse2-p5-x12-acc2.c | 58 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 59 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 60 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vx89AB, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 75 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 76 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 77 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vx89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 79 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 80 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 81 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_lo), vt89AB); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 84 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() [all …]
|
D | sse2-p5-x8-acc2.c | 56 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 57 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 70 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 71 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 73 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 74 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 77 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 78 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc5, vt4567), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 80 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 81 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() [all …]
|
D | sse2-p5-x8.c | 55 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 56 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vx4567, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 69 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 70 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vx4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 72 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 73 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 76 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 77 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc5, vt4567), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 79 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 80 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() [all …]
|
D | sse2-p5-x4.c | 53 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vx0123, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 64 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vx0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 66 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 69 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 71 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 73 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc2); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 75 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc1); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 83 __m128 vf0123 = _mm_add_ps(_mm_mul_ps(vt0123, vp0123), vs0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 94 vacc0 = _mm_add_ps(vacc0, vf0123); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() 107 __m128 vn = _mm_add_ps(_mm_mul_ps(vx, vlog2e), vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x4() [all …]
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | sse41-p5-div-x24.c | 71 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 72 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 73 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 74 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 75 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vzGHIJ, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 76 __m128 vnKLMN = _mm_add_ps(_mm_mul_ps(vzKLMN, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 97 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 98 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 99 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 100 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() [all …]
|
D | sse41-p5-div-x20.c | 69 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 70 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 71 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 72 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 73 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vzGHIJ, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 92 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 93 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 94 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 95 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 96 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() [all …]
|
D | sse2-p5-div-x24.c | 71 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 72 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 73 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 74 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 75 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vzGHIJ, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 76 __m128 vnKLMN = _mm_add_ps(_mm_mul_ps(vzKLMN, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 97 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 98 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 99 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 100 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() [all …]
|
D | sse2-p5-div-x20.c | 69 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 70 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 71 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 72 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 73 __m128 vnGHIJ = _mm_add_ps(_mm_mul_ps(vzGHIJ, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 92 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 93 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 94 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 95 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 96 __m128 vtGHIJ = _mm_add_ps(_mm_mul_ps(vnGHIJ, vminus_ln2_hi), vzGHIJ); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() [all …]
|
D | sse41-p5-div-x16.c | 67 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 68 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 69 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 70 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 87 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 88 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 89 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 90 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 92 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 93 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() [all …]
|
D | sse41-p5-div-x12.c | 65 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 66 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 67 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 82 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 83 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 84 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 86 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 87 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 88 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_lo), vt89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 91 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() [all …]
|
D | sse2-p5-div-x16.c | 67 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 68 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 69 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 70 __m128 vnCDEF = _mm_add_ps(_mm_mul_ps(vzCDEF, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 87 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 88 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 89 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 90 __m128 vtCDEF = _mm_add_ps(_mm_mul_ps(vnCDEF, vminus_ln2_hi), vzCDEF); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 92 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 93 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() [all …]
|
D | sse2-p5-div-x12.c | 65 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 66 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 67 __m128 vn89AB = _mm_add_ps(_mm_mul_ps(vz89AB, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 82 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 83 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 84 __m128 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_hi), vz89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 86 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 87 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 88 vt89AB = _mm_add_ps(_mm_mul_ps(vn89AB, vminus_ln2_lo), vt89AB); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 91 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() [all …]
|
D | sse41-p5-div-x8.c | 63 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 64 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 77 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 78 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 80 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 81 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 84 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 85 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc5, vt4567), vc4); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 87 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 88 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() [all …]
|
D | sse2-p5-div-x8.c | 63 __m128 vn0123 = _mm_add_ps(_mm_mul_ps(vz0123, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() 64 __m128 vn4567 = _mm_add_ps(_mm_mul_ps(vz4567, vlog2e), vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() 77 __m128 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_hi), vz0123); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() 78 __m128 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_hi), vz4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() 80 vt0123 = _mm_add_ps(_mm_mul_ps(vn0123, vminus_ln2_lo), vt0123); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() 81 vt4567 = _mm_add_ps(_mm_mul_ps(vn4567, vminus_ln2_lo), vt4567); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() 84 __m128 vp0123 = _mm_add_ps(_mm_mul_ps(vc5, vt0123), vc4); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() 85 __m128 vp4567 = _mm_add_ps(_mm_mul_ps(vc5, vt4567), vc4); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() 87 vp0123 = _mm_add_ps(_mm_mul_ps(vp0123, vt0123), vc3); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() 88 vp4567 = _mm_add_ps(_mm_mul_ps(vp4567, vt4567), vc3); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x8() [all …]
|
/external/XNNPACK/src/f32-dwconv/gen/ |
D | up8x25-sse.c | 98 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse() 99 vacc4567p0 = _mm_add_ps(vacc4567p0, _mm_mul_ps(vi0x4567, vk0x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse() 107 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi1x0123, vk1x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse() 108 vacc4567p0 = _mm_add_ps(vacc4567p0, _mm_mul_ps(vi1x4567, vk1x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse() 116 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse() 117 vacc4567p0 = _mm_add_ps(vacc4567p0, _mm_mul_ps(vi2x4567, vk2x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse() 125 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse() 126 vacc4567p0 = _mm_add_ps(vacc4567p0, _mm_mul_ps(vi3x4567, vk3x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse() 134 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse() 135 vacc4567p0 = _mm_add_ps(vacc4567p0, _mm_mul_ps(vi4x4567, vk4x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse() [all …]
|
D | up8x25-sse-acc2.c | 98 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi0x0123, vk0x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 99 vacc4567p0 = _mm_add_ps(vacc4567p0, _mm_mul_ps(vi0x4567, vk0x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 116 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi2x0123, vk2x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 117 vacc4567p0 = _mm_add_ps(vacc4567p0, _mm_mul_ps(vi2x4567, vk2x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 125 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi3x0123, vk3x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 126 vacc4567p1 = _mm_add_ps(vacc4567p1, _mm_mul_ps(vi3x4567, vk3x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 134 vacc0123p0 = _mm_add_ps(vacc0123p0, _mm_mul_ps(vi4x0123, vk4x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 135 vacc4567p0 = _mm_add_ps(vacc4567p0, _mm_mul_ps(vi4x4567, vk4x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 143 vacc0123p1 = _mm_add_ps(vacc0123p1, _mm_mul_ps(vi5x0123, vk5x0123)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() 144 vacc4567p1 = _mm_add_ps(vacc4567p1, _mm_mul_ps(vi5x4567, vk5x4567)); in xnn_f32_dwconv_ukernel_up8x25__sse_acc2() [all …]
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 4x8s4-sse.c | 85 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0, vb0123c0)); in xnn_f32_gemm_ukernel_4x8s4__sse() 86 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1, vb0123c0)); in xnn_f32_gemm_ukernel_4x8s4__sse() 87 vacc2x0123 = _mm_add_ps(vacc2x0123, _mm_mul_ps(va2, vb0123c0)); in xnn_f32_gemm_ukernel_4x8s4__sse() 88 vacc3x0123 = _mm_add_ps(vacc3x0123, _mm_mul_ps(va3, vb0123c0)); in xnn_f32_gemm_ukernel_4x8s4__sse() 89 vacc0x4567 = _mm_add_ps(vacc0x4567, _mm_mul_ps(va0, vb4567c0)); in xnn_f32_gemm_ukernel_4x8s4__sse() 90 vacc1x4567 = _mm_add_ps(vacc1x4567, _mm_mul_ps(va1, vb4567c0)); in xnn_f32_gemm_ukernel_4x8s4__sse() 91 vacc2x4567 = _mm_add_ps(vacc2x4567, _mm_mul_ps(va2, vb4567c0)); in xnn_f32_gemm_ukernel_4x8s4__sse() 92 vacc3x4567 = _mm_add_ps(vacc3x4567, _mm_mul_ps(va3, vb4567c0)); in xnn_f32_gemm_ukernel_4x8s4__sse() 102 vacc0x0123 = _mm_add_ps(vacc0x0123, _mm_mul_ps(va0, vb0123c1)); in xnn_f32_gemm_ukernel_4x8s4__sse() 103 vacc1x0123 = _mm_add_ps(vacc1x0123, _mm_mul_ps(va1, vb0123c1)); in xnn_f32_gemm_ukernel_4x8s4__sse() [all …]
|